1 /*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35
36 #if CONFIG_EXCLAVES
37
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #include <arm64/hv/hv_vm.h>
41 #include <arm64/hv/hv_vcpu.h>
42 #else
43 #error Invalid configuration
44 #endif /* CONFIG_SPTM */
45
46 #include <arm/cpu_data_internal.h>
47 #include <arm/misc_protos.h>
48 #include <kern/epoch_sync.h>
49 #include <kern/ipc_kobject.h>
50 #include <kern/kalloc.h>
51 #include <kern/locks.h>
52 #include <kern/percpu.h>
53 #include <kern/task.h>
54 #include <kern/thread.h>
55 #include <kern/zalloc.h>
56 #include <kern/exclaves_stackshot.h>
57 #include <kern/exclaves_test_stackshot.h>
58 #include <vm/pmap.h>
59 #include <pexpert/pexpert.h>
60
61 #include <mach/exclaves_l4.h>
62 #include <mach/mach_port.h>
63
64 #include <Exclaves/Exclaves.h>
65
66 #include <IOKit/IOBSD.h>
67
68 #include <xnuproxy/messages.h>
69
70 #include "exclaves_debug.h"
71 #include "exclaves_panic.h"
72 #include "exclaves_xnuproxy.h"
73
74 /* External & generated headers */
75 #include <xrt_hosted_types/types.h>
76
77 #if __has_include(<Tightbeam/tightbeam.h>)
78 #include <Tightbeam/tightbeam.h>
79 #include <Tightbeam/tightbeam_private.h>
80 #endif
81
82 #include "exclaves_resource.h"
83 #include "exclaves_upcalls.h"
84 #include "exclaves_boot.h"
85 #include "exclaves_inspection.h"
86 #include "exclaves_memory.h"
87 #include "exclaves_internal.h"
88
89 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
90
91 /* Boot lock - only used here for assertions. */
92 extern lck_mtx_t exclaves_boot_lock;
93
94 /*
95 * Sent/latest offset for updating exclaves clocks
96 */
97 typedef struct {
98 union {
99 /* atomic fields are used via atomic primitives */
100 struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
101 _Atomic unsigned __int128 a_u128;
102 /* non-atomic fields are used via local variable. this is needed
103 * to avoid undefined behavior with an atomic struct or
104 * accessing atomic fields non-atomically */
105 struct { uint64_t sent_offset, latest_offset; } u64;
106 unsigned __int128 u128;
107 };
108 } exclaves_clock_t;
109
110
111 /*
112 * Two clocks indexed by their type.
113 * This makes things easy to lookup.
114 */
115 static exclaves_clock_t exclaves_clock[] = {
116 [EXCLAVES_CLOCK_ABSOLUTE] = {},
117 [EXCLAVES_CLOCK_CONTINUOUS] = {},
118 };
119
120 static kern_return_t
121 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
122
123 static kern_return_t
124 exclaves_enter(void);
125 static kern_return_t
126 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
127
128 static kern_return_t
129 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
130 OS_NORETURN OS_NOINLINE
131 static void
132 exclaves_wait_for_panic(void);
133
134 static inline bool
135 exclaves_clocks_need_update(void);
136
137 static kern_return_t
138 exclaves_scheduler_boot(void);
139
140 static kern_return_t
141 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
142
143 static kern_return_t
144 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
145 uint64_t offset);
146
147 static kern_return_t
148 exclaves_scheduler_request_boot(void);
149
150
151 /*
152 * A static set of exclave epoch counters.
153 */
154 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
155
os_atomic(uint64_t)156 static inline os_atomic(uint64_t) *
157 exclaves_get_queue_counter(const uint64_t id)
158 {
159 return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
160 }
161
os_atomic(uint64_t)162 static inline os_atomic(uint64_t) *
163 exclaves_get_thread_counter(const uint64_t id)
164 {
165 return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
166 }
167
168
169 /* -------------------------------------------------------------------------- */
170 #pragma mark exclaves debug configuration
171
172 #if DEVELOPMENT || DEBUG
173 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
174 exclaves_debug_show_errors);
175
176 TUNABLE_DT_WRITEABLE(exclaves_requirement_t, exclaves_relaxed_requirements,
177 "/defaults", "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements",
178 0, TUNABLE_DT_NONE);
179 #else
180 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
181 #endif
182
183 #endif /* CONFIG_EXCLAVES */
184
185 /* -------------------------------------------------------------------------- */
186 #pragma mark userspace entry point
187
188 #if CONFIG_EXCLAVES
189 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)190 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
191 {
192 if (name != MACH_PORT_NULL) {
193 /* Only accept MACH_PORT_NULL for now */
194 return KERN_INVALID_CAPABILITY;
195 }
196
197 /*
198 * As the boot operation itself happens outside the context of any
199 * conclave, it requires special privilege.
200 */
201 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
202 return KERN_DENIED;
203 }
204
205 return exclaves_boot(stage);
206 }
207 #endif /* CONFIG_EXCLAVES */
208
209 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)210 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
211 {
212 #if CONFIG_EXCLAVES
213 kern_return_t kr = KERN_SUCCESS;
214 int error = 0;
215
216 mach_port_name_t name = uap->name;
217 exclaves_id_t identifier = uap->identifier;
218 mach_vm_address_t ubuffer = uap->buffer;
219 mach_vm_size_t usize = uap->size;
220 mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
221 mach_vm_size_t usize2 = uap->size2;
222 mach_vm_size_t uoffset2 = uap->offset;
223 mach_vm_address_t ustatus = uap->status;
224
225 task_t task = current_task();
226
227 /*
228 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
229 * with xnu. That header is not shared with userspace. Make sure that
230 * the retval userspace picks up is the same as the one
231 * xnu/ExclavePlatform thinks it is.
232 */
233 assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
234 &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
235
236 uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
237 uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
238 if (flags != 0) {
239 return KERN_INVALID_ARGUMENT;
240 }
241
242 /*
243 * Deal with OP_BOOT up-front as it has slightly different restrictions
244 * than the other operations.
245 */
246 if (operation == EXCLAVES_CTL_OP_BOOT) {
247 return operation_boot(name, (uint32_t)identifier);
248 }
249
250 /*
251 * All other operations are restricted to properly entitled tasks which
252 * can operate in the kernel domain, or those which have joined
253 * conclaves (which has its own entitlement check).
254 * If requirements are relaxed during development, tasks with no
255 * conclaves are also allowed.
256 */
257 if (task_get_conclave(task) == NULL &&
258 !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
259 !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
260 return KERN_DENIED;
261 }
262
263 /*
264 * Wait for EXCLAVECORE boot to complete. If exclaves are unsupported,
265 * return immediately.
266 */
267 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE);
268 if (kr != KERN_SUCCESS) {
269 return kr;
270 }
271
272 if (task_get_conclave(task) != NULL) {
273 /*
274 * For calls from tasks that have joined conclaves, now wait until
275 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
276 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would
277 * panic). This is a separate call to the one above because we
278 * need to distinguish EXCLAVECORE being not supported and
279 * still wait for EXCLAVEKIT to boot if it *is* supported.
280 */
281 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
282 if (kr != KERN_SUCCESS) {
283 return kr;
284 }
285 }
286
287 switch (operation) {
288 case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
289 if (name != MACH_PORT_NULL) {
290 /* Only accept MACH_PORT_NULL for now */
291 return KERN_INVALID_CAPABILITY;
292 }
293 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
294 usize != Exclaves_L4_IpcBuffer_Size) {
295 return KERN_INVALID_ARGUMENT;
296 }
297
298
299 Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
300 /* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
301 if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
302 return error;
303 }
304 assert(ipcb != NULL);
305 if ((error = copyin(ubuffer, ipcb, usize))) {
306 return error;
307 }
308
309 if (identifier >= CONCLAVE_SERVICE_MAX) {
310 return KERN_INVALID_ARGUMENT;
311 }
312
313 /*
314 * Verify that the service actually exists in the current
315 * domain.
316 */
317 if (!exclaves_conclave_has_service(task_get_conclave(task),
318 identifier)) {
319 return KERN_INVALID_ARGUMENT;
320 }
321
322 kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
323 error = copyout(ipcb, ubuffer, usize);
324 /*
325 * Endpoint call to conclave may have trigger a stop upcall,
326 * check if stop upcall completion handler needs to run.
327 */
328 task_stop_conclave_upcall_complete();
329 if (error) {
330 return error;
331 }
332 break;
333 }
334
335 case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
336 if (name != MACH_PORT_NULL) {
337 /* Only accept MACH_PORT_NULL for now */
338 return KERN_INVALID_CAPABILITY;
339 }
340
341 size_t len = 0;
342 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
343 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
344 &len) != 0 || id_name[0] == '\0') {
345 return KERN_INVALID_ARGUMENT;
346 }
347
348 exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
349 const exclaves_buffer_perm_t supported =
350 EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
351 if ((perm & supported) == 0 || (perm & ~supported) != 0) {
352 return KERN_INVALID_ARGUMENT;
353 }
354
355 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
356 exclaves_resource_t *resource = NULL;
357 kr = exclaves_resource_shared_memory_map(domain, id_name, usize,
358 perm, &resource);
359 if (kr != KERN_SUCCESS) {
360 return kr;
361 }
362
363 kr = exclaves_resource_create_port_name(resource,
364 current_space(), &name);
365 if (kr != KERN_SUCCESS) {
366 return kr;
367 }
368
369 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
370 if (kr != KERN_SUCCESS) {
371 mach_port_deallocate(current_space(), name);
372 return kr;
373 }
374
375 break;
376 }
377
378 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
379 exclaves_resource_t *resource = NULL;
380 kr = exclaves_resource_from_port_name(current_space(), name,
381 &resource);
382 if (kr != KERN_SUCCESS) {
383 return kr;
384 }
385
386 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
387 exclaves_resource_release(resource);
388 return KERN_INVALID_CAPABILITY;
389 }
390
391 kr = exclaves_resource_shared_memory_copyin(resource,
392 ubuffer, usize, uoffset, usize2, uoffset2);
393
394 exclaves_resource_release(resource);
395
396 if (kr != KERN_SUCCESS) {
397 return kr;
398 }
399 break;
400 }
401
402 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
403 exclaves_resource_t *resource = NULL;
404 kr = exclaves_resource_from_port_name(current_space(), name,
405 &resource);
406 if (kr != KERN_SUCCESS) {
407 return kr;
408 }
409
410 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
411 exclaves_resource_release(resource);
412 return KERN_INVALID_CAPABILITY;
413 }
414
415 kr = exclaves_resource_shared_memory_copyout(resource,
416 ubuffer, usize, uoffset, usize2, uoffset2);
417
418 exclaves_resource_release(resource);
419
420 if (kr != KERN_SUCCESS) {
421 return kr;
422 }
423 break;
424 }
425
426 case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
427 if (name != MACH_PORT_NULL) {
428 /* Only accept MACH_PORT_NULL for now */
429 return KERN_INVALID_CAPABILITY;
430 }
431 kr = task_launch_conclave(name);
432
433 /*
434 * Conclave launch call to may have trigger a stop upcall,
435 * check if stop upcall completion handler needs to run.
436 */
437 task_stop_conclave_upcall_complete();
438 break;
439
440 case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
441 if (name != MACH_PORT_NULL) {
442 /* Only accept MACH_PORT_NULL for now */
443 return KERN_INVALID_CAPABILITY;
444 }
445 struct exclaves_resource_user uresource = {};
446
447 if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
448 (usize % sizeof(struct exclaves_resource_user) != 0)) {
449 return KERN_INVALID_ARGUMENT;
450 }
451
452 if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
453 (usize == 0 && ubuffer != USER_ADDR_NULL)) {
454 return KERN_INVALID_ARGUMENT;
455 }
456
457 if (ubuffer == USER_ADDR_NULL) {
458 return KERN_INVALID_ARGUMENT;
459 }
460
461 /* For the moment we only ever have to deal with one request. */
462 if (usize != sizeof(struct exclaves_resource_user)) {
463 return KERN_INVALID_ARGUMENT;
464 }
465 error = copyin(ubuffer, &uresource, usize);
466 if (error) {
467 return KERN_INVALID_ARGUMENT;
468 }
469
470 const size_t name_buf_len = sizeof(uresource.r_name);
471 if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
472 return KERN_INVALID_ARGUMENT;
473 }
474
475 /*
476 * Do the regular lookup first. If that fails, fallback to the
477 * DARWIN domain, finally fallback to the KERNEL domain.
478 */
479 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
480 uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
481
482 if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
483 exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
484 if (id == EXCLAVES_INVALID_ID) {
485 id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
486 uresource.r_name);
487 }
488 if (id == EXCLAVES_INVALID_ID) {
489 id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
490 uresource.r_name);
491 }
492 }
493
494 if (id == EXCLAVES_INVALID_ID) {
495 return KERN_NOT_FOUND;
496 }
497
498 uresource.r_id = id;
499 uresource.r_port = MACH_PORT_NULL;
500
501 error = copyout(&uresource, ubuffer, usize);
502 if (error) {
503 return KERN_INVALID_ADDRESS;
504 }
505
506 kr = KERN_SUCCESS;
507 break;
508 }
509
510 case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
511 if (identifier == 0) {
512 return KERN_INVALID_ARGUMENT;
513 }
514
515 /* copy in string name */
516 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
517 size_t done = 0;
518 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
519 return KERN_INVALID_ARGUMENT;
520 }
521
522 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
523 exclaves_resource_t *resource = NULL;
524 kr = exclaves_resource_audio_memory_map(domain, id_name, usize,
525 &resource);
526 if (kr != KERN_SUCCESS) {
527 return kr;
528 }
529
530 kr = exclaves_resource_create_port_name(resource, current_space(),
531 &name);
532 if (kr != KERN_SUCCESS) {
533 return kr;
534 }
535
536 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
537 if (kr != KERN_SUCCESS) {
538 mach_port_deallocate(current_space(), name);
539 return kr;
540 }
541
542 break;
543 }
544
545 case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
546 exclaves_resource_t *resource;
547
548 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
549 if (kr != KERN_SUCCESS) {
550 return kr;
551 }
552
553 if (resource->r_type !=
554 XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) {
555 exclaves_resource_release(resource);
556 return KERN_INVALID_CAPABILITY;
557 }
558
559 kr = exclaves_resource_audio_memory_copyout(resource,
560 ubuffer, usize, uoffset, usize2, uoffset2, ustatus);
561
562 exclaves_resource_release(resource);
563
564 if (kr != KERN_SUCCESS) {
565 return kr;
566 }
567
568 break;
569 }
570
571 case EXCLAVES_CTL_OP_SENSOR_CREATE: {
572 if (identifier == 0) {
573 return KERN_INVALID_ARGUMENT;
574 }
575
576 /* copy in string name */
577 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
578 size_t done = 0;
579 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
580 return KERN_INVALID_ARGUMENT;
581 }
582
583 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
584 exclaves_resource_t *resource = NULL;
585 kr = exclaves_resource_sensor_open(domain, id_name, &resource);
586 if (kr != KERN_SUCCESS) {
587 return kr;
588 }
589
590 kr = exclaves_resource_create_port_name(resource, current_space(),
591 &name);
592 if (kr != KERN_SUCCESS) {
593 return kr;
594 }
595
596 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
597 if (kr != KERN_SUCCESS) {
598 /* No senders drops the reference. */
599 mach_port_deallocate(current_space(), name);
600 return kr;
601 }
602
603 break;
604 }
605
606 case EXCLAVES_CTL_OP_SENSOR_START: {
607 exclaves_resource_t *resource;
608 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
609 if (kr != KERN_SUCCESS) {
610 return kr;
611 }
612
613 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
614 exclaves_resource_release(resource);
615 return KERN_FAILURE;
616 }
617
618 exclaves_sensor_status_t status;
619 kr = exclaves_resource_sensor_start(resource, identifier, &status);
620
621 exclaves_resource_release(resource);
622
623 if (kr != KERN_SUCCESS) {
624 return kr;
625 }
626
627 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
628
629 break;
630 }
631 case EXCLAVES_CTL_OP_SENSOR_STOP: {
632 exclaves_resource_t *resource;
633 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
634 if (kr != KERN_SUCCESS) {
635 return kr;
636 }
637
638 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
639 exclaves_resource_release(resource);
640 return KERN_FAILURE;
641 }
642
643 exclaves_sensor_status_t status;
644 kr = exclaves_resource_sensor_stop(resource, identifier, &status);
645
646 exclaves_resource_release(resource);
647
648 if (kr != KERN_SUCCESS) {
649 return kr;
650 }
651
652 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
653
654 break;
655 }
656 case EXCLAVES_CTL_OP_SENSOR_STATUS: {
657 exclaves_resource_t *resource;
658 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
659 if (kr != KERN_SUCCESS) {
660 return kr;
661 }
662
663 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
664 exclaves_resource_release(resource);
665 return KERN_FAILURE;
666 }
667
668
669 exclaves_sensor_status_t status;
670 kr = exclaves_resource_sensor_status(resource, identifier, &status);
671
672 exclaves_resource_release(resource);
673
674 if (kr != KERN_SUCCESS) {
675 return kr;
676 }
677
678 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
679 break;
680 }
681 case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
682 exclaves_resource_t *notification_resource = NULL;
683 mach_port_name_t port_name = MACH_PORT_NULL;
684
685 struct exclaves_resource_user *notification_resource_user = NULL;
686 if (usize != sizeof(struct exclaves_resource_user)) {
687 return KERN_INVALID_ARGUMENT;
688 }
689
690 if (ubuffer == USER_ADDR_NULL) {
691 return KERN_INVALID_ARGUMENT;
692 }
693
694 notification_resource_user = (struct exclaves_resource_user *)
695 kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
696
697 error = copyin(ubuffer, notification_resource_user, usize);
698 if (error) {
699 kr = KERN_INVALID_ARGUMENT;
700 goto notification_resource_lookup_out;
701 }
702
703 const size_t name_buf_len = sizeof(notification_resource_user->r_name);
704 if (strnlen(notification_resource_user->r_name, name_buf_len)
705 == name_buf_len) {
706 kr = KERN_INVALID_ARGUMENT;
707 goto notification_resource_lookup_out;
708 }
709
710 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
711 kr = exclaves_notification_create(domain,
712 notification_resource_user->r_name, ¬ification_resource);
713 if (kr != KERN_SUCCESS) {
714 goto notification_resource_lookup_out;
715 }
716
717 kr = exclaves_resource_create_port_name(notification_resource,
718 current_space(), &port_name);
719 if (kr != KERN_SUCCESS) {
720 goto notification_resource_lookup_out;
721 }
722 notification_resource_user->r_type = notification_resource->r_type;
723 notification_resource_user->r_id = notification_resource->r_id;
724 notification_resource_user->r_port = port_name;
725 error = copyout(notification_resource_user, ubuffer, usize);
726 if (error) {
727 kr = KERN_INVALID_ADDRESS;
728 goto notification_resource_lookup_out;
729 }
730
731 notification_resource_lookup_out:
732 if (notification_resource_user != NULL) {
733 kfree_data(notification_resource_user, usize);
734 }
735 if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
736 mach_port_deallocate(current_space(), port_name);
737 }
738 break;
739 }
740
741 default:
742 kr = KERN_INVALID_ARGUMENT;
743 break;
744 }
745
746 return kr;
747 #else /* CONFIG_EXCLAVES */
748 #pragma unused(uap)
749 return KERN_NOT_SUPPORTED;
750 #endif /* CONFIG_EXCLAVES */
751 }
752
753 /* -------------------------------------------------------------------------- */
754 #pragma mark kernel entry points
755
756 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)757 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
758 exclaves_tag_t *tag, exclaves_error_t *error)
759 {
760 #if CONFIG_EXCLAVES
761 kern_return_t kr = KERN_SUCCESS;
762 assert(port == IPC_PORT_NULL);
763
764 Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
765 assert(ipcb != NULL);
766
767 exclaves_debug_printf(show_progress,
768 "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
769 endpoint_id, *tag);
770
771 ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
772 kr = exclaves_endpoint_call_internal(port, endpoint_id);
773 *tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
774 *error = XNUPROXY_CR_RETVAL(ipcb);
775
776 exclaves_debug_printf(show_progress,
777 "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
778 "error 0x%llx\n", endpoint_id, *tag, *error);
779
780 return kr;
781 #else /* CONFIG_EXCLAVES */
782 #pragma unused(port, endpoint_id, tag, error)
783 return KERN_NOT_SUPPORTED;
784 #endif /* CONFIG_EXCLAVES */
785 }
786
787 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)788 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
789 {
790 #if CONFIG_EXCLAVES
791 kern_return_t kr = KERN_SUCCESS;
792 thread_t thread = current_thread();
793
794 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
795 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
796 kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
797 if (kr != KERN_SUCCESS) {
798 return kr;
799 }
800 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
801 }
802 thread->th_exclaves_ipc_ctx.usecnt++;
803
804 if (out_ipc_buffer != NULL) {
805 *out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
806 }
807 return KERN_SUCCESS;
808 #else /* CONFIG_EXCLAVES */
809 #pragma unused(out_ipc_buffer)
810 return KERN_NOT_SUPPORTED;
811 #endif /* CONFIG_EXCLAVES */
812 }
813
814 kern_return_t
exclaves_free_ipc_buffer(void)815 exclaves_free_ipc_buffer(void)
816 {
817 #if CONFIG_EXCLAVES
818
819 /* The inspection thread's cached buffer should never be freed */
820 thread_t thread = current_thread();
821
822 /* Don't try to free unallocated contexts. */
823 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
824 return KERN_SUCCESS;
825 }
826
827 const thread_exclaves_inspection_flags_t iflags =
828 os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
829 if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
830 return KERN_SUCCESS;
831 }
832
833 assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
834 if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
835 return KERN_SUCCESS;
836 }
837
838 return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
839 #else /* CONFIG_EXCLAVES */
840 return KERN_NOT_SUPPORTED;
841 #endif /* CONFIG_EXCLAVES */
842 }
843
844 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)845 exclaves_thread_terminate(__unused thread_t thread)
846 {
847 kern_return_t kr = KERN_SUCCESS;
848
849 #if CONFIG_EXCLAVES
850 assert(thread == current_thread());
851 assert(thread->th_exclaves_intstate == 0);
852 assert(thread->th_exclaves_state == 0);
853 if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
854 exclaves_debug_printf(show_progress,
855 "exclaves: thread_terminate freeing abandoned exclaves "
856 "ipc buffer\n");
857 /* Unconditionally free context irrespective of usecount */
858 thread->th_exclaves_ipc_ctx.usecnt = 0;
859 kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
860 assert(kr == KERN_SUCCESS);
861 }
862 #else
863 #pragma unused(thread)
864 #endif /* CONFIG_EXCLAVES */
865
866 return kr;
867 }
868
869 OS_CONST
870 void*
exclaves_get_ipc_buffer(void)871 exclaves_get_ipc_buffer(void)
872 {
873 #if CONFIG_EXCLAVES
874 thread_t thread = current_thread();
875 Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
876
877 return ipcb;
878 #else /* CONFIG_EXCLAVES */
879 return NULL;
880 #endif /* CONFIG_EXCLAVES */
881 }
882
883 #if CONFIG_EXCLAVES
884
885 static void
bind_to_boot_core(void)886 bind_to_boot_core(void)
887 {
888 /*
889 * First ensure the boot cluster isn't powered down preventing the
890 * thread from running at all.
891 */
892 suspend_cluster_powerdown();
893 const int cpu = ml_get_boot_cpu_number();
894 processor_t processor = cpu_to_processor(cpu);
895 assert3p(processor, !=, NULL);
896 __assert_only processor_t old = thread_bind(processor);
897 assert3p(old, ==, PROCESSOR_NULL);
898 thread_block(THREAD_CONTINUE_NULL);
899 }
900
901 static void
unbind_from_boot_core(void)902 unbind_from_boot_core(void)
903 {
904 /* Unbind the thread from the boot CPU. */
905 thread_bind(PROCESSOR_NULL);
906 thread_block(THREAD_CONTINUE_NULL);
907 resume_cluster_powerdown();
908 }
909
910 extern kern_return_t exclaves_boot_early(void);
911 kern_return_t
exclaves_boot_early(void)912 exclaves_boot_early(void)
913 {
914 kern_return_t kr = KERN_FAILURE;
915 uint64_t boot_info = 0;
916 bool early_enter = false;
917
918 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
919
920 kr = exclaves_bootinfo(&boot_info, &early_enter);
921 if (kr != KERN_SUCCESS) {
922 exclaves_debug_printf(show_errors,
923 "exclaves: Get bootinfo failed\n");
924 return kr;
925 }
926
927 if (early_enter) {
928 thread_t thread = current_thread();
929 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
930
931 bind_to_boot_core();
932
933 disable_preemption_without_measurements();
934 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
935
936 kr = exclaves_enter();
937
938 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
939 enable_preemption();
940
941 unbind_from_boot_core();
942
943 if (kr != KERN_SUCCESS) {
944 exclaves_debug_printf(show_errors,
945 "exclaves: early exclaves enter failed\n");
946 if (kr == KERN_ABORTED) {
947 panic("Unexpected ringgate panic status");
948 }
949 return kr;
950 }
951 }
952
953 uint64_t xnuproxy_boot_info = 0;
954 kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
955 if (kr != KERN_SUCCESS) {
956 exclaves_debug_printf(show_errors,
957 "exclaves: Init scheduler failed\n");
958 return kr;
959 }
960
961 kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
962 if (kr != KERN_SUCCESS) {
963 exclaves_debug_printf(show_errors,
964 "XNU proxy setup failed\n");
965 return KERN_FAILURE;
966 }
967
968 kr = exclaves_resource_init();
969 if (kr != KERN_SUCCESS) {
970 exclaves_debug_printf(show_errors,
971 "exclaves: failed to initialize resources\n");
972 return kr;
973 }
974
975 kr = exclaves_panic_thread_setup();
976 if (kr != KERN_SUCCESS) {
977 exclaves_debug_printf(show_errors,
978 "XNU proxy panic thread setup failed\n");
979 return KERN_FAILURE;
980 }
981
982 return KERN_SUCCESS;
983 }
984 #endif /* CONFIG_EXCLAVES */
985
986 #if CONFIG_EXCLAVES
987 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
988 #endif /* CONFIG_EXCLAVES */
989
990 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)991 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
992 {
993 #if CONFIG_EXCLAVES
994 if (exclaves_callbacks == NULL) {
995 exclaves_callbacks = callbacks;
996 }
997 #else /* CONFIG_EXCLAVES */
998 #pragma unused(callbacks)
999 #endif /* CONFIG_EXCLAVES */
1000 }
1001
1002 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1003 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1004 {
1005 assert(
1006 type == EXCLAVES_CLOCK_CONTINUOUS ||
1007 type == EXCLAVES_CLOCK_ABSOLUTE);
1008 #if CONFIG_EXCLAVES
1009 exclaves_clock_t *clock = &exclaves_clock[type];
1010 uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1011 while (latest_offset != offset) {
1012 /* Update the latest offset with the new offset. If this fails, then a
1013 * concurrent update occurred and our offset may be stale. */
1014 if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1015 offset, &latest_offset, relaxed)) {
1016 break;
1017 }
1018 }
1019 #else
1020 #pragma unused(type, offset)
1021 #endif /* CONFIG_EXCLAVES */
1022 }
1023
1024 /* -------------------------------------------------------------------------- */
1025
1026 #pragma mark exclaves ipc internals
1027
1028 #if CONFIG_EXCLAVES
1029
1030 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1031 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1032 exclaves_id_t endpoint_id)
1033 {
1034 kern_return_t kr = KERN_SUCCESS;
1035
1036 assert(port == IPC_PORT_NULL);
1037
1038 kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1039
1040 return kr;
1041 }
1042
1043 /* -------------------------------------------------------------------------- */
1044 #pragma mark secure kernel communication
1045
1046 /* ringgate entry endpoints */
1047 enum {
1048 RINGGATE_EP_ENTER,
1049 RINGGATE_EP_INFO
1050 };
1051
1052 /* ringgate entry status codes */
1053 enum {
1054 RINGGATE_STATUS_SUCCESS,
1055 RINGGATE_STATUS_ERROR,
1056 RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1057 };
1058
1059 OS_NOINLINE
1060 static kern_return_t
exclaves_enter(void)1061 exclaves_enter(void)
1062 {
1063 uint32_t endpoint = RINGGATE_EP_ENTER;
1064 uint64_t result = RINGGATE_STATUS_ERROR;
1065
1066 sptm_call_regs_t regs = { };
1067
1068 __assert_only thread_t thread = current_thread();
1069
1070 /*
1071 * Should never re-enter exclaves.
1072 */
1073 if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1074 (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1075 panic("attempt to re-enter exclaves");
1076 }
1077
1078 /*
1079 * Must have one (and only one) of the flags set to enter exclaves.
1080 */
1081 __assert_only const thread_exclaves_state_flags_t mask = (
1082 TH_EXCLAVES_RPC |
1083 TH_EXCLAVES_XNUPROXY |
1084 TH_EXCLAVES_SCHEDULER_CALL |
1085 TH_EXCLAVES_RESUME_PANIC_THREAD);
1086 assert3u(thread->th_exclaves_state & mask, !=, 0);
1087 assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1088
1089 #if MACH_ASSERT
1090 /*
1091 * Set the ast to check that the thread doesn't return to userspace
1092 * while in an RPC or XNUPROXY call.
1093 */
1094 act_set_debug_assert();
1095 #endif /* MACH_ASSERT */
1096
1097 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1098 | DBG_FUNC_START);
1099
1100 recount_enter_secure();
1101
1102 /* xnu_return_to_gl2 relies on this flag being present to correctly return
1103 * to SK from interrupts xnu handles on behalf of SK. */
1104 thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1105
1106 /*
1107 * Bracket with labels so stackshot can determine where exclaves are
1108 * entered from xnu.
1109 */
1110 __asm__ volatile (
1111 "EXCLAVES_ENTRY_START:\n\t"
1112 );
1113 result = sk_enter(endpoint, ®s);
1114 __asm__ volatile (
1115 "EXCLAVES_ENTRY_END:\n\t"
1116 );
1117
1118 thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1119
1120 recount_leave_secure();
1121
1122 #if CONFIG_SPTM
1123 /**
1124 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1125 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1126 * debug state forcing a reload on the next return to user mode.
1127 */
1128 if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1129 arm_debug_set(NULL);
1130 }
1131 #endif /* CONFIG_SPTM */
1132
1133 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1134 | DBG_FUNC_END);
1135
1136 switch (result) {
1137 case RINGGATE_STATUS_SUCCESS:
1138 return KERN_SUCCESS;
1139 case RINGGATE_STATUS_ERROR:
1140 return KERN_FAILURE;
1141 case RINGGATE_STATUS_PANIC:
1142 return KERN_ABORTED;
1143 default:
1144 assertf(false, "Unknown ringgate status %llu", result);
1145 __builtin_trap();
1146 }
1147 }
1148
1149
1150 /*
1151 * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1152 * it in indicates that we should immediately enter the ringgate once in order
1153 * to allow the scheduler to perform early boot initialisation.
1154 */
1155 #define EARLY_RINGGATE_ENTER 2
1156
1157 OS_NOINLINE
1158 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1159 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1160 {
1161 uint32_t endpoint = RINGGATE_EP_INFO;
1162 uint64_t result = RINGGATE_STATUS_ERROR;
1163
1164 sptm_call_regs_t regs = { };
1165
1166 recount_enter_secure();
1167 result = sk_enter(endpoint, ®s);
1168 recount_leave_secure();
1169 if (result == RINGGATE_STATUS_ERROR) {
1170 return KERN_FAILURE;
1171 }
1172
1173 *early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1174 *out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1175
1176 return KERN_SUCCESS;
1177 }
1178
1179 /* -------------------------------------------------------------------------- */
1180
1181 #pragma mark exclaves scheduler communication
1182
1183 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1184 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1185
1186 static void
exclaves_init_multicore(void)1187 exclaves_init_multicore(void)
1188 {
1189 XrtHosted_Buffer_t **req, **res;
1190
1191 exclaves_wait_for_cpu_init();
1192
1193 exclaves_debug_printf(show_progress,
1194 "Using MPIDR for exclave scheduler core IDs\n");
1195
1196 /*
1197 * Match the hardwareID to the physical ID and stash the pointers to the
1198 * request/response buffers in per-cpu data for quick access.
1199 */
1200 size_t core_count = exclaves_callbacks->v1.cores();
1201 for (size_t i = 0; i < core_count; i++) {
1202 const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1203 uint32_t dt_phys_id = (uint32_t)core->v2.hardwareId;
1204
1205 percpu_foreach(cpu_data, cpu_data) {
1206 if (cpu_data->cpu_phys_id != dt_phys_id) {
1207 continue;
1208 }
1209 req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1210 *req = exclaves_callbacks->v1.Core.request(i);
1211
1212 res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1213 *res = exclaves_callbacks->v1.Core.response(i);
1214
1215 break;
1216 }
1217 }
1218 }
1219
1220 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1221 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1222 {
1223 kern_return_t kr = KERN_SUCCESS;
1224 XrtHosted_Error_t hosted_error;
1225
1226 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1227
1228 if (!pmap_valid_address(boot_info)) {
1229 exclaves_debug_printf(show_errors,
1230 "exclaves: %s: 0x%012llx\n",
1231 "Invalid root physical address",
1232 boot_info);
1233 return KERN_FAILURE;
1234 }
1235
1236 if (exclaves_callbacks == NULL) {
1237 exclaves_debug_printf(show_errors,
1238 "exclaves: Callbacks not registered\n");
1239 return KERN_FAILURE;
1240 }
1241
1242 /* Initialise XrtHostedXnu kext */
1243 kr = exclaves_hosted_error(
1244 exclaves_callbacks->v1.init(
1245 XrtHosted_Version_current,
1246 phystokv(boot_info),
1247 &hosted_error),
1248 &hosted_error);
1249 if (kr != KERN_SUCCESS) {
1250 return kr;
1251 }
1252
1253 /* Record aperture addresses in buffer */
1254 size_t frames = exclaves_callbacks->v1.frames();
1255 XrtHosted_Mapped_t **pages = zalloc_permanent(
1256 frames * sizeof(XrtHosted_Mapped_t *),
1257 ZALIGN(XrtHosted_Mapped_t *));
1258 size_t index = 0;
1259 uint64_t phys = boot_info;
1260 while (index < frames) {
1261 if (!pmap_valid_address(phys)) {
1262 exclaves_debug_printf(show_errors,
1263 "exclaves: %s: 0x%012llx\n",
1264 "Invalid shared physical address",
1265 phys);
1266 return KERN_FAILURE;
1267 }
1268 pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1269 kr = exclaves_hosted_error(
1270 exclaves_callbacks->v1.nextPhys(
1271 pages[index],
1272 &index,
1273 &phys,
1274 &hosted_error),
1275 &hosted_error);
1276 if (kr != KERN_SUCCESS) {
1277 return kr;
1278 }
1279 }
1280
1281 /* Initialise the mapped region */
1282 exclaves_callbacks->v1.setMapping(
1283 XrtHosted_Region_scattered(frames, pages));
1284
1285 /* Boot the scheduler. */
1286 kr = exclaves_scheduler_boot();
1287 if (kr != KERN_SUCCESS) {
1288 return kr;
1289 }
1290
1291 XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1292
1293 /* Only support MPIDR multicore. */
1294 if (global->v2.smpStatus != XrtHosted_SmpStatus_MulticoreMpidr) {
1295 exclaves_debug_printf(show_errors,
1296 "exclaves: exclaves scheduler doesn't support multicore");
1297 return KERN_FAILURE;
1298 }
1299 exclaves_init_multicore();
1300
1301 /* Initialise the XNU proxy */
1302 if (!pmap_valid_address(global->v1.proxyInit)) {
1303 exclaves_debug_printf(show_errors,
1304 "exclaves: %s: 0x%012llx\n",
1305 "Invalid xnu prpoxy physical address",
1306 phys);
1307 return KERN_FAILURE;
1308 }
1309 *xnuproxy_boot_info = global->v1.proxyInit;
1310
1311 return kr;
1312 }
1313
1314 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1315 #define exclaves_scheduler_debug_save_buffer(_buf) \
1316 XrtHosted_Buffer_t _buf##_copy = *(_buf)
1317 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1318 _response_buf) ({ \
1319 if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1320 printf("exclaves: Scheduler request = %p\n", _request_buf); \
1321 printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1322 _request_buf##_copy.tag); \
1323 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1324 printf("exclaves: Scheduler request.arguments[%02zu] = " \
1325 "0x%04llx\n", arg, \
1326 _request_buf##_copy.arguments[arg]); \
1327 } \
1328 printf("exclaves: Scheduler response = %p\n", _response_buf); \
1329 printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1330 _response_buf##_copy.tag); \
1331 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1332 printf("exclaves: Scheduler response.arguments[%02zu] = " \
1333 "0x%04llx\n", arg, \
1334 _response_buf##_copy.arguments[arg]); \
1335 } \
1336 }})
1337 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1338 #define exclaves_scheduler_debug_save_buffer(_buf) ({ })
1339 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1340 _response_buf) ({ })
1341 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1342
1343 static void
request_trace_start(const XrtHosted_Request_t * request)1344 request_trace_start(const XrtHosted_Request_t *request)
1345 {
1346 switch (request->tag) {
1347 case XrtHosted_Request_ResumeWithHostId:
1348 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1349 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_START,
1350 request->ResumeWithHostId.hostId, request->ResumeWithHostId.thread);
1351 break;
1352
1353 case XrtHosted_Request_InterruptWithHostId:
1354 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1355 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_START,
1356 request->InterruptWithHostId.hostId, request->InterruptWithHostId.thread);
1357 break;
1358
1359 case XrtHosted_Request_UpdateTimerOffset:
1360 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1361 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_START,
1362 request->UpdateTimerOffset.timer, request->UpdateTimerOffset.offset);
1363 break;
1364
1365 case XrtHosted_Request_BootExclaves:
1366 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1367 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_START);
1368 break;
1369
1370 case XrtHosted_Request_PmmEarlyAllocResponse:
1371 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1372 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_START,
1373 request->PmmEarlyAllocResponse.a);
1374 break;
1375
1376 case XrtHosted_Request_WatchdogPanic:
1377 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1378 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_START);
1379 break;
1380
1381 default:
1382 panic("Unsupported exclaves scheduler request: %d", request->tag);
1383 }
1384 }
1385
1386 static void
request_trace_end(const XrtHosted_Request_t * request)1387 request_trace_end(const XrtHosted_Request_t *request)
1388 {
1389 switch (request->tag) {
1390 case XrtHosted_Request_ResumeWithHostId:
1391 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1392 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_END);
1393 break;
1394
1395 case XrtHosted_Request_InterruptWithHostId:
1396 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1397 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_END);
1398 break;
1399
1400 case XrtHosted_Request_UpdateTimerOffset:
1401 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1402 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_END);
1403 break;
1404
1405 case XrtHosted_Request_BootExclaves:
1406 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1407 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_END);
1408 break;
1409
1410 case XrtHosted_Request_PmmEarlyAllocResponse:
1411 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1412 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_END);
1413 break;
1414
1415 case XrtHosted_Request_WatchdogPanic:
1416 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1417 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_END);
1418 break;
1419
1420 default:
1421 panic("Unsupported exclaves scheduler request: %d", request->tag);
1422 }
1423 }
1424
1425 __attribute__((always_inline))
1426 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1427 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1428 XrtHosted_Response_t *response)
1429 {
1430 assert3u(request->tag, >, XrtHosted_Request_Invalid);
1431 assert3u(request->tag, <, XrtHosted_Request_Limit);
1432
1433 kern_return_t kr = KERN_SUCCESS;
1434 bool istate;
1435
1436 /*
1437 * Disable preemption and interrupts as the xrt hosted scheduler data
1438 * structures are per-core.
1439 * Preemption disabled and interrupt disabled timeouts are disabled for
1440 * now until we can co-ordinate the measurements with the exclaves side
1441 * of things.
1442 */
1443 istate = ml_set_interrupts_enabled_with_debug(false, false);
1444
1445 /* Interrupts should have been enabled entering this function. */
1446 assert(istate);
1447
1448 /*
1449 * This needs to be done with interrupts disabled, otherwise stackshot
1450 * could mark the thread blocked just after this function exits and a
1451 * thread marked as AST blocked would go into exclaves.
1452 */
1453
1454 while ((os_atomic_load(¤t_thread()->th_exclaves_inspection_state,
1455 relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1456 /* Enable interrupts */
1457 (void) ml_set_interrupts_enabled_with_debug(true, false);
1458
1459 /* Wait until the thread is collected on exclaves side */
1460 exclaves_inspection_check_ast();
1461
1462 /* Disable interrupts and preemption before next AST check */
1463 ml_set_interrupts_enabled_with_debug(false, false);
1464 }
1465 /* Interrupts are disabled and exclaves_stackshot_ast is clean */
1466
1467 disable_preemption_without_measurements();
1468
1469 /*
1470 * Don't enter with a stale clock (unless updating the clock or
1471 * panicking).
1472 */
1473 if (request->tag != XrtHosted_Request_UpdateTimerOffset &&
1474 request->tag != XrtHosted_Request_WatchdogPanic &&
1475 exclaves_clocks_need_update()) {
1476 enable_preemption();
1477 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1478 return KERN_POLICY_LIMIT;
1479 }
1480
1481 XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1482 assert3p(request_buf, !=, NULL);
1483
1484 request_trace_start(request);
1485
1486 exclaves_callbacks->v1.Request.encode(request_buf, request);
1487 exclaves_scheduler_debug_save_buffer(request_buf);
1488
1489 kr = exclaves_enter();
1490
1491 /* The response may have come back on a different core. */
1492 XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1493 assert3p(response_buf, !=, NULL);
1494
1495 exclaves_scheduler_debug_save_buffer(response_buf);
1496 exclaves_callbacks->v1.Response.decode(response_buf, response);
1497
1498 request_trace_end(request);
1499
1500 enable_preemption();
1501 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1502
1503 exclaves_scheduler_debug_show_request_response(request_buf, response_buf);
1504
1505 if (kr == KERN_ABORTED) {
1506 /* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1507 * another core has paniced in exclaves and is on the way to call xnu
1508 * panic() via SPTM, so wait here for that to happen. */
1509 exclaves_wait_for_panic();
1510 }
1511
1512 return kr;
1513 }
1514
1515 OS_NORETURN OS_NOINLINE
1516 static void
exclaves_wait_for_panic(void)1517 exclaves_wait_for_panic(void)
1518 {
1519 assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1520 NSEC_PER_SEC);
1521 wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1522 panic("Unexpected wait for panic result: %d", wr);
1523 }
1524
1525 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1526 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1527 const XrtHosted_Yield_t *yield)
1528 {
1529 Exclaves_L4_Word_t responding_scid = yield->thread;
1530 Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1531 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1532
1533 exclaves_debug_printf(show_progress,
1534 "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1535 early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1536 /* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1537 * th_exclaves_scheduling_context_id so we know to resume it later
1538 * 2. translate yield_to to thread_switch()-style handoff.
1539 */
1540 if (!early) {
1541 assert3u(responding_scid, ==, scid);
1542 assert3u(yield->threadHostId, ==, ctid);
1543 }
1544
1545 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1546 MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1547
1548 return KERN_SUCCESS;
1549 }
1550
1551 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1552 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1553 const XrtHosted_Spawned_t *spawned)
1554 {
1555 Exclaves_L4_Word_t responding_scid = spawned->thread;
1556 thread_t thread = current_thread();
1557 __assert_only ctid_t ctid = thread_get_ctid(thread);
1558
1559 /*
1560 * There are only a few places an exclaves thread is expected to be
1561 * spawned. Any other cases are considered errors.
1562 */
1563 if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1564 exclaves_debug_printf(show_errors,
1565 "exclaves: Scheduler: Unexpected thread spawn: "
1566 "scid 0x%lx spawned scid 0x%llx\n",
1567 responding_scid, spawned->spawned);
1568 return KERN_FAILURE;
1569 }
1570
1571 exclaves_debug_printf(show_progress,
1572 "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1573 responding_scid, (unsigned long)spawned->spawned);
1574 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1575 MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1576
1577 assert3u(responding_scid, ==, scid);
1578 assert3u(spawned->threadHostId, ==, ctid);
1579
1580 return KERN_SUCCESS;
1581 }
1582
1583 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1584 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1585 {
1586 Exclaves_L4_Word_t responding_scid = terminated->thread;
1587 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1588
1589 exclaves_debug_printf(show_errors,
1590 "exclaves: Scheduler: Unexpected thread terminate: "
1591 "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1592 terminated->terminated);
1593 assert3u(terminated->threadHostId, ==, ctid);
1594
1595 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1596 MACH_EXCLAVES_SCHEDULER_TERMINATED),
1597 terminated->terminated);
1598
1599 return KERN_TERMINATED;
1600 }
1601
1602 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1603 handle_response_wait(const XrtHosted_Wait_t *wait)
1604 {
1605 Exclaves_L4_Word_t responding_scid = wait->waiter;
1606 thread_t thread = current_thread();
1607 __assert_only ctid_t ctid = thread_get_ctid(thread);
1608
1609 exclaves_debug_printf(show_progress,
1610 "exclaves: Scheduler: Wait: "
1611 "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1612 "epoch 0x%llx\n", responding_scid, wait->owner,
1613 wait->queueId, wait->epoch);
1614 assert3u(wait->waiterHostId, ==, ctid);
1615
1616 /* The exclaves inspection thread should never wait. */
1617 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1618 panic("Exclaves inspection thread tried to wait\n");
1619 }
1620
1621 /*
1622 * Note, "owner" may not be safe to access directly, for example
1623 * the thread may have exited and been freed. esync_wait will
1624 * only access it under a lock if the epoch is fresh thus
1625 * ensuring safety.
1626 */
1627 const ctid_t owner = (ctid_t)wait->ownerHostId;
1628 const XrtHosted_Word_t id = wait->queueId;
1629 const uint64_t epoch = wait->epoch;
1630
1631 wait_interrupt_t interruptible;
1632 esync_policy_t policy;
1633
1634 switch (wait->interruptible) {
1635 case XrtHosted_Interruptibility_None:
1636 interruptible = THREAD_UNINT;
1637 policy = ESYNC_POLICY_KERNEL;
1638 break;
1639
1640 case XrtHosted_Interruptibility_Voluntary:
1641 interruptible = THREAD_INTERRUPTIBLE;
1642 policy = ESYNC_POLICY_KERNEL;
1643 break;
1644
1645 case XrtHosted_Interruptibility_DynamicQueue:
1646 interruptible = THREAD_INTERRUPTIBLE;
1647 policy = ESYNC_POLICY_USER;
1648 break;
1649
1650 default:
1651 panic("Unknown exclaves interruptibility: %llu",
1652 wait->interruptible);
1653 }
1654
1655 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1656 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1657 wait->interruptible);
1658 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1659 exclaves_get_queue_counter(id), owner, policy, interruptible);
1660 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1661 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1662
1663 switch (wr) {
1664 case THREAD_INTERRUPTED:
1665 return KERN_ABORTED;
1666
1667 case THREAD_NOT_WAITING:
1668 case THREAD_AWAKENED:
1669 return KERN_SUCCESS;
1670
1671 default:
1672 panic("Unexpected wait result from esync_wait: %d", wr);
1673 }
1674 }
1675
1676 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1677 handle_response_wake(const XrtHosted_Wake_t *wake)
1678 {
1679 Exclaves_L4_Word_t responding_scid = wake->waker;
1680 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1681
1682 exclaves_debug_printf(show_progress,
1683 "exclaves: Scheduler: Wake: "
1684 "scid 0x%lx wake of queue id 0x%llx, "
1685 "epoch 0x%llx, all 0x%llx\n", responding_scid,
1686 wake->queueId, wake->epoch, wake->all);
1687 assert3u(wake->wakerHostId, ==, ctid);
1688
1689 const XrtHosted_Word_t id = wake->queueId;
1690 const uint64_t epoch = wake->epoch;
1691 const esync_wake_mode_t mode = wake->all != 0 ?
1692 ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1693
1694 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1695 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1696
1697 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1698 exclaves_get_queue_counter(id), mode, 0);
1699
1700 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1701 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1702 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1703
1704 return KERN_SUCCESS;
1705 }
1706
1707 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1708 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1709 {
1710 Exclaves_L4_Word_t responding_scid = wake->waker;
1711 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1712
1713 exclaves_debug_printf(show_progress,
1714 "exclaves: Scheduler: WakeWithOwner: "
1715 "scid 0x%lx wake of queue id 0x%llx, "
1716 "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1717 wake->queueId, wake->epoch,
1718 wake->owner);
1719
1720 assert3u(wake->wakerHostId, ==, ctid);
1721
1722 const ctid_t owner = (ctid_t)wake->ownerHostId;
1723 const XrtHosted_Word_t id = wake->queueId;
1724 const uint64_t epoch = wake->epoch;
1725
1726 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1727 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1728 ESYNC_WAKE_ONE_WITH_OWNER);
1729
1730 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1731 exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1732
1733 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1734 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1735 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1736
1737 return KERN_SUCCESS;
1738 }
1739
1740 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1741 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1742 {
1743 Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1744 __assert_only thread_t thread = current_thread();
1745
1746 exclaves_debug_printf(show_progress,
1747 "exclaves: Scheduler: PanicWait: "
1748 "Panic thread SCID %lx\n",
1749 panic_thread_scid);
1750
1751 assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1752
1753 exclaves_panic_thread_wait();
1754
1755 /* NOT REACHABLE */
1756 return KERN_SUCCESS;
1757 }
1758
1759 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1760 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1761 {
1762 Exclaves_L4_Word_t responding_scid = suspended->suspended;
1763 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1764
1765 exclaves_debug_printf(show_progress,
1766 "exclaves: Scheduler: Suspended: "
1767 "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1768 assert3u(suspended->suspendedHostId, ==, ctid);
1769
1770 const uint64_t id = suspended->suspended;
1771 const uint64_t epoch = suspended->epoch;
1772
1773 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1774 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1775
1776 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1777 exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1778
1779 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1780 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1781
1782 switch (wr) {
1783 case THREAD_INTERRUPTED:
1784 return KERN_ABORTED;
1785
1786 case THREAD_NOT_WAITING:
1787 case THREAD_AWAKENED:
1788 return KERN_SUCCESS;
1789
1790 default:
1791 panic("Unexpected wait result from esync_wait: %d", wr);
1792 }
1793 }
1794
1795 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1796 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1797 {
1798 Exclaves_L4_Word_t responding_scid = resumed->thread;
1799 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1800
1801 exclaves_debug_printf(show_progress,
1802 "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1803 "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1804 resumed->resumedHostId, resumed->epoch);
1805 assert3u(resumed->threadHostId, ==, ctid);
1806
1807 const ctid_t target = (ctid_t)resumed->resumedHostId;
1808 const XrtHosted_Word_t id = resumed->resumed;
1809 const uint64_t epoch = resumed->epoch;
1810
1811 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1812 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1813 target);
1814
1815 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1816 exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1817
1818 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1819 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1820 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1821
1822 return KERN_SUCCESS;
1823 }
1824
1825 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1826 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1827 {
1828 Exclaves_L4_Word_t responding_scid = interrupted->thread;
1829 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1830
1831 exclaves_debug_printf(show_progress,
1832 "exclaves: Scheduler: Interrupted: "
1833 "scid 0x%lx interrupt on queue id 0x%llx, "
1834 "epoch 0x%llx, target 0x%llx\n", responding_scid,
1835 interrupted->queueId, interrupted->epoch,
1836 interrupted->interruptedHostId);
1837 assert3u(interrupted->threadHostId, ==, ctid);
1838
1839 const ctid_t target = (ctid_t)interrupted->interruptedHostId;
1840 const XrtHosted_Word_t id = interrupted->queueId;
1841 const uint64_t epoch = interrupted->epoch;
1842
1843 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1844 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
1845 target);
1846
1847 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1848 exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
1849
1850 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1851 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
1852 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1853
1854 return KERN_SUCCESS;
1855 }
1856
1857 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)1858 handle_response_nothing_scheduled(
1859 __unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
1860 {
1861 exclaves_debug_printf(show_progress,
1862 "exclaves: Scheduler: nothing scheduled\n");
1863
1864 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1865 MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
1866
1867 return KERN_SUCCESS;
1868 }
1869
1870 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)1871 handle_response_all_exclaves_booted(
1872 __unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
1873 {
1874 exclaves_debug_printf(show_progress,
1875 "exclaves: scheduler: all exclaves booted\n");
1876
1877 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1878 MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
1879
1880 return KERN_SUCCESS;
1881 }
1882
1883 /*
1884 * The Early Alloc response asks for npages to be allocated. The list of
1885 * allocated pages is written into the first allocated page in the form of 32bit
1886 * page numbers. The physical address of the first page is passed back to the
1887 * exclaves scheduler as part of the next request.
1888 */
1889 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)1890 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
1891 uint64_t *pagelist_pa)
1892 {
1893 const uint32_t npages = (uint32_t)pmm_early_alloc->a;
1894 const uint64_t flags = pmm_early_alloc->b;
1895
1896 exclaves_memory_pagekind_t kind = EXCLAVES_MEMORY_PAGEKIND_ROOTDOMAIN;
1897 exclaves_memory_page_flags_t alloc_flags = EXCLAVES_MEMORY_PAGE_FLAGS_NONE;
1898
1899 exclaves_debug_printf(show_progress,
1900 "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
1901 npages, flags);
1902
1903 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1904 MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
1905
1906 if (npages == 0) {
1907 return KERN_SUCCESS;
1908 }
1909
1910 if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
1911 exclaves_debug_printf(show_errors,
1912 "exclaves: request to allocate too many pages: %u\n",
1913 npages);
1914 return KERN_NO_SPACE;
1915 }
1916
1917
1918 /*
1919 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
1920 * stack allocation is sufficient and fast. If
1921 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
1922 * to the heap.
1923 */
1924 uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
1925 exclaves_memory_alloc(npages, page, kind, alloc_flags);
1926
1927 /* Now copy the list of pages into the first page. */
1928 uint64_t first_page_pa = ptoa(page[0]);
1929 #if 0
1930 // move to before sptm retype
1931 uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
1932 for (int i = 0; i < npages; i++) {
1933 first_page[i] = page[i];
1934 }
1935 #endif
1936
1937 *pagelist_pa = first_page_pa;
1938 return KERN_SUCCESS;
1939 }
1940
1941 static void
handle_response_watchdog_panic_complete(__unused const XrtHosted_WatchdogPanicComplete_t * panic_complete)1942 handle_response_watchdog_panic_complete(
1943 __unused const XrtHosted_WatchdogPanicComplete_t *panic_complete)
1944 {
1945 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1946 MACH_EXCLAVES_SCHEDULER_WATCHDOG_PANIC_COMPLETE));
1947 }
1948
1949 OS_NORETURN
1950 static void
handle_response_panicking(__unused const XrtHosted_Panicking_t * panicking)1951 handle_response_panicking(
1952 __unused const XrtHosted_Panicking_t *panicking)
1953 {
1954 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1955 MACH_EXCLAVES_SCHEDULER_PANICKING));
1956
1957 exclaves_wait_for_panic();
1958
1959 /* Not reached. */
1960 }
1961
1962 static inline bool
exclaves_clocks_need_update(void)1963 exclaves_clocks_need_update(void)
1964 {
1965 const exclaves_clock_type_t clocks[] = {
1966 EXCLAVES_CLOCK_ABSOLUTE,
1967 EXCLAVES_CLOCK_CONTINUOUS
1968 };
1969
1970 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
1971 const exclaves_clock_t *clock = &exclaves_clock[i];
1972 exclaves_clock_t local = {
1973 .u128 = os_atomic_load(&clock->a_u128, relaxed),
1974 };
1975
1976 if (local.u64.sent_offset != local.u64.latest_offset) {
1977 return true;
1978 }
1979 }
1980
1981 return false;
1982 }
1983
1984 OS_NOINLINE
1985 static kern_return_t
exclaves_clocks_update(void)1986 exclaves_clocks_update(void)
1987 {
1988 const exclaves_clock_type_t clocks[] = {
1989 EXCLAVES_CLOCK_ABSOLUTE,
1990 EXCLAVES_CLOCK_CONTINUOUS
1991 };
1992
1993 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
1994 exclaves_clock_t local;
1995 exclaves_clock_t *clock = &exclaves_clock[i];
1996
1997 local.u128 = os_atomic_load(&clock->a_u128, relaxed);
1998 while (local.u64.sent_offset != local.u64.latest_offset) {
1999 XrtHosted_Timer_t timer = i == EXCLAVES_CLOCK_ABSOLUTE ?
2000 XrtHosted_Timer_Absolute :
2001 XrtHosted_Timer_Continuous;
2002
2003 kern_return_t kr =
2004 exclaves_scheduler_request_update_timer(timer,
2005 local.u64.latest_offset);
2006 if (kr != KERN_SUCCESS) {
2007 return kr;
2008 }
2009
2010 /*
2011 * Swap the sent offset with the local latest offset. If
2012 * it fails, the sent offset will be reloaded.
2013 */
2014 os_atomic_cmpxchgv(&clock->a_u64.sent_offset,
2015 local.u64.sent_offset, local.u64.latest_offset,
2016 &local.u64.sent_offset, relaxed);
2017
2018 /*
2019 * Fetch the latest offset again, in case we are stale.
2020 */
2021 local.u64.latest_offset = os_atomic_load(
2022 &clock->a_u64.latest_offset, relaxed);
2023 }
2024 }
2025
2026 return KERN_SUCCESS;
2027 }
2028
2029 static kern_return_t
exclaves_scheduler_boot(void)2030 exclaves_scheduler_boot(void)
2031 {
2032 /* This must happen on the boot CPU - bind the thread. */
2033 bind_to_boot_core();
2034
2035 /*
2036 * Set the request/response buffers. These may be overriden later when
2037 * doing multicore setup.
2038 */
2039 *PERCPU_GET(exclaves_request) =
2040 exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2041 *PERCPU_GET(exclaves_response) =
2042 exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2043
2044 kern_return_t kr = exclaves_scheduler_request_boot();
2045
2046 unbind_from_boot_core();
2047
2048 return kr;
2049 }
2050
2051 static kern_return_t
exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,uint64_t offset)2052 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
2053 uint64_t offset)
2054 {
2055 thread_t thread = current_thread();
2056
2057 exclaves_debug_printf(show_progress,
2058 "exclaves: Scheduler: Request to update timer\n");
2059
2060 XrtHosted_Response_t response = {
2061 .tag = XrtHosted_Response_NothingScheduled,
2062 };
2063
2064 const XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
2065 .timer = timer,
2066 .offset = offset,
2067 );
2068
2069 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2070 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2071 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2072
2073 switch (kr) {
2074 case KERN_SUCCESS:
2075 break;
2076
2077 case KERN_POLICY_LIMIT:
2078 /*
2079 * POLICY_LIMIT should only happen if a timer update was pending
2080 * (and thus should never happen when trying to update a timer.
2081 */
2082 panic("exclaves: timer update requested when updating timer");
2083
2084 default:
2085 exclaves_debug_printf(show_errors,
2086 "exclaves: scheduler request failed\n");
2087 return kr;
2088 }
2089
2090 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2091
2092 switch (response.tag) {
2093 case XrtHosted_Response_NothingScheduled:
2094 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2095 break;
2096
2097 default:
2098 exclaves_debug_printf(show_errors, "exclaves: "
2099 "unexpected scheduler response when updating timer\n");
2100 kr = KERN_FAILURE;
2101 break;
2102 }
2103
2104 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2105
2106 return kr;
2107 }
2108
2109 static kern_return_t
exclaves_scheduler_request_boot(void)2110 exclaves_scheduler_request_boot(void)
2111 {
2112 kern_return_t kr = KERN_FAILURE;
2113 thread_t thread = current_thread();
2114
2115 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2116
2117 exclaves_debug_printf(show_progress,
2118 "exclaves: Scheduler: Request to boot exclave\n");
2119
2120 XrtHosted_Response_t response = {
2121 .tag = XrtHosted_Response_Invalid,
2122 };
2123 uint64_t pagelist_pa = 0;
2124
2125 while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2126 const XrtHosted_Request_t request = pagelist_pa != 0 ?
2127 XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2128 XrtHosted_Request_BootExclavesMsg();
2129 pagelist_pa = 0;
2130
2131 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2132 kr = exclaves_scheduler_request(&request, &response);
2133 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2134
2135 switch (kr) {
2136 case KERN_SUCCESS:
2137 break;
2138
2139 case KERN_POLICY_LIMIT:
2140 kr = exclaves_clocks_update();
2141 if (kr != KERN_SUCCESS) {
2142 return kr;
2143 }
2144 /*
2145 * Don't try to process the response - we just updated
2146 * the clock so continue with the boot request.
2147 */
2148 continue;
2149
2150 default:
2151 exclaves_debug_printf(show_errors,
2152 "exclaves: scheduler request failed\n");
2153 return KERN_FAILURE;
2154 }
2155
2156 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2157
2158 switch (response.tag) {
2159 case XrtHosted_Response_Yield:
2160 kr = handle_response_yield(true, 0, &response.Yield);
2161 break;
2162
2163 case XrtHosted_Response_NothingScheduled:
2164 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2165 break;
2166
2167 case XrtHosted_Response_AllExclavesBooted:
2168 kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2169 break;
2170
2171 case XrtHosted_Response_PmmEarlyAlloc:
2172 kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2173 break;
2174
2175 case XrtHosted_Response_PanicBufferAddress:
2176 handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2177 break;
2178
2179 case XrtHosted_Response_Panicking:
2180 handle_response_panicking(&response.Panicking);
2181 /* Not reached. */
2182
2183 default:
2184 exclaves_debug_printf(show_errors,
2185 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2186 response.tag);
2187 kr = KERN_FAILURE;
2188 break;
2189 }
2190
2191 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2192
2193 if (kr != KERN_SUCCESS) {
2194 break;
2195 }
2196 }
2197
2198 return kr;
2199 }
2200
2201 OS_INLINE
2202 kern_return_t
exclaves_scheduler_request_resume(const exclaves_ctx_t * ctx,bool interrupted)2203 exclaves_scheduler_request_resume(const exclaves_ctx_t *ctx, bool interrupted)
2204 {
2205 thread_t thread = current_thread();
2206 const ctid_t ctid = thread_get_ctid(thread);
2207
2208 assert3u(thread->th_exclaves_state &
2209 (TH_EXCLAVES_RESUME_PANIC_THREAD | TH_EXCLAVES_RPC), !=, 0);
2210
2211 exclaves_debug_printf(show_progress,
2212 "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2213
2214 XrtHosted_Response_t response = {};
2215 const XrtHosted_Request_t request = interrupted ?
2216 XrtHosted_Request_InterruptWithHostIdMsg(
2217 .thread = ctx->scid,
2218 .hostId = ctid,
2219 ) :
2220 XrtHosted_Request_ResumeWithHostIdMsg(
2221 .thread = ctx->scid,
2222 .hostId = ctid,
2223 );
2224
2225 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2226
2227 switch (kr) {
2228 case KERN_SUCCESS:
2229 break;
2230
2231 case KERN_POLICY_LIMIT:
2232 /*
2233 * Don't try to handle any response (as there isn't one), just
2234 * return to the caller which will check MSG STATUS and re-enter
2235 * if neccessary.
2236 */
2237 return exclaves_clocks_update();
2238
2239 default:
2240 exclaves_debug_printf(show_errors,
2241 "exclaves: scheduler request failed\n");
2242 break;
2243 }
2244
2245 if (kr != KERN_SUCCESS) {
2246 return kr;
2247 }
2248
2249 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_START:\n\t");
2250 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2251
2252 switch (response.tag) {
2253 case XrtHosted_Response_Wait:
2254 kr = handle_response_wait(&response.Wait);
2255 break;
2256
2257 case XrtHosted_Response_Wake:
2258 kr = handle_response_wake(&response.Wake);
2259 break;
2260
2261 case XrtHosted_Response_Yield:
2262 kr = handle_response_yield(false, ctx->scid, &response.Yield);
2263 break;
2264
2265 case XrtHosted_Response_Spawned:
2266 kr = handle_response_spawned(ctx->scid, &response.Spawned);
2267 break;
2268
2269 case XrtHosted_Response_Terminated:
2270 kr = handle_response_terminated(&response.Terminated);
2271 break;
2272
2273 case XrtHosted_Response_WakeWithOwner:
2274 kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2275 break;
2276
2277 case XrtHosted_Response_PanicWait:
2278 kr = handle_response_panic_wait(&response.PanicWait);
2279 break;
2280
2281 case XrtHosted_Response_Suspended:
2282 kr = handle_response_suspended(&response.Suspended);
2283 break;
2284
2285 case XrtHosted_Response_Resumed:
2286 kr = handle_response_resumed(&response.Resumed);
2287 break;
2288
2289 case XrtHosted_Response_Interrupted:
2290 kr = handle_response_interrupted(&response.Interrupted);
2291 break;
2292
2293 case XrtHosted_Response_Panicking:
2294 handle_response_panicking(&response.Panicking);
2295 /* Not reached. */
2296
2297 case XrtHosted_Response_Invalid:
2298 case XrtHosted_Response_Failure:
2299 case XrtHosted_Response_Pong:
2300 case XrtHosted_Response_SleepUntil:
2301 case XrtHosted_Response_Awaken:
2302 default:
2303 exclaves_debug_printf(show_errors,
2304 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2305 response.tag);
2306 kr = KERN_FAILURE;
2307 break;
2308 }
2309
2310 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2311 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_END:\n\t");
2312
2313 return kr;
2314 }
2315
2316 /* A friendly name to show up in backtraces. */
2317 OS_NOINLINE
2318 kern_return_t
exclaves_run(thread_t thread,bool interrupted)2319 exclaves_run(thread_t thread, bool interrupted)
2320 {
2321 return exclaves_scheduler_request_resume(&thread->th_exclaves_ipc_ctx,
2322 interrupted);
2323 }
2324
2325 /*
2326 * Note: this is called from a thread with RT priority which is on the way to
2327 * panicking and thus doesn't log.
2328 */
2329 kern_return_t
exclaves_scheduler_request_watchdog_panic(void)2330 exclaves_scheduler_request_watchdog_panic(void)
2331 {
2332 thread_t thread = current_thread();
2333
2334 XrtHosted_Response_t response = {};
2335 const XrtHosted_Request_t request = XrtHosted_Request_WatchdogPanicMsg();
2336
2337 /*
2338 * Check for consistent exclaves thread state to make sure we don't
2339 * accidentally block. This should normally never happen but if it does,
2340 * just return and allow the caller to panic without gathering an
2341 * exclaves stackshot.
2342 */
2343 if (os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) != 0 ||
2344 thread->th_exclaves_state != 0) {
2345 return KERN_FAILURE;
2346 }
2347
2348 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2349 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2350 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2351
2352 switch (kr) {
2353 case KERN_SUCCESS:
2354 break;
2355
2356 case KERN_POLICY_LIMIT:
2357 /*
2358 * POLICY_LIMIT should only happen if a timer update was pending
2359 * (and thus should never happen when trying to send a watchdog
2360 * panic message.
2361 */
2362 panic("exclaves: "
2363 "timer update requested when calling watchdog panic");
2364
2365 default:
2366 return kr;
2367 }
2368
2369 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2370
2371 switch (response.tag) {
2372 case XrtHosted_Response_WatchdogPanicComplete:
2373 handle_response_watchdog_panic_complete(&response.WatchdogPanicComplete);
2374 break;
2375
2376 case XrtHosted_Response_Panicking:
2377 handle_response_panicking(&response.Panicking);
2378 /* Not Reached. */
2379
2380 default:
2381 panic("exclaves: unexpected scheduler response "
2382 "when sending watchdog panic request: %d", response.tag);
2383 }
2384
2385 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2386
2387 return kr;
2388 }
2389
2390 /* -------------------------------------------------------------------------- */
2391
2392 #pragma mark exclaves xnu proxy communication
2393
2394 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2395 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2396 {
2397 if (success) {
2398 return KERN_SUCCESS;
2399 } else {
2400 exclaves_debug_printf(show_errors,
2401 "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2402 error->file,
2403 error->line,
2404 error->function,
2405 error->expression
2406 );
2407 return KERN_FAILURE;
2408 }
2409 }
2410
2411
2412 #pragma mark exclaves privilege management
2413
2414 /*
2415 * All entitlement checking enabled by default.
2416 */
2417 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2418
2419 /*
2420 * boot-arg to control the use of entitlements.
2421 * Eventually this should be removed and entitlement checking should be gated on
2422 * the EXCLAVES_R_ENTITLEMENTS requirement.
2423 * This will be addressed with rdar://125153460.
2424 */
2425 TUNABLE(unsigned int, exclaves_entitlement_flags,
2426 "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2427
2428 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2429 has_entitlement(task_t task, const exclaves_priv_t priv,
2430 const char *entitlement)
2431 {
2432 /* Skip the entitlement if not enabled. */
2433 if ((exclaves_entitlement_flags & priv) == 0) {
2434 return true;
2435 }
2436
2437 return IOTaskHasEntitlement(task, entitlement);
2438 }
2439
2440 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2441 has_entitlement_vnode(void *vnode, const int64_t off,
2442 const exclaves_priv_t priv, const char *entitlement)
2443 {
2444 /* Skip the entitlement if not enabled. */
2445 if ((exclaves_entitlement_flags & priv) == 0) {
2446 return true;
2447 }
2448
2449 return IOVnodeHasEntitlement(vnode, off, entitlement);
2450 }
2451
2452 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2453 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2454 {
2455 const bool is_kernel = task == kernel_task;
2456 const bool is_launchd = task_pid(task) == 1;
2457
2458 switch (priv) {
2459 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2460 /* Both launchd and entitled tasks can spawn new conclaves. */
2461 if (is_launchd) {
2462 return true;
2463 }
2464 return has_entitlement(task, priv,
2465 "com.apple.private.exclaves.conclave-spawn");
2466
2467 case EXCLAVES_PRIV_KERNEL_DOMAIN:
2468 /*
2469 * Both the kernel itself and user tasks with the right
2470 * privilege can access exclaves resources in the kernel domain.
2471 */
2472 if (is_kernel) {
2473 return true;
2474 }
2475
2476 /*
2477 * If the task was entitled and has been through this path
2478 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2479 */
2480 if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2481 return true;
2482 }
2483
2484 if (has_entitlement(task, priv,
2485 "com.apple.private.exclaves.kernel-domain")) {
2486 task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2487 return true;
2488 }
2489
2490 return false;
2491
2492 case EXCLAVES_PRIV_BOOT:
2493 /* Both launchd and entitled tasks can boot exclaves. */
2494 if (is_launchd) {
2495 return true;
2496 }
2497 /* BEGIN IGNORE CODESTYLE */
2498 return has_entitlement(task, priv,
2499 "com.apple.private.exclaves.boot");
2500 /* END IGNORE CODESTYLE */
2501
2502 /* The CONCLAVE HOST priv is always checked by vnode. */
2503 case EXCLAVES_PRIV_CONCLAVE_HOST:
2504 default:
2505 panic("bad exclaves privilege (%u)", priv);
2506 }
2507 }
2508
2509 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2510 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2511 {
2512 switch (priv) {
2513 case EXCLAVES_PRIV_CONCLAVE_HOST: {
2514 const bool has_conclave_host = has_entitlement_vnode(vnode,
2515 off, priv, "com.apple.private.exclaves.conclave-host");
2516
2517 /*
2518 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2519 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2520 */
2521
2522 /* Don't check if neither entitlemenent is being enforced.*/
2523 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2524 (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2525 return has_conclave_host;
2526 }
2527
2528 const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2529 EXCLAVES_PRIV_KERNEL_DOMAIN,
2530 "com.apple.private.exclaves.kernel-domain");
2531
2532 /* See if it has both. */
2533 if (has_conclave_host && has_domain_kernel) {
2534 exclaves_debug_printf(show_errors,
2535 "exclaves: task has both conclave-host and "
2536 "kernel-domain entitlements which is forbidden\n");
2537 return false;
2538 }
2539
2540 return has_conclave_host;
2541 }
2542
2543 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2544 return has_entitlement_vnode(vnode, off, priv,
2545 "com.apple.private.exclaves.conclave-spawn");
2546
2547 default:
2548 panic("bad exclaves privilege (%u)", priv);
2549 }
2550 }
2551
2552
2553 #pragma mark exclaves stackshot range
2554
2555 /* Unslid pointers defining the range of code which switches threads into
2556 * secure world */
2557 uintptr_t exclaves_enter_range_start;
2558 uintptr_t exclaves_enter_range_end;
2559
2560 /* Unslid pointers defining the range of code which handles exclaves scheduler request */
2561 uintptr_t exclaves_scheduler_request_range_start;
2562 uintptr_t exclaves_scheduler_request_range_end;
2563
2564
2565 __startup_func
2566 static void
initialize_exclaves_ranges(void)2567 initialize_exclaves_ranges(void)
2568 {
2569 exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2570 assert3u(exclaves_enter_range_start, !=, 0);
2571 exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2572 assert3u(exclaves_enter_range_end, !=, 0);
2573
2574 exclaves_scheduler_request_range_start = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_start_label);
2575 assert3u(exclaves_scheduler_request_range_start, !=, 0);
2576 exclaves_scheduler_request_range_end = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_end_label);
2577 assert3u(exclaves_scheduler_request_range_end, !=, 0);
2578 }
2579 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_ranges);
2580
2581 /*
2582 * Return true if the specified address is in exclaves_enter.
2583 */
2584 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2585 exclaves_enter_in_range(uintptr_t addr, bool slid)
2586 {
2587 return slid ?
2588 exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2589 exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2590 }
2591
2592 /*
2593 * Return true if the specified address is in scheduler request handlers.
2594 */
2595 static bool
exclaves_scheduler_request_in_range(uintptr_t addr,bool slid)2596 exclaves_scheduler_request_in_range(uintptr_t addr, bool slid)
2597 {
2598 return slid ?
2599 exclaves_in_range(addr, (uintptr_t)&exclaves_scheduler_request_start_label, (uintptr_t)&exclaves_scheduler_request_end_label) :
2600 exclaves_in_range(addr, exclaves_scheduler_request_range_start, exclaves_scheduler_request_range_end);
2601 }
2602
2603 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2604 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2605 {
2606 size_t i = 0;
2607
2608 // Check for a frame matching scheduler request range
2609 for (i = 0; i < nframes; i++) {
2610 if (exclaves_scheduler_request_in_range(addr[i], slid)) {
2611 break;
2612 }
2613 }
2614
2615 // Insert exclaves stacks before the scheduler request frame
2616 if (i < nframes) {
2617 return (uint32_t)(i + 1);
2618 }
2619
2620 // Check for a frame matching upcall code range
2621 for (i = 0; i < nframes; i++) {
2622 if (exclaves_upcall_in_range(addr[i], slid)) {
2623 break;
2624 }
2625 }
2626
2627 // Insert exclaves stacks before the upcall frame when found
2628 if (i < nframes) {
2629 return (uint32_t)(i + 1);
2630 }
2631
2632 // Check for a frame matching exclaves enter range
2633 for (i = 0; i < nframes; i++) {
2634 if (exclaves_enter_in_range(addr[i], slid)) {
2635 break;
2636 }
2637 }
2638
2639 // Put exclaves stacks on top of kernel stacks by default
2640 if (i == nframes) {
2641 i = 0;
2642 }
2643 return (uint32_t)i;
2644 }
2645
2646 #if DEVELOPMENT || DEBUG
2647
2648 /* Tweak the set of relaxed requirements on startup. */
2649 __startup_func
2650 static void
exclaves_requirement_startup(void)2651 exclaves_requirement_startup(void)
2652 {
2653 /*
2654 * The medium-term plan is that the boot-arg controlling entitlements
2655 * goes away entirely and is replaced with EXCLAVES_R_ENTITLEMENTS.
2656 * Until that happens, for historical reasons, if the entitlement
2657 * boot-arg has disabled EXCLAVES_PRIV_CONCLAVE_HOST, then relax
2658 * EXCLAVES_R_CONCLAVE_RESOURCES here too.
2659 */
2660 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0) {
2661 exclaves_requirement_relax(EXCLAVES_R_CONCLAVE_RESOURCES);
2662 }
2663
2664 exclaves_requirement_relax(EXCLAVES_R_EIC);
2665 }
2666 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, exclaves_requirement_startup);
2667
2668 #endif /* DEVELOPMENT || DEBUG */
2669
2670 #endif /* CONFIG_EXCLAVES */
2671
2672
2673 #ifndef CONFIG_EXCLAVES
2674 /* stubs for sensor functions which are not compiled in from exclaves.c when
2675 * CONFIG_EXCLAVE is disabled */
2676
2677 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2678 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2679 exclaves_sensor_status_t *status)
2680 {
2681 #pragma unused(sensor_type, flags, status)
2682 return KERN_NOT_SUPPORTED;
2683 }
2684
2685 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2686 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2687 exclaves_sensor_status_t *status)
2688 {
2689 #pragma unused(sensor_type, flags, status)
2690 return KERN_NOT_SUPPORTED;
2691 }
2692
2693 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2694 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2695 exclaves_sensor_status_t *status)
2696 {
2697 #pragma unused(sensor_type, flags, status)
2698 return KERN_NOT_SUPPORTED;
2699 }
2700
2701 #endif /* ! CONFIG_EXCLAVES */
2702