1 /*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35
36 #if CONFIG_EXCLAVES
37
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #else
41 #error Invalid configuration
42 #endif /* CONFIG_SPTM */
43
44 #include <arm/cpu_data_internal.h>
45 #include <arm/misc_protos.h>
46 #include <kern/epoch_sync.h>
47 #include <kern/ipc_kobject.h>
48 #include <kern/kalloc.h>
49 #include <kern/locks.h>
50 #include <kern/percpu.h>
51 #include <kern/task.h>
52 #include <kern/thread.h>
53 #include <kern/zalloc.h>
54 #include <kern/exclaves_stackshot.h>
55 #include <kern/exclaves_test_stackshot.h>
56 #include <vm/pmap.h>
57 #include <pexpert/pexpert.h>
58
59 #include <mach/exclaves_l4.h>
60 #include <mach/mach_port.h>
61
62 #include <Exclaves/Exclaves.h>
63
64 #include <IOKit/IOBSD.h>
65
66 #include <xnuproxy/messages.h>
67
68 #include "exclaves_debug.h"
69 #include "exclaves_panic.h"
70 #include "exclaves_xnuproxy.h"
71
72 /* External & generated headers */
73 #include <xrt_hosted_types/types.h>
74
75 #if __has_include(<Tightbeam/tightbeam.h>)
76 #include <Tightbeam/tightbeam.h>
77 #include <Tightbeam/tightbeam_private.h>
78 #endif
79
80 #include "exclaves_resource.h"
81 #include "exclaves_upcalls.h"
82 #include "exclaves_boot.h"
83 #include "exclaves_inspection.h"
84 #include "exclaves_memory.h"
85 #include "exclaves_internal.h"
86 #include "exclaves_aoe.h"
87 #include "exclaves_sensor.h"
88
89 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
90
91 /* Boot lock - only used here for assertions. */
92 extern lck_mtx_t exclaves_boot_lock;
93
94 /*
95 * Sent/latest offset for updating exclaves clocks
96 */
97 typedef struct {
98 union {
99 /* atomic fields are used via atomic primitives */
100 struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
101 _Atomic unsigned __int128 a_u128;
102 /* non-atomic fields are used via local variable. this is needed
103 * to avoid undefined behavior with an atomic struct or
104 * accessing atomic fields non-atomically */
105 struct { uint64_t sent_offset, latest_offset; } u64;
106 unsigned __int128 u128;
107 };
108 } exclaves_clock_t;
109
110
111 /*
112 * Two clocks indexed by their type.
113 * This makes things easy to lookup.
114 */
115 static exclaves_clock_t exclaves_clock[] = {
116 [EXCLAVES_CLOCK_ABSOLUTE] = {},
117 [EXCLAVES_CLOCK_CONTINUOUS] = {},
118 };
119
120 static kern_return_t
121 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
122
123 static bool
124 exclaves_cpu_callback(__unused void *param, enum cpu_event event, __unused unsigned int cpu_or_cluster);
125
126 static kern_return_t
127 exclaves_enter(void);
128 static kern_return_t
129 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
130
131 static kern_return_t
132 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
133 OS_NORETURN OS_NOINLINE
134 static void
135 exclaves_wait_for_panic(void);
136
137 static inline bool
138 exclaves_clocks_need_update(void);
139
140 static kern_return_t
141 exclaves_scheduler_boot(void);
142
143 static kern_return_t
144 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
145
146 static kern_return_t
147 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
148 uint64_t offset);
149
150 static kern_return_t
151 exclaves_scheduler_request_boot(void);
152
153
154 /*
155 * A static set of exclave epoch counters.
156 */
157 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
158
os_atomic(uint64_t)159 static inline os_atomic(uint64_t) *
160 exclaves_get_queue_counter(const uint64_t id)
161 {
162 return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
163 }
164
os_atomic(uint64_t)165 static inline os_atomic(uint64_t) *
166 exclaves_get_thread_counter(const uint64_t id)
167 {
168 return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
169 }
170
171
172 /* -------------------------------------------------------------------------- */
173 #pragma mark exclaves debug configuration
174
175 #if DEVELOPMENT || DEBUG
176 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
177 exclaves_debug_show_errors);
178
179 TUNABLE_DT_WRITEABLE(exclaves_requirement_t, exclaves_relaxed_requirements,
180 "/defaults", "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements",
181 0, TUNABLE_DT_NONE);
182 #else
183 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
184 #endif
185
186 #endif /* CONFIG_EXCLAVES */
187
188 /* -------------------------------------------------------------------------- */
189 #pragma mark userspace entry point
190
191 #if CONFIG_EXCLAVES
192 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)193 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
194 {
195 if (name != MACH_PORT_NULL) {
196 /* Only accept MACH_PORT_NULL for now */
197 return KERN_INVALID_CAPABILITY;
198 }
199
200 /*
201 * As the boot operation itself happens outside the context of any
202 * conclave, it requires special privilege.
203 */
204 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
205 return KERN_DENIED;
206 }
207
208 return exclaves_boot(stage);
209 }
210 #endif /* CONFIG_EXCLAVES */
211
212 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)213 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
214 {
215 #if CONFIG_EXCLAVES
216 kern_return_t kr = KERN_SUCCESS;
217 int error = 0;
218
219 mach_port_name_t name = uap->name;
220 exclaves_id_t identifier = uap->identifier;
221 mach_vm_address_t ubuffer = uap->buffer;
222 mach_vm_size_t usize = uap->size;
223 mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
224 mach_vm_size_t usize2 = uap->size2;
225 mach_vm_size_t uoffset2 = uap->offset;
226 mach_vm_address_t ustatus = uap->status;
227
228 task_t task = current_task();
229
230 /*
231 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
232 * with xnu. That header is not shared with userspace. Make sure that
233 * the retval userspace picks up is the same as the one
234 * xnu/ExclavePlatform thinks it is.
235 */
236 assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
237 &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
238
239 uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
240 uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
241 if (flags != 0) {
242 return KERN_INVALID_ARGUMENT;
243 }
244
245 /*
246 * Deal with OP_BOOT up-front as it has slightly different restrictions
247 * than the other operations.
248 */
249 if (operation == EXCLAVES_CTL_OP_BOOT) {
250 return operation_boot(name, (uint32_t)identifier);
251 }
252
253 /*
254 * All other operations are restricted to properly entitled tasks which
255 * can operate in the kernel domain, or those which have joined
256 * conclaves (which has its own entitlement check).
257 * If requirements are relaxed during development, tasks with no
258 * conclaves are also allowed.
259 */
260 if (operation == EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME) {
261 if (!exclaves_has_priv(task, EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME)) {
262 return KERN_DENIED;
263 }
264 } else if (task_get_conclave(task) == NULL &&
265 !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
266 !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
267 return KERN_DENIED;
268 }
269
270 /*
271 * Wait for EXCLAVECORE boot to complete. If exclaves are unsupported,
272 * return immediately.
273 */
274 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE);
275 if (kr != KERN_SUCCESS) {
276 return kr;
277 }
278
279 if (task_get_conclave(task) != NULL) {
280 /*
281 * For calls from tasks that have joined conclaves, now wait until
282 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
283 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would
284 * panic). This is a separate call to the one above because we
285 * need to distinguish EXCLAVECORE being not supported and
286 * still wait for EXCLAVEKIT to boot if it *is* supported.
287 */
288 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
289 if (kr != KERN_SUCCESS) {
290 return kr;
291 }
292 }
293
294 switch (operation) {
295 case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
296 if (name != MACH_PORT_NULL) {
297 /* Only accept MACH_PORT_NULL for now */
298 return KERN_INVALID_CAPABILITY;
299 }
300 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
301 usize != Exclaves_L4_IpcBuffer_Size) {
302 return KERN_INVALID_ARGUMENT;
303 }
304
305
306 Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
307 /* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
308 if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
309 return error;
310 }
311 assert(ipcb != NULL);
312 if ((error = copyin(ubuffer, ipcb, usize))) {
313 return error;
314 }
315
316 if (identifier >= CONCLAVE_SERVICE_MAX) {
317 return KERN_INVALID_ARGUMENT;
318 }
319
320 /*
321 * Verify that the service actually exists in the current
322 * domain.
323 */
324 if (!exclaves_conclave_has_service(task_get_conclave(task),
325 identifier)) {
326 return KERN_INVALID_ARGUMENT;
327 }
328
329 kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
330 error = copyout(ipcb, ubuffer, usize);
331 /*
332 * Endpoint call to conclave may have trigger a stop upcall,
333 * check if stop upcall completion handler needs to run.
334 */
335 task_stop_conclave_upcall_complete();
336 if (error) {
337 return error;
338 }
339 break;
340 }
341
342 case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
343 if (name != MACH_PORT_NULL) {
344 /* Only accept MACH_PORT_NULL for now */
345 return KERN_INVALID_CAPABILITY;
346 }
347
348 size_t len = 0;
349 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
350 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
351 &len) != 0 || id_name[0] == '\0') {
352 return KERN_INVALID_ARGUMENT;
353 }
354
355 exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
356 const exclaves_buffer_perm_t supported =
357 EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
358 if ((perm & supported) == 0 || (perm & ~supported) != 0) {
359 return KERN_INVALID_ARGUMENT;
360 }
361
362 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
363 exclaves_resource_t *resource = NULL;
364 kr = exclaves_resource_shared_memory_map(domain, id_name, usize,
365 perm, &resource);
366 if (kr != KERN_SUCCESS) {
367 return kr;
368 }
369
370 kr = exclaves_resource_create_port_name(resource,
371 current_space(), &name);
372 if (kr != KERN_SUCCESS) {
373 return kr;
374 }
375
376 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
377 if (kr != KERN_SUCCESS) {
378 mach_port_deallocate(current_space(), name);
379 return kr;
380 }
381
382 break;
383 }
384
385 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
386 exclaves_resource_t *resource = NULL;
387 kr = exclaves_resource_from_port_name(current_space(), name,
388 &resource);
389 if (kr != KERN_SUCCESS) {
390 return kr;
391 }
392
393 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
394 exclaves_resource_release(resource);
395 return KERN_INVALID_CAPABILITY;
396 }
397
398 kr = exclaves_resource_shared_memory_copyin(resource,
399 ubuffer, usize, uoffset, usize2, uoffset2);
400
401 exclaves_resource_release(resource);
402
403 if (kr != KERN_SUCCESS) {
404 return kr;
405 }
406 break;
407 }
408
409 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
410 exclaves_resource_t *resource = NULL;
411 kr = exclaves_resource_from_port_name(current_space(), name,
412 &resource);
413 if (kr != KERN_SUCCESS) {
414 return kr;
415 }
416
417 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
418 exclaves_resource_release(resource);
419 return KERN_INVALID_CAPABILITY;
420 }
421
422 kr = exclaves_resource_shared_memory_copyout(resource,
423 ubuffer, usize, uoffset, usize2, uoffset2);
424
425 exclaves_resource_release(resource);
426
427 if (kr != KERN_SUCCESS) {
428 return kr;
429 }
430 break;
431 }
432
433 case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
434 if (name != MACH_PORT_NULL) {
435 /* Only accept MACH_PORT_NULL for now */
436 return KERN_INVALID_CAPABILITY;
437 }
438 kr = task_launch_conclave(name);
439
440 /*
441 * Conclave launch call to may have trigger a stop upcall,
442 * check if stop upcall completion handler needs to run.
443 */
444 task_stop_conclave_upcall_complete();
445 break;
446
447 case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
448 if (name != MACH_PORT_NULL) {
449 /* Only accept MACH_PORT_NULL for now */
450 return KERN_INVALID_CAPABILITY;
451 }
452 struct exclaves_resource_user uresource = {};
453
454 if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
455 (usize % sizeof(struct exclaves_resource_user) != 0)) {
456 return KERN_INVALID_ARGUMENT;
457 }
458
459 if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
460 (usize == 0 && ubuffer != USER_ADDR_NULL)) {
461 return KERN_INVALID_ARGUMENT;
462 }
463
464 if (ubuffer == USER_ADDR_NULL) {
465 return KERN_INVALID_ARGUMENT;
466 }
467
468 /* For the moment we only ever have to deal with one request. */
469 if (usize != sizeof(struct exclaves_resource_user)) {
470 return KERN_INVALID_ARGUMENT;
471 }
472 error = copyin(ubuffer, &uresource, usize);
473 if (error) {
474 return KERN_INVALID_ARGUMENT;
475 }
476
477 const size_t name_buf_len = sizeof(uresource.r_name);
478 if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
479 return KERN_INVALID_ARGUMENT;
480 }
481
482 /*
483 * Do the regular lookup first. If that fails, fallback to the
484 * DARWIN domain, finally fallback to the KERNEL domain.
485 */
486 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
487 uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
488
489 if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
490 exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
491 if (id == EXCLAVES_INVALID_ID) {
492 id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
493 uresource.r_name);
494 }
495 if (id == EXCLAVES_INVALID_ID) {
496 id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
497 uresource.r_name);
498 }
499 }
500
501 if (id == EXCLAVES_INVALID_ID) {
502 return KERN_NOT_FOUND;
503 }
504
505 /*
506 * Looking up a forwarding service verifies its existence, but
507 * doesn't return the id since communication with it is not possible
508 */
509 if (id > EXCLAVES_FORWARDING_RESOURCE_ID_BASE) {
510 return KERN_NAME_EXISTS;
511 }
512
513 uresource.r_id = id;
514 uresource.r_port = MACH_PORT_NULL;
515
516 error = copyout(&uresource, ubuffer, usize);
517 if (error) {
518 return KERN_INVALID_ADDRESS;
519 }
520
521 kr = KERN_SUCCESS;
522 break;
523 }
524
525 case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
526 if (identifier == 0) {
527 return KERN_INVALID_ARGUMENT;
528 }
529
530 /* copy in string name */
531 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
532 size_t done = 0;
533 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
534 return KERN_INVALID_ARGUMENT;
535 }
536
537 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
538 exclaves_resource_t *resource = NULL;
539 kr = exclaves_resource_audio_memory_map(domain, id_name, usize,
540 &resource);
541 if (kr != KERN_SUCCESS) {
542 return kr;
543 }
544
545 kr = exclaves_resource_create_port_name(resource, current_space(),
546 &name);
547 if (kr != KERN_SUCCESS) {
548 return kr;
549 }
550
551 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
552 if (kr != KERN_SUCCESS) {
553 mach_port_deallocate(current_space(), name);
554 return kr;
555 }
556
557 break;
558 }
559
560 case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
561 exclaves_resource_t *resource;
562
563 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
564 if (kr != KERN_SUCCESS) {
565 return kr;
566 }
567
568 if (resource->r_type !=
569 XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) {
570 exclaves_resource_release(resource);
571 return KERN_INVALID_CAPABILITY;
572 }
573
574 kr = exclaves_resource_audio_memory_copyout(resource,
575 ubuffer, usize, uoffset, usize2, uoffset2, ustatus);
576
577 exclaves_resource_release(resource);
578
579 if (kr != KERN_SUCCESS) {
580 return kr;
581 }
582
583 break;
584 }
585
586 case EXCLAVES_CTL_OP_SENSOR_CREATE: {
587 if (identifier == 0) {
588 return KERN_INVALID_ARGUMENT;
589 }
590
591 /* copy in string name */
592 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
593 size_t done = 0;
594 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
595 return KERN_INVALID_ARGUMENT;
596 }
597
598 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
599 exclaves_resource_t *resource = NULL;
600 kr = exclaves_resource_sensor_open(domain, id_name, &resource);
601 if (kr != KERN_SUCCESS) {
602 return kr;
603 }
604
605 kr = exclaves_resource_create_port_name(resource, current_space(),
606 &name);
607 if (kr != KERN_SUCCESS) {
608 return kr;
609 }
610
611 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
612 if (kr != KERN_SUCCESS) {
613 /* No senders drops the reference. */
614 mach_port_deallocate(current_space(), name);
615 return kr;
616 }
617
618 break;
619 }
620
621 case EXCLAVES_CTL_OP_SENSOR_START: {
622 exclaves_resource_t *resource;
623 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
624 if (kr != KERN_SUCCESS) {
625 return kr;
626 }
627
628 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
629 exclaves_resource_release(resource);
630 return KERN_FAILURE;
631 }
632
633 exclaves_sensor_status_t status;
634 kr = exclaves_resource_sensor_start(resource, identifier, &status);
635
636 exclaves_resource_release(resource);
637
638 if (kr != KERN_SUCCESS) {
639 return kr;
640 }
641
642 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
643
644 break;
645 }
646 case EXCLAVES_CTL_OP_SENSOR_STOP: {
647 exclaves_resource_t *resource;
648 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
649 if (kr != KERN_SUCCESS) {
650 return kr;
651 }
652
653 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
654 exclaves_resource_release(resource);
655 return KERN_FAILURE;
656 }
657
658 exclaves_sensor_status_t status;
659 kr = exclaves_resource_sensor_stop(resource, identifier, &status);
660
661 exclaves_resource_release(resource);
662
663 if (kr != KERN_SUCCESS) {
664 return kr;
665 }
666
667 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
668
669 break;
670 }
671 case EXCLAVES_CTL_OP_SENSOR_STATUS: {
672 exclaves_resource_t *resource;
673 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
674 if (kr != KERN_SUCCESS) {
675 return kr;
676 }
677
678 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
679 exclaves_resource_release(resource);
680 return KERN_FAILURE;
681 }
682
683
684 exclaves_sensor_status_t status;
685 kr = exclaves_resource_sensor_status(resource, identifier, &status);
686
687 exclaves_resource_release(resource);
688
689 if (kr != KERN_SUCCESS) {
690 return kr;
691 }
692
693 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
694 break;
695 }
696 case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
697 exclaves_resource_t *notification_resource = NULL;
698 mach_port_name_t port_name = MACH_PORT_NULL;
699
700 struct exclaves_resource_user *notification_resource_user = NULL;
701 if (usize != sizeof(struct exclaves_resource_user)) {
702 return KERN_INVALID_ARGUMENT;
703 }
704
705 if (ubuffer == USER_ADDR_NULL) {
706 return KERN_INVALID_ARGUMENT;
707 }
708
709 notification_resource_user = (struct exclaves_resource_user *)
710 kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
711
712 error = copyin(ubuffer, notification_resource_user, usize);
713 if (error) {
714 kr = KERN_INVALID_ARGUMENT;
715 goto notification_resource_lookup_out;
716 }
717
718 const size_t name_buf_len = sizeof(notification_resource_user->r_name);
719 if (strnlen(notification_resource_user->r_name, name_buf_len)
720 == name_buf_len) {
721 kr = KERN_INVALID_ARGUMENT;
722 goto notification_resource_lookup_out;
723 }
724
725 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
726 kr = exclaves_notification_create(domain,
727 notification_resource_user->r_name, ¬ification_resource);
728 if (kr != KERN_SUCCESS) {
729 goto notification_resource_lookup_out;
730 }
731
732 kr = exclaves_resource_create_port_name(notification_resource,
733 current_space(), &port_name);
734 if (kr != KERN_SUCCESS) {
735 goto notification_resource_lookup_out;
736 }
737 notification_resource_user->r_type = notification_resource->r_type;
738 notification_resource_user->r_id = notification_resource->r_id;
739 notification_resource_user->r_port = port_name;
740 error = copyout(notification_resource_user, ubuffer, usize);
741 if (error) {
742 kr = KERN_INVALID_ADDRESS;
743 goto notification_resource_lookup_out;
744 }
745
746 notification_resource_lookup_out:
747 if (notification_resource_user != NULL) {
748 kfree_data(notification_resource_user, usize);
749 }
750 if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
751 mach_port_deallocate(current_space(), port_name);
752 }
753 break;
754 }
755
756 case EXCLAVES_CTL_OP_AOE_SETUP: {
757 uint8_t num_message = 0;
758 uint8_t num_worker = 0;
759
760 if (task_get_conclave(task) == NULL) {
761 kr = KERN_FAILURE;
762 break;
763 }
764
765 kr = exclaves_aoe_setup(&num_message, &num_worker);
766 if (kr != KERN_SUCCESS) {
767 break;
768 }
769
770 error = copyout(&num_message, ubuffer, sizeof(num_message));
771 if (error != 0) {
772 kr = KERN_INVALID_ADDRESS;
773 break;
774 }
775
776 error = copyout(&num_worker, ustatus, sizeof(num_worker));
777 if (error != 0) {
778 kr = KERN_INVALID_ADDRESS;
779 break;
780 }
781
782 break;
783 }
784
785 case EXCLAVES_CTL_OP_AOE_MESSAGE_LOOP: {
786 if (task_get_conclave(task) == NULL) {
787 kr = KERN_FAILURE;
788 break;
789 }
790
791 kr = exclaves_aoe_message_loop();
792 break;
793 }
794
795 case EXCLAVES_CTL_OP_AOE_WORK_LOOP: {
796 if (task_get_conclave(task) == NULL) {
797 kr = KERN_FAILURE;
798 break;
799 }
800
801 kr = exclaves_aoe_work_loop();
802 break;
803 }
804
805 case EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME: {
806 if (name != MACH_PORT_NULL) {
807 /* Only accept MACH_PORT_NULL for now */
808 return KERN_INVALID_CAPABILITY;
809 }
810
811 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
812 usize != sizeof(struct exclaves_indicator_deadlines)) {
813 return KERN_INVALID_ARGUMENT;
814 }
815
816 struct exclaves_indicator_deadlines udurations;
817 error = copyin(ubuffer, &udurations, usize);
818 if (error) {
819 return KERN_INVALID_ARGUMENT;
820 }
821
822 kr = exclaves_indicator_min_on_time_deadlines(&udurations);
823 if (kr != KERN_SUCCESS) {
824 return kr;
825 }
826
827 error = copyout(&udurations, ubuffer, usize);
828 if (error) {
829 return KERN_INVALID_ADDRESS;
830 }
831
832 break;
833 }
834
835 default:
836 kr = KERN_INVALID_ARGUMENT;
837 break;
838 }
839
840 return kr;
841 #else /* CONFIG_EXCLAVES */
842 #pragma unused(uap)
843 return KERN_NOT_SUPPORTED;
844 #endif /* CONFIG_EXCLAVES */
845 }
846
847 /* -------------------------------------------------------------------------- */
848 #pragma mark kernel entry points
849
850 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)851 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
852 exclaves_tag_t *tag, exclaves_error_t *error)
853 {
854 #if CONFIG_EXCLAVES
855 kern_return_t kr = KERN_SUCCESS;
856 assert(port == IPC_PORT_NULL);
857
858 Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
859 assert(ipcb != NULL);
860
861 exclaves_debug_printf(show_progress,
862 "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
863 endpoint_id, *tag);
864
865 ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
866 kr = exclaves_endpoint_call_internal(port, endpoint_id);
867 *tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
868 *error = XNUPROXY_CR_RETVAL(ipcb);
869
870 exclaves_debug_printf(show_progress,
871 "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
872 "error 0x%llx\n", endpoint_id, *tag, *error);
873
874 return kr;
875 #else /* CONFIG_EXCLAVES */
876 #pragma unused(port, endpoint_id, tag, error)
877 return KERN_NOT_SUPPORTED;
878 #endif /* CONFIG_EXCLAVES */
879 }
880
881 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)882 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
883 {
884 #if CONFIG_EXCLAVES
885 kern_return_t kr = KERN_SUCCESS;
886 thread_t thread = current_thread();
887
888 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
889 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
890 kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
891 if (kr != KERN_SUCCESS) {
892 return kr;
893 }
894 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
895 }
896 thread->th_exclaves_ipc_ctx.usecnt++;
897
898 if (out_ipc_buffer != NULL) {
899 *out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
900 }
901 return KERN_SUCCESS;
902 #else /* CONFIG_EXCLAVES */
903 #pragma unused(out_ipc_buffer)
904 return KERN_NOT_SUPPORTED;
905 #endif /* CONFIG_EXCLAVES */
906 }
907
908 kern_return_t
exclaves_free_ipc_buffer(void)909 exclaves_free_ipc_buffer(void)
910 {
911 #if CONFIG_EXCLAVES
912
913 /* The inspection thread's cached buffer should never be freed */
914 thread_t thread = current_thread();
915
916 /* Don't try to free unallocated contexts. */
917 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
918 return KERN_SUCCESS;
919 }
920
921 const thread_exclaves_inspection_flags_t iflags =
922 os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
923 if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
924 return KERN_SUCCESS;
925 }
926
927 assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
928 if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
929 return KERN_SUCCESS;
930 }
931
932 return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
933 #else /* CONFIG_EXCLAVES */
934 return KERN_NOT_SUPPORTED;
935 #endif /* CONFIG_EXCLAVES */
936 }
937
938 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)939 exclaves_thread_terminate(__unused thread_t thread)
940 {
941 kern_return_t kr = KERN_SUCCESS;
942
943 #if CONFIG_EXCLAVES
944 assert(thread == current_thread());
945 assert(thread->th_exclaves_intstate == 0);
946 assert(thread->th_exclaves_state == 0);
947 if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
948 exclaves_debug_printf(show_progress,
949 "exclaves: thread_terminate freeing abandoned exclaves "
950 "ipc buffer\n");
951 /* Unconditionally free context irrespective of usecount */
952 thread->th_exclaves_ipc_ctx.usecnt = 0;
953 kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
954 assert(kr == KERN_SUCCESS);
955 }
956 #else
957 #pragma unused(thread)
958 #endif /* CONFIG_EXCLAVES */
959
960 return kr;
961 }
962
963 OS_CONST
964 void*
exclaves_get_ipc_buffer(void)965 exclaves_get_ipc_buffer(void)
966 {
967 #if CONFIG_EXCLAVES
968 thread_t thread = current_thread();
969 Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
970
971 return ipcb;
972 #else /* CONFIG_EXCLAVES */
973 return NULL;
974 #endif /* CONFIG_EXCLAVES */
975 }
976
977 #if CONFIG_EXCLAVES
978
979 static void
bind_to_boot_core(void)980 bind_to_boot_core(void)
981 {
982 /*
983 * First ensure the boot cluster isn't powered down preventing the
984 * thread from running at all.
985 */
986 suspend_cluster_powerdown();
987 const int cpu = ml_get_boot_cpu_number();
988 processor_t processor = cpu_to_processor(cpu);
989 assert3p(processor, !=, NULL);
990 __assert_only processor_t old = thread_bind(processor);
991 assert3p(old, ==, PROCESSOR_NULL);
992 thread_block(THREAD_CONTINUE_NULL);
993 }
994
995 static void
unbind_from_boot_core(void)996 unbind_from_boot_core(void)
997 {
998 /* Unbind the thread from the boot CPU. */
999 thread_bind(PROCESSOR_NULL);
1000 thread_block(THREAD_CONTINUE_NULL);
1001 resume_cluster_powerdown();
1002 }
1003
1004 extern kern_return_t exclaves_boot_early(void);
1005 kern_return_t
exclaves_boot_early(void)1006 exclaves_boot_early(void)
1007 {
1008 kern_return_t kr = KERN_FAILURE;
1009 uint64_t boot_info = 0;
1010 bool early_enter = false;
1011
1012 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1013
1014 kr = exclaves_bootinfo(&boot_info, &early_enter);
1015 if (kr != KERN_SUCCESS) {
1016 exclaves_debug_printf(show_errors,
1017 "exclaves: Get bootinfo failed\n");
1018 return kr;
1019 }
1020
1021 if (early_enter) {
1022 thread_t thread = current_thread();
1023 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
1024
1025 bind_to_boot_core();
1026
1027 disable_preemption_without_measurements();
1028 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
1029
1030 kr = exclaves_enter();
1031
1032 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
1033 enable_preemption();
1034
1035 unbind_from_boot_core();
1036
1037 if (kr != KERN_SUCCESS) {
1038 exclaves_debug_printf(show_errors,
1039 "exclaves: early exclaves enter failed\n");
1040 if (kr == KERN_ABORTED) {
1041 panic("Unexpected ringgate panic status");
1042 }
1043 return kr;
1044 }
1045 }
1046
1047 uint64_t xnuproxy_boot_info = 0;
1048 kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
1049 if (kr != KERN_SUCCESS) {
1050 exclaves_debug_printf(show_errors,
1051 "exclaves: Init scheduler failed\n");
1052 return kr;
1053 }
1054
1055 kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
1056 if (kr != KERN_SUCCESS) {
1057 exclaves_debug_printf(show_errors,
1058 "XNU proxy setup failed\n");
1059 return KERN_FAILURE;
1060 }
1061
1062 kr = exclaves_resource_init();
1063 if (kr != KERN_SUCCESS) {
1064 exclaves_debug_printf(show_errors,
1065 "exclaves: failed to initialize resources\n");
1066 return kr;
1067 }
1068
1069 kr = exclaves_panic_thread_setup();
1070 if (kr != KERN_SUCCESS) {
1071 exclaves_debug_printf(show_errors,
1072 "XNU proxy panic thread setup failed\n");
1073 return KERN_FAILURE;
1074 }
1075
1076 return KERN_SUCCESS;
1077 }
1078 #endif /* CONFIG_EXCLAVES */
1079
1080 #if CONFIG_EXCLAVES
1081 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
1082 #endif /* CONFIG_EXCLAVES */
1083
1084 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)1085 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
1086 {
1087 #if CONFIG_EXCLAVES
1088 if (exclaves_callbacks == NULL) {
1089 exclaves_callbacks = callbacks;
1090 }
1091 #else /* CONFIG_EXCLAVES */
1092 #pragma unused(callbacks)
1093 #endif /* CONFIG_EXCLAVES */
1094 }
1095
1096 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1097 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1098 {
1099 assert(
1100 type == EXCLAVES_CLOCK_CONTINUOUS ||
1101 type == EXCLAVES_CLOCK_ABSOLUTE);
1102 #if CONFIG_EXCLAVES
1103 exclaves_clock_t *clock = &exclaves_clock[type];
1104 uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1105 while (latest_offset != offset) {
1106 /* Update the latest offset with the new offset. If this fails, then a
1107 * concurrent update occurred and our offset may be stale. */
1108 if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1109 offset, &latest_offset, relaxed)) {
1110 break;
1111 }
1112 }
1113 #else
1114 #pragma unused(type, offset)
1115 #endif /* CONFIG_EXCLAVES */
1116 }
1117
1118 /* -------------------------------------------------------------------------- */
1119
1120 #pragma mark exclaves ipc internals
1121
1122 #if CONFIG_EXCLAVES
1123
1124 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1125 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1126 exclaves_id_t endpoint_id)
1127 {
1128 kern_return_t kr = KERN_SUCCESS;
1129
1130 assert(port == IPC_PORT_NULL);
1131
1132 kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1133
1134 return kr;
1135 }
1136
1137 /* -------------------------------------------------------------------------- */
1138 #pragma mark secure kernel communication
1139
1140 /** save SME state before entering exclaves */
1141 static bool
exclaves_save_matrix_state(void)1142 exclaves_save_matrix_state(void)
1143 {
1144 bool saved = false;
1145 #if HAS_ARM_FEAT_SME
1146 /* Save only the ZA/ZT0 state. SPTM will save/restore TPIDR2. */
1147 if (arm_sme_version() > 0 && !!(__builtin_arm_rsr64("SVCR") & SVCR_ZA)) {
1148 arm_sme_saved_state_t *sme_state = machine_thread_get_sme_state(current_thread());
1149 arm_save_sme_za_zt0(&sme_state->context, sme_state->svl_b);
1150 asm volatile ("smstop za");
1151 saved = true;
1152 }
1153 #endif /* HAS_ARM_FEAT_SME */
1154 return saved;
1155 }
1156
1157 static void
exclaves_restore_matrix_state(bool did_save_sme __unused)1158 exclaves_restore_matrix_state(bool did_save_sme __unused)
1159 {
1160 #if HAS_ARM_FEAT_SME
1161 if (did_save_sme) {
1162 arm_sme_saved_state_t *sme_state = machine_thread_get_sme_state(current_thread());
1163 asm volatile ("smstart za");
1164 arm_load_sme_za_zt0(&sme_state->context, sme_state->svl_b);
1165 }
1166 #endif /* HAS_ARM_FEAT_SME */
1167 }
1168
1169 /* ringgate entry endpoints */
1170 enum {
1171 RINGGATE_EP_ENTER,
1172 RINGGATE_EP_INFO,
1173 RINGGATE_EP_CPU_ONLINE,
1174 RINGGATE_EP_CPU_OFFLINE,
1175 };
1176
1177 /* ringgate entry status codes */
1178 enum {
1179 RINGGATE_STATUS_SUCCESS,
1180 RINGGATE_STATUS_ERROR,
1181 RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1182 };
1183
1184 static bool
exclaves_cpu_callback(__unused void * param,enum cpu_event event,__unused unsigned int cpu_or_cluster)1185 exclaves_cpu_callback(__unused void *param, enum cpu_event event, __unused unsigned int cpu_or_cluster)
1186 {
1187 uint32_t endpoint;
1188 sptm_call_regs_t regs = { };
1189 uint64_t result = RINGGATE_STATUS_ERROR;
1190
1191 switch (event) {
1192 // Both events are guaranteed to fire on the affected CPU, which mirrors
1193 // the calls from SPTM on initial CPU boot.
1194 case CPU_BOOTED:
1195 endpoint = RINGGATE_EP_CPU_ONLINE;
1196 break;
1197 case CPU_DOWN:
1198 endpoint = RINGGATE_EP_CPU_OFFLINE;
1199 break;
1200 default:
1201 return true;
1202 }
1203
1204 if (exclaves_boot_supported()) {
1205 result = sk_enter(endpoint, ®s);
1206 assert(result == RINGGATE_STATUS_SUCCESS);
1207 }
1208
1209 return true;
1210 }
1211
1212 void
exclaves_early_init(void)1213 exclaves_early_init(void)
1214 {
1215 cpu_event_register_callback(exclaves_cpu_callback, NULL);
1216 }
1217
1218 OS_NOINLINE
1219 static kern_return_t
exclaves_enter(void)1220 exclaves_enter(void)
1221 {
1222 uint32_t endpoint = RINGGATE_EP_ENTER;
1223 uint64_t result = RINGGATE_STATUS_ERROR;
1224
1225 sptm_call_regs_t regs = { };
1226
1227 thread_t thread = current_thread();
1228
1229 /*
1230 * Should never re-enter exclaves.
1231 */
1232 if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1233 (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1234 panic("attempt to re-enter exclaves");
1235 }
1236
1237 /*
1238 * Must have one (and only one) of the flags set to enter exclaves.
1239 */
1240 __assert_only const thread_exclaves_state_flags_t mask = (
1241 TH_EXCLAVES_RPC |
1242 TH_EXCLAVES_XNUPROXY |
1243 TH_EXCLAVES_SCHEDULER_CALL |
1244 TH_EXCLAVES_RESUME_PANIC_THREAD);
1245 assert3u(thread->th_exclaves_state & mask, !=, 0);
1246 assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1247
1248 /*
1249 * Save any SME matrix state before entering exclaves.
1250 */
1251 bool did_save_sme = exclaves_save_matrix_state();
1252
1253 #if MACH_ASSERT
1254 /*
1255 * Set the ast to check that the thread doesn't return to userspace
1256 * while in an RPC or XNUPROXY call.
1257 */
1258 act_set_debug_assert();
1259 #endif /* MACH_ASSERT */
1260
1261 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1262 | DBG_FUNC_START);
1263
1264 recount_enter_secure();
1265
1266 /* xnu_return_to_gl2 relies on this flag being present to correctly return
1267 * to SK from interrupts xnu handles on behalf of SK. */
1268 thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1269
1270 /*
1271 * Bracket with labels so stackshot can determine where exclaves are
1272 * entered from xnu.
1273 */
1274 __asm__ volatile (
1275 "EXCLAVES_ENTRY_START:\n\t"
1276 );
1277 result = sk_enter(endpoint, ®s);
1278 __asm__ volatile (
1279 "EXCLAVES_ENTRY_END:\n\t"
1280 );
1281
1282 thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1283
1284 recount_leave_secure();
1285
1286 #if CONFIG_SPTM
1287 /**
1288 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1289 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1290 * debug state forcing a reload on the next return to user mode.
1291 */
1292 if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1293 arm_debug_set(NULL);
1294 }
1295 #endif /* CONFIG_SPTM */
1296
1297 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1298 | DBG_FUNC_END);
1299
1300 /*
1301 * Restore SME matrix state, if it existed.
1302 */
1303 exclaves_restore_matrix_state(did_save_sme);
1304
1305 switch (result) {
1306 case RINGGATE_STATUS_SUCCESS:
1307 return KERN_SUCCESS;
1308 case RINGGATE_STATUS_ERROR:
1309 return KERN_FAILURE;
1310 case RINGGATE_STATUS_PANIC:
1311 return KERN_ABORTED;
1312 default:
1313 assertf(false, "Unknown ringgate status %llu", result);
1314 __builtin_trap();
1315 }
1316 }
1317
1318
1319 /*
1320 * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1321 * it in indicates that we should immediately enter the ringgate once in order
1322 * to allow the scheduler to perform early boot initialisation.
1323 */
1324 #define EARLY_RINGGATE_ENTER 2
1325
1326 OS_NOINLINE
1327 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1328 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1329 {
1330 uint32_t endpoint = RINGGATE_EP_INFO;
1331 uint64_t result = RINGGATE_STATUS_ERROR;
1332
1333 sptm_call_regs_t regs = { };
1334
1335 recount_enter_secure();
1336 result = sk_enter(endpoint, ®s);
1337 recount_leave_secure();
1338 if (result == RINGGATE_STATUS_ERROR) {
1339 return KERN_FAILURE;
1340 }
1341
1342 *early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1343 *out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1344
1345 return KERN_SUCCESS;
1346 }
1347
1348 /* -------------------------------------------------------------------------- */
1349
1350 #pragma mark exclaves scheduler communication
1351
1352 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1353 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1354
1355 static void
exclaves_init_multicore(void)1356 exclaves_init_multicore(void)
1357 {
1358 XrtHosted_Buffer_t **req, **res;
1359
1360 exclaves_wait_for_cpu_init();
1361
1362 exclaves_debug_printf(show_progress,
1363 "Using MPIDR for exclave scheduler core IDs\n");
1364
1365 /*
1366 * Match the hardwareID to the physical ID and stash the pointers to the
1367 * request/response buffers in per-cpu data for quick access.
1368 */
1369 size_t core_count = exclaves_callbacks->v1.cores();
1370 for (size_t i = 0; i < core_count; i++) {
1371 const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1372 uint32_t dt_phys_id = (uint32_t)core->v2.hardwareId;
1373
1374 percpu_foreach(cpu_data, cpu_data) {
1375 if (cpu_data->cpu_phys_id != dt_phys_id) {
1376 continue;
1377 }
1378 req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1379 *req = exclaves_callbacks->v1.Core.request(i);
1380
1381 res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1382 *res = exclaves_callbacks->v1.Core.response(i);
1383
1384 break;
1385 }
1386 }
1387 }
1388
1389 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1390 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1391 {
1392 kern_return_t kr = KERN_SUCCESS;
1393 XrtHosted_Error_t hosted_error;
1394
1395 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1396
1397 if (!pmap_valid_address(boot_info)) {
1398 exclaves_debug_printf(show_errors,
1399 "exclaves: %s: 0x%012llx\n",
1400 "Invalid root physical address",
1401 boot_info);
1402 return KERN_FAILURE;
1403 }
1404
1405 if (exclaves_callbacks == NULL) {
1406 exclaves_debug_printf(show_errors,
1407 "exclaves: Callbacks not registered\n");
1408 return KERN_FAILURE;
1409 }
1410
1411 /* Initialise XrtHostedXnu kext */
1412 kr = exclaves_hosted_error(
1413 exclaves_callbacks->v1.init(
1414 XrtHosted_Version_current,
1415 phystokv(boot_info),
1416 &hosted_error),
1417 &hosted_error);
1418 if (kr != KERN_SUCCESS) {
1419 return kr;
1420 }
1421
1422 /* Record aperture addresses in buffer */
1423 size_t frames = exclaves_callbacks->v1.frames();
1424 XrtHosted_Mapped_t **pages = zalloc_permanent(
1425 frames * sizeof(XrtHosted_Mapped_t *),
1426 ZALIGN(XrtHosted_Mapped_t *));
1427 size_t index = 0;
1428 uint64_t phys = boot_info;
1429 while (index < frames) {
1430 if (!pmap_valid_address(phys)) {
1431 exclaves_debug_printf(show_errors,
1432 "exclaves: %s: 0x%012llx\n",
1433 "Invalid shared physical address",
1434 phys);
1435 return KERN_FAILURE;
1436 }
1437 pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1438 kr = exclaves_hosted_error(
1439 exclaves_callbacks->v1.nextPhys(
1440 pages[index],
1441 &index,
1442 &phys,
1443 &hosted_error),
1444 &hosted_error);
1445 if (kr != KERN_SUCCESS) {
1446 return kr;
1447 }
1448 }
1449
1450 /* Initialise the mapped region */
1451 exclaves_callbacks->v1.setMapping(
1452 XrtHosted_Region_scattered(frames, pages));
1453
1454 /* Boot the scheduler. */
1455 kr = exclaves_scheduler_boot();
1456 if (kr != KERN_SUCCESS) {
1457 return kr;
1458 }
1459
1460 XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1461
1462 /* Only support MPIDR multicore. */
1463 if (global->v2.smpStatus != XrtHosted_SmpStatus_MulticoreMpidr) {
1464 exclaves_debug_printf(show_errors,
1465 "exclaves: exclaves scheduler doesn't support multicore");
1466 return KERN_FAILURE;
1467 }
1468 exclaves_init_multicore();
1469
1470 /* Initialise the XNU proxy */
1471 if (!pmap_valid_address(global->v1.proxyInit)) {
1472 exclaves_debug_printf(show_errors,
1473 "exclaves: %s: 0x%012llx\n",
1474 "Invalid xnu prpoxy physical address",
1475 phys);
1476 return KERN_FAILURE;
1477 }
1478 *xnuproxy_boot_info = global->v1.proxyInit;
1479
1480 return kr;
1481 }
1482
1483 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1484 #define exclaves_scheduler_debug_save_buffer(_buf) \
1485 XrtHosted_Buffer_t _buf##_copy = *(_buf)
1486 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1487 _response_buf) ({ \
1488 if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1489 printf("exclaves: Scheduler request = %p\n", _request_buf); \
1490 printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1491 _request_buf##_copy.tag); \
1492 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1493 printf("exclaves: Scheduler request.arguments[%02zu] = " \
1494 "0x%04llx\n", arg, \
1495 _request_buf##_copy.arguments[arg]); \
1496 } \
1497 printf("exclaves: Scheduler response = %p\n", _response_buf); \
1498 printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1499 _response_buf##_copy.tag); \
1500 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1501 printf("exclaves: Scheduler response.arguments[%02zu] = " \
1502 "0x%04llx\n", arg, \
1503 _response_buf##_copy.arguments[arg]); \
1504 } \
1505 }})
1506 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1507 #define exclaves_scheduler_debug_save_buffer(_buf) ({ })
1508 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1509 _response_buf) ({ })
1510 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1511
1512 static void
request_trace_start(const XrtHosted_Request_t * request)1513 request_trace_start(const XrtHosted_Request_t *request)
1514 {
1515 switch (request->tag) {
1516 case XrtHosted_Request_ResumeWithHostId:
1517 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1518 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_START,
1519 request->ResumeWithHostId.hostId, request->ResumeWithHostId.thread);
1520 break;
1521
1522 case XrtHosted_Request_InterruptWithHostId:
1523 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1524 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_START,
1525 request->InterruptWithHostId.hostId, request->InterruptWithHostId.thread);
1526 break;
1527
1528 case XrtHosted_Request_UpdateTimerOffset:
1529 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1530 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_START,
1531 request->UpdateTimerOffset.timer, request->UpdateTimerOffset.offset);
1532 break;
1533
1534 case XrtHosted_Request_BootExclaves:
1535 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1536 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_START);
1537 break;
1538
1539 case XrtHosted_Request_PmmEarlyAllocResponse:
1540 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1541 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_START,
1542 request->PmmEarlyAllocResponse.a);
1543 break;
1544
1545 case XrtHosted_Request_WatchdogPanic:
1546 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1547 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_START);
1548 break;
1549
1550 default:
1551 panic("Unsupported exclaves scheduler request: %d", request->tag);
1552 }
1553 }
1554
1555 static void
request_trace_end(const XrtHosted_Request_t * request)1556 request_trace_end(const XrtHosted_Request_t *request)
1557 {
1558 switch (request->tag) {
1559 case XrtHosted_Request_ResumeWithHostId:
1560 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1561 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_END);
1562 break;
1563
1564 case XrtHosted_Request_InterruptWithHostId:
1565 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1566 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_END);
1567 break;
1568
1569 case XrtHosted_Request_UpdateTimerOffset:
1570 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1571 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_END);
1572 break;
1573
1574 case XrtHosted_Request_BootExclaves:
1575 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1576 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_END);
1577 break;
1578
1579 case XrtHosted_Request_PmmEarlyAllocResponse:
1580 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1581 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_END);
1582 break;
1583
1584 case XrtHosted_Request_WatchdogPanic:
1585 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1586 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_END);
1587 break;
1588
1589 default:
1590 panic("Unsupported exclaves scheduler request: %d", request->tag);
1591 }
1592 }
1593
1594 __attribute__((always_inline))
1595 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1596 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1597 XrtHosted_Response_t *response)
1598 {
1599 assert3u(request->tag, >, XrtHosted_Request_Invalid);
1600 assert3u(request->tag, <, XrtHosted_Request_Limit);
1601
1602 kern_return_t kr = KERN_SUCCESS;
1603 bool istate;
1604
1605 /*
1606 * Disable preemption and interrupts as the xrt hosted scheduler data
1607 * structures are per-core.
1608 * Preemption disabled and interrupt disabled timeouts are disabled for
1609 * now until we can co-ordinate the measurements with the exclaves side
1610 * of things.
1611 */
1612 istate = ml_set_interrupts_enabled_with_debug(false, false);
1613
1614 /* Interrupts should have been enabled entering this function. */
1615 assert(istate);
1616
1617 /*
1618 * This needs to be done with interrupts disabled, otherwise stackshot
1619 * could mark the thread blocked just after this function exits and a
1620 * thread marked as AST blocked would go into exclaves.
1621 */
1622
1623 while ((os_atomic_load(¤t_thread()->th_exclaves_inspection_state,
1624 relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1625 /* Enable interrupts */
1626 (void) ml_set_interrupts_enabled_with_debug(true, false);
1627
1628 /* Wait until the thread is collected on exclaves side */
1629 exclaves_inspection_check_ast();
1630
1631 /* Disable interrupts and preemption before next AST check */
1632 ml_set_interrupts_enabled_with_debug(false, false);
1633 }
1634 /* Interrupts are disabled and exclaves_stackshot_ast is clean */
1635
1636 disable_preemption_without_measurements();
1637
1638 /*
1639 * Don't enter with a stale clock (unless updating the clock or
1640 * panicking).
1641 */
1642 if (request->tag != XrtHosted_Request_UpdateTimerOffset &&
1643 request->tag != XrtHosted_Request_WatchdogPanic &&
1644 exclaves_clocks_need_update()) {
1645 enable_preemption();
1646 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1647 return KERN_POLICY_LIMIT;
1648 }
1649
1650 XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1651 assert3p(request_buf, !=, NULL);
1652
1653 request_trace_start(request);
1654
1655 exclaves_callbacks->v1.Request.encode(request_buf, request);
1656 exclaves_scheduler_debug_save_buffer(request_buf);
1657
1658 kr = exclaves_enter();
1659
1660 /* The response may have come back on a different core. */
1661 XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1662 assert3p(response_buf, !=, NULL);
1663
1664 exclaves_scheduler_debug_save_buffer(response_buf);
1665 exclaves_callbacks->v1.Response.decode(response_buf, response);
1666
1667 request_trace_end(request);
1668
1669 enable_preemption();
1670 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1671
1672 exclaves_scheduler_debug_show_request_response(request_buf, response_buf);
1673
1674 if (kr == KERN_ABORTED) {
1675 /* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1676 * another core has paniced in exclaves and is on the way to call xnu
1677 * panic() via SPTM, so wait here for that to happen. */
1678 exclaves_wait_for_panic();
1679 }
1680
1681 return kr;
1682 }
1683
1684 OS_NORETURN OS_NOINLINE
1685 static void
exclaves_wait_for_panic(void)1686 exclaves_wait_for_panic(void)
1687 {
1688 assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1689 NSEC_PER_SEC);
1690 wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1691 panic("Unexpected wait for panic result: %d", wr);
1692 }
1693
1694 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1695 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1696 const XrtHosted_Yield_t *yield)
1697 {
1698 Exclaves_L4_Word_t responding_scid = yield->thread;
1699 Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1700 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1701
1702 exclaves_debug_printf(show_progress,
1703 "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1704 early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1705 /* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1706 * th_exclaves_scheduling_context_id so we know to resume it later
1707 * 2. translate yield_to to thread_switch()-style handoff.
1708 */
1709 if (!early) {
1710 assert3u(responding_scid, ==, scid);
1711 assert3u(yield->threadHostId, ==, ctid);
1712 }
1713
1714 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1715 MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1716
1717 return KERN_SUCCESS;
1718 }
1719
1720 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1721 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1722 const XrtHosted_Spawned_t *spawned)
1723 {
1724 Exclaves_L4_Word_t responding_scid = spawned->thread;
1725 thread_t thread = current_thread();
1726 __assert_only ctid_t ctid = thread_get_ctid(thread);
1727
1728 /*
1729 * There are only a few places an exclaves thread is expected to be
1730 * spawned. Any other cases are considered errors.
1731 */
1732 if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1733 exclaves_debug_printf(show_errors,
1734 "exclaves: Scheduler: Unexpected thread spawn: "
1735 "scid 0x%lx spawned scid 0x%llx\n",
1736 responding_scid, spawned->spawned);
1737 return KERN_FAILURE;
1738 }
1739
1740 exclaves_debug_printf(show_progress,
1741 "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1742 responding_scid, (unsigned long)spawned->spawned);
1743 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1744 MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1745
1746 assert3u(responding_scid, ==, scid);
1747 assert3u(spawned->threadHostId, ==, ctid);
1748
1749 return KERN_SUCCESS;
1750 }
1751
1752 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1753 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1754 {
1755 Exclaves_L4_Word_t responding_scid = terminated->thread;
1756 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1757
1758 exclaves_debug_printf(show_errors,
1759 "exclaves: Scheduler: Unexpected thread terminate: "
1760 "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1761 terminated->terminated);
1762 assert3u(terminated->threadHostId, ==, ctid);
1763
1764 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1765 MACH_EXCLAVES_SCHEDULER_TERMINATED),
1766 terminated->terminated);
1767
1768 return KERN_TERMINATED;
1769 }
1770
1771 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1772 handle_response_wait(const XrtHosted_Wait_t *wait)
1773 {
1774 Exclaves_L4_Word_t responding_scid = wait->waiter;
1775 thread_t thread = current_thread();
1776 __assert_only ctid_t ctid = thread_get_ctid(thread);
1777
1778 exclaves_debug_printf(show_progress,
1779 "exclaves: Scheduler: Wait: "
1780 "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1781 "epoch 0x%llx\n", responding_scid, wait->owner,
1782 wait->queueId, wait->epoch);
1783 assert3u(wait->waiterHostId, ==, ctid);
1784
1785 /* The exclaves inspection thread should never wait. */
1786 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1787 panic("Exclaves inspection thread tried to wait\n");
1788 }
1789
1790 /*
1791 * Note, "owner" may not be safe to access directly, for example
1792 * the thread may have exited and been freed. esync_wait will
1793 * only access it under a lock if the epoch is fresh thus
1794 * ensuring safety.
1795 */
1796 const ctid_t owner = (ctid_t)wait->ownerHostId;
1797 const XrtHosted_Word_t id = wait->queueId;
1798 const uint64_t epoch = wait->epoch;
1799
1800 wait_interrupt_t interruptible;
1801 esync_policy_t policy;
1802
1803 switch (wait->interruptible) {
1804 case XrtHosted_Interruptibility_None:
1805 interruptible = THREAD_UNINT;
1806 policy = ESYNC_POLICY_KERNEL;
1807 break;
1808
1809 case XrtHosted_Interruptibility_Voluntary:
1810 interruptible = THREAD_INTERRUPTIBLE;
1811 policy = ESYNC_POLICY_KERNEL;
1812 break;
1813
1814 case XrtHosted_Interruptibility_DynamicQueue:
1815 interruptible = THREAD_INTERRUPTIBLE;
1816 policy = ESYNC_POLICY_USER;
1817 break;
1818
1819 default:
1820 panic("Unknown exclaves interruptibility: %llu",
1821 wait->interruptible);
1822 }
1823
1824 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1825 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1826 wait->interruptible);
1827 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1828 exclaves_get_queue_counter(id), owner, policy, interruptible);
1829 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1830 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1831
1832 switch (wr) {
1833 case THREAD_INTERRUPTED:
1834 return KERN_ABORTED;
1835
1836 case THREAD_NOT_WAITING:
1837 case THREAD_AWAKENED:
1838 return KERN_SUCCESS;
1839
1840 default:
1841 panic("Unexpected wait result from esync_wait: %d", wr);
1842 }
1843 }
1844
1845 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1846 handle_response_wake(const XrtHosted_Wake_t *wake)
1847 {
1848 Exclaves_L4_Word_t responding_scid = wake->waker;
1849 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1850
1851 exclaves_debug_printf(show_progress,
1852 "exclaves: Scheduler: Wake: "
1853 "scid 0x%lx wake of queue id 0x%llx, "
1854 "epoch 0x%llx, all 0x%llx\n", responding_scid,
1855 wake->queueId, wake->epoch, wake->all);
1856 assert3u(wake->wakerHostId, ==, ctid);
1857
1858 const XrtHosted_Word_t id = wake->queueId;
1859 const uint64_t epoch = wake->epoch;
1860 const esync_wake_mode_t mode = wake->all != 0 ?
1861 ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1862
1863 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1864 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1865
1866 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1867 exclaves_get_queue_counter(id), mode, 0);
1868
1869 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1870 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1871 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1872
1873 return KERN_SUCCESS;
1874 }
1875
1876 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1877 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1878 {
1879 Exclaves_L4_Word_t responding_scid = wake->waker;
1880 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1881
1882 exclaves_debug_printf(show_progress,
1883 "exclaves: Scheduler: WakeWithOwner: "
1884 "scid 0x%lx wake of queue id 0x%llx, "
1885 "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1886 wake->queueId, wake->epoch,
1887 wake->owner);
1888
1889 assert3u(wake->wakerHostId, ==, ctid);
1890
1891 const ctid_t owner = (ctid_t)wake->ownerHostId;
1892 const XrtHosted_Word_t id = wake->queueId;
1893 const uint64_t epoch = wake->epoch;
1894
1895 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1896 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1897 ESYNC_WAKE_ONE_WITH_OWNER);
1898
1899 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1900 exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1901
1902 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1903 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1904 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1905
1906 return KERN_SUCCESS;
1907 }
1908
1909 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1910 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1911 {
1912 Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1913 __assert_only thread_t thread = current_thread();
1914
1915 exclaves_debug_printf(show_progress,
1916 "exclaves: Scheduler: PanicWait: "
1917 "Panic thread SCID %lx\n",
1918 panic_thread_scid);
1919
1920 assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1921
1922 exclaves_panic_thread_wait();
1923
1924 /* NOT REACHABLE */
1925 return KERN_SUCCESS;
1926 }
1927
1928 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1929 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1930 {
1931 Exclaves_L4_Word_t responding_scid = suspended->suspended;
1932 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1933
1934 exclaves_debug_printf(show_progress,
1935 "exclaves: Scheduler: Suspended: "
1936 "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1937 assert3u(suspended->suspendedHostId, ==, ctid);
1938
1939 const uint64_t id = suspended->suspended;
1940 const uint64_t epoch = suspended->epoch;
1941
1942 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1943 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1944
1945 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1946 exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1947
1948 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1949 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1950
1951 switch (wr) {
1952 case THREAD_INTERRUPTED:
1953 return KERN_ABORTED;
1954
1955 case THREAD_NOT_WAITING:
1956 case THREAD_AWAKENED:
1957 return KERN_SUCCESS;
1958
1959 default:
1960 panic("Unexpected wait result from esync_wait: %d", wr);
1961 }
1962 }
1963
1964 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1965 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1966 {
1967 Exclaves_L4_Word_t responding_scid = resumed->thread;
1968 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1969
1970 exclaves_debug_printf(show_progress,
1971 "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1972 "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1973 resumed->resumedHostId, resumed->epoch);
1974 assert3u(resumed->threadHostId, ==, ctid);
1975
1976 const ctid_t target = (ctid_t)resumed->resumedHostId;
1977 const XrtHosted_Word_t id = resumed->resumed;
1978 const uint64_t epoch = resumed->epoch;
1979
1980 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1981 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1982 target);
1983
1984 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1985 exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1986
1987 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1988 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1989 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1990
1991 return KERN_SUCCESS;
1992 }
1993
1994 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1995 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1996 {
1997 Exclaves_L4_Word_t responding_scid = interrupted->thread;
1998 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1999
2000 exclaves_debug_printf(show_progress,
2001 "exclaves: Scheduler: Interrupted: "
2002 "scid 0x%lx interrupt on queue id 0x%llx, "
2003 "epoch 0x%llx, target 0x%llx\n", responding_scid,
2004 interrupted->queueId, interrupted->epoch,
2005 interrupted->interruptedHostId);
2006 assert3u(interrupted->threadHostId, ==, ctid);
2007
2008 const ctid_t target = (ctid_t)interrupted->interruptedHostId;
2009 const XrtHosted_Word_t id = interrupted->queueId;
2010 const uint64_t epoch = interrupted->epoch;
2011
2012 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2013 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
2014 target);
2015
2016 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
2017 exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
2018
2019 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2020 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
2021 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
2022
2023 return KERN_SUCCESS;
2024 }
2025
2026 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)2027 handle_response_nothing_scheduled(
2028 __unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
2029 {
2030 exclaves_debug_printf(show_progress,
2031 "exclaves: Scheduler: nothing scheduled\n");
2032
2033 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2034 MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
2035
2036 return KERN_SUCCESS;
2037 }
2038
2039 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)2040 handle_response_all_exclaves_booted(
2041 __unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
2042 {
2043 exclaves_debug_printf(show_progress,
2044 "exclaves: scheduler: all exclaves booted\n");
2045
2046 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2047 MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
2048
2049 return KERN_SUCCESS;
2050 }
2051
2052 /*
2053 * The Early Alloc response asks for npages to be allocated. The list of
2054 * allocated pages is written into the first allocated page in the form of 32bit
2055 * page numbers. The physical address of the first page is passed back to the
2056 * exclaves scheduler as part of the next request.
2057 */
2058 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)2059 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
2060 uint64_t *pagelist_pa)
2061 {
2062 const uint32_t npages = (uint32_t)pmm_early_alloc->a;
2063 const uint64_t flags = pmm_early_alloc->b;
2064
2065 exclaves_memory_pagekind_t kind = EXCLAVES_MEMORY_PAGEKIND_ROOTDOMAIN;
2066 exclaves_memory_page_flags_t alloc_flags = EXCLAVES_MEMORY_PAGE_FLAGS_NONE;
2067
2068 exclaves_debug_printf(show_progress,
2069 "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
2070 npages, flags);
2071
2072 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2073 MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
2074
2075 if (npages == 0) {
2076 return KERN_SUCCESS;
2077 }
2078
2079 if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
2080 exclaves_debug_printf(show_errors,
2081 "exclaves: request to allocate too many pages: %u\n",
2082 npages);
2083 return KERN_NO_SPACE;
2084 }
2085
2086 #if HAS_MTE
2087 if (flags & XNUUPCALLS_PAGEALLOCFLAGS_SEC_TRANSITION) {
2088 alloc_flags |= EXCLAVES_MEMORY_PAGE_FLAGS_MTE_TAGGED;
2089 }
2090 #endif /* HAS_MTE */
2091
2092 /*
2093 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
2094 * stack allocation is sufficient and fast. If
2095 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
2096 * to the heap.
2097 */
2098 uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
2099 exclaves_memory_alloc(npages, page, kind, alloc_flags);
2100
2101 /* Now copy the list of pages into the first page. */
2102 uint64_t first_page_pa = ptoa(page[0]);
2103 #if 0
2104 // move to before sptm retype
2105 uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
2106 for (int i = 0; i < npages; i++) {
2107 first_page[i] = page[i];
2108 }
2109 #endif
2110
2111 *pagelist_pa = first_page_pa;
2112 return KERN_SUCCESS;
2113 }
2114
2115 static void
handle_response_watchdog_panic_complete(__unused const XrtHosted_WatchdogPanicComplete_t * panic_complete)2116 handle_response_watchdog_panic_complete(
2117 __unused const XrtHosted_WatchdogPanicComplete_t *panic_complete)
2118 {
2119 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2120 MACH_EXCLAVES_SCHEDULER_WATCHDOG_PANIC_COMPLETE));
2121 }
2122
2123 OS_NORETURN
2124 static void
handle_response_panicking(__unused const XrtHosted_Panicking_t * panicking)2125 handle_response_panicking(
2126 __unused const XrtHosted_Panicking_t *panicking)
2127 {
2128 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2129 MACH_EXCLAVES_SCHEDULER_PANICKING));
2130
2131 exclaves_wait_for_panic();
2132
2133 /* Not reached. */
2134 }
2135
2136 static inline bool
exclaves_clocks_need_update(void)2137 exclaves_clocks_need_update(void)
2138 {
2139 const exclaves_clock_type_t clocks[] = {
2140 EXCLAVES_CLOCK_ABSOLUTE,
2141 EXCLAVES_CLOCK_CONTINUOUS
2142 };
2143
2144 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2145 const exclaves_clock_t *clock = &exclaves_clock[i];
2146 exclaves_clock_t local = {
2147 .u128 = os_atomic_load(&clock->a_u128, relaxed),
2148 };
2149
2150 if (local.u64.sent_offset != local.u64.latest_offset) {
2151 return true;
2152 }
2153 }
2154
2155 return false;
2156 }
2157
2158 OS_NOINLINE
2159 static kern_return_t
exclaves_clocks_update(void)2160 exclaves_clocks_update(void)
2161 {
2162 const exclaves_clock_type_t clocks[] = {
2163 EXCLAVES_CLOCK_ABSOLUTE,
2164 EXCLAVES_CLOCK_CONTINUOUS
2165 };
2166
2167 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2168 exclaves_clock_t local;
2169 exclaves_clock_t *clock = &exclaves_clock[i];
2170
2171 local.u128 = os_atomic_load(&clock->a_u128, relaxed);
2172 while (local.u64.sent_offset != local.u64.latest_offset) {
2173 XrtHosted_Timer_t timer = i == EXCLAVES_CLOCK_ABSOLUTE ?
2174 XrtHosted_Timer_Absolute :
2175 XrtHosted_Timer_Continuous;
2176
2177 kern_return_t kr =
2178 exclaves_scheduler_request_update_timer(timer,
2179 local.u64.latest_offset);
2180 if (kr != KERN_SUCCESS) {
2181 return kr;
2182 }
2183
2184 /*
2185 * Swap the sent offset with the local latest offset. If
2186 * it fails, the sent offset will be reloaded.
2187 */
2188 os_atomic_cmpxchgv(&clock->a_u64.sent_offset,
2189 local.u64.sent_offset, local.u64.latest_offset,
2190 &local.u64.sent_offset, relaxed);
2191
2192 /*
2193 * Fetch the latest offset again, in case we are stale.
2194 */
2195 local.u64.latest_offset = os_atomic_load(
2196 &clock->a_u64.latest_offset, relaxed);
2197 }
2198 }
2199
2200 return KERN_SUCCESS;
2201 }
2202
2203 static kern_return_t
exclaves_scheduler_boot(void)2204 exclaves_scheduler_boot(void)
2205 {
2206 /* This must happen on the boot CPU - bind the thread. */
2207 bind_to_boot_core();
2208
2209 /*
2210 * Set the request/response buffers. These may be overriden later when
2211 * doing multicore setup.
2212 */
2213 *PERCPU_GET(exclaves_request) =
2214 exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2215 *PERCPU_GET(exclaves_response) =
2216 exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2217
2218 kern_return_t kr = exclaves_scheduler_request_boot();
2219
2220 unbind_from_boot_core();
2221
2222 return kr;
2223 }
2224
2225 static kern_return_t
exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,uint64_t offset)2226 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
2227 uint64_t offset)
2228 {
2229 thread_t thread = current_thread();
2230
2231 exclaves_debug_printf(show_progress,
2232 "exclaves: Scheduler: Request to update timer\n");
2233
2234 XrtHosted_Response_t response = {
2235 .tag = XrtHosted_Response_NothingScheduled,
2236 };
2237
2238 const XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
2239 .timer = timer,
2240 .offset = offset,
2241 );
2242
2243 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2244 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2245 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2246
2247 switch (kr) {
2248 case KERN_SUCCESS:
2249 break;
2250
2251 case KERN_POLICY_LIMIT:
2252 /*
2253 * POLICY_LIMIT should only happen if a timer update was pending
2254 * (and thus should never happen when trying to update a timer.
2255 */
2256 panic("exclaves: timer update requested when updating timer");
2257
2258 default:
2259 exclaves_debug_printf(show_errors,
2260 "exclaves: scheduler request failed\n");
2261 return kr;
2262 }
2263
2264 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2265
2266 switch (response.tag) {
2267 case XrtHosted_Response_NothingScheduled:
2268 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2269 break;
2270
2271 default:
2272 exclaves_debug_printf(show_errors, "exclaves: "
2273 "unexpected scheduler response when updating timer\n");
2274 kr = KERN_FAILURE;
2275 break;
2276 }
2277
2278 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2279
2280 return kr;
2281 }
2282
2283 static kern_return_t
exclaves_scheduler_request_boot(void)2284 exclaves_scheduler_request_boot(void)
2285 {
2286 kern_return_t kr = KERN_FAILURE;
2287 thread_t thread = current_thread();
2288
2289 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2290
2291 exclaves_debug_printf(show_progress,
2292 "exclaves: Scheduler: Request to boot exclave\n");
2293
2294 XrtHosted_Response_t response = {
2295 .tag = XrtHosted_Response_Invalid,
2296 };
2297 uint64_t pagelist_pa = 0;
2298
2299 while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2300 const XrtHosted_Request_t request = pagelist_pa != 0 ?
2301 XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2302 XrtHosted_Request_BootExclavesMsg();
2303 pagelist_pa = 0;
2304
2305 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2306 kr = exclaves_scheduler_request(&request, &response);
2307 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2308
2309 switch (kr) {
2310 case KERN_SUCCESS:
2311 break;
2312
2313 case KERN_POLICY_LIMIT:
2314 kr = exclaves_clocks_update();
2315 if (kr != KERN_SUCCESS) {
2316 return kr;
2317 }
2318 /*
2319 * Don't try to process the response - we just updated
2320 * the clock so continue with the boot request.
2321 */
2322 continue;
2323
2324 default:
2325 exclaves_debug_printf(show_errors,
2326 "exclaves: scheduler request failed\n");
2327 return KERN_FAILURE;
2328 }
2329
2330 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2331
2332 switch (response.tag) {
2333 case XrtHosted_Response_Yield:
2334 kr = handle_response_yield(true, 0, &response.Yield);
2335 break;
2336
2337 case XrtHosted_Response_NothingScheduled:
2338 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2339 break;
2340
2341 case XrtHosted_Response_AllExclavesBooted:
2342 kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2343 break;
2344
2345 case XrtHosted_Response_PmmEarlyAlloc:
2346 kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2347 break;
2348
2349 case XrtHosted_Response_PanicBufferAddress:
2350 handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2351 break;
2352
2353 case XrtHosted_Response_Panicking:
2354 handle_response_panicking(&response.Panicking);
2355 /* Not reached. */
2356
2357 default:
2358 exclaves_debug_printf(show_errors,
2359 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2360 response.tag);
2361 kr = KERN_FAILURE;
2362 break;
2363 }
2364
2365 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2366
2367 if (kr != KERN_SUCCESS) {
2368 break;
2369 }
2370 }
2371
2372 return kr;
2373 }
2374
2375 OS_INLINE
2376 kern_return_t
exclaves_scheduler_request_resume(const exclaves_ctx_t * ctx,bool interrupted)2377 exclaves_scheduler_request_resume(const exclaves_ctx_t *ctx, bool interrupted)
2378 {
2379 thread_t thread = current_thread();
2380 const ctid_t ctid = thread_get_ctid(thread);
2381
2382 assert3u(thread->th_exclaves_state &
2383 (TH_EXCLAVES_RESUME_PANIC_THREAD | TH_EXCLAVES_RPC), !=, 0);
2384
2385 exclaves_debug_printf(show_progress,
2386 "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2387
2388 XrtHosted_Response_t response = {};
2389 const XrtHosted_Request_t request = interrupted ?
2390 XrtHosted_Request_InterruptWithHostIdMsg(
2391 .thread = ctx->scid,
2392 .hostId = ctid,
2393 ) :
2394 XrtHosted_Request_ResumeWithHostIdMsg(
2395 .thread = ctx->scid,
2396 .hostId = ctid,
2397 );
2398
2399 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2400
2401 switch (kr) {
2402 case KERN_SUCCESS:
2403 break;
2404
2405 case KERN_POLICY_LIMIT:
2406 /*
2407 * Don't try to handle any response (as there isn't one), just
2408 * return to the caller which will check MSG STATUS and re-enter
2409 * if neccessary.
2410 */
2411 return exclaves_clocks_update();
2412
2413 default:
2414 exclaves_debug_printf(show_errors,
2415 "exclaves: scheduler request failed\n");
2416 break;
2417 }
2418
2419 if (kr != KERN_SUCCESS) {
2420 return kr;
2421 }
2422
2423 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_START:\n\t");
2424 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2425
2426 switch (response.tag) {
2427 case XrtHosted_Response_Wait:
2428 kr = handle_response_wait(&response.Wait);
2429 break;
2430
2431 case XrtHosted_Response_Wake:
2432 kr = handle_response_wake(&response.Wake);
2433 break;
2434
2435 case XrtHosted_Response_Yield:
2436 kr = handle_response_yield(false, ctx->scid, &response.Yield);
2437 break;
2438
2439 case XrtHosted_Response_Spawned:
2440 kr = handle_response_spawned(ctx->scid, &response.Spawned);
2441 break;
2442
2443 case XrtHosted_Response_Terminated:
2444 kr = handle_response_terminated(&response.Terminated);
2445 break;
2446
2447 case XrtHosted_Response_WakeWithOwner:
2448 kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2449 break;
2450
2451 case XrtHosted_Response_PanicWait:
2452 kr = handle_response_panic_wait(&response.PanicWait);
2453 break;
2454
2455 case XrtHosted_Response_Suspended:
2456 kr = handle_response_suspended(&response.Suspended);
2457 break;
2458
2459 case XrtHosted_Response_Resumed:
2460 kr = handle_response_resumed(&response.Resumed);
2461 break;
2462
2463 case XrtHosted_Response_Interrupted:
2464 kr = handle_response_interrupted(&response.Interrupted);
2465 break;
2466
2467 case XrtHosted_Response_Panicking:
2468 handle_response_panicking(&response.Panicking);
2469 /* Not reached. */
2470
2471 case XrtHosted_Response_Invalid:
2472 case XrtHosted_Response_Failure:
2473 case XrtHosted_Response_Pong:
2474 case XrtHosted_Response_SleepUntil:
2475 case XrtHosted_Response_Awaken:
2476 default:
2477 exclaves_debug_printf(show_errors,
2478 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2479 response.tag);
2480 kr = KERN_FAILURE;
2481 break;
2482 }
2483
2484 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2485 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_END:\n\t");
2486
2487 return kr;
2488 }
2489
2490 /* A friendly name to show up in backtraces. */
2491 OS_NOINLINE
2492 kern_return_t
exclaves_run(thread_t thread,bool interrupted)2493 exclaves_run(thread_t thread, bool interrupted)
2494 {
2495 return exclaves_scheduler_request_resume(&thread->th_exclaves_ipc_ctx,
2496 interrupted);
2497 }
2498
2499 /*
2500 * Note: this is called from a thread with RT priority which is on the way to
2501 * panicking and thus doesn't log.
2502 */
2503 kern_return_t
exclaves_scheduler_request_watchdog_panic(void)2504 exclaves_scheduler_request_watchdog_panic(void)
2505 {
2506 thread_t thread = current_thread();
2507
2508 XrtHosted_Response_t response = {};
2509 const XrtHosted_Request_t request = XrtHosted_Request_WatchdogPanicMsg();
2510
2511 /*
2512 * Check for consistent exclaves thread state to make sure we don't
2513 * accidentally block. This should normally never happen but if it does,
2514 * just return and allow the caller to panic without gathering an
2515 * exclaves stackshot.
2516 */
2517 if (os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) != 0 ||
2518 thread->th_exclaves_state != 0) {
2519 return KERN_FAILURE;
2520 }
2521
2522 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2523 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2524 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2525
2526 switch (kr) {
2527 case KERN_SUCCESS:
2528 break;
2529
2530 case KERN_POLICY_LIMIT:
2531 /*
2532 * POLICY_LIMIT should only happen if a timer update was pending
2533 * (and thus should never happen when trying to send a watchdog
2534 * panic message.
2535 */
2536 panic("exclaves: "
2537 "timer update requested when calling watchdog panic");
2538
2539 default:
2540 return kr;
2541 }
2542
2543 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2544
2545 switch (response.tag) {
2546 case XrtHosted_Response_WatchdogPanicComplete:
2547 handle_response_watchdog_panic_complete(&response.WatchdogPanicComplete);
2548 break;
2549
2550 case XrtHosted_Response_Panicking:
2551 handle_response_panicking(&response.Panicking);
2552 /* Not Reached. */
2553
2554 default:
2555 panic("exclaves: unexpected scheduler response "
2556 "when sending watchdog panic request: %d", response.tag);
2557 }
2558
2559 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2560
2561 return kr;
2562 }
2563
2564 /* -------------------------------------------------------------------------- */
2565
2566 #pragma mark exclaves xnu proxy communication
2567
2568 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2569 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2570 {
2571 if (success) {
2572 return KERN_SUCCESS;
2573 } else {
2574 exclaves_debug_printf(show_errors,
2575 "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2576 error->file,
2577 error->line,
2578 error->function,
2579 error->expression
2580 );
2581 return KERN_FAILURE;
2582 }
2583 }
2584
2585 #pragma mark exclaves privilege management
2586
2587 /*
2588 * All entitlement checking enabled by default.
2589 */
2590 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2591
2592 /*
2593 * boot-arg to control the use of entitlements.
2594 * Eventually this should be removed and entitlement checking should be gated on
2595 * the EXCLAVES_R_ENTITLEMENTS requirement.
2596 * This will be addressed with rdar://125153460.
2597 */
2598 TUNABLE(unsigned int, exclaves_entitlement_flags,
2599 "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2600
2601 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2602 has_entitlement(task_t task, const exclaves_priv_t priv,
2603 const char *entitlement)
2604 {
2605 /* Skip the entitlement if not enabled. */
2606 if ((exclaves_entitlement_flags & priv) == 0) {
2607 return true;
2608 }
2609
2610 return IOTaskHasEntitlement(task, entitlement);
2611 }
2612
2613 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2614 has_entitlement_vnode(void *vnode, const int64_t off,
2615 const exclaves_priv_t priv, const char *entitlement)
2616 {
2617 /* Skip the entitlement if not enabled. */
2618 if ((exclaves_entitlement_flags & priv) == 0) {
2619 return true;
2620 }
2621
2622 return IOVnodeHasEntitlement(vnode, off, entitlement);
2623 }
2624
2625 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2626 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2627 {
2628 const bool is_kernel = task == kernel_task;
2629 const bool is_launchd = task_pid(task) == 1;
2630
2631 switch (priv) {
2632 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2633 /* Both launchd and entitled tasks can spawn new conclaves. */
2634 if (is_launchd) {
2635 return true;
2636 }
2637 return has_entitlement(task, priv,
2638 "com.apple.private.exclaves.conclave-spawn");
2639
2640 case EXCLAVES_PRIV_KERNEL_DOMAIN:
2641 /*
2642 * Both the kernel itself and user tasks with the right
2643 * privilege can access exclaves resources in the kernel domain.
2644 */
2645 if (is_kernel) {
2646 return true;
2647 }
2648
2649 /*
2650 * If the task was entitled and has been through this path
2651 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2652 */
2653 if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2654 return true;
2655 }
2656
2657 if (has_entitlement(task, priv,
2658 "com.apple.private.exclaves.kernel-domain")) {
2659 task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2660 return true;
2661 }
2662
2663 return false;
2664
2665 case EXCLAVES_PRIV_BOOT:
2666 /* Both launchd and entitled tasks can boot exclaves. */
2667 if (is_launchd) {
2668 return true;
2669 }
2670 /* BEGIN IGNORE CODESTYLE */
2671 return has_entitlement(task, priv,
2672 "com.apple.private.exclaves.boot");
2673 /* END IGNORE CODESTYLE */
2674
2675 case EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME:
2676 /*
2677 * If the task was entitled and has been through this path
2678 * before, it will have set the TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS flag.
2679 */
2680 if ((task_ro_flags_get(task) & TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS) != 0) {
2681 return true;
2682 }
2683
2684 if (has_entitlement(task, priv,
2685 "com.apple.private.exclaves.indicator_min_on_time")) {
2686 task_ro_flags_set(task, TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS);
2687 return true;
2688 }
2689
2690 return false;
2691
2692 /* The CONCLAVE HOST priv is always checked by vnode. */
2693 case EXCLAVES_PRIV_CONCLAVE_HOST:
2694 default:
2695 panic("bad exclaves privilege (%u)", priv);
2696 }
2697 }
2698
2699 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2700 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2701 {
2702 switch (priv) {
2703 case EXCLAVES_PRIV_CONCLAVE_HOST: {
2704 const bool has_conclave_host = has_entitlement_vnode(vnode,
2705 off, priv, "com.apple.private.exclaves.conclave-host");
2706
2707 /*
2708 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2709 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2710 */
2711
2712 /* Don't check if neither entitlemenent is being enforced.*/
2713 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2714 (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2715 return has_conclave_host;
2716 }
2717
2718 const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2719 EXCLAVES_PRIV_KERNEL_DOMAIN,
2720 "com.apple.private.exclaves.kernel-domain");
2721
2722 /* See if it has both. */
2723 if (has_conclave_host && has_domain_kernel) {
2724 exclaves_debug_printf(show_errors,
2725 "exclaves: task has both conclave-host and "
2726 "kernel-domain entitlements which is forbidden\n");
2727 return false;
2728 }
2729
2730 return has_conclave_host;
2731 }
2732
2733 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2734 return has_entitlement_vnode(vnode, off, priv,
2735 "com.apple.private.exclaves.conclave-spawn");
2736
2737 default:
2738 panic("bad exclaves privilege (%u)", priv);
2739 }
2740 }
2741
2742
2743 #pragma mark exclaves stackshot range
2744
2745 /* Unslid pointers defining the range of code which switches threads into
2746 * secure world */
2747 uintptr_t exclaves_enter_range_start;
2748 uintptr_t exclaves_enter_range_end;
2749
2750 /* Unslid pointers defining the range of code which handles exclaves scheduler request */
2751 uintptr_t exclaves_scheduler_request_range_start;
2752 uintptr_t exclaves_scheduler_request_range_end;
2753
2754
2755 __startup_func
2756 static void
initialize_exclaves_ranges(void)2757 initialize_exclaves_ranges(void)
2758 {
2759 exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2760 assert3u(exclaves_enter_range_start, !=, 0);
2761 exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2762 assert3u(exclaves_enter_range_end, !=, 0);
2763
2764 exclaves_scheduler_request_range_start = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_start_label);
2765 assert3u(exclaves_scheduler_request_range_start, !=, 0);
2766 exclaves_scheduler_request_range_end = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_end_label);
2767 assert3u(exclaves_scheduler_request_range_end, !=, 0);
2768 }
2769 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_ranges);
2770
2771 /*
2772 * Return true if the specified address is in exclaves_enter.
2773 */
2774 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2775 exclaves_enter_in_range(uintptr_t addr, bool slid)
2776 {
2777 return slid ?
2778 exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2779 exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2780 }
2781
2782 /*
2783 * Return true if the specified address is in scheduler request handlers.
2784 */
2785 static bool
exclaves_scheduler_request_in_range(uintptr_t addr,bool slid)2786 exclaves_scheduler_request_in_range(uintptr_t addr, bool slid)
2787 {
2788 return slid ?
2789 exclaves_in_range(addr, (uintptr_t)&exclaves_scheduler_request_start_label, (uintptr_t)&exclaves_scheduler_request_end_label) :
2790 exclaves_in_range(addr, exclaves_scheduler_request_range_start, exclaves_scheduler_request_range_end);
2791 }
2792
2793 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2794 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2795 {
2796 size_t i = 0;
2797
2798 // Check for a frame matching scheduler request range
2799 for (i = 0; i < nframes; i++) {
2800 if (exclaves_scheduler_request_in_range(addr[i], slid)) {
2801 break;
2802 }
2803 }
2804
2805 // Insert exclaves stacks before the scheduler request frame
2806 if (i < nframes) {
2807 return (uint32_t)(i + 1);
2808 }
2809
2810 // Check for a frame matching upcall code range
2811 for (i = 0; i < nframes; i++) {
2812 if (exclaves_upcall_in_range(addr[i], slid)) {
2813 break;
2814 }
2815 }
2816
2817 // Insert exclaves stacks before the upcall frame when found
2818 if (i < nframes) {
2819 return (uint32_t)(i + 1);
2820 }
2821
2822 // Check for a frame matching exclaves enter range
2823 for (i = 0; i < nframes; i++) {
2824 if (exclaves_enter_in_range(addr[i], slid)) {
2825 break;
2826 }
2827 }
2828
2829 // Put exclaves stacks on top of kernel stacks by default
2830 if (i == nframes) {
2831 i = 0;
2832 }
2833 return (uint32_t)i;
2834 }
2835
2836 #if DEVELOPMENT || DEBUG
2837
2838 /* Tweak the set of relaxed requirements on startup. */
2839 __startup_func
2840 static void
exclaves_requirement_startup(void)2841 exclaves_requirement_startup(void)
2842 {
2843 /*
2844 * The medium-term plan is that the boot-arg controlling entitlements
2845 * goes away entirely and is replaced with EXCLAVES_R_ENTITLEMENTS.
2846 * Until that happens, for historical reasons, if the entitlement
2847 * boot-arg has disabled EXCLAVES_PRIV_CONCLAVE_HOST, then relax
2848 * EXCLAVES_R_CONCLAVE_RESOURCES here too.
2849 */
2850 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0) {
2851 exclaves_requirement_relax(EXCLAVES_R_CONCLAVE_RESOURCES);
2852 }
2853 }
2854 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, exclaves_requirement_startup);
2855
2856 #endif /* DEVELOPMENT || DEBUG */
2857
2858 #endif /* CONFIG_EXCLAVES */
2859
2860 #if __has_include(<Tightbeam/tightbeam.h>)
2861
2862 #include <Tightbeam/tightbeam.h>
2863
2864 /*
2865 * Tightbeam needs to initialize for kernel transports (xnu and AFK).
2866 * Only the XNU transport is specific to exclaves - AFK is not.
2867 */
2868 __startup_func
2869 static void
tightbeam_startup(void)2870 tightbeam_startup(void)
2871 {
2872 tb_transport_startup();
2873 }
2874 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, tightbeam_startup);
2875
2876 #endif /* __has_include(<Tightbeam/tightbeam.h> */
2877
2878 #ifndef CONFIG_EXCLAVES
2879 /* stubs for sensor functions which are not compiled in from exclaves.c when
2880 * CONFIG_EXCLAVE is disabled */
2881
2882 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2883 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2884 exclaves_sensor_status_t *status)
2885 {
2886 #pragma unused(sensor_type, flags, status)
2887 return KERN_NOT_SUPPORTED;
2888 }
2889
2890 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2891 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2892 exclaves_sensor_status_t *status)
2893 {
2894 #pragma unused(sensor_type, flags, status)
2895 return KERN_NOT_SUPPORTED;
2896 }
2897
2898 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2899 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2900 exclaves_sensor_status_t *status)
2901 {
2902 #pragma unused(sensor_type, flags, status)
2903 return KERN_NOT_SUPPORTED;
2904 }
2905
2906 #endif /* ! CONFIG_EXCLAVES */
2907