1 /*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35
36 #if CONFIG_EXCLAVES
37
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #else
41 #error Invalid configuration
42 #endif /* CONFIG_SPTM */
43
44 #include <arm/cpu_data_internal.h>
45 #include <arm/misc_protos.h>
46 #include <kern/epoch_sync.h>
47 #include <kern/ipc_kobject.h>
48 #include <kern/kalloc.h>
49 #include <kern/locks.h>
50 #include <kern/percpu.h>
51 #include <kern/task.h>
52 #include <kern/thread.h>
53 #include <kern/zalloc.h>
54 #include <kern/exclaves_stackshot.h>
55 #include <kern/exclaves_test_stackshot.h>
56 #include <vm/pmap.h>
57 #include <pexpert/pexpert.h>
58
59 #include <mach/exclaves_l4.h>
60 #include <mach/mach_port.h>
61
62 #include <Exclaves/Exclaves.h>
63
64 #include <IOKit/IOBSD.h>
65
66 #include <xnuproxy/messages.h>
67
68 #include "exclaves_debug.h"
69 #include "exclaves_panic.h"
70 #include "exclaves_xnuproxy.h"
71
72 /* External & generated headers */
73 #include <xrt_hosted_types/types.h>
74
75 #if __has_include(<Tightbeam/tightbeam.h>)
76 #include <Tightbeam/tightbeam.h>
77 #include <Tightbeam/tightbeam_private.h>
78 #endif
79
80 #include "exclaves_resource.h"
81 #include "exclaves_upcalls.h"
82 #include "exclaves_boot.h"
83 #include "exclaves_inspection.h"
84 #include "exclaves_memory.h"
85 #include "exclaves_internal.h"
86 #include "exclaves_aoe.h"
87 #include "exclaves_sensor.h"
88
89 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
90
91 /* Boot lock - only used here for assertions. */
92 extern lck_mtx_t exclaves_boot_lock;
93
94 /*
95 * Sent/latest offset for updating exclaves clocks
96 */
97 typedef struct {
98 union {
99 /* atomic fields are used via atomic primitives */
100 struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
101 _Atomic unsigned __int128 a_u128;
102 /* non-atomic fields are used via local variable. this is needed
103 * to avoid undefined behavior with an atomic struct or
104 * accessing atomic fields non-atomically */
105 struct { uint64_t sent_offset, latest_offset; } u64;
106 unsigned __int128 u128;
107 };
108 } exclaves_clock_t;
109
110
111 /*
112 * Two clocks indexed by their type.
113 * This makes things easy to lookup.
114 */
115 static exclaves_clock_t exclaves_clock[] = {
116 [EXCLAVES_CLOCK_ABSOLUTE] = {},
117 [EXCLAVES_CLOCK_CONTINUOUS] = {},
118 };
119
120 static kern_return_t
121 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
122
123 static kern_return_t
124 exclaves_enter(void);
125 static kern_return_t
126 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
127
128 static kern_return_t
129 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
130 OS_NORETURN OS_NOINLINE
131 static void
132 exclaves_wait_for_panic(void);
133
134 static inline bool
135 exclaves_clocks_need_update(void);
136
137 static kern_return_t
138 exclaves_scheduler_boot(void);
139
140 static kern_return_t
141 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
142
143 static kern_return_t
144 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
145 uint64_t offset);
146
147 static kern_return_t
148 exclaves_scheduler_request_boot(void);
149
150
151 /*
152 * A static set of exclave epoch counters.
153 */
154 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
155
os_atomic(uint64_t)156 static inline os_atomic(uint64_t) *
157 exclaves_get_queue_counter(const uint64_t id)
158 {
159 return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
160 }
161
os_atomic(uint64_t)162 static inline os_atomic(uint64_t) *
163 exclaves_get_thread_counter(const uint64_t id)
164 {
165 return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
166 }
167
168
169 /* -------------------------------------------------------------------------- */
170 #pragma mark exclaves debug configuration
171
172 #if DEVELOPMENT || DEBUG
173 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
174 exclaves_debug_show_errors);
175
176 TUNABLE_DT_WRITEABLE(exclaves_requirement_t, exclaves_relaxed_requirements,
177 "/defaults", "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements",
178 0, TUNABLE_DT_NONE);
179 #else
180 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
181 #endif
182
183 #endif /* CONFIG_EXCLAVES */
184
185 /* -------------------------------------------------------------------------- */
186 #pragma mark userspace entry point
187
188 #if CONFIG_EXCLAVES
189 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)190 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
191 {
192 if (name != MACH_PORT_NULL) {
193 /* Only accept MACH_PORT_NULL for now */
194 return KERN_INVALID_CAPABILITY;
195 }
196
197 /*
198 * As the boot operation itself happens outside the context of any
199 * conclave, it requires special privilege.
200 */
201 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
202 return KERN_DENIED;
203 }
204
205 return exclaves_boot(stage);
206 }
207 #endif /* CONFIG_EXCLAVES */
208
209 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)210 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
211 {
212 #if CONFIG_EXCLAVES
213 kern_return_t kr = KERN_SUCCESS;
214 int error = 0;
215
216 mach_port_name_t name = uap->name;
217 exclaves_id_t identifier = uap->identifier;
218 mach_vm_address_t ubuffer = uap->buffer;
219 mach_vm_size_t usize = uap->size;
220 mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
221 mach_vm_size_t usize2 = uap->size2;
222 mach_vm_size_t uoffset2 = uap->offset;
223 mach_vm_address_t ustatus = uap->status;
224
225 task_t task = current_task();
226
227 /*
228 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
229 * with xnu. That header is not shared with userspace. Make sure that
230 * the retval userspace picks up is the same as the one
231 * xnu/ExclavePlatform thinks it is.
232 */
233 assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
234 &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
235
236 uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
237 uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
238 if (flags != 0) {
239 return KERN_INVALID_ARGUMENT;
240 }
241
242 /*
243 * Deal with OP_BOOT up-front as it has slightly different restrictions
244 * than the other operations.
245 */
246 if (operation == EXCLAVES_CTL_OP_BOOT) {
247 return operation_boot(name, (uint32_t)identifier);
248 }
249
250 /*
251 * All other operations are restricted to properly entitled tasks which
252 * can operate in the kernel domain, or those which have joined
253 * conclaves (which has its own entitlement check).
254 * If requirements are relaxed during development, tasks with no
255 * conclaves are also allowed.
256 */
257 if (operation == EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME) {
258 if (!exclaves_has_priv(task, EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME)) {
259 return KERN_DENIED;
260 }
261 } else if (task_get_conclave(task) == NULL &&
262 !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
263 !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
264 return KERN_DENIED;
265 }
266
267 /*
268 * Wait for EXCLAVECORE boot to complete. If exclaves are unsupported,
269 * return immediately.
270 */
271 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE);
272 if (kr != KERN_SUCCESS) {
273 return kr;
274 }
275
276 if (task_get_conclave(task) != NULL) {
277 /*
278 * For calls from tasks that have joined conclaves, now wait until
279 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
280 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would
281 * panic). This is a separate call to the one above because we
282 * need to distinguish EXCLAVECORE being not supported and
283 * still wait for EXCLAVEKIT to boot if it *is* supported.
284 */
285 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
286 if (kr != KERN_SUCCESS) {
287 return kr;
288 }
289 }
290
291 switch (operation) {
292 case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
293 if (name != MACH_PORT_NULL) {
294 /* Only accept MACH_PORT_NULL for now */
295 return KERN_INVALID_CAPABILITY;
296 }
297 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
298 usize != Exclaves_L4_IpcBuffer_Size) {
299 return KERN_INVALID_ARGUMENT;
300 }
301
302
303 Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
304 /* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
305 if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
306 return error;
307 }
308 assert(ipcb != NULL);
309 if ((error = copyin(ubuffer, ipcb, usize))) {
310 return error;
311 }
312
313 if (identifier >= CONCLAVE_SERVICE_MAX) {
314 return KERN_INVALID_ARGUMENT;
315 }
316
317 /*
318 * Verify that the service actually exists in the current
319 * domain.
320 */
321 if (!exclaves_conclave_has_service(task_get_conclave(task),
322 identifier)) {
323 return KERN_INVALID_ARGUMENT;
324 }
325
326 kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
327 error = copyout(ipcb, ubuffer, usize);
328 /*
329 * Endpoint call to conclave may have trigger a stop upcall,
330 * check if stop upcall completion handler needs to run.
331 */
332 task_stop_conclave_upcall_complete();
333 if (error) {
334 return error;
335 }
336 break;
337 }
338
339 case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
340 if (name != MACH_PORT_NULL) {
341 /* Only accept MACH_PORT_NULL for now */
342 return KERN_INVALID_CAPABILITY;
343 }
344
345 size_t len = 0;
346 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
347 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
348 &len) != 0 || id_name[0] == '\0') {
349 return KERN_INVALID_ARGUMENT;
350 }
351
352 exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
353 const exclaves_buffer_perm_t supported =
354 EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
355 if ((perm & supported) == 0 || (perm & ~supported) != 0) {
356 return KERN_INVALID_ARGUMENT;
357 }
358
359 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
360 exclaves_resource_t *resource = NULL;
361 kr = exclaves_resource_shared_memory_map(domain, id_name, usize,
362 perm, &resource);
363 if (kr != KERN_SUCCESS) {
364 return kr;
365 }
366
367 kr = exclaves_resource_create_port_name(resource,
368 current_space(), &name);
369 if (kr != KERN_SUCCESS) {
370 return kr;
371 }
372
373 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
374 if (kr != KERN_SUCCESS) {
375 mach_port_deallocate(current_space(), name);
376 return kr;
377 }
378
379 break;
380 }
381
382 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
383 exclaves_resource_t *resource = NULL;
384 kr = exclaves_resource_from_port_name(current_space(), name,
385 &resource);
386 if (kr != KERN_SUCCESS) {
387 return kr;
388 }
389
390 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
391 exclaves_resource_release(resource);
392 return KERN_INVALID_CAPABILITY;
393 }
394
395 kr = exclaves_resource_shared_memory_copyin(resource,
396 ubuffer, usize, uoffset, usize2, uoffset2);
397
398 exclaves_resource_release(resource);
399
400 if (kr != KERN_SUCCESS) {
401 return kr;
402 }
403 break;
404 }
405
406 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
407 exclaves_resource_t *resource = NULL;
408 kr = exclaves_resource_from_port_name(current_space(), name,
409 &resource);
410 if (kr != KERN_SUCCESS) {
411 return kr;
412 }
413
414 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
415 exclaves_resource_release(resource);
416 return KERN_INVALID_CAPABILITY;
417 }
418
419 kr = exclaves_resource_shared_memory_copyout(resource,
420 ubuffer, usize, uoffset, usize2, uoffset2);
421
422 exclaves_resource_release(resource);
423
424 if (kr != KERN_SUCCESS) {
425 return kr;
426 }
427 break;
428 }
429
430 case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
431 if (name != MACH_PORT_NULL) {
432 /* Only accept MACH_PORT_NULL for now */
433 return KERN_INVALID_CAPABILITY;
434 }
435 kr = task_launch_conclave(name);
436
437 /*
438 * Conclave launch call to may have trigger a stop upcall,
439 * check if stop upcall completion handler needs to run.
440 */
441 task_stop_conclave_upcall_complete();
442 break;
443
444 case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
445 if (name != MACH_PORT_NULL) {
446 /* Only accept MACH_PORT_NULL for now */
447 return KERN_INVALID_CAPABILITY;
448 }
449 struct exclaves_resource_user uresource = {};
450
451 if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
452 (usize % sizeof(struct exclaves_resource_user) != 0)) {
453 return KERN_INVALID_ARGUMENT;
454 }
455
456 if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
457 (usize == 0 && ubuffer != USER_ADDR_NULL)) {
458 return KERN_INVALID_ARGUMENT;
459 }
460
461 if (ubuffer == USER_ADDR_NULL) {
462 return KERN_INVALID_ARGUMENT;
463 }
464
465 /* For the moment we only ever have to deal with one request. */
466 if (usize != sizeof(struct exclaves_resource_user)) {
467 return KERN_INVALID_ARGUMENT;
468 }
469 error = copyin(ubuffer, &uresource, usize);
470 if (error) {
471 return KERN_INVALID_ARGUMENT;
472 }
473
474 const size_t name_buf_len = sizeof(uresource.r_name);
475 if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
476 return KERN_INVALID_ARGUMENT;
477 }
478
479 /*
480 * Do the regular lookup first. If that fails, fallback to the
481 * DARWIN domain, finally fallback to the KERNEL domain.
482 */
483 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
484 uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
485
486 if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
487 exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
488 if (id == EXCLAVES_INVALID_ID) {
489 id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
490 uresource.r_name);
491 }
492 if (id == EXCLAVES_INVALID_ID) {
493 id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
494 uresource.r_name);
495 }
496 }
497
498 if (id == EXCLAVES_INVALID_ID) {
499 return KERN_NOT_FOUND;
500 }
501
502 /*
503 * Looking up a forwarding service verifies its existence, but
504 * doesn't return the id since communication with it is not possible
505 */
506 if (id > EXCLAVES_FORWARDING_RESOURCE_ID_BASE) {
507 return KERN_NAME_EXISTS;
508 }
509
510 uresource.r_id = id;
511 uresource.r_port = MACH_PORT_NULL;
512
513 error = copyout(&uresource, ubuffer, usize);
514 if (error) {
515 return KERN_INVALID_ADDRESS;
516 }
517
518 kr = KERN_SUCCESS;
519 break;
520 }
521
522 case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
523 if (identifier == 0) {
524 return KERN_INVALID_ARGUMENT;
525 }
526
527 /* copy in string name */
528 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
529 size_t done = 0;
530 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
531 return KERN_INVALID_ARGUMENT;
532 }
533
534 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
535 exclaves_resource_t *resource = NULL;
536 kr = exclaves_resource_audio_memory_map(domain, id_name, usize,
537 &resource);
538 if (kr != KERN_SUCCESS) {
539 return kr;
540 }
541
542 kr = exclaves_resource_create_port_name(resource, current_space(),
543 &name);
544 if (kr != KERN_SUCCESS) {
545 return kr;
546 }
547
548 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
549 if (kr != KERN_SUCCESS) {
550 mach_port_deallocate(current_space(), name);
551 return kr;
552 }
553
554 break;
555 }
556
557 case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
558 exclaves_resource_t *resource;
559
560 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
561 if (kr != KERN_SUCCESS) {
562 return kr;
563 }
564
565 if (resource->r_type !=
566 XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) {
567 exclaves_resource_release(resource);
568 return KERN_INVALID_CAPABILITY;
569 }
570
571 kr = exclaves_resource_audio_memory_copyout(resource,
572 ubuffer, usize, uoffset, usize2, uoffset2, ustatus);
573
574 exclaves_resource_release(resource);
575
576 if (kr != KERN_SUCCESS) {
577 return kr;
578 }
579
580 break;
581 }
582
583 case EXCLAVES_CTL_OP_SENSOR_CREATE: {
584 if (identifier == 0) {
585 return KERN_INVALID_ARGUMENT;
586 }
587
588 /* copy in string name */
589 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
590 size_t done = 0;
591 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
592 return KERN_INVALID_ARGUMENT;
593 }
594
595 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
596 exclaves_resource_t *resource = NULL;
597 kr = exclaves_resource_sensor_open(domain, id_name, &resource);
598 if (kr != KERN_SUCCESS) {
599 return kr;
600 }
601
602 kr = exclaves_resource_create_port_name(resource, current_space(),
603 &name);
604 if (kr != KERN_SUCCESS) {
605 return kr;
606 }
607
608 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
609 if (kr != KERN_SUCCESS) {
610 /* No senders drops the reference. */
611 mach_port_deallocate(current_space(), name);
612 return kr;
613 }
614
615 break;
616 }
617
618 case EXCLAVES_CTL_OP_SENSOR_START: {
619 exclaves_resource_t *resource;
620 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
621 if (kr != KERN_SUCCESS) {
622 return kr;
623 }
624
625 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
626 exclaves_resource_release(resource);
627 return KERN_FAILURE;
628 }
629
630 exclaves_sensor_status_t status;
631 kr = exclaves_resource_sensor_start(resource, identifier, &status);
632
633 exclaves_resource_release(resource);
634
635 if (kr != KERN_SUCCESS) {
636 return kr;
637 }
638
639 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
640
641 break;
642 }
643 case EXCLAVES_CTL_OP_SENSOR_STOP: {
644 exclaves_resource_t *resource;
645 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
646 if (kr != KERN_SUCCESS) {
647 return kr;
648 }
649
650 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
651 exclaves_resource_release(resource);
652 return KERN_FAILURE;
653 }
654
655 exclaves_sensor_status_t status;
656 kr = exclaves_resource_sensor_stop(resource, identifier, &status);
657
658 exclaves_resource_release(resource);
659
660 if (kr != KERN_SUCCESS) {
661 return kr;
662 }
663
664 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
665
666 break;
667 }
668 case EXCLAVES_CTL_OP_SENSOR_STATUS: {
669 exclaves_resource_t *resource;
670 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
671 if (kr != KERN_SUCCESS) {
672 return kr;
673 }
674
675 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
676 exclaves_resource_release(resource);
677 return KERN_FAILURE;
678 }
679
680
681 exclaves_sensor_status_t status;
682 kr = exclaves_resource_sensor_status(resource, identifier, &status);
683
684 exclaves_resource_release(resource);
685
686 if (kr != KERN_SUCCESS) {
687 return kr;
688 }
689
690 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
691 break;
692 }
693 case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
694 exclaves_resource_t *notification_resource = NULL;
695 mach_port_name_t port_name = MACH_PORT_NULL;
696
697 struct exclaves_resource_user *notification_resource_user = NULL;
698 if (usize != sizeof(struct exclaves_resource_user)) {
699 return KERN_INVALID_ARGUMENT;
700 }
701
702 if (ubuffer == USER_ADDR_NULL) {
703 return KERN_INVALID_ARGUMENT;
704 }
705
706 notification_resource_user = (struct exclaves_resource_user *)
707 kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
708
709 error = copyin(ubuffer, notification_resource_user, usize);
710 if (error) {
711 kr = KERN_INVALID_ARGUMENT;
712 goto notification_resource_lookup_out;
713 }
714
715 const size_t name_buf_len = sizeof(notification_resource_user->r_name);
716 if (strnlen(notification_resource_user->r_name, name_buf_len)
717 == name_buf_len) {
718 kr = KERN_INVALID_ARGUMENT;
719 goto notification_resource_lookup_out;
720 }
721
722 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
723 kr = exclaves_notification_create(domain,
724 notification_resource_user->r_name, ¬ification_resource);
725 if (kr != KERN_SUCCESS) {
726 goto notification_resource_lookup_out;
727 }
728
729 kr = exclaves_resource_create_port_name(notification_resource,
730 current_space(), &port_name);
731 if (kr != KERN_SUCCESS) {
732 goto notification_resource_lookup_out;
733 }
734 notification_resource_user->r_type = notification_resource->r_type;
735 notification_resource_user->r_id = notification_resource->r_id;
736 notification_resource_user->r_port = port_name;
737 error = copyout(notification_resource_user, ubuffer, usize);
738 if (error) {
739 kr = KERN_INVALID_ADDRESS;
740 goto notification_resource_lookup_out;
741 }
742
743 notification_resource_lookup_out:
744 if (notification_resource_user != NULL) {
745 kfree_data(notification_resource_user, usize);
746 }
747 if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
748 mach_port_deallocate(current_space(), port_name);
749 }
750 break;
751 }
752
753 case EXCLAVES_CTL_OP_AOE_SETUP: {
754 uint8_t num_message = 0;
755 uint8_t num_worker = 0;
756
757 if (task_get_conclave(task) == NULL) {
758 kr = KERN_FAILURE;
759 break;
760 }
761
762 kr = exclaves_aoe_setup(&num_message, &num_worker);
763 if (kr != KERN_SUCCESS) {
764 break;
765 }
766
767 error = copyout(&num_message, ubuffer, sizeof(num_message));
768 if (error != 0) {
769 kr = KERN_INVALID_ADDRESS;
770 break;
771 }
772
773 error = copyout(&num_worker, ustatus, sizeof(num_worker));
774 if (error != 0) {
775 kr = KERN_INVALID_ADDRESS;
776 break;
777 }
778
779 break;
780 }
781
782 case EXCLAVES_CTL_OP_AOE_MESSAGE_LOOP: {
783 if (task_get_conclave(task) == NULL) {
784 kr = KERN_FAILURE;
785 break;
786 }
787
788 kr = exclaves_aoe_message_loop();
789 break;
790 }
791
792 case EXCLAVES_CTL_OP_AOE_WORK_LOOP: {
793 if (task_get_conclave(task) == NULL) {
794 kr = KERN_FAILURE;
795 break;
796 }
797
798 kr = exclaves_aoe_work_loop();
799 break;
800 }
801
802 case EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME: {
803 if (name != MACH_PORT_NULL) {
804 /* Only accept MACH_PORT_NULL for now */
805 return KERN_INVALID_CAPABILITY;
806 }
807
808 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
809 usize != sizeof(struct exclaves_indicator_deadlines)) {
810 return KERN_INVALID_ARGUMENT;
811 }
812
813 struct exclaves_indicator_deadlines udurations;
814 error = copyin(ubuffer, &udurations, usize);
815 if (error) {
816 return KERN_INVALID_ARGUMENT;
817 }
818
819 kr = exclaves_indicator_min_on_time_deadlines(&udurations);
820 if (kr != KERN_SUCCESS) {
821 return kr;
822 }
823
824 error = copyout(&udurations, ubuffer, usize);
825 if (error) {
826 return KERN_INVALID_ADDRESS;
827 }
828
829 break;
830 }
831
832 default:
833 kr = KERN_INVALID_ARGUMENT;
834 break;
835 }
836
837 return kr;
838 #else /* CONFIG_EXCLAVES */
839 #pragma unused(uap)
840 return KERN_NOT_SUPPORTED;
841 #endif /* CONFIG_EXCLAVES */
842 }
843
844 /* -------------------------------------------------------------------------- */
845 #pragma mark kernel entry points
846
847 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)848 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
849 exclaves_tag_t *tag, exclaves_error_t *error)
850 {
851 #if CONFIG_EXCLAVES
852 kern_return_t kr = KERN_SUCCESS;
853 assert(port == IPC_PORT_NULL);
854
855 Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
856 assert(ipcb != NULL);
857
858 exclaves_debug_printf(show_progress,
859 "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
860 endpoint_id, *tag);
861
862 ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
863 kr = exclaves_endpoint_call_internal(port, endpoint_id);
864 *tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
865 *error = XNUPROXY_CR_RETVAL(ipcb);
866
867 exclaves_debug_printf(show_progress,
868 "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
869 "error 0x%llx\n", endpoint_id, *tag, *error);
870
871 return kr;
872 #else /* CONFIG_EXCLAVES */
873 #pragma unused(port, endpoint_id, tag, error)
874 return KERN_NOT_SUPPORTED;
875 #endif /* CONFIG_EXCLAVES */
876 }
877
878 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)879 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
880 {
881 #if CONFIG_EXCLAVES
882 kern_return_t kr = KERN_SUCCESS;
883 thread_t thread = current_thread();
884
885 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
886 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
887 kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
888 if (kr != KERN_SUCCESS) {
889 return kr;
890 }
891 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
892 }
893 thread->th_exclaves_ipc_ctx.usecnt++;
894
895 if (out_ipc_buffer != NULL) {
896 *out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
897 }
898 return KERN_SUCCESS;
899 #else /* CONFIG_EXCLAVES */
900 #pragma unused(out_ipc_buffer)
901 return KERN_NOT_SUPPORTED;
902 #endif /* CONFIG_EXCLAVES */
903 }
904
905 kern_return_t
exclaves_free_ipc_buffer(void)906 exclaves_free_ipc_buffer(void)
907 {
908 #if CONFIG_EXCLAVES
909
910 /* The inspection thread's cached buffer should never be freed */
911 thread_t thread = current_thread();
912
913 /* Don't try to free unallocated contexts. */
914 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
915 return KERN_SUCCESS;
916 }
917
918 const thread_exclaves_inspection_flags_t iflags =
919 os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
920 if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
921 return KERN_SUCCESS;
922 }
923
924 assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
925 if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
926 return KERN_SUCCESS;
927 }
928
929 return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
930 #else /* CONFIG_EXCLAVES */
931 return KERN_NOT_SUPPORTED;
932 #endif /* CONFIG_EXCLAVES */
933 }
934
935 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)936 exclaves_thread_terminate(__unused thread_t thread)
937 {
938 kern_return_t kr = KERN_SUCCESS;
939
940 #if CONFIG_EXCLAVES
941 assert(thread == current_thread());
942 assert(thread->th_exclaves_intstate == 0);
943 assert(thread->th_exclaves_state == 0);
944 if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
945 exclaves_debug_printf(show_progress,
946 "exclaves: thread_terminate freeing abandoned exclaves "
947 "ipc buffer\n");
948 /* Unconditionally free context irrespective of usecount */
949 thread->th_exclaves_ipc_ctx.usecnt = 0;
950 kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
951 assert(kr == KERN_SUCCESS);
952 }
953 #else
954 #pragma unused(thread)
955 #endif /* CONFIG_EXCLAVES */
956
957 return kr;
958 }
959
960 OS_CONST
961 void*
exclaves_get_ipc_buffer(void)962 exclaves_get_ipc_buffer(void)
963 {
964 #if CONFIG_EXCLAVES
965 thread_t thread = current_thread();
966 Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
967
968 return ipcb;
969 #else /* CONFIG_EXCLAVES */
970 return NULL;
971 #endif /* CONFIG_EXCLAVES */
972 }
973
974 #if CONFIG_EXCLAVES
975
976 static void
bind_to_boot_core(void)977 bind_to_boot_core(void)
978 {
979 /*
980 * First ensure the boot cluster isn't powered down preventing the
981 * thread from running at all.
982 */
983 suspend_cluster_powerdown();
984 const int cpu = ml_get_boot_cpu_number();
985 processor_t processor = cpu_to_processor(cpu);
986 assert3p(processor, !=, NULL);
987 __assert_only processor_t old = thread_bind(processor);
988 assert3p(old, ==, PROCESSOR_NULL);
989 thread_block(THREAD_CONTINUE_NULL);
990 }
991
992 static void
unbind_from_boot_core(void)993 unbind_from_boot_core(void)
994 {
995 /* Unbind the thread from the boot CPU. */
996 thread_bind(PROCESSOR_NULL);
997 thread_block(THREAD_CONTINUE_NULL);
998 resume_cluster_powerdown();
999 }
1000
1001 extern kern_return_t exclaves_boot_early(void);
1002 kern_return_t
exclaves_boot_early(void)1003 exclaves_boot_early(void)
1004 {
1005 kern_return_t kr = KERN_FAILURE;
1006 uint64_t boot_info = 0;
1007 bool early_enter = false;
1008
1009 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1010
1011 kr = exclaves_bootinfo(&boot_info, &early_enter);
1012 if (kr != KERN_SUCCESS) {
1013 exclaves_debug_printf(show_errors,
1014 "exclaves: Get bootinfo failed\n");
1015 return kr;
1016 }
1017
1018 if (early_enter) {
1019 thread_t thread = current_thread();
1020 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
1021
1022 bind_to_boot_core();
1023
1024 disable_preemption_without_measurements();
1025 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
1026
1027 kr = exclaves_enter();
1028
1029 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
1030 enable_preemption();
1031
1032 unbind_from_boot_core();
1033
1034 if (kr != KERN_SUCCESS) {
1035 exclaves_debug_printf(show_errors,
1036 "exclaves: early exclaves enter failed\n");
1037 if (kr == KERN_ABORTED) {
1038 panic("Unexpected ringgate panic status");
1039 }
1040 return kr;
1041 }
1042 }
1043
1044 uint64_t xnuproxy_boot_info = 0;
1045 kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
1046 if (kr != KERN_SUCCESS) {
1047 exclaves_debug_printf(show_errors,
1048 "exclaves: Init scheduler failed\n");
1049 return kr;
1050 }
1051
1052 kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
1053 if (kr != KERN_SUCCESS) {
1054 exclaves_debug_printf(show_errors,
1055 "XNU proxy setup failed\n");
1056 return KERN_FAILURE;
1057 }
1058
1059 kr = exclaves_resource_init();
1060 if (kr != KERN_SUCCESS) {
1061 exclaves_debug_printf(show_errors,
1062 "exclaves: failed to initialize resources\n");
1063 return kr;
1064 }
1065
1066 kr = exclaves_panic_thread_setup();
1067 if (kr != KERN_SUCCESS) {
1068 exclaves_debug_printf(show_errors,
1069 "XNU proxy panic thread setup failed\n");
1070 return KERN_FAILURE;
1071 }
1072
1073 return KERN_SUCCESS;
1074 }
1075 #endif /* CONFIG_EXCLAVES */
1076
1077 #if CONFIG_EXCLAVES
1078 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
1079 #endif /* CONFIG_EXCLAVES */
1080
1081 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)1082 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
1083 {
1084 #if CONFIG_EXCLAVES
1085 if (exclaves_callbacks == NULL) {
1086 exclaves_callbacks = callbacks;
1087 }
1088 #else /* CONFIG_EXCLAVES */
1089 #pragma unused(callbacks)
1090 #endif /* CONFIG_EXCLAVES */
1091 }
1092
1093 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1094 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1095 {
1096 assert(
1097 type == EXCLAVES_CLOCK_CONTINUOUS ||
1098 type == EXCLAVES_CLOCK_ABSOLUTE);
1099 #if CONFIG_EXCLAVES
1100 exclaves_clock_t *clock = &exclaves_clock[type];
1101 uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1102 while (latest_offset != offset) {
1103 /* Update the latest offset with the new offset. If this fails, then a
1104 * concurrent update occurred and our offset may be stale. */
1105 if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1106 offset, &latest_offset, relaxed)) {
1107 break;
1108 }
1109 }
1110 #else
1111 #pragma unused(type, offset)
1112 #endif /* CONFIG_EXCLAVES */
1113 }
1114
1115 /* -------------------------------------------------------------------------- */
1116
1117 #pragma mark exclaves ipc internals
1118
1119 #if CONFIG_EXCLAVES
1120
1121 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1122 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1123 exclaves_id_t endpoint_id)
1124 {
1125 kern_return_t kr = KERN_SUCCESS;
1126
1127 assert(port == IPC_PORT_NULL);
1128
1129 kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1130
1131 return kr;
1132 }
1133
1134 /* -------------------------------------------------------------------------- */
1135 #pragma mark secure kernel communication
1136
1137 /** save SME state before entering exclaves */
1138 static bool
exclaves_save_matrix_state(void)1139 exclaves_save_matrix_state(void)
1140 {
1141 bool saved = false;
1142 #if HAS_ARM_FEAT_SME
1143 /* Save only the ZA/ZT0 state. SPTM will save/restore TPIDR2. */
1144 if (arm_sme_version() > 0 && !!(__builtin_arm_rsr64("SVCR") & SVCR_ZA)) {
1145 arm_sme_saved_state_t *sme_state = machine_thread_get_sme_state(current_thread());
1146 arm_save_sme_za_zt0(&sme_state->context, sme_state->svl_b);
1147 asm volatile ("smstop za");
1148 saved = true;
1149 }
1150 #endif /* HAS_ARM_FEAT_SME */
1151 return saved;
1152 }
1153
1154 static void
exclaves_restore_matrix_state(bool did_save_sme __unused)1155 exclaves_restore_matrix_state(bool did_save_sme __unused)
1156 {
1157 #if HAS_ARM_FEAT_SME
1158 if (did_save_sme) {
1159 arm_sme_saved_state_t *sme_state = machine_thread_get_sme_state(current_thread());
1160 asm volatile ("smstart za");
1161 arm_load_sme_za_zt0(&sme_state->context, sme_state->svl_b);
1162 }
1163 #endif /* HAS_ARM_FEAT_SME */
1164 }
1165
1166 /* ringgate entry endpoints */
1167 enum {
1168 RINGGATE_EP_ENTER,
1169 RINGGATE_EP_INFO
1170 };
1171
1172 /* ringgate entry status codes */
1173 enum {
1174 RINGGATE_STATUS_SUCCESS,
1175 RINGGATE_STATUS_ERROR,
1176 RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1177 };
1178
1179 OS_NOINLINE
1180 static kern_return_t
exclaves_enter(void)1181 exclaves_enter(void)
1182 {
1183 uint32_t endpoint = RINGGATE_EP_ENTER;
1184 uint64_t result = RINGGATE_STATUS_ERROR;
1185
1186 sptm_call_regs_t regs = { };
1187
1188 thread_t thread = current_thread();
1189
1190 /*
1191 * Should never re-enter exclaves.
1192 */
1193 if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1194 (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1195 panic("attempt to re-enter exclaves");
1196 }
1197
1198 /*
1199 * Must have one (and only one) of the flags set to enter exclaves.
1200 */
1201 __assert_only const thread_exclaves_state_flags_t mask = (
1202 TH_EXCLAVES_RPC |
1203 TH_EXCLAVES_XNUPROXY |
1204 TH_EXCLAVES_SCHEDULER_CALL |
1205 TH_EXCLAVES_RESUME_PANIC_THREAD);
1206 assert3u(thread->th_exclaves_state & mask, !=, 0);
1207 assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1208
1209 /*
1210 * Save any SME matrix state before entering exclaves.
1211 */
1212 bool did_save_sme = exclaves_save_matrix_state();
1213
1214 #if MACH_ASSERT
1215 /*
1216 * Set the ast to check that the thread doesn't return to userspace
1217 * while in an RPC or XNUPROXY call.
1218 */
1219 act_set_debug_assert();
1220 #endif /* MACH_ASSERT */
1221
1222 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1223 | DBG_FUNC_START);
1224
1225 recount_enter_secure();
1226
1227 /* xnu_return_to_gl2 relies on this flag being present to correctly return
1228 * to SK from interrupts xnu handles on behalf of SK. */
1229 thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1230
1231 /*
1232 * Bracket with labels so stackshot can determine where exclaves are
1233 * entered from xnu.
1234 */
1235 __asm__ volatile (
1236 "EXCLAVES_ENTRY_START:\n\t"
1237 );
1238 result = sk_enter(endpoint, ®s);
1239 __asm__ volatile (
1240 "EXCLAVES_ENTRY_END:\n\t"
1241 );
1242
1243 thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1244
1245 recount_leave_secure();
1246
1247 #if CONFIG_SPTM
1248 /**
1249 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1250 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1251 * debug state forcing a reload on the next return to user mode.
1252 */
1253 if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1254 arm_debug_set(NULL);
1255 }
1256 #endif /* CONFIG_SPTM */
1257
1258 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1259 | DBG_FUNC_END);
1260
1261 /*
1262 * Restore SME matrix state, if it existed.
1263 */
1264 exclaves_restore_matrix_state(did_save_sme);
1265
1266 switch (result) {
1267 case RINGGATE_STATUS_SUCCESS:
1268 return KERN_SUCCESS;
1269 case RINGGATE_STATUS_ERROR:
1270 return KERN_FAILURE;
1271 case RINGGATE_STATUS_PANIC:
1272 return KERN_ABORTED;
1273 default:
1274 assertf(false, "Unknown ringgate status %llu", result);
1275 __builtin_trap();
1276 }
1277 }
1278
1279
1280 /*
1281 * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1282 * it in indicates that we should immediately enter the ringgate once in order
1283 * to allow the scheduler to perform early boot initialisation.
1284 */
1285 #define EARLY_RINGGATE_ENTER 2
1286
1287 OS_NOINLINE
1288 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1289 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1290 {
1291 uint32_t endpoint = RINGGATE_EP_INFO;
1292 uint64_t result = RINGGATE_STATUS_ERROR;
1293
1294 sptm_call_regs_t regs = { };
1295
1296 recount_enter_secure();
1297 result = sk_enter(endpoint, ®s);
1298 recount_leave_secure();
1299 if (result == RINGGATE_STATUS_ERROR) {
1300 return KERN_FAILURE;
1301 }
1302
1303 *early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1304 *out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1305
1306 return KERN_SUCCESS;
1307 }
1308
1309 /* -------------------------------------------------------------------------- */
1310
1311 #pragma mark exclaves scheduler communication
1312
1313 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1314 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1315
1316 static void
exclaves_init_multicore(void)1317 exclaves_init_multicore(void)
1318 {
1319 XrtHosted_Buffer_t **req, **res;
1320
1321 exclaves_wait_for_cpu_init();
1322
1323 exclaves_debug_printf(show_progress,
1324 "Using MPIDR for exclave scheduler core IDs\n");
1325
1326 /*
1327 * Match the hardwareID to the physical ID and stash the pointers to the
1328 * request/response buffers in per-cpu data for quick access.
1329 */
1330 size_t core_count = exclaves_callbacks->v1.cores();
1331 for (size_t i = 0; i < core_count; i++) {
1332 const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1333 uint32_t dt_phys_id = (uint32_t)core->v2.hardwareId;
1334
1335 percpu_foreach(cpu_data, cpu_data) {
1336 if (cpu_data->cpu_phys_id != dt_phys_id) {
1337 continue;
1338 }
1339 req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1340 *req = exclaves_callbacks->v1.Core.request(i);
1341
1342 res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1343 *res = exclaves_callbacks->v1.Core.response(i);
1344
1345 break;
1346 }
1347 }
1348 }
1349
1350 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1351 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1352 {
1353 kern_return_t kr = KERN_SUCCESS;
1354 XrtHosted_Error_t hosted_error;
1355
1356 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1357
1358 if (!pmap_valid_address(boot_info)) {
1359 exclaves_debug_printf(show_errors,
1360 "exclaves: %s: 0x%012llx\n",
1361 "Invalid root physical address",
1362 boot_info);
1363 return KERN_FAILURE;
1364 }
1365
1366 if (exclaves_callbacks == NULL) {
1367 exclaves_debug_printf(show_errors,
1368 "exclaves: Callbacks not registered\n");
1369 return KERN_FAILURE;
1370 }
1371
1372 /* Initialise XrtHostedXnu kext */
1373 kr = exclaves_hosted_error(
1374 exclaves_callbacks->v1.init(
1375 XrtHosted_Version_current,
1376 phystokv(boot_info),
1377 &hosted_error),
1378 &hosted_error);
1379 if (kr != KERN_SUCCESS) {
1380 return kr;
1381 }
1382
1383 /* Record aperture addresses in buffer */
1384 size_t frames = exclaves_callbacks->v1.frames();
1385 XrtHosted_Mapped_t **pages = zalloc_permanent(
1386 frames * sizeof(XrtHosted_Mapped_t *),
1387 ZALIGN(XrtHosted_Mapped_t *));
1388 size_t index = 0;
1389 uint64_t phys = boot_info;
1390 while (index < frames) {
1391 if (!pmap_valid_address(phys)) {
1392 exclaves_debug_printf(show_errors,
1393 "exclaves: %s: 0x%012llx\n",
1394 "Invalid shared physical address",
1395 phys);
1396 return KERN_FAILURE;
1397 }
1398 pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1399 kr = exclaves_hosted_error(
1400 exclaves_callbacks->v1.nextPhys(
1401 pages[index],
1402 &index,
1403 &phys,
1404 &hosted_error),
1405 &hosted_error);
1406 if (kr != KERN_SUCCESS) {
1407 return kr;
1408 }
1409 }
1410
1411 /* Initialise the mapped region */
1412 exclaves_callbacks->v1.setMapping(
1413 XrtHosted_Region_scattered(frames, pages));
1414
1415 /* Boot the scheduler. */
1416 kr = exclaves_scheduler_boot();
1417 if (kr != KERN_SUCCESS) {
1418 return kr;
1419 }
1420
1421 XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1422
1423 /* Only support MPIDR multicore. */
1424 if (global->v2.smpStatus != XrtHosted_SmpStatus_MulticoreMpidr) {
1425 exclaves_debug_printf(show_errors,
1426 "exclaves: exclaves scheduler doesn't support multicore");
1427 return KERN_FAILURE;
1428 }
1429 exclaves_init_multicore();
1430
1431 /* Initialise the XNU proxy */
1432 if (!pmap_valid_address(global->v1.proxyInit)) {
1433 exclaves_debug_printf(show_errors,
1434 "exclaves: %s: 0x%012llx\n",
1435 "Invalid xnu prpoxy physical address",
1436 phys);
1437 return KERN_FAILURE;
1438 }
1439 *xnuproxy_boot_info = global->v1.proxyInit;
1440
1441 return kr;
1442 }
1443
1444 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1445 #define exclaves_scheduler_debug_save_buffer(_buf) \
1446 XrtHosted_Buffer_t _buf##_copy = *(_buf)
1447 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1448 _response_buf) ({ \
1449 if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1450 printf("exclaves: Scheduler request = %p\n", _request_buf); \
1451 printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1452 _request_buf##_copy.tag); \
1453 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1454 printf("exclaves: Scheduler request.arguments[%02zu] = " \
1455 "0x%04llx\n", arg, \
1456 _request_buf##_copy.arguments[arg]); \
1457 } \
1458 printf("exclaves: Scheduler response = %p\n", _response_buf); \
1459 printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1460 _response_buf##_copy.tag); \
1461 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1462 printf("exclaves: Scheduler response.arguments[%02zu] = " \
1463 "0x%04llx\n", arg, \
1464 _response_buf##_copy.arguments[arg]); \
1465 } \
1466 }})
1467 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1468 #define exclaves_scheduler_debug_save_buffer(_buf) ({ })
1469 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1470 _response_buf) ({ })
1471 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1472
1473 static void
request_trace_start(const XrtHosted_Request_t * request)1474 request_trace_start(const XrtHosted_Request_t *request)
1475 {
1476 switch (request->tag) {
1477 case XrtHosted_Request_ResumeWithHostId:
1478 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1479 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_START,
1480 request->ResumeWithHostId.hostId, request->ResumeWithHostId.thread);
1481 break;
1482
1483 case XrtHosted_Request_InterruptWithHostId:
1484 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1485 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_START,
1486 request->InterruptWithHostId.hostId, request->InterruptWithHostId.thread);
1487 break;
1488
1489 case XrtHosted_Request_UpdateTimerOffset:
1490 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1491 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_START,
1492 request->UpdateTimerOffset.timer, request->UpdateTimerOffset.offset);
1493 break;
1494
1495 case XrtHosted_Request_BootExclaves:
1496 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1497 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_START);
1498 break;
1499
1500 case XrtHosted_Request_PmmEarlyAllocResponse:
1501 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1502 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_START,
1503 request->PmmEarlyAllocResponse.a);
1504 break;
1505
1506 case XrtHosted_Request_WatchdogPanic:
1507 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1508 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_START);
1509 break;
1510
1511 default:
1512 panic("Unsupported exclaves scheduler request: %d", request->tag);
1513 }
1514 }
1515
1516 static void
request_trace_end(const XrtHosted_Request_t * request)1517 request_trace_end(const XrtHosted_Request_t *request)
1518 {
1519 switch (request->tag) {
1520 case XrtHosted_Request_ResumeWithHostId:
1521 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1522 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_END);
1523 break;
1524
1525 case XrtHosted_Request_InterruptWithHostId:
1526 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1527 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_END);
1528 break;
1529
1530 case XrtHosted_Request_UpdateTimerOffset:
1531 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1532 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_END);
1533 break;
1534
1535 case XrtHosted_Request_BootExclaves:
1536 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1537 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_END);
1538 break;
1539
1540 case XrtHosted_Request_PmmEarlyAllocResponse:
1541 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1542 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_END);
1543 break;
1544
1545 case XrtHosted_Request_WatchdogPanic:
1546 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1547 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_END);
1548 break;
1549
1550 default:
1551 panic("Unsupported exclaves scheduler request: %d", request->tag);
1552 }
1553 }
1554
1555 __attribute__((always_inline))
1556 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1557 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1558 XrtHosted_Response_t *response)
1559 {
1560 assert3u(request->tag, >, XrtHosted_Request_Invalid);
1561 assert3u(request->tag, <, XrtHosted_Request_Limit);
1562
1563 kern_return_t kr = KERN_SUCCESS;
1564 bool istate;
1565
1566 /*
1567 * Disable preemption and interrupts as the xrt hosted scheduler data
1568 * structures are per-core.
1569 * Preemption disabled and interrupt disabled timeouts are disabled for
1570 * now until we can co-ordinate the measurements with the exclaves side
1571 * of things.
1572 */
1573 istate = ml_set_interrupts_enabled_with_debug(false, false);
1574
1575 /* Interrupts should have been enabled entering this function. */
1576 assert(istate);
1577
1578 /*
1579 * This needs to be done with interrupts disabled, otherwise stackshot
1580 * could mark the thread blocked just after this function exits and a
1581 * thread marked as AST blocked would go into exclaves.
1582 */
1583
1584 while ((os_atomic_load(¤t_thread()->th_exclaves_inspection_state,
1585 relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1586 /* Enable interrupts */
1587 (void) ml_set_interrupts_enabled_with_debug(true, false);
1588
1589 /* Wait until the thread is collected on exclaves side */
1590 exclaves_inspection_check_ast();
1591
1592 /* Disable interrupts and preemption before next AST check */
1593 ml_set_interrupts_enabled_with_debug(false, false);
1594 }
1595 /* Interrupts are disabled and exclaves_stackshot_ast is clean */
1596
1597 disable_preemption_without_measurements();
1598
1599 /*
1600 * Don't enter with a stale clock (unless updating the clock or
1601 * panicking).
1602 */
1603 if (request->tag != XrtHosted_Request_UpdateTimerOffset &&
1604 request->tag != XrtHosted_Request_WatchdogPanic &&
1605 exclaves_clocks_need_update()) {
1606 enable_preemption();
1607 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1608 return KERN_POLICY_LIMIT;
1609 }
1610
1611 XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1612 assert3p(request_buf, !=, NULL);
1613
1614 request_trace_start(request);
1615
1616 exclaves_callbacks->v1.Request.encode(request_buf, request);
1617 exclaves_scheduler_debug_save_buffer(request_buf);
1618
1619 kr = exclaves_enter();
1620
1621 /* The response may have come back on a different core. */
1622 XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1623 assert3p(response_buf, !=, NULL);
1624
1625 exclaves_scheduler_debug_save_buffer(response_buf);
1626 exclaves_callbacks->v1.Response.decode(response_buf, response);
1627
1628 request_trace_end(request);
1629
1630 enable_preemption();
1631 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1632
1633 exclaves_scheduler_debug_show_request_response(request_buf, response_buf);
1634
1635 if (kr == KERN_ABORTED) {
1636 /* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1637 * another core has paniced in exclaves and is on the way to call xnu
1638 * panic() via SPTM, so wait here for that to happen. */
1639 exclaves_wait_for_panic();
1640 }
1641
1642 return kr;
1643 }
1644
1645 OS_NORETURN OS_NOINLINE
1646 static void
exclaves_wait_for_panic(void)1647 exclaves_wait_for_panic(void)
1648 {
1649 assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1650 NSEC_PER_SEC);
1651 wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1652 panic("Unexpected wait for panic result: %d", wr);
1653 }
1654
1655 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1656 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1657 const XrtHosted_Yield_t *yield)
1658 {
1659 Exclaves_L4_Word_t responding_scid = yield->thread;
1660 Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1661 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1662
1663 exclaves_debug_printf(show_progress,
1664 "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1665 early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1666 /* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1667 * th_exclaves_scheduling_context_id so we know to resume it later
1668 * 2. translate yield_to to thread_switch()-style handoff.
1669 */
1670 if (!early) {
1671 assert3u(responding_scid, ==, scid);
1672 assert3u(yield->threadHostId, ==, ctid);
1673 }
1674
1675 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1676 MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1677
1678 return KERN_SUCCESS;
1679 }
1680
1681 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1682 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1683 const XrtHosted_Spawned_t *spawned)
1684 {
1685 Exclaves_L4_Word_t responding_scid = spawned->thread;
1686 thread_t thread = current_thread();
1687 __assert_only ctid_t ctid = thread_get_ctid(thread);
1688
1689 /*
1690 * There are only a few places an exclaves thread is expected to be
1691 * spawned. Any other cases are considered errors.
1692 */
1693 if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1694 exclaves_debug_printf(show_errors,
1695 "exclaves: Scheduler: Unexpected thread spawn: "
1696 "scid 0x%lx spawned scid 0x%llx\n",
1697 responding_scid, spawned->spawned);
1698 return KERN_FAILURE;
1699 }
1700
1701 exclaves_debug_printf(show_progress,
1702 "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1703 responding_scid, (unsigned long)spawned->spawned);
1704 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1705 MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1706
1707 assert3u(responding_scid, ==, scid);
1708 assert3u(spawned->threadHostId, ==, ctid);
1709
1710 return KERN_SUCCESS;
1711 }
1712
1713 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1714 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1715 {
1716 Exclaves_L4_Word_t responding_scid = terminated->thread;
1717 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1718
1719 exclaves_debug_printf(show_errors,
1720 "exclaves: Scheduler: Unexpected thread terminate: "
1721 "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1722 terminated->terminated);
1723 assert3u(terminated->threadHostId, ==, ctid);
1724
1725 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1726 MACH_EXCLAVES_SCHEDULER_TERMINATED),
1727 terminated->terminated);
1728
1729 return KERN_TERMINATED;
1730 }
1731
1732 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1733 handle_response_wait(const XrtHosted_Wait_t *wait)
1734 {
1735 Exclaves_L4_Word_t responding_scid = wait->waiter;
1736 thread_t thread = current_thread();
1737 __assert_only ctid_t ctid = thread_get_ctid(thread);
1738
1739 exclaves_debug_printf(show_progress,
1740 "exclaves: Scheduler: Wait: "
1741 "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1742 "epoch 0x%llx\n", responding_scid, wait->owner,
1743 wait->queueId, wait->epoch);
1744 assert3u(wait->waiterHostId, ==, ctid);
1745
1746 /* The exclaves inspection thread should never wait. */
1747 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1748 panic("Exclaves inspection thread tried to wait\n");
1749 }
1750
1751 /*
1752 * Note, "owner" may not be safe to access directly, for example
1753 * the thread may have exited and been freed. esync_wait will
1754 * only access it under a lock if the epoch is fresh thus
1755 * ensuring safety.
1756 */
1757 const ctid_t owner = (ctid_t)wait->ownerHostId;
1758 const XrtHosted_Word_t id = wait->queueId;
1759 const uint64_t epoch = wait->epoch;
1760
1761 wait_interrupt_t interruptible;
1762 esync_policy_t policy;
1763
1764 switch (wait->interruptible) {
1765 case XrtHosted_Interruptibility_None:
1766 interruptible = THREAD_UNINT;
1767 policy = ESYNC_POLICY_KERNEL;
1768 break;
1769
1770 case XrtHosted_Interruptibility_Voluntary:
1771 interruptible = THREAD_INTERRUPTIBLE;
1772 policy = ESYNC_POLICY_KERNEL;
1773 break;
1774
1775 case XrtHosted_Interruptibility_DynamicQueue:
1776 interruptible = THREAD_INTERRUPTIBLE;
1777 policy = ESYNC_POLICY_USER;
1778 break;
1779
1780 default:
1781 panic("Unknown exclaves interruptibility: %llu",
1782 wait->interruptible);
1783 }
1784
1785 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1786 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1787 wait->interruptible);
1788 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1789 exclaves_get_queue_counter(id), owner, policy, interruptible);
1790 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1791 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1792
1793 switch (wr) {
1794 case THREAD_INTERRUPTED:
1795 return KERN_ABORTED;
1796
1797 case THREAD_NOT_WAITING:
1798 case THREAD_AWAKENED:
1799 return KERN_SUCCESS;
1800
1801 default:
1802 panic("Unexpected wait result from esync_wait: %d", wr);
1803 }
1804 }
1805
1806 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1807 handle_response_wake(const XrtHosted_Wake_t *wake)
1808 {
1809 Exclaves_L4_Word_t responding_scid = wake->waker;
1810 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1811
1812 exclaves_debug_printf(show_progress,
1813 "exclaves: Scheduler: Wake: "
1814 "scid 0x%lx wake of queue id 0x%llx, "
1815 "epoch 0x%llx, all 0x%llx\n", responding_scid,
1816 wake->queueId, wake->epoch, wake->all);
1817 assert3u(wake->wakerHostId, ==, ctid);
1818
1819 const XrtHosted_Word_t id = wake->queueId;
1820 const uint64_t epoch = wake->epoch;
1821 const esync_wake_mode_t mode = wake->all != 0 ?
1822 ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1823
1824 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1825 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1826
1827 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1828 exclaves_get_queue_counter(id), mode, 0);
1829
1830 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1831 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1832 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1833
1834 return KERN_SUCCESS;
1835 }
1836
1837 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1838 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1839 {
1840 Exclaves_L4_Word_t responding_scid = wake->waker;
1841 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1842
1843 exclaves_debug_printf(show_progress,
1844 "exclaves: Scheduler: WakeWithOwner: "
1845 "scid 0x%lx wake of queue id 0x%llx, "
1846 "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1847 wake->queueId, wake->epoch,
1848 wake->owner);
1849
1850 assert3u(wake->wakerHostId, ==, ctid);
1851
1852 const ctid_t owner = (ctid_t)wake->ownerHostId;
1853 const XrtHosted_Word_t id = wake->queueId;
1854 const uint64_t epoch = wake->epoch;
1855
1856 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1857 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1858 ESYNC_WAKE_ONE_WITH_OWNER);
1859
1860 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1861 exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1862
1863 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1864 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1865 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1866
1867 return KERN_SUCCESS;
1868 }
1869
1870 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1871 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1872 {
1873 Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1874 __assert_only thread_t thread = current_thread();
1875
1876 exclaves_debug_printf(show_progress,
1877 "exclaves: Scheduler: PanicWait: "
1878 "Panic thread SCID %lx\n",
1879 panic_thread_scid);
1880
1881 assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1882
1883 exclaves_panic_thread_wait();
1884
1885 /* NOT REACHABLE */
1886 return KERN_SUCCESS;
1887 }
1888
1889 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1890 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1891 {
1892 Exclaves_L4_Word_t responding_scid = suspended->suspended;
1893 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1894
1895 exclaves_debug_printf(show_progress,
1896 "exclaves: Scheduler: Suspended: "
1897 "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1898 assert3u(suspended->suspendedHostId, ==, ctid);
1899
1900 const uint64_t id = suspended->suspended;
1901 const uint64_t epoch = suspended->epoch;
1902
1903 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1904 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1905
1906 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1907 exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1908
1909 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1910 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1911
1912 switch (wr) {
1913 case THREAD_INTERRUPTED:
1914 return KERN_ABORTED;
1915
1916 case THREAD_NOT_WAITING:
1917 case THREAD_AWAKENED:
1918 return KERN_SUCCESS;
1919
1920 default:
1921 panic("Unexpected wait result from esync_wait: %d", wr);
1922 }
1923 }
1924
1925 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1926 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1927 {
1928 Exclaves_L4_Word_t responding_scid = resumed->thread;
1929 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1930
1931 exclaves_debug_printf(show_progress,
1932 "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1933 "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1934 resumed->resumedHostId, resumed->epoch);
1935 assert3u(resumed->threadHostId, ==, ctid);
1936
1937 const ctid_t target = (ctid_t)resumed->resumedHostId;
1938 const XrtHosted_Word_t id = resumed->resumed;
1939 const uint64_t epoch = resumed->epoch;
1940
1941 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1942 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1943 target);
1944
1945 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1946 exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1947
1948 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1949 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1950 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1951
1952 return KERN_SUCCESS;
1953 }
1954
1955 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1956 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1957 {
1958 Exclaves_L4_Word_t responding_scid = interrupted->thread;
1959 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1960
1961 exclaves_debug_printf(show_progress,
1962 "exclaves: Scheduler: Interrupted: "
1963 "scid 0x%lx interrupt on queue id 0x%llx, "
1964 "epoch 0x%llx, target 0x%llx\n", responding_scid,
1965 interrupted->queueId, interrupted->epoch,
1966 interrupted->interruptedHostId);
1967 assert3u(interrupted->threadHostId, ==, ctid);
1968
1969 const ctid_t target = (ctid_t)interrupted->interruptedHostId;
1970 const XrtHosted_Word_t id = interrupted->queueId;
1971 const uint64_t epoch = interrupted->epoch;
1972
1973 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1974 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
1975 target);
1976
1977 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1978 exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
1979
1980 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1981 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
1982 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1983
1984 return KERN_SUCCESS;
1985 }
1986
1987 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)1988 handle_response_nothing_scheduled(
1989 __unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
1990 {
1991 exclaves_debug_printf(show_progress,
1992 "exclaves: Scheduler: nothing scheduled\n");
1993
1994 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1995 MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
1996
1997 return KERN_SUCCESS;
1998 }
1999
2000 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)2001 handle_response_all_exclaves_booted(
2002 __unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
2003 {
2004 exclaves_debug_printf(show_progress,
2005 "exclaves: scheduler: all exclaves booted\n");
2006
2007 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2008 MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
2009
2010 return KERN_SUCCESS;
2011 }
2012
2013 /*
2014 * The Early Alloc response asks for npages to be allocated. The list of
2015 * allocated pages is written into the first allocated page in the form of 32bit
2016 * page numbers. The physical address of the first page is passed back to the
2017 * exclaves scheduler as part of the next request.
2018 */
2019 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)2020 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
2021 uint64_t *pagelist_pa)
2022 {
2023 const uint32_t npages = (uint32_t)pmm_early_alloc->a;
2024 const uint64_t flags = pmm_early_alloc->b;
2025
2026 exclaves_memory_pagekind_t kind = EXCLAVES_MEMORY_PAGEKIND_ROOTDOMAIN;
2027 exclaves_memory_page_flags_t alloc_flags = EXCLAVES_MEMORY_PAGE_FLAGS_NONE;
2028
2029 exclaves_debug_printf(show_progress,
2030 "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
2031 npages, flags);
2032
2033 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2034 MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
2035
2036 if (npages == 0) {
2037 return KERN_SUCCESS;
2038 }
2039
2040 if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
2041 exclaves_debug_printf(show_errors,
2042 "exclaves: request to allocate too many pages: %u\n",
2043 npages);
2044 return KERN_NO_SPACE;
2045 }
2046
2047 #if HAS_MTE
2048 if (flags & XNUUPCALLS_PAGEALLOCFLAGS_SEC_TRANSITION) {
2049 alloc_flags |= EXCLAVES_MEMORY_PAGE_FLAGS_MTE_TAGGED;
2050 }
2051 #endif /* HAS_MTE */
2052
2053 /*
2054 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
2055 * stack allocation is sufficient and fast. If
2056 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
2057 * to the heap.
2058 */
2059 uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
2060 exclaves_memory_alloc(npages, page, kind, alloc_flags);
2061
2062 /* Now copy the list of pages into the first page. */
2063 uint64_t first_page_pa = ptoa(page[0]);
2064 #if 0
2065 // move to before sptm retype
2066 uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
2067 for (int i = 0; i < npages; i++) {
2068 first_page[i] = page[i];
2069 }
2070 #endif
2071
2072 *pagelist_pa = first_page_pa;
2073 return KERN_SUCCESS;
2074 }
2075
2076 static void
handle_response_watchdog_panic_complete(__unused const XrtHosted_WatchdogPanicComplete_t * panic_complete)2077 handle_response_watchdog_panic_complete(
2078 __unused const XrtHosted_WatchdogPanicComplete_t *panic_complete)
2079 {
2080 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2081 MACH_EXCLAVES_SCHEDULER_WATCHDOG_PANIC_COMPLETE));
2082 }
2083
2084 OS_NORETURN
2085 static void
handle_response_panicking(__unused const XrtHosted_Panicking_t * panicking)2086 handle_response_panicking(
2087 __unused const XrtHosted_Panicking_t *panicking)
2088 {
2089 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2090 MACH_EXCLAVES_SCHEDULER_PANICKING));
2091
2092 exclaves_wait_for_panic();
2093
2094 /* Not reached. */
2095 }
2096
2097 static inline bool
exclaves_clocks_need_update(void)2098 exclaves_clocks_need_update(void)
2099 {
2100 const exclaves_clock_type_t clocks[] = {
2101 EXCLAVES_CLOCK_ABSOLUTE,
2102 EXCLAVES_CLOCK_CONTINUOUS
2103 };
2104
2105 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2106 const exclaves_clock_t *clock = &exclaves_clock[i];
2107 exclaves_clock_t local = {
2108 .u128 = os_atomic_load(&clock->a_u128, relaxed),
2109 };
2110
2111 if (local.u64.sent_offset != local.u64.latest_offset) {
2112 return true;
2113 }
2114 }
2115
2116 return false;
2117 }
2118
2119 OS_NOINLINE
2120 static kern_return_t
exclaves_clocks_update(void)2121 exclaves_clocks_update(void)
2122 {
2123 const exclaves_clock_type_t clocks[] = {
2124 EXCLAVES_CLOCK_ABSOLUTE,
2125 EXCLAVES_CLOCK_CONTINUOUS
2126 };
2127
2128 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2129 exclaves_clock_t local;
2130 exclaves_clock_t *clock = &exclaves_clock[i];
2131
2132 local.u128 = os_atomic_load(&clock->a_u128, relaxed);
2133 while (local.u64.sent_offset != local.u64.latest_offset) {
2134 XrtHosted_Timer_t timer = i == EXCLAVES_CLOCK_ABSOLUTE ?
2135 XrtHosted_Timer_Absolute :
2136 XrtHosted_Timer_Continuous;
2137
2138 kern_return_t kr =
2139 exclaves_scheduler_request_update_timer(timer,
2140 local.u64.latest_offset);
2141 if (kr != KERN_SUCCESS) {
2142 return kr;
2143 }
2144
2145 /*
2146 * Swap the sent offset with the local latest offset. If
2147 * it fails, the sent offset will be reloaded.
2148 */
2149 os_atomic_cmpxchgv(&clock->a_u64.sent_offset,
2150 local.u64.sent_offset, local.u64.latest_offset,
2151 &local.u64.sent_offset, relaxed);
2152
2153 /*
2154 * Fetch the latest offset again, in case we are stale.
2155 */
2156 local.u64.latest_offset = os_atomic_load(
2157 &clock->a_u64.latest_offset, relaxed);
2158 }
2159 }
2160
2161 return KERN_SUCCESS;
2162 }
2163
2164 static kern_return_t
exclaves_scheduler_boot(void)2165 exclaves_scheduler_boot(void)
2166 {
2167 /* This must happen on the boot CPU - bind the thread. */
2168 bind_to_boot_core();
2169
2170 /*
2171 * Set the request/response buffers. These may be overriden later when
2172 * doing multicore setup.
2173 */
2174 *PERCPU_GET(exclaves_request) =
2175 exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2176 *PERCPU_GET(exclaves_response) =
2177 exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2178
2179 kern_return_t kr = exclaves_scheduler_request_boot();
2180
2181 unbind_from_boot_core();
2182
2183 return kr;
2184 }
2185
2186 static kern_return_t
exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,uint64_t offset)2187 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
2188 uint64_t offset)
2189 {
2190 thread_t thread = current_thread();
2191
2192 exclaves_debug_printf(show_progress,
2193 "exclaves: Scheduler: Request to update timer\n");
2194
2195 XrtHosted_Response_t response = {
2196 .tag = XrtHosted_Response_NothingScheduled,
2197 };
2198
2199 const XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
2200 .timer = timer,
2201 .offset = offset,
2202 );
2203
2204 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2205 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2206 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2207
2208 switch (kr) {
2209 case KERN_SUCCESS:
2210 break;
2211
2212 case KERN_POLICY_LIMIT:
2213 /*
2214 * POLICY_LIMIT should only happen if a timer update was pending
2215 * (and thus should never happen when trying to update a timer.
2216 */
2217 panic("exclaves: timer update requested when updating timer");
2218
2219 default:
2220 exclaves_debug_printf(show_errors,
2221 "exclaves: scheduler request failed\n");
2222 return kr;
2223 }
2224
2225 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2226
2227 switch (response.tag) {
2228 case XrtHosted_Response_NothingScheduled:
2229 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2230 break;
2231
2232 default:
2233 exclaves_debug_printf(show_errors, "exclaves: "
2234 "unexpected scheduler response when updating timer\n");
2235 kr = KERN_FAILURE;
2236 break;
2237 }
2238
2239 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2240
2241 return kr;
2242 }
2243
2244 static kern_return_t
exclaves_scheduler_request_boot(void)2245 exclaves_scheduler_request_boot(void)
2246 {
2247 kern_return_t kr = KERN_FAILURE;
2248 thread_t thread = current_thread();
2249
2250 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2251
2252 exclaves_debug_printf(show_progress,
2253 "exclaves: Scheduler: Request to boot exclave\n");
2254
2255 XrtHosted_Response_t response = {
2256 .tag = XrtHosted_Response_Invalid,
2257 };
2258 uint64_t pagelist_pa = 0;
2259
2260 while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2261 const XrtHosted_Request_t request = pagelist_pa != 0 ?
2262 XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2263 XrtHosted_Request_BootExclavesMsg();
2264 pagelist_pa = 0;
2265
2266 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2267 kr = exclaves_scheduler_request(&request, &response);
2268 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2269
2270 switch (kr) {
2271 case KERN_SUCCESS:
2272 break;
2273
2274 case KERN_POLICY_LIMIT:
2275 kr = exclaves_clocks_update();
2276 if (kr != KERN_SUCCESS) {
2277 return kr;
2278 }
2279 /*
2280 * Don't try to process the response - we just updated
2281 * the clock so continue with the boot request.
2282 */
2283 continue;
2284
2285 default:
2286 exclaves_debug_printf(show_errors,
2287 "exclaves: scheduler request failed\n");
2288 return KERN_FAILURE;
2289 }
2290
2291 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2292
2293 switch (response.tag) {
2294 case XrtHosted_Response_Yield:
2295 kr = handle_response_yield(true, 0, &response.Yield);
2296 break;
2297
2298 case XrtHosted_Response_NothingScheduled:
2299 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2300 break;
2301
2302 case XrtHosted_Response_AllExclavesBooted:
2303 kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2304 break;
2305
2306 case XrtHosted_Response_PmmEarlyAlloc:
2307 kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2308 break;
2309
2310 case XrtHosted_Response_PanicBufferAddress:
2311 handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2312 break;
2313
2314 case XrtHosted_Response_Panicking:
2315 handle_response_panicking(&response.Panicking);
2316 /* Not reached. */
2317
2318 default:
2319 exclaves_debug_printf(show_errors,
2320 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2321 response.tag);
2322 kr = KERN_FAILURE;
2323 break;
2324 }
2325
2326 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2327
2328 if (kr != KERN_SUCCESS) {
2329 break;
2330 }
2331 }
2332
2333 return kr;
2334 }
2335
2336 OS_INLINE
2337 kern_return_t
exclaves_scheduler_request_resume(const exclaves_ctx_t * ctx,bool interrupted)2338 exclaves_scheduler_request_resume(const exclaves_ctx_t *ctx, bool interrupted)
2339 {
2340 thread_t thread = current_thread();
2341 const ctid_t ctid = thread_get_ctid(thread);
2342
2343 assert3u(thread->th_exclaves_state &
2344 (TH_EXCLAVES_RESUME_PANIC_THREAD | TH_EXCLAVES_RPC), !=, 0);
2345
2346 exclaves_debug_printf(show_progress,
2347 "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2348
2349 XrtHosted_Response_t response = {};
2350 const XrtHosted_Request_t request = interrupted ?
2351 XrtHosted_Request_InterruptWithHostIdMsg(
2352 .thread = ctx->scid,
2353 .hostId = ctid,
2354 ) :
2355 XrtHosted_Request_ResumeWithHostIdMsg(
2356 .thread = ctx->scid,
2357 .hostId = ctid,
2358 );
2359
2360 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2361
2362 switch (kr) {
2363 case KERN_SUCCESS:
2364 break;
2365
2366 case KERN_POLICY_LIMIT:
2367 /*
2368 * Don't try to handle any response (as there isn't one), just
2369 * return to the caller which will check MSG STATUS and re-enter
2370 * if neccessary.
2371 */
2372 return exclaves_clocks_update();
2373
2374 default:
2375 exclaves_debug_printf(show_errors,
2376 "exclaves: scheduler request failed\n");
2377 break;
2378 }
2379
2380 if (kr != KERN_SUCCESS) {
2381 return kr;
2382 }
2383
2384 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_START:\n\t");
2385 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2386
2387 switch (response.tag) {
2388 case XrtHosted_Response_Wait:
2389 kr = handle_response_wait(&response.Wait);
2390 break;
2391
2392 case XrtHosted_Response_Wake:
2393 kr = handle_response_wake(&response.Wake);
2394 break;
2395
2396 case XrtHosted_Response_Yield:
2397 kr = handle_response_yield(false, ctx->scid, &response.Yield);
2398 break;
2399
2400 case XrtHosted_Response_Spawned:
2401 kr = handle_response_spawned(ctx->scid, &response.Spawned);
2402 break;
2403
2404 case XrtHosted_Response_Terminated:
2405 kr = handle_response_terminated(&response.Terminated);
2406 break;
2407
2408 case XrtHosted_Response_WakeWithOwner:
2409 kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2410 break;
2411
2412 case XrtHosted_Response_PanicWait:
2413 kr = handle_response_panic_wait(&response.PanicWait);
2414 break;
2415
2416 case XrtHosted_Response_Suspended:
2417 kr = handle_response_suspended(&response.Suspended);
2418 break;
2419
2420 case XrtHosted_Response_Resumed:
2421 kr = handle_response_resumed(&response.Resumed);
2422 break;
2423
2424 case XrtHosted_Response_Interrupted:
2425 kr = handle_response_interrupted(&response.Interrupted);
2426 break;
2427
2428 case XrtHosted_Response_Panicking:
2429 handle_response_panicking(&response.Panicking);
2430 /* Not reached. */
2431
2432 case XrtHosted_Response_Invalid:
2433 case XrtHosted_Response_Failure:
2434 case XrtHosted_Response_Pong:
2435 case XrtHosted_Response_SleepUntil:
2436 case XrtHosted_Response_Awaken:
2437 default:
2438 exclaves_debug_printf(show_errors,
2439 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2440 response.tag);
2441 kr = KERN_FAILURE;
2442 break;
2443 }
2444
2445 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2446 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_END:\n\t");
2447
2448 return kr;
2449 }
2450
2451 /* A friendly name to show up in backtraces. */
2452 OS_NOINLINE
2453 kern_return_t
exclaves_run(thread_t thread,bool interrupted)2454 exclaves_run(thread_t thread, bool interrupted)
2455 {
2456 return exclaves_scheduler_request_resume(&thread->th_exclaves_ipc_ctx,
2457 interrupted);
2458 }
2459
2460 /*
2461 * Note: this is called from a thread with RT priority which is on the way to
2462 * panicking and thus doesn't log.
2463 */
2464 kern_return_t
exclaves_scheduler_request_watchdog_panic(void)2465 exclaves_scheduler_request_watchdog_panic(void)
2466 {
2467 thread_t thread = current_thread();
2468
2469 XrtHosted_Response_t response = {};
2470 const XrtHosted_Request_t request = XrtHosted_Request_WatchdogPanicMsg();
2471
2472 /*
2473 * Check for consistent exclaves thread state to make sure we don't
2474 * accidentally block. This should normally never happen but if it does,
2475 * just return and allow the caller to panic without gathering an
2476 * exclaves stackshot.
2477 */
2478 if (os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) != 0 ||
2479 thread->th_exclaves_state != 0) {
2480 return KERN_FAILURE;
2481 }
2482
2483 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2484 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2485 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2486
2487 switch (kr) {
2488 case KERN_SUCCESS:
2489 break;
2490
2491 case KERN_POLICY_LIMIT:
2492 /*
2493 * POLICY_LIMIT should only happen if a timer update was pending
2494 * (and thus should never happen when trying to send a watchdog
2495 * panic message.
2496 */
2497 panic("exclaves: "
2498 "timer update requested when calling watchdog panic");
2499
2500 default:
2501 return kr;
2502 }
2503
2504 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2505
2506 switch (response.tag) {
2507 case XrtHosted_Response_WatchdogPanicComplete:
2508 handle_response_watchdog_panic_complete(&response.WatchdogPanicComplete);
2509 break;
2510
2511 case XrtHosted_Response_Panicking:
2512 handle_response_panicking(&response.Panicking);
2513 /* Not Reached. */
2514
2515 default:
2516 panic("exclaves: unexpected scheduler response "
2517 "when sending watchdog panic request: %d", response.tag);
2518 }
2519
2520 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2521
2522 return kr;
2523 }
2524
2525 /* -------------------------------------------------------------------------- */
2526
2527 #pragma mark exclaves xnu proxy communication
2528
2529 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2530 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2531 {
2532 if (success) {
2533 return KERN_SUCCESS;
2534 } else {
2535 exclaves_debug_printf(show_errors,
2536 "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2537 error->file,
2538 error->line,
2539 error->function,
2540 error->expression
2541 );
2542 return KERN_FAILURE;
2543 }
2544 }
2545
2546 #pragma mark exclaves privilege management
2547
2548 /*
2549 * All entitlement checking enabled by default.
2550 */
2551 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2552
2553 /*
2554 * boot-arg to control the use of entitlements.
2555 * Eventually this should be removed and entitlement checking should be gated on
2556 * the EXCLAVES_R_ENTITLEMENTS requirement.
2557 * This will be addressed with rdar://125153460.
2558 */
2559 TUNABLE(unsigned int, exclaves_entitlement_flags,
2560 "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2561
2562 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2563 has_entitlement(task_t task, const exclaves_priv_t priv,
2564 const char *entitlement)
2565 {
2566 /* Skip the entitlement if not enabled. */
2567 if ((exclaves_entitlement_flags & priv) == 0) {
2568 return true;
2569 }
2570
2571 return IOTaskHasEntitlement(task, entitlement);
2572 }
2573
2574 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2575 has_entitlement_vnode(void *vnode, const int64_t off,
2576 const exclaves_priv_t priv, const char *entitlement)
2577 {
2578 /* Skip the entitlement if not enabled. */
2579 if ((exclaves_entitlement_flags & priv) == 0) {
2580 return true;
2581 }
2582
2583 return IOVnodeHasEntitlement(vnode, off, entitlement);
2584 }
2585
2586 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2587 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2588 {
2589 const bool is_kernel = task == kernel_task;
2590 const bool is_launchd = task_pid(task) == 1;
2591
2592 switch (priv) {
2593 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2594 /* Both launchd and entitled tasks can spawn new conclaves. */
2595 if (is_launchd) {
2596 return true;
2597 }
2598 return has_entitlement(task, priv,
2599 "com.apple.private.exclaves.conclave-spawn");
2600
2601 case EXCLAVES_PRIV_KERNEL_DOMAIN:
2602 /*
2603 * Both the kernel itself and user tasks with the right
2604 * privilege can access exclaves resources in the kernel domain.
2605 */
2606 if (is_kernel) {
2607 return true;
2608 }
2609
2610 /*
2611 * If the task was entitled and has been through this path
2612 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2613 */
2614 if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2615 return true;
2616 }
2617
2618 if (has_entitlement(task, priv,
2619 "com.apple.private.exclaves.kernel-domain")) {
2620 task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2621 return true;
2622 }
2623
2624 return false;
2625
2626 case EXCLAVES_PRIV_BOOT:
2627 /* Both launchd and entitled tasks can boot exclaves. */
2628 if (is_launchd) {
2629 return true;
2630 }
2631 /* BEGIN IGNORE CODESTYLE */
2632 return has_entitlement(task, priv,
2633 "com.apple.private.exclaves.boot");
2634 /* END IGNORE CODESTYLE */
2635
2636 case EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME:
2637 /*
2638 * If the task was entitled and has been through this path
2639 * before, it will have set the TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS flag.
2640 */
2641 if ((task_ro_flags_get(task) & TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS) != 0) {
2642 return true;
2643 }
2644
2645 if (has_entitlement(task, priv,
2646 "com.apple.private.exclaves.indicator_min_on_time")) {
2647 task_ro_flags_set(task, TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS);
2648 return true;
2649 }
2650
2651 return false;
2652
2653 /* The CONCLAVE HOST priv is always checked by vnode. */
2654 case EXCLAVES_PRIV_CONCLAVE_HOST:
2655 default:
2656 panic("bad exclaves privilege (%u)", priv);
2657 }
2658 }
2659
2660 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2661 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2662 {
2663 switch (priv) {
2664 case EXCLAVES_PRIV_CONCLAVE_HOST: {
2665 const bool has_conclave_host = has_entitlement_vnode(vnode,
2666 off, priv, "com.apple.private.exclaves.conclave-host");
2667
2668 /*
2669 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2670 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2671 */
2672
2673 /* Don't check if neither entitlemenent is being enforced.*/
2674 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2675 (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2676 return has_conclave_host;
2677 }
2678
2679 const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2680 EXCLAVES_PRIV_KERNEL_DOMAIN,
2681 "com.apple.private.exclaves.kernel-domain");
2682
2683 /* See if it has both. */
2684 if (has_conclave_host && has_domain_kernel) {
2685 exclaves_debug_printf(show_errors,
2686 "exclaves: task has both conclave-host and "
2687 "kernel-domain entitlements which is forbidden\n");
2688 return false;
2689 }
2690
2691 return has_conclave_host;
2692 }
2693
2694 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2695 return has_entitlement_vnode(vnode, off, priv,
2696 "com.apple.private.exclaves.conclave-spawn");
2697
2698 default:
2699 panic("bad exclaves privilege (%u)", priv);
2700 }
2701 }
2702
2703
2704 #pragma mark exclaves stackshot range
2705
2706 /* Unslid pointers defining the range of code which switches threads into
2707 * secure world */
2708 uintptr_t exclaves_enter_range_start;
2709 uintptr_t exclaves_enter_range_end;
2710
2711 /* Unslid pointers defining the range of code which handles exclaves scheduler request */
2712 uintptr_t exclaves_scheduler_request_range_start;
2713 uintptr_t exclaves_scheduler_request_range_end;
2714
2715
2716 __startup_func
2717 static void
initialize_exclaves_ranges(void)2718 initialize_exclaves_ranges(void)
2719 {
2720 exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2721 assert3u(exclaves_enter_range_start, !=, 0);
2722 exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2723 assert3u(exclaves_enter_range_end, !=, 0);
2724
2725 exclaves_scheduler_request_range_start = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_start_label);
2726 assert3u(exclaves_scheduler_request_range_start, !=, 0);
2727 exclaves_scheduler_request_range_end = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_end_label);
2728 assert3u(exclaves_scheduler_request_range_end, !=, 0);
2729 }
2730 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_ranges);
2731
2732 /*
2733 * Return true if the specified address is in exclaves_enter.
2734 */
2735 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2736 exclaves_enter_in_range(uintptr_t addr, bool slid)
2737 {
2738 return slid ?
2739 exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2740 exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2741 }
2742
2743 /*
2744 * Return true if the specified address is in scheduler request handlers.
2745 */
2746 static bool
exclaves_scheduler_request_in_range(uintptr_t addr,bool slid)2747 exclaves_scheduler_request_in_range(uintptr_t addr, bool slid)
2748 {
2749 return slid ?
2750 exclaves_in_range(addr, (uintptr_t)&exclaves_scheduler_request_start_label, (uintptr_t)&exclaves_scheduler_request_end_label) :
2751 exclaves_in_range(addr, exclaves_scheduler_request_range_start, exclaves_scheduler_request_range_end);
2752 }
2753
2754 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2755 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2756 {
2757 size_t i = 0;
2758
2759 // Check for a frame matching scheduler request range
2760 for (i = 0; i < nframes; i++) {
2761 if (exclaves_scheduler_request_in_range(addr[i], slid)) {
2762 break;
2763 }
2764 }
2765
2766 // Insert exclaves stacks before the scheduler request frame
2767 if (i < nframes) {
2768 return (uint32_t)(i + 1);
2769 }
2770
2771 // Check for a frame matching upcall code range
2772 for (i = 0; i < nframes; i++) {
2773 if (exclaves_upcall_in_range(addr[i], slid)) {
2774 break;
2775 }
2776 }
2777
2778 // Insert exclaves stacks before the upcall frame when found
2779 if (i < nframes) {
2780 return (uint32_t)(i + 1);
2781 }
2782
2783 // Check for a frame matching exclaves enter range
2784 for (i = 0; i < nframes; i++) {
2785 if (exclaves_enter_in_range(addr[i], slid)) {
2786 break;
2787 }
2788 }
2789
2790 // Put exclaves stacks on top of kernel stacks by default
2791 if (i == nframes) {
2792 i = 0;
2793 }
2794 return (uint32_t)i;
2795 }
2796
2797 #if DEVELOPMENT || DEBUG
2798
2799 /* Tweak the set of relaxed requirements on startup. */
2800 __startup_func
2801 static void
exclaves_requirement_startup(void)2802 exclaves_requirement_startup(void)
2803 {
2804 /*
2805 * The medium-term plan is that the boot-arg controlling entitlements
2806 * goes away entirely and is replaced with EXCLAVES_R_ENTITLEMENTS.
2807 * Until that happens, for historical reasons, if the entitlement
2808 * boot-arg has disabled EXCLAVES_PRIV_CONCLAVE_HOST, then relax
2809 * EXCLAVES_R_CONCLAVE_RESOURCES here too.
2810 */
2811 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0) {
2812 exclaves_requirement_relax(EXCLAVES_R_CONCLAVE_RESOURCES);
2813 }
2814
2815 exclaves_requirement_relax(EXCLAVES_R_EIC);
2816 }
2817 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, exclaves_requirement_startup);
2818
2819 #endif /* DEVELOPMENT || DEBUG */
2820
2821 #endif /* CONFIG_EXCLAVES */
2822
2823 #if __has_include(<Tightbeam/tightbeam.h>)
2824
2825 #include <Tightbeam/tightbeam.h>
2826
2827 /*
2828 * Tightbeam needs to initialize for kernel transports (xnu and AFK).
2829 * Only the XNU transport is specific to exclaves - AFK is not.
2830 */
2831 __startup_func
2832 static void
tightbeam_startup(void)2833 tightbeam_startup(void)
2834 {
2835 tb_transport_startup();
2836 }
2837 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, tightbeam_startup);
2838
2839 #endif /* __has_include(<Tightbeam/tightbeam.h> */
2840
2841 #ifndef CONFIG_EXCLAVES
2842 /* stubs for sensor functions which are not compiled in from exclaves.c when
2843 * CONFIG_EXCLAVE is disabled */
2844
2845 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2846 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2847 exclaves_sensor_status_t *status)
2848 {
2849 #pragma unused(sensor_type, flags, status)
2850 return KERN_NOT_SUPPORTED;
2851 }
2852
2853 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2854 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2855 exclaves_sensor_status_t *status)
2856 {
2857 #pragma unused(sensor_type, flags, status)
2858 return KERN_NOT_SUPPORTED;
2859 }
2860
2861 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2862 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2863 exclaves_sensor_status_t *status)
2864 {
2865 #pragma unused(sensor_type, flags, status)
2866 return KERN_NOT_SUPPORTED;
2867 }
2868
2869 #endif /* ! CONFIG_EXCLAVES */
2870