1 /*
2 * Copyright (c) 2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/exclaves.h>
30 #include <mach/mach_traps.h>
31 #include <kern/misc_protos.h>
32 #include <kern/assert.h>
33 #include <kern/recount.h>
34 #include <kern/startup.h>
35
36 #if CONFIG_EXCLAVES
37
38 #if CONFIG_SPTM
39 #include <arm64/sptm/sptm.h>
40 #include <arm64/hv/hv_vm.h>
41 #include <arm64/hv/hv_vcpu.h>
42 #else
43 #error Invalid configuration
44 #endif /* CONFIG_SPTM */
45
46 #include <arm/cpu_data_internal.h>
47 #include <arm/misc_protos.h>
48 #include <kern/epoch_sync.h>
49 #include <kern/ipc_kobject.h>
50 #include <kern/kalloc.h>
51 #include <kern/locks.h>
52 #include <kern/percpu.h>
53 #include <kern/task.h>
54 #include <kern/thread.h>
55 #include <kern/zalloc.h>
56 #include <kern/exclaves_stackshot.h>
57 #include <kern/exclaves_test_stackshot.h>
58 #include <vm/pmap.h>
59 #include <pexpert/pexpert.h>
60
61 #include <mach/exclaves_l4.h>
62 #include <mach/mach_port.h>
63
64 #include <Exclaves/Exclaves.h>
65
66 #include <IOKit/IOBSD.h>
67
68 #include <xnuproxy/messages.h>
69
70 #include "exclaves_debug.h"
71 #include "exclaves_panic.h"
72 #include "exclaves_xnuproxy.h"
73
74 /* External & generated headers */
75 #include <xrt_hosted_types/types.h>
76
77 #if __has_include(<Tightbeam/tightbeam.h>)
78 #include <Tightbeam/tightbeam.h>
79 #include <Tightbeam/tightbeam_private.h>
80 #endif
81
82 #include "exclaves_resource.h"
83 #include "exclaves_upcalls.h"
84 #include "exclaves_boot.h"
85 #include "exclaves_inspection.h"
86 #include "exclaves_memory.h"
87 #include "exclaves_internal.h"
88 #include "exclaves_aoe.h"
89 #include "exclaves_sensor.h"
90
91 LCK_GRP_DECLARE(exclaves_lck_grp, "exclaves");
92
93 /* Boot lock - only used here for assertions. */
94 extern lck_mtx_t exclaves_boot_lock;
95
96 /*
97 * Sent/latest offset for updating exclaves clocks
98 */
99 typedef struct {
100 union {
101 /* atomic fields are used via atomic primitives */
102 struct { _Atomic uint64_t sent_offset, latest_offset; } a_u64;
103 _Atomic unsigned __int128 a_u128;
104 /* non-atomic fields are used via local variable. this is needed
105 * to avoid undefined behavior with an atomic struct or
106 * accessing atomic fields non-atomically */
107 struct { uint64_t sent_offset, latest_offset; } u64;
108 unsigned __int128 u128;
109 };
110 } exclaves_clock_t;
111
112
113 /*
114 * Two clocks indexed by their type.
115 * This makes things easy to lookup.
116 */
117 static exclaves_clock_t exclaves_clock[] = {
118 [EXCLAVES_CLOCK_ABSOLUTE] = {},
119 [EXCLAVES_CLOCK_CONTINUOUS] = {},
120 };
121
122 static kern_return_t
123 exclaves_endpoint_call_internal(ipc_port_t port, exclaves_id_t endpoint_id);
124
125 static kern_return_t
126 exclaves_enter(void);
127 static kern_return_t
128 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter);
129
130 static kern_return_t
131 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info);
132 OS_NORETURN OS_NOINLINE
133 static void
134 exclaves_wait_for_panic(void);
135
136 static inline bool
137 exclaves_clocks_need_update(void);
138
139 static kern_return_t
140 exclaves_scheduler_boot(void);
141
142 static kern_return_t
143 exclaves_hosted_error(bool success, XrtHosted_Error_t *error);
144
145 static kern_return_t
146 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
147 uint64_t offset);
148
149 static kern_return_t
150 exclaves_scheduler_request_boot(void);
151
152
153 /*
154 * A static set of exclave epoch counters.
155 */
156 static os_atomic(uint64_t) epoch_counter[XrtHosted_Counter_limit] = {};
157
os_atomic(uint64_t)158 static inline os_atomic(uint64_t) *
159 exclaves_get_queue_counter(const uint64_t id)
160 {
161 return &epoch_counter[XrtHosted_Counter_fromQueueId(id)];
162 }
163
os_atomic(uint64_t)164 static inline os_atomic(uint64_t) *
165 exclaves_get_thread_counter(const uint64_t id)
166 {
167 return &epoch_counter[XrtHosted_Counter_fromThreadId(id)];
168 }
169
170
171 /* -------------------------------------------------------------------------- */
172 #pragma mark exclaves debug configuration
173
174 #if DEVELOPMENT || DEBUG
175 TUNABLE_WRITEABLE(unsigned int, exclaves_debug, "exclaves_debug",
176 exclaves_debug_show_errors);
177
178 TUNABLE_DT_WRITEABLE(exclaves_requirement_t, exclaves_relaxed_requirements,
179 "/defaults", "kern.exclaves_relaxed_reqs", "exclaves_relaxed_requirements",
180 0, TUNABLE_DT_NONE);
181 #else
182 const exclaves_requirement_t exclaves_relaxed_requirements = 0;
183 #endif
184
185 #endif /* CONFIG_EXCLAVES */
186
187 /* -------------------------------------------------------------------------- */
188 #pragma mark userspace entry point
189
190 #if CONFIG_EXCLAVES
191 static kern_return_t
operation_boot(mach_port_name_t name,exclaves_boot_stage_t stage)192 operation_boot(mach_port_name_t name, exclaves_boot_stage_t stage)
193 {
194 if (name != MACH_PORT_NULL) {
195 /* Only accept MACH_PORT_NULL for now */
196 return KERN_INVALID_CAPABILITY;
197 }
198
199 /*
200 * As the boot operation itself happens outside the context of any
201 * conclave, it requires special privilege.
202 */
203 if (!exclaves_has_priv(current_task(), EXCLAVES_PRIV_BOOT)) {
204 return KERN_DENIED;
205 }
206
207 return exclaves_boot(stage);
208 }
209 #endif /* CONFIG_EXCLAVES */
210
211 kern_return_t
_exclaves_ctl_trap(struct exclaves_ctl_trap_args * uap)212 _exclaves_ctl_trap(struct exclaves_ctl_trap_args *uap)
213 {
214 #if CONFIG_EXCLAVES
215 kern_return_t kr = KERN_SUCCESS;
216 int error = 0;
217
218 mach_port_name_t name = uap->name;
219 exclaves_id_t identifier = uap->identifier;
220 mach_vm_address_t ubuffer = uap->buffer;
221 mach_vm_size_t usize = uap->size;
222 mach_vm_size_t uoffset = (mach_vm_size_t)uap->identifier;
223 mach_vm_size_t usize2 = uap->size2;
224 mach_vm_size_t uoffset2 = uap->offset;
225 mach_vm_address_t ustatus = uap->status;
226
227 task_t task = current_task();
228
229 /*
230 * EXCLAVES_XNU_PROXY_CR_RETVAL comes from ExclavePlatform and is shared
231 * with xnu. That header is not shared with userspace. Make sure that
232 * the retval userspace picks up is the same as the one
233 * xnu/ExclavePlatform thinks it is.
234 */
235 assert3p(&EXCLAVES_XNU_PROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0), ==,
236 &XNUPROXY_CR_RETVAL((Exclaves_L4_IpcBuffer_t *)0));
237
238 uint8_t operation = EXCLAVES_CTL_OP(uap->operation_and_flags);
239 uint32_t flags = EXCLAVES_CTL_FLAGS(uap->operation_and_flags);
240 if (flags != 0) {
241 return KERN_INVALID_ARGUMENT;
242 }
243
244 /*
245 * Deal with OP_BOOT up-front as it has slightly different restrictions
246 * than the other operations.
247 */
248 if (operation == EXCLAVES_CTL_OP_BOOT) {
249 return operation_boot(name, (uint32_t)identifier);
250 }
251
252 /*
253 * All other operations are restricted to properly entitled tasks which
254 * can operate in the kernel domain, or those which have joined
255 * conclaves (which has its own entitlement check).
256 * If requirements are relaxed during development, tasks with no
257 * conclaves are also allowed.
258 */
259 if (operation == EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME) {
260 if (!exclaves_has_priv(task, EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME)) {
261 return KERN_DENIED;
262 }
263 } else if (task_get_conclave(task) == NULL &&
264 !exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN) &&
265 !exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES)) {
266 return KERN_DENIED;
267 }
268
269 /*
270 * Wait for EXCLAVECORE boot to complete. If exclaves are unsupported,
271 * return immediately.
272 */
273 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVECORE);
274 if (kr != KERN_SUCCESS) {
275 return kr;
276 }
277
278 if (task_get_conclave(task) != NULL) {
279 /*
280 * For calls from tasks that have joined conclaves, now wait until
281 * booted up to EXCLAVEKIT. If EXCLAVEKIT boot fails for some reason,
282 * KERN_NOT_SUPPORTED will be returned (on RELEASE this would
283 * panic). This is a separate call to the one above because we
284 * need to distinguish EXCLAVECORE being not supported and
285 * still wait for EXCLAVEKIT to boot if it *is* supported.
286 */
287 kr = exclaves_boot_wait(EXCLAVES_BOOT_STAGE_EXCLAVEKIT);
288 if (kr != KERN_SUCCESS) {
289 return kr;
290 }
291 }
292
293 switch (operation) {
294 case EXCLAVES_CTL_OP_ENDPOINT_CALL: {
295 if (name != MACH_PORT_NULL) {
296 /* Only accept MACH_PORT_NULL for now */
297 return KERN_INVALID_CAPABILITY;
298 }
299 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
300 usize != Exclaves_L4_IpcBuffer_Size) {
301 return KERN_INVALID_ARGUMENT;
302 }
303
304
305 Exclaves_L4_IpcBuffer_t *ipcb = exclaves_get_ipc_buffer();
306 /* TODO (rdar://123728529) - IPC buffer isn't freed until thread exit */
307 if (!ipcb && (error = exclaves_allocate_ipc_buffer((void**)&ipcb))) {
308 return error;
309 }
310 assert(ipcb != NULL);
311 if ((error = copyin(ubuffer, ipcb, usize))) {
312 return error;
313 }
314
315 if (identifier >= CONCLAVE_SERVICE_MAX) {
316 return KERN_INVALID_ARGUMENT;
317 }
318
319 /*
320 * Verify that the service actually exists in the current
321 * domain.
322 */
323 if (!exclaves_conclave_has_service(task_get_conclave(task),
324 identifier)) {
325 return KERN_INVALID_ARGUMENT;
326 }
327
328 kr = exclaves_endpoint_call_internal(IPC_PORT_NULL, identifier);
329 error = copyout(ipcb, ubuffer, usize);
330 /*
331 * Endpoint call to conclave may have trigger a stop upcall,
332 * check if stop upcall completion handler needs to run.
333 */
334 task_stop_conclave_upcall_complete();
335 if (error) {
336 return error;
337 }
338 break;
339 }
340
341 case EXCLAVES_CTL_OP_NAMED_BUFFER_CREATE: {
342 if (name != MACH_PORT_NULL) {
343 /* Only accept MACH_PORT_NULL for now */
344 return KERN_INVALID_CAPABILITY;
345 }
346
347 size_t len = 0;
348 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
349 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX,
350 &len) != 0 || id_name[0] == '\0') {
351 return KERN_INVALID_ARGUMENT;
352 }
353
354 exclaves_buffer_perm_t perm = (exclaves_buffer_perm_t)usize2;
355 const exclaves_buffer_perm_t supported =
356 EXCLAVES_BUFFER_PERM_READ | EXCLAVES_BUFFER_PERM_WRITE;
357 if ((perm & supported) == 0 || (perm & ~supported) != 0) {
358 return KERN_INVALID_ARGUMENT;
359 }
360
361 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
362 exclaves_resource_t *resource = NULL;
363 kr = exclaves_resource_shared_memory_map(domain, id_name, usize,
364 perm, &resource);
365 if (kr != KERN_SUCCESS) {
366 return kr;
367 }
368
369 kr = exclaves_resource_create_port_name(resource,
370 current_space(), &name);
371 if (kr != KERN_SUCCESS) {
372 return kr;
373 }
374
375 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
376 if (kr != KERN_SUCCESS) {
377 mach_port_deallocate(current_space(), name);
378 return kr;
379 }
380
381 break;
382 }
383
384 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYIN: {
385 exclaves_resource_t *resource = NULL;
386 kr = exclaves_resource_from_port_name(current_space(), name,
387 &resource);
388 if (kr != KERN_SUCCESS) {
389 return kr;
390 }
391
392 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
393 exclaves_resource_release(resource);
394 return KERN_INVALID_CAPABILITY;
395 }
396
397 kr = exclaves_resource_shared_memory_copyin(resource,
398 ubuffer, usize, uoffset, usize2, uoffset2);
399
400 exclaves_resource_release(resource);
401
402 if (kr != KERN_SUCCESS) {
403 return kr;
404 }
405 break;
406 }
407
408 case EXCLAVES_CTL_OP_NAMED_BUFFER_COPYOUT: {
409 exclaves_resource_t *resource = NULL;
410 kr = exclaves_resource_from_port_name(current_space(), name,
411 &resource);
412 if (kr != KERN_SUCCESS) {
413 return kr;
414 }
415
416 if (resource->r_type != XNUPROXY_RESOURCETYPE_SHAREDMEMORY) {
417 exclaves_resource_release(resource);
418 return KERN_INVALID_CAPABILITY;
419 }
420
421 kr = exclaves_resource_shared_memory_copyout(resource,
422 ubuffer, usize, uoffset, usize2, uoffset2);
423
424 exclaves_resource_release(resource);
425
426 if (kr != KERN_SUCCESS) {
427 return kr;
428 }
429 break;
430 }
431
432 case EXCLAVES_CTL_OP_LAUNCH_CONCLAVE:
433 if (name != MACH_PORT_NULL) {
434 /* Only accept MACH_PORT_NULL for now */
435 return KERN_INVALID_CAPABILITY;
436 }
437 kr = task_launch_conclave(name);
438
439 /*
440 * Conclave launch call to may have trigger a stop upcall,
441 * check if stop upcall completion handler needs to run.
442 */
443 task_stop_conclave_upcall_complete();
444 break;
445
446 case EXCLAVES_CTL_OP_LOOKUP_SERVICES: {
447 if (name != MACH_PORT_NULL) {
448 /* Only accept MACH_PORT_NULL for now */
449 return KERN_INVALID_CAPABILITY;
450 }
451 struct exclaves_resource_user uresource = {};
452
453 if (usize > (MAX_CONCLAVE_RESOURCE_NUM * sizeof(struct exclaves_resource_user)) ||
454 (usize % sizeof(struct exclaves_resource_user) != 0)) {
455 return KERN_INVALID_ARGUMENT;
456 }
457
458 if ((ubuffer == USER_ADDR_NULL && usize != 0) ||
459 (usize == 0 && ubuffer != USER_ADDR_NULL)) {
460 return KERN_INVALID_ARGUMENT;
461 }
462
463 if (ubuffer == USER_ADDR_NULL) {
464 return KERN_INVALID_ARGUMENT;
465 }
466
467 /* For the moment we only ever have to deal with one request. */
468 if (usize != sizeof(struct exclaves_resource_user)) {
469 return KERN_INVALID_ARGUMENT;
470 }
471 error = copyin(ubuffer, &uresource, usize);
472 if (error) {
473 return KERN_INVALID_ARGUMENT;
474 }
475
476 const size_t name_buf_len = sizeof(uresource.r_name);
477 if (strnlen(uresource.r_name, name_buf_len) == name_buf_len) {
478 return KERN_INVALID_ARGUMENT;
479 }
480
481 /*
482 * Do the regular lookup first. If that fails, fallback to the
483 * DARWIN domain, finally fallback to the KERNEL domain.
484 */
485 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
486 uint64_t id = exclaves_service_lookup(domain, uresource.r_name);
487
488 if (exclaves_requirement_is_relaxed(EXCLAVES_R_CONCLAVE_RESOURCES) ||
489 exclaves_has_priv(task, EXCLAVES_PRIV_KERNEL_DOMAIN)) {
490 if (id == EXCLAVES_INVALID_ID) {
491 id = exclaves_service_lookup(EXCLAVES_DOMAIN_DARWIN,
492 uresource.r_name);
493 }
494 if (id == EXCLAVES_INVALID_ID) {
495 id = exclaves_service_lookup(EXCLAVES_DOMAIN_KERNEL,
496 uresource.r_name);
497 }
498 }
499
500 if (id == EXCLAVES_INVALID_ID) {
501 return KERN_NOT_FOUND;
502 }
503
504 /*
505 * Looking up a forwarding service verifies its existence, but
506 * doesn't return the id since communication with it is not possible
507 */
508 if (id > EXCLAVES_FORWARDING_RESOURCE_ID_BASE) {
509 return KERN_NAME_EXISTS;
510 }
511
512 uresource.r_id = id;
513 uresource.r_port = MACH_PORT_NULL;
514
515 error = copyout(&uresource, ubuffer, usize);
516 if (error) {
517 return KERN_INVALID_ADDRESS;
518 }
519
520 kr = KERN_SUCCESS;
521 break;
522 }
523
524 case EXCLAVES_CTL_OP_AUDIO_BUFFER_CREATE: {
525 if (identifier == 0) {
526 return KERN_INVALID_ARGUMENT;
527 }
528
529 /* copy in string name */
530 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
531 size_t done = 0;
532 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
533 return KERN_INVALID_ARGUMENT;
534 }
535
536 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
537 exclaves_resource_t *resource = NULL;
538 kr = exclaves_resource_audio_memory_map(domain, id_name, usize,
539 &resource);
540 if (kr != KERN_SUCCESS) {
541 return kr;
542 }
543
544 kr = exclaves_resource_create_port_name(resource, current_space(),
545 &name);
546 if (kr != KERN_SUCCESS) {
547 return kr;
548 }
549
550 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
551 if (kr != KERN_SUCCESS) {
552 mach_port_deallocate(current_space(), name);
553 return kr;
554 }
555
556 break;
557 }
558
559 case EXCLAVES_CTL_OP_AUDIO_BUFFER_COPYOUT: {
560 exclaves_resource_t *resource;
561
562 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
563 if (kr != KERN_SUCCESS) {
564 return kr;
565 }
566
567 if (resource->r_type !=
568 XNUPROXY_RESOURCETYPE_ARBITRATEDAUDIOMEMORY) {
569 exclaves_resource_release(resource);
570 return KERN_INVALID_CAPABILITY;
571 }
572
573 kr = exclaves_resource_audio_memory_copyout(resource,
574 ubuffer, usize, uoffset, usize2, uoffset2, ustatus);
575
576 exclaves_resource_release(resource);
577
578 if (kr != KERN_SUCCESS) {
579 return kr;
580 }
581
582 break;
583 }
584
585 case EXCLAVES_CTL_OP_SENSOR_CREATE: {
586 if (identifier == 0) {
587 return KERN_INVALID_ARGUMENT;
588 }
589
590 /* copy in string name */
591 char id_name[EXCLAVES_RESOURCE_NAME_MAX] = "";
592 size_t done = 0;
593 if (copyinstr(identifier, id_name, EXCLAVES_RESOURCE_NAME_MAX, &done) != 0) {
594 return KERN_INVALID_ARGUMENT;
595 }
596
597 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
598 exclaves_resource_t *resource = NULL;
599 kr = exclaves_resource_sensor_open(domain, id_name, &resource);
600 if (kr != KERN_SUCCESS) {
601 return kr;
602 }
603
604 kr = exclaves_resource_create_port_name(resource, current_space(),
605 &name);
606 if (kr != KERN_SUCCESS) {
607 return kr;
608 }
609
610 kr = copyout(&name, ubuffer, sizeof(mach_port_name_t));
611 if (kr != KERN_SUCCESS) {
612 /* No senders drops the reference. */
613 mach_port_deallocate(current_space(), name);
614 return kr;
615 }
616
617 break;
618 }
619
620 case EXCLAVES_CTL_OP_SENSOR_START: {
621 exclaves_resource_t *resource;
622 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
623 if (kr != KERN_SUCCESS) {
624 return kr;
625 }
626
627 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
628 exclaves_resource_release(resource);
629 return KERN_FAILURE;
630 }
631
632 exclaves_sensor_status_t status;
633 kr = exclaves_resource_sensor_start(resource, identifier, &status);
634
635 exclaves_resource_release(resource);
636
637 if (kr != KERN_SUCCESS) {
638 return kr;
639 }
640
641 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
642
643 break;
644 }
645 case EXCLAVES_CTL_OP_SENSOR_STOP: {
646 exclaves_resource_t *resource;
647 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
648 if (kr != KERN_SUCCESS) {
649 return kr;
650 }
651
652 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
653 exclaves_resource_release(resource);
654 return KERN_FAILURE;
655 }
656
657 exclaves_sensor_status_t status;
658 kr = exclaves_resource_sensor_stop(resource, identifier, &status);
659
660 exclaves_resource_release(resource);
661
662 if (kr != KERN_SUCCESS) {
663 return kr;
664 }
665
666 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
667
668 break;
669 }
670 case EXCLAVES_CTL_OP_SENSOR_STATUS: {
671 exclaves_resource_t *resource;
672 kr = exclaves_resource_from_port_name(current_space(), name, &resource);
673 if (kr != KERN_SUCCESS) {
674 return kr;
675 }
676
677 if (resource->r_type != XNUPROXY_RESOURCETYPE_SENSOR) {
678 exclaves_resource_release(resource);
679 return KERN_FAILURE;
680 }
681
682
683 exclaves_sensor_status_t status;
684 kr = exclaves_resource_sensor_status(resource, identifier, &status);
685
686 exclaves_resource_release(resource);
687
688 if (kr != KERN_SUCCESS) {
689 return kr;
690 }
691
692 kr = copyout(&status, ubuffer, sizeof(exclaves_sensor_status_t));
693 break;
694 }
695 case EXCLAVES_CTL_OP_NOTIFICATION_RESOURCE_LOOKUP: {
696 exclaves_resource_t *notification_resource = NULL;
697 mach_port_name_t port_name = MACH_PORT_NULL;
698
699 struct exclaves_resource_user *notification_resource_user = NULL;
700 if (usize != sizeof(struct exclaves_resource_user)) {
701 return KERN_INVALID_ARGUMENT;
702 }
703
704 if (ubuffer == USER_ADDR_NULL) {
705 return KERN_INVALID_ARGUMENT;
706 }
707
708 notification_resource_user = (struct exclaves_resource_user *)
709 kalloc_data(usize, Z_WAITOK | Z_ZERO | Z_NOFAIL);
710
711 error = copyin(ubuffer, notification_resource_user, usize);
712 if (error) {
713 kr = KERN_INVALID_ARGUMENT;
714 goto notification_resource_lookup_out;
715 }
716
717 const size_t name_buf_len = sizeof(notification_resource_user->r_name);
718 if (strnlen(notification_resource_user->r_name, name_buf_len)
719 == name_buf_len) {
720 kr = KERN_INVALID_ARGUMENT;
721 goto notification_resource_lookup_out;
722 }
723
724 const char *domain = exclaves_conclave_get_domain(task_get_conclave(task));
725 kr = exclaves_notification_create(domain,
726 notification_resource_user->r_name, ¬ification_resource);
727 if (kr != KERN_SUCCESS) {
728 goto notification_resource_lookup_out;
729 }
730
731 kr = exclaves_resource_create_port_name(notification_resource,
732 current_space(), &port_name);
733 if (kr != KERN_SUCCESS) {
734 goto notification_resource_lookup_out;
735 }
736 notification_resource_user->r_type = notification_resource->r_type;
737 notification_resource_user->r_id = notification_resource->r_id;
738 notification_resource_user->r_port = port_name;
739 error = copyout(notification_resource_user, ubuffer, usize);
740 if (error) {
741 kr = KERN_INVALID_ADDRESS;
742 goto notification_resource_lookup_out;
743 }
744
745 notification_resource_lookup_out:
746 if (notification_resource_user != NULL) {
747 kfree_data(notification_resource_user, usize);
748 }
749 if (kr != KERN_SUCCESS && port_name != MACH_PORT_NULL) {
750 mach_port_deallocate(current_space(), port_name);
751 }
752 break;
753 }
754
755 case EXCLAVES_CTL_OP_AOE_SETUP: {
756 uint8_t num_message = 0;
757 uint8_t num_worker = 0;
758
759 if (task_get_conclave(task) == NULL) {
760 kr = KERN_FAILURE;
761 break;
762 }
763
764 kr = exclaves_aoe_setup(&num_message, &num_worker);
765 if (kr != KERN_SUCCESS) {
766 break;
767 }
768
769 error = copyout(&num_message, ubuffer, sizeof(num_message));
770 if (error != 0) {
771 kr = KERN_INVALID_ADDRESS;
772 break;
773 }
774
775 error = copyout(&num_worker, ustatus, sizeof(num_worker));
776 if (error != 0) {
777 kr = KERN_INVALID_ADDRESS;
778 break;
779 }
780
781 break;
782 }
783
784 case EXCLAVES_CTL_OP_AOE_MESSAGE_LOOP: {
785 if (task_get_conclave(task) == NULL) {
786 kr = KERN_FAILURE;
787 break;
788 }
789
790 kr = exclaves_aoe_message_loop();
791 break;
792 }
793
794 case EXCLAVES_CTL_OP_AOE_WORK_LOOP: {
795 if (task_get_conclave(task) == NULL) {
796 kr = KERN_FAILURE;
797 break;
798 }
799
800 kr = exclaves_aoe_work_loop();
801 break;
802 }
803
804 case EXCLAVES_CTL_OP_SENSOR_MIN_ON_TIME: {
805 if (name != MACH_PORT_NULL) {
806 /* Only accept MACH_PORT_NULL for now */
807 return KERN_INVALID_CAPABILITY;
808 }
809
810 if (ubuffer == USER_ADDR_NULL || usize == 0 ||
811 usize != sizeof(struct exclaves_indicator_deadlines)) {
812 return KERN_INVALID_ARGUMENT;
813 }
814
815 struct exclaves_indicator_deadlines udurations;
816 error = copyin(ubuffer, &udurations, usize);
817 if (error) {
818 return KERN_INVALID_ARGUMENT;
819 }
820
821 kr = exclaves_indicator_min_on_time_deadlines(&udurations);
822 if (kr != KERN_SUCCESS) {
823 return kr;
824 }
825
826 error = copyout(&udurations, ubuffer, usize);
827 if (error) {
828 return KERN_INVALID_ADDRESS;
829 }
830
831 break;
832 }
833
834 default:
835 kr = KERN_INVALID_ARGUMENT;
836 break;
837 }
838
839 return kr;
840 #else /* CONFIG_EXCLAVES */
841 #pragma unused(uap)
842 return KERN_NOT_SUPPORTED;
843 #endif /* CONFIG_EXCLAVES */
844 }
845
846 /* -------------------------------------------------------------------------- */
847 #pragma mark kernel entry points
848
849 kern_return_t
exclaves_endpoint_call(ipc_port_t port,exclaves_id_t endpoint_id,exclaves_tag_t * tag,exclaves_error_t * error)850 exclaves_endpoint_call(ipc_port_t port, exclaves_id_t endpoint_id,
851 exclaves_tag_t *tag, exclaves_error_t *error)
852 {
853 #if CONFIG_EXCLAVES
854 kern_return_t kr = KERN_SUCCESS;
855 assert(port == IPC_PORT_NULL);
856
857 Exclaves_L4_IpcBuffer_t *ipcb = Exclaves_L4_IpcBuffer();
858 assert(ipcb != NULL);
859
860 exclaves_debug_printf(show_progress,
861 "exclaves: endpoint call:\tendpoint id %lld tag 0x%llx\n",
862 endpoint_id, *tag);
863
864 ipcb->mr[Exclaves_L4_Ipc_Mr_Tag] = *tag;
865 kr = exclaves_endpoint_call_internal(port, endpoint_id);
866 *tag = ipcb->mr[Exclaves_L4_Ipc_Mr_Tag];
867 *error = XNUPROXY_CR_RETVAL(ipcb);
868
869 exclaves_debug_printf(show_progress,
870 "exclaves: endpoint call return:\tendpoint id %lld tag 0x%llx "
871 "error 0x%llx\n", endpoint_id, *tag, *error);
872
873 return kr;
874 #else /* CONFIG_EXCLAVES */
875 #pragma unused(port, endpoint_id, tag, error)
876 return KERN_NOT_SUPPORTED;
877 #endif /* CONFIG_EXCLAVES */
878 }
879
880 kern_return_t
exclaves_allocate_ipc_buffer(void ** out_ipc_buffer)881 exclaves_allocate_ipc_buffer(void **out_ipc_buffer)
882 {
883 #if CONFIG_EXCLAVES
884 kern_return_t kr = KERN_SUCCESS;
885 thread_t thread = current_thread();
886
887 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
888 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
889 kr = exclaves_xnuproxy_ctx_alloc(&thread->th_exclaves_ipc_ctx);
890 if (kr != KERN_SUCCESS) {
891 return kr;
892 }
893 assert(thread->th_exclaves_ipc_ctx.usecnt == 0);
894 }
895 thread->th_exclaves_ipc_ctx.usecnt++;
896
897 if (out_ipc_buffer != NULL) {
898 *out_ipc_buffer = thread->th_exclaves_ipc_ctx.ipcb;
899 }
900 return KERN_SUCCESS;
901 #else /* CONFIG_EXCLAVES */
902 #pragma unused(out_ipc_buffer)
903 return KERN_NOT_SUPPORTED;
904 #endif /* CONFIG_EXCLAVES */
905 }
906
907 kern_return_t
exclaves_free_ipc_buffer(void)908 exclaves_free_ipc_buffer(void)
909 {
910 #if CONFIG_EXCLAVES
911
912 /* The inspection thread's cached buffer should never be freed */
913 thread_t thread = current_thread();
914
915 /* Don't try to free unallocated contexts. */
916 if (thread->th_exclaves_ipc_ctx.ipcb == NULL) {
917 return KERN_SUCCESS;
918 }
919
920 const thread_exclaves_inspection_flags_t iflags =
921 os_atomic_load(&thread->th_exclaves_inspection_state, relaxed);
922 if ((iflags & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
923 return KERN_SUCCESS;
924 }
925
926 assert(thread->th_exclaves_ipc_ctx.usecnt > 0);
927 if (--thread->th_exclaves_ipc_ctx.usecnt > 0) {
928 return KERN_SUCCESS;
929 }
930
931 return exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
932 #else /* CONFIG_EXCLAVES */
933 return KERN_NOT_SUPPORTED;
934 #endif /* CONFIG_EXCLAVES */
935 }
936
937 kern_return_t
exclaves_thread_terminate(__unused thread_t thread)938 exclaves_thread_terminate(__unused thread_t thread)
939 {
940 kern_return_t kr = KERN_SUCCESS;
941
942 #if CONFIG_EXCLAVES
943 assert(thread == current_thread());
944 assert(thread->th_exclaves_intstate == 0);
945 assert(thread->th_exclaves_state == 0);
946 if (thread->th_exclaves_ipc_ctx.ipcb != NULL) {
947 exclaves_debug_printf(show_progress,
948 "exclaves: thread_terminate freeing abandoned exclaves "
949 "ipc buffer\n");
950 /* Unconditionally free context irrespective of usecount */
951 thread->th_exclaves_ipc_ctx.usecnt = 0;
952 kr = exclaves_xnuproxy_ctx_free(&thread->th_exclaves_ipc_ctx);
953 assert(kr == KERN_SUCCESS);
954 }
955 #else
956 #pragma unused(thread)
957 #endif /* CONFIG_EXCLAVES */
958
959 return kr;
960 }
961
962 OS_CONST
963 void*
exclaves_get_ipc_buffer(void)964 exclaves_get_ipc_buffer(void)
965 {
966 #if CONFIG_EXCLAVES
967 thread_t thread = current_thread();
968 Exclaves_L4_IpcBuffer_t *ipcb = thread->th_exclaves_ipc_ctx.ipcb;
969
970 return ipcb;
971 #else /* CONFIG_EXCLAVES */
972 return NULL;
973 #endif /* CONFIG_EXCLAVES */
974 }
975
976 #if CONFIG_EXCLAVES
977
978 static void
bind_to_boot_core(void)979 bind_to_boot_core(void)
980 {
981 /*
982 * First ensure the boot cluster isn't powered down preventing the
983 * thread from running at all.
984 */
985 suspend_cluster_powerdown();
986 const int cpu = ml_get_boot_cpu_number();
987 processor_t processor = cpu_to_processor(cpu);
988 assert3p(processor, !=, NULL);
989 __assert_only processor_t old = thread_bind(processor);
990 assert3p(old, ==, PROCESSOR_NULL);
991 thread_block(THREAD_CONTINUE_NULL);
992 }
993
994 static void
unbind_from_boot_core(void)995 unbind_from_boot_core(void)
996 {
997 /* Unbind the thread from the boot CPU. */
998 thread_bind(PROCESSOR_NULL);
999 thread_block(THREAD_CONTINUE_NULL);
1000 resume_cluster_powerdown();
1001 }
1002
1003 extern kern_return_t exclaves_boot_early(void);
1004 kern_return_t
exclaves_boot_early(void)1005 exclaves_boot_early(void)
1006 {
1007 kern_return_t kr = KERN_FAILURE;
1008 uint64_t boot_info = 0;
1009 bool early_enter = false;
1010
1011 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1012
1013 kr = exclaves_bootinfo(&boot_info, &early_enter);
1014 if (kr != KERN_SUCCESS) {
1015 exclaves_debug_printf(show_errors,
1016 "exclaves: Get bootinfo failed\n");
1017 return kr;
1018 }
1019
1020 if (early_enter) {
1021 thread_t thread = current_thread();
1022 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
1023
1024 bind_to_boot_core();
1025
1026 disable_preemption_without_measurements();
1027 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
1028
1029 kr = exclaves_enter();
1030
1031 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
1032 enable_preemption();
1033
1034 unbind_from_boot_core();
1035
1036 if (kr != KERN_SUCCESS) {
1037 exclaves_debug_printf(show_errors,
1038 "exclaves: early exclaves enter failed\n");
1039 if (kr == KERN_ABORTED) {
1040 panic("Unexpected ringgate panic status");
1041 }
1042 return kr;
1043 }
1044 }
1045
1046 uint64_t xnuproxy_boot_info = 0;
1047 kr = exclaves_scheduler_init(boot_info, &xnuproxy_boot_info);
1048 if (kr != KERN_SUCCESS) {
1049 exclaves_debug_printf(show_errors,
1050 "exclaves: Init scheduler failed\n");
1051 return kr;
1052 }
1053
1054 kr = exclaves_xnuproxy_init(xnuproxy_boot_info);
1055 if (kr != KERN_SUCCESS) {
1056 exclaves_debug_printf(show_errors,
1057 "XNU proxy setup failed\n");
1058 return KERN_FAILURE;
1059 }
1060
1061 kr = exclaves_resource_init();
1062 if (kr != KERN_SUCCESS) {
1063 exclaves_debug_printf(show_errors,
1064 "exclaves: failed to initialize resources\n");
1065 return kr;
1066 }
1067
1068 kr = exclaves_panic_thread_setup();
1069 if (kr != KERN_SUCCESS) {
1070 exclaves_debug_printf(show_errors,
1071 "XNU proxy panic thread setup failed\n");
1072 return KERN_FAILURE;
1073 }
1074
1075 return KERN_SUCCESS;
1076 }
1077 #endif /* CONFIG_EXCLAVES */
1078
1079 #if CONFIG_EXCLAVES
1080 static struct XrtHosted_Callbacks *exclaves_callbacks = NULL;
1081 #endif /* CONFIG_EXCLAVES */
1082
1083 void
exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks * callbacks)1084 exclaves_register_xrt_hosted_callbacks(struct XrtHosted_Callbacks *callbacks)
1085 {
1086 #if CONFIG_EXCLAVES
1087 if (exclaves_callbacks == NULL) {
1088 exclaves_callbacks = callbacks;
1089 }
1090 #else /* CONFIG_EXCLAVES */
1091 #pragma unused(callbacks)
1092 #endif /* CONFIG_EXCLAVES */
1093 }
1094
1095 void
exclaves_update_timebase(exclaves_clock_type_t type,uint64_t offset)1096 exclaves_update_timebase(exclaves_clock_type_t type, uint64_t offset)
1097 {
1098 assert(
1099 type == EXCLAVES_CLOCK_CONTINUOUS ||
1100 type == EXCLAVES_CLOCK_ABSOLUTE);
1101 #if CONFIG_EXCLAVES
1102 exclaves_clock_t *clock = &exclaves_clock[type];
1103 uint64_t latest_offset = os_atomic_load(&clock->a_u64.latest_offset, relaxed);
1104 while (latest_offset != offset) {
1105 /* Update the latest offset with the new offset. If this fails, then a
1106 * concurrent update occurred and our offset may be stale. */
1107 if (os_atomic_cmpxchgv(&clock->a_u64.latest_offset, latest_offset,
1108 offset, &latest_offset, relaxed)) {
1109 break;
1110 }
1111 }
1112 #else
1113 #pragma unused(type, offset)
1114 #endif /* CONFIG_EXCLAVES */
1115 }
1116
1117 /* -------------------------------------------------------------------------- */
1118
1119 #pragma mark exclaves ipc internals
1120
1121 #if CONFIG_EXCLAVES
1122
1123 static kern_return_t
exclaves_endpoint_call_internal(__unused ipc_port_t port,exclaves_id_t endpoint_id)1124 exclaves_endpoint_call_internal(__unused ipc_port_t port,
1125 exclaves_id_t endpoint_id)
1126 {
1127 kern_return_t kr = KERN_SUCCESS;
1128
1129 assert(port == IPC_PORT_NULL);
1130
1131 kr = exclaves_xnuproxy_endpoint_call(endpoint_id);
1132
1133 return kr;
1134 }
1135
1136 /* -------------------------------------------------------------------------- */
1137 #pragma mark secure kernel communication
1138
1139 /** save SME state before entering exclaves */
1140 static bool
exclaves_save_matrix_state(void)1141 exclaves_save_matrix_state(void)
1142 {
1143 bool saved = false;
1144 #if HAS_ARM_FEAT_SME
1145 /* Save only the ZA/ZT0 state. SPTM will save/restore TPIDR2. */
1146 if (arm_sme_version() > 0 && !!(__builtin_arm_rsr64("SVCR") & SVCR_ZA)) {
1147 arm_sme_saved_state_t *sme_state = machine_thread_get_sme_state(current_thread());
1148 arm_save_sme_za_zt0(&sme_state->context, sme_state->svl_b);
1149 asm volatile ("smstop za");
1150 saved = true;
1151 }
1152 #endif /* HAS_ARM_FEAT_SME */
1153 return saved;
1154 }
1155
1156 static void
exclaves_restore_matrix_state(bool did_save_sme __unused)1157 exclaves_restore_matrix_state(bool did_save_sme __unused)
1158 {
1159 #if HAS_ARM_FEAT_SME
1160 if (did_save_sme) {
1161 arm_sme_saved_state_t *sme_state = machine_thread_get_sme_state(current_thread());
1162 asm volatile ("smstart za");
1163 arm_load_sme_za_zt0(&sme_state->context, sme_state->svl_b);
1164 }
1165 #endif /* HAS_ARM_FEAT_SME */
1166 }
1167
1168 /* ringgate entry endpoints */
1169 enum {
1170 RINGGATE_EP_ENTER,
1171 RINGGATE_EP_INFO
1172 };
1173
1174 /* ringgate entry status codes */
1175 enum {
1176 RINGGATE_STATUS_SUCCESS,
1177 RINGGATE_STATUS_ERROR,
1178 RINGGATE_STATUS_PANIC, /* RINGGATE_EP_ENTER: Another core paniced */
1179 };
1180
1181 OS_NOINLINE
1182 static kern_return_t
exclaves_enter(void)1183 exclaves_enter(void)
1184 {
1185 uint32_t endpoint = RINGGATE_EP_ENTER;
1186 uint64_t result = RINGGATE_STATUS_ERROR;
1187
1188 sptm_call_regs_t regs = { };
1189
1190 thread_t thread = current_thread();
1191
1192 /*
1193 * Should never re-enter exclaves.
1194 */
1195 if ((thread->th_exclaves_state & TH_EXCLAVES_UPCALL) != 0 ||
1196 (thread->th_exclaves_state & TH_EXCLAVES_SCHEDULER_REQUEST) != 0) {
1197 panic("attempt to re-enter exclaves");
1198 }
1199
1200 /*
1201 * Must have one (and only one) of the flags set to enter exclaves.
1202 */
1203 __assert_only const thread_exclaves_state_flags_t mask = (
1204 TH_EXCLAVES_RPC |
1205 TH_EXCLAVES_XNUPROXY |
1206 TH_EXCLAVES_SCHEDULER_CALL |
1207 TH_EXCLAVES_RESUME_PANIC_THREAD);
1208 assert3u(thread->th_exclaves_state & mask, !=, 0);
1209 assert3u(thread->th_exclaves_intstate & TH_EXCLAVES_EXECUTION, ==, 0);
1210
1211 /*
1212 * Save any SME matrix state before entering exclaves.
1213 */
1214 bool did_save_sme = exclaves_save_matrix_state();
1215
1216 #if MACH_ASSERT
1217 /*
1218 * Set the ast to check that the thread doesn't return to userspace
1219 * while in an RPC or XNUPROXY call.
1220 */
1221 act_set_debug_assert();
1222 #endif /* MACH_ASSERT */
1223
1224 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1225 | DBG_FUNC_START);
1226
1227 recount_enter_secure();
1228
1229 /* xnu_return_to_gl2 relies on this flag being present to correctly return
1230 * to SK from interrupts xnu handles on behalf of SK. */
1231 thread->th_exclaves_intstate |= TH_EXCLAVES_EXECUTION;
1232
1233 /*
1234 * Bracket with labels so stackshot can determine where exclaves are
1235 * entered from xnu.
1236 */
1237 __asm__ volatile (
1238 "EXCLAVES_ENTRY_START:\n\t"
1239 );
1240 result = sk_enter(endpoint, ®s);
1241 __asm__ volatile (
1242 "EXCLAVES_ENTRY_END:\n\t"
1243 );
1244
1245 thread->th_exclaves_intstate &= ~TH_EXCLAVES_EXECUTION;
1246
1247 recount_leave_secure();
1248
1249 #if CONFIG_SPTM
1250 /**
1251 * SPTM will return here with debug exceptions disabled (MDSCR_{KDE,MDE} == {0,0})
1252 * but SK might have clobbered individual breakpoints, etc. Invalidate the current CPU
1253 * debug state forcing a reload on the next return to user mode.
1254 */
1255 if (__improbable(getCpuDatap()->cpu_user_debug != NULL)) {
1256 arm_debug_set(NULL);
1257 }
1258 #endif /* CONFIG_SPTM */
1259
1260 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES, MACH_EXCLAVES_SWITCH)
1261 | DBG_FUNC_END);
1262
1263 /*
1264 * Restore SME matrix state, if it existed.
1265 */
1266 exclaves_restore_matrix_state(did_save_sme);
1267
1268 switch (result) {
1269 case RINGGATE_STATUS_SUCCESS:
1270 return KERN_SUCCESS;
1271 case RINGGATE_STATUS_ERROR:
1272 return KERN_FAILURE;
1273 case RINGGATE_STATUS_PANIC:
1274 return KERN_ABORTED;
1275 default:
1276 assertf(false, "Unknown ringgate status %llu", result);
1277 __builtin_trap();
1278 }
1279 }
1280
1281
1282 /*
1283 * A bit in the lower byte of the value returned by RINGGATE_EP_INFO. If set,
1284 * it in indicates that we should immediately enter the ringgate once in order
1285 * to allow the scheduler to perform early boot initialisation.
1286 */
1287 #define EARLY_RINGGATE_ENTER 2
1288
1289 OS_NOINLINE
1290 static kern_return_t
exclaves_bootinfo(uint64_t * out_boot_info,bool * early_enter)1291 exclaves_bootinfo(uint64_t *out_boot_info, bool *early_enter)
1292 {
1293 uint32_t endpoint = RINGGATE_EP_INFO;
1294 uint64_t result = RINGGATE_STATUS_ERROR;
1295
1296 sptm_call_regs_t regs = { };
1297
1298 recount_enter_secure();
1299 result = sk_enter(endpoint, ®s);
1300 recount_leave_secure();
1301 if (result == RINGGATE_STATUS_ERROR) {
1302 return KERN_FAILURE;
1303 }
1304
1305 *early_enter = (result & EARLY_RINGGATE_ENTER) != 0;
1306 *out_boot_info = result & ~EARLY_RINGGATE_ENTER;
1307
1308 return KERN_SUCCESS;
1309 }
1310
1311 /* -------------------------------------------------------------------------- */
1312
1313 #pragma mark exclaves scheduler communication
1314
1315 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_request);
1316 static XrtHosted_Buffer_t * PERCPU_DATA(exclaves_response);
1317
1318 static void
exclaves_init_multicore(void)1319 exclaves_init_multicore(void)
1320 {
1321 XrtHosted_Buffer_t **req, **res;
1322
1323 exclaves_wait_for_cpu_init();
1324
1325 exclaves_debug_printf(show_progress,
1326 "Using MPIDR for exclave scheduler core IDs\n");
1327
1328 /*
1329 * Match the hardwareID to the physical ID and stash the pointers to the
1330 * request/response buffers in per-cpu data for quick access.
1331 */
1332 size_t core_count = exclaves_callbacks->v1.cores();
1333 for (size_t i = 0; i < core_count; i++) {
1334 const XrtHosted_Core_t *core = exclaves_callbacks->v1.core(i);
1335 uint32_t dt_phys_id = (uint32_t)core->v2.hardwareId;
1336
1337 percpu_foreach(cpu_data, cpu_data) {
1338 if (cpu_data->cpu_phys_id != dt_phys_id) {
1339 continue;
1340 }
1341 req = PERCPU_GET_RELATIVE(exclaves_request, cpu_data, cpu_data);
1342 *req = exclaves_callbacks->v1.Core.request(i);
1343
1344 res = PERCPU_GET_RELATIVE(exclaves_response, cpu_data, cpu_data);
1345 *res = exclaves_callbacks->v1.Core.response(i);
1346
1347 break;
1348 }
1349 }
1350 }
1351
1352 static kern_return_t
exclaves_scheduler_init(uint64_t boot_info,uint64_t * xnuproxy_boot_info)1353 exclaves_scheduler_init(uint64_t boot_info, uint64_t *xnuproxy_boot_info)
1354 {
1355 kern_return_t kr = KERN_SUCCESS;
1356 XrtHosted_Error_t hosted_error;
1357
1358 lck_mtx_assert(&exclaves_boot_lock, LCK_MTX_ASSERT_OWNED);
1359
1360 if (!pmap_valid_address(boot_info)) {
1361 exclaves_debug_printf(show_errors,
1362 "exclaves: %s: 0x%012llx\n",
1363 "Invalid root physical address",
1364 boot_info);
1365 return KERN_FAILURE;
1366 }
1367
1368 if (exclaves_callbacks == NULL) {
1369 exclaves_debug_printf(show_errors,
1370 "exclaves: Callbacks not registered\n");
1371 return KERN_FAILURE;
1372 }
1373
1374 /* Initialise XrtHostedXnu kext */
1375 kr = exclaves_hosted_error(
1376 exclaves_callbacks->v1.init(
1377 XrtHosted_Version_current,
1378 phystokv(boot_info),
1379 &hosted_error),
1380 &hosted_error);
1381 if (kr != KERN_SUCCESS) {
1382 return kr;
1383 }
1384
1385 /* Record aperture addresses in buffer */
1386 size_t frames = exclaves_callbacks->v1.frames();
1387 XrtHosted_Mapped_t **pages = zalloc_permanent(
1388 frames * sizeof(XrtHosted_Mapped_t *),
1389 ZALIGN(XrtHosted_Mapped_t *));
1390 size_t index = 0;
1391 uint64_t phys = boot_info;
1392 while (index < frames) {
1393 if (!pmap_valid_address(phys)) {
1394 exclaves_debug_printf(show_errors,
1395 "exclaves: %s: 0x%012llx\n",
1396 "Invalid shared physical address",
1397 phys);
1398 return KERN_FAILURE;
1399 }
1400 pages[index] = (XrtHosted_Mapped_t *)phystokv(phys);
1401 kr = exclaves_hosted_error(
1402 exclaves_callbacks->v1.nextPhys(
1403 pages[index],
1404 &index,
1405 &phys,
1406 &hosted_error),
1407 &hosted_error);
1408 if (kr != KERN_SUCCESS) {
1409 return kr;
1410 }
1411 }
1412
1413 /* Initialise the mapped region */
1414 exclaves_callbacks->v1.setMapping(
1415 XrtHosted_Region_scattered(frames, pages));
1416
1417 /* Boot the scheduler. */
1418 kr = exclaves_scheduler_boot();
1419 if (kr != KERN_SUCCESS) {
1420 return kr;
1421 }
1422
1423 XrtHosted_Global_t *global = exclaves_callbacks->v1.global();
1424
1425 /* Only support MPIDR multicore. */
1426 if (global->v2.smpStatus != XrtHosted_SmpStatus_MulticoreMpidr) {
1427 exclaves_debug_printf(show_errors,
1428 "exclaves: exclaves scheduler doesn't support multicore");
1429 return KERN_FAILURE;
1430 }
1431 exclaves_init_multicore();
1432
1433 /* Initialise the XNU proxy */
1434 if (!pmap_valid_address(global->v1.proxyInit)) {
1435 exclaves_debug_printf(show_errors,
1436 "exclaves: %s: 0x%012llx\n",
1437 "Invalid xnu prpoxy physical address",
1438 phys);
1439 return KERN_FAILURE;
1440 }
1441 *xnuproxy_boot_info = global->v1.proxyInit;
1442
1443 return kr;
1444 }
1445
1446 #if EXCLAVES_ENABLE_SHOW_SCHEDULER_REQUEST_RESPONSE
1447 #define exclaves_scheduler_debug_save_buffer(_buf) \
1448 XrtHosted_Buffer_t _buf##_copy = *(_buf)
1449 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1450 _response_buf) ({ \
1451 if (exclaves_debug_enabled(show_scheduler_request_response)) { \
1452 printf("exclaves: Scheduler request = %p\n", _request_buf); \
1453 printf("exclaves: Scheduler request.tag = 0x%04llx\n", \
1454 _request_buf##_copy.tag); \
1455 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1456 printf("exclaves: Scheduler request.arguments[%02zu] = " \
1457 "0x%04llx\n", arg, \
1458 _request_buf##_copy.arguments[arg]); \
1459 } \
1460 printf("exclaves: Scheduler response = %p\n", _response_buf); \
1461 printf("exclaves: Scheduler response.tag = 0x%04llx\n", \
1462 _response_buf##_copy.tag); \
1463 for (size_t arg = 0; arg < XrtHosted_Buffer_args; arg += 1) { \
1464 printf("exclaves: Scheduler response.arguments[%02zu] = " \
1465 "0x%04llx\n", arg, \
1466 _response_buf##_copy.arguments[arg]); \
1467 } \
1468 }})
1469 #else // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1470 #define exclaves_scheduler_debug_save_buffer(_buf) ({ })
1471 #define exclaves_scheduler_debug_show_request_response(_request_buf, \
1472 _response_buf) ({ })
1473 #endif // EXCLAVES_SHOW_SCHEDULER_REQUEST_RESPONSE
1474
1475 static void
request_trace_start(const XrtHosted_Request_t * request)1476 request_trace_start(const XrtHosted_Request_t *request)
1477 {
1478 switch (request->tag) {
1479 case XrtHosted_Request_ResumeWithHostId:
1480 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1481 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_START,
1482 request->ResumeWithHostId.hostId, request->ResumeWithHostId.thread);
1483 break;
1484
1485 case XrtHosted_Request_InterruptWithHostId:
1486 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1487 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_START,
1488 request->InterruptWithHostId.hostId, request->InterruptWithHostId.thread);
1489 break;
1490
1491 case XrtHosted_Request_UpdateTimerOffset:
1492 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1493 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_START,
1494 request->UpdateTimerOffset.timer, request->UpdateTimerOffset.offset);
1495 break;
1496
1497 case XrtHosted_Request_BootExclaves:
1498 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1499 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_START);
1500 break;
1501
1502 case XrtHosted_Request_PmmEarlyAllocResponse:
1503 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1504 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_START,
1505 request->PmmEarlyAllocResponse.a);
1506 break;
1507
1508 case XrtHosted_Request_WatchdogPanic:
1509 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1510 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_START);
1511 break;
1512
1513 default:
1514 panic("Unsupported exclaves scheduler request: %d", request->tag);
1515 }
1516 }
1517
1518 static void
request_trace_end(const XrtHosted_Request_t * request)1519 request_trace_end(const XrtHosted_Request_t *request)
1520 {
1521 switch (request->tag) {
1522 case XrtHosted_Request_ResumeWithHostId:
1523 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1524 MACH_EXCLAVES_SCHEDULER_REQ_RESUME_WITH_HOSTID) | DBG_FUNC_END);
1525 break;
1526
1527 case XrtHosted_Request_InterruptWithHostId:
1528 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1529 MACH_EXCLAVES_SCHEDULER_REQ_INTERRUPT_WITH_HOSTID) | DBG_FUNC_END);
1530 break;
1531
1532 case XrtHosted_Request_UpdateTimerOffset:
1533 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1534 MACH_EXCLAVES_SCHEDULER_REQ_UPDATE_TIMER_OFFSET) | DBG_FUNC_END);
1535 break;
1536
1537 case XrtHosted_Request_BootExclaves:
1538 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1539 MACH_EXCLAVES_SCHEDULER_REQ_BOOT_EXCLAVES) | DBG_FUNC_END);
1540 break;
1541
1542 case XrtHosted_Request_PmmEarlyAllocResponse:
1543 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1544 MACH_EXCLAVES_SCHEDULER_REQ_PMM_EARLY_ALLOC_RESPONSE) | DBG_FUNC_END);
1545 break;
1546
1547 case XrtHosted_Request_WatchdogPanic:
1548 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1549 MACH_EXCLAVES_SCHEDULER_REQ_WATCHDOG_PANIC) | DBG_FUNC_END);
1550 break;
1551
1552 default:
1553 panic("Unsupported exclaves scheduler request: %d", request->tag);
1554 }
1555 }
1556
1557 __attribute__((always_inline))
1558 static kern_return_t
exclaves_scheduler_request(const XrtHosted_Request_t * request,XrtHosted_Response_t * response)1559 exclaves_scheduler_request(const XrtHosted_Request_t *request,
1560 XrtHosted_Response_t *response)
1561 {
1562 assert3u(request->tag, >, XrtHosted_Request_Invalid);
1563 assert3u(request->tag, <, XrtHosted_Request_Limit);
1564
1565 kern_return_t kr = KERN_SUCCESS;
1566 bool istate;
1567
1568 /*
1569 * Disable preemption and interrupts as the xrt hosted scheduler data
1570 * structures are per-core.
1571 * Preemption disabled and interrupt disabled timeouts are disabled for
1572 * now until we can co-ordinate the measurements with the exclaves side
1573 * of things.
1574 */
1575 istate = ml_set_interrupts_enabled_with_debug(false, false);
1576
1577 /* Interrupts should have been enabled entering this function. */
1578 assert(istate);
1579
1580 /*
1581 * This needs to be done with interrupts disabled, otherwise stackshot
1582 * could mark the thread blocked just after this function exits and a
1583 * thread marked as AST blocked would go into exclaves.
1584 */
1585
1586 while ((os_atomic_load(¤t_thread()->th_exclaves_inspection_state,
1587 relaxed) & ~TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1588 /* Enable interrupts */
1589 (void) ml_set_interrupts_enabled_with_debug(true, false);
1590
1591 /* Wait until the thread is collected on exclaves side */
1592 exclaves_inspection_check_ast();
1593
1594 /* Disable interrupts and preemption before next AST check */
1595 ml_set_interrupts_enabled_with_debug(false, false);
1596 }
1597 /* Interrupts are disabled and exclaves_stackshot_ast is clean */
1598
1599 disable_preemption_without_measurements();
1600
1601 /*
1602 * Don't enter with a stale clock (unless updating the clock or
1603 * panicking).
1604 */
1605 if (request->tag != XrtHosted_Request_UpdateTimerOffset &&
1606 request->tag != XrtHosted_Request_WatchdogPanic &&
1607 exclaves_clocks_need_update()) {
1608 enable_preemption();
1609 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1610 return KERN_POLICY_LIMIT;
1611 }
1612
1613 XrtHosted_Buffer_t *request_buf = *PERCPU_GET(exclaves_request);
1614 assert3p(request_buf, !=, NULL);
1615
1616 request_trace_start(request);
1617
1618 exclaves_callbacks->v1.Request.encode(request_buf, request);
1619 exclaves_scheduler_debug_save_buffer(request_buf);
1620
1621 kr = exclaves_enter();
1622
1623 /* The response may have come back on a different core. */
1624 XrtHosted_Buffer_t *response_buf = *PERCPU_GET(exclaves_response);
1625 assert3p(response_buf, !=, NULL);
1626
1627 exclaves_scheduler_debug_save_buffer(response_buf);
1628 exclaves_callbacks->v1.Response.decode(response_buf, response);
1629
1630 request_trace_end(request);
1631
1632 enable_preemption();
1633 (void) ml_set_interrupts_enabled_with_debug(istate, false);
1634
1635 exclaves_scheduler_debug_show_request_response(request_buf, response_buf);
1636
1637 if (kr == KERN_ABORTED) {
1638 /* RINGGATE_EP_ENTER returned RINGGATE_STATUS_PANIC indicating that
1639 * another core has paniced in exclaves and is on the way to call xnu
1640 * panic() via SPTM, so wait here for that to happen. */
1641 exclaves_wait_for_panic();
1642 }
1643
1644 return kr;
1645 }
1646
1647 OS_NORETURN OS_NOINLINE
1648 static void
exclaves_wait_for_panic(void)1649 exclaves_wait_for_panic(void)
1650 {
1651 assert_wait_timeout((event_t)exclaves_wait_for_panic, THREAD_UNINT, 1,
1652 NSEC_PER_SEC);
1653 wait_result_t wr = thread_block(THREAD_CONTINUE_NULL);
1654 panic("Unexpected wait for panic result: %d", wr);
1655 }
1656
1657 static kern_return_t
handle_response_yield(bool early,__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Yield_t * yield)1658 handle_response_yield(bool early, __assert_only Exclaves_L4_Word_t scid,
1659 const XrtHosted_Yield_t *yield)
1660 {
1661 Exclaves_L4_Word_t responding_scid = yield->thread;
1662 Exclaves_L4_Word_t yielded_to_scid = yield->yieldTo;
1663 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1664
1665 exclaves_debug_printf(show_progress,
1666 "exclaves: Scheduler: %s scid 0x%lx yielded to scid 0x%lx\n",
1667 early ? "(early yield)" : "", responding_scid, yielded_to_scid);
1668 /* TODO: 1. remember yielding scid if it isn't the xnu proxy's
1669 * th_exclaves_scheduling_context_id so we know to resume it later
1670 * 2. translate yield_to to thread_switch()-style handoff.
1671 */
1672 if (!early) {
1673 assert3u(responding_scid, ==, scid);
1674 assert3u(yield->threadHostId, ==, ctid);
1675 }
1676
1677 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1678 MACH_EXCLAVES_SCHEDULER_YIELD), yielded_to_scid, early);
1679
1680 return KERN_SUCCESS;
1681 }
1682
1683 static kern_return_t
handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,const XrtHosted_Spawned_t * spawned)1684 handle_response_spawned(__assert_only Exclaves_L4_Word_t scid,
1685 const XrtHosted_Spawned_t *spawned)
1686 {
1687 Exclaves_L4_Word_t responding_scid = spawned->thread;
1688 thread_t thread = current_thread();
1689 __assert_only ctid_t ctid = thread_get_ctid(thread);
1690
1691 /*
1692 * There are only a few places an exclaves thread is expected to be
1693 * spawned. Any other cases are considered errors.
1694 */
1695 if ((thread->th_exclaves_state & TH_EXCLAVES_SPAWN_EXPECTED) == 0) {
1696 exclaves_debug_printf(show_errors,
1697 "exclaves: Scheduler: Unexpected thread spawn: "
1698 "scid 0x%lx spawned scid 0x%llx\n",
1699 responding_scid, spawned->spawned);
1700 return KERN_FAILURE;
1701 }
1702
1703 exclaves_debug_printf(show_progress,
1704 "exclaves: Scheduler: scid 0x%lx spawned scid 0x%lx\n",
1705 responding_scid, (unsigned long)spawned->spawned);
1706 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1707 MACH_EXCLAVES_SCHEDULER_SPAWNED), spawned->spawned);
1708
1709 assert3u(responding_scid, ==, scid);
1710 assert3u(spawned->threadHostId, ==, ctid);
1711
1712 return KERN_SUCCESS;
1713 }
1714
1715 static kern_return_t
handle_response_terminated(const XrtHosted_Terminated_t * terminated)1716 handle_response_terminated(const XrtHosted_Terminated_t *terminated)
1717 {
1718 Exclaves_L4_Word_t responding_scid = terminated->thread;
1719 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1720
1721 exclaves_debug_printf(show_errors,
1722 "exclaves: Scheduler: Unexpected thread terminate: "
1723 "scid 0x%lx terminated scid 0x%llx\n", responding_scid,
1724 terminated->terminated);
1725 assert3u(terminated->threadHostId, ==, ctid);
1726
1727 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1728 MACH_EXCLAVES_SCHEDULER_TERMINATED),
1729 terminated->terminated);
1730
1731 return KERN_TERMINATED;
1732 }
1733
1734 static kern_return_t
handle_response_wait(const XrtHosted_Wait_t * wait)1735 handle_response_wait(const XrtHosted_Wait_t *wait)
1736 {
1737 Exclaves_L4_Word_t responding_scid = wait->waiter;
1738 thread_t thread = current_thread();
1739 __assert_only ctid_t ctid = thread_get_ctid(thread);
1740
1741 exclaves_debug_printf(show_progress,
1742 "exclaves: Scheduler: Wait: "
1743 "scid 0x%lx wait on owner scid 0x%llx, queue id 0x%llx, "
1744 "epoch 0x%llx\n", responding_scid, wait->owner,
1745 wait->queueId, wait->epoch);
1746 assert3u(wait->waiterHostId, ==, ctid);
1747
1748 /* The exclaves inspection thread should never wait. */
1749 if ((os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) & TH_EXCLAVES_INSPECTION_NOINSPECT) != 0) {
1750 panic("Exclaves inspection thread tried to wait\n");
1751 }
1752
1753 /*
1754 * Note, "owner" may not be safe to access directly, for example
1755 * the thread may have exited and been freed. esync_wait will
1756 * only access it under a lock if the epoch is fresh thus
1757 * ensuring safety.
1758 */
1759 const ctid_t owner = (ctid_t)wait->ownerHostId;
1760 const XrtHosted_Word_t id = wait->queueId;
1761 const uint64_t epoch = wait->epoch;
1762
1763 wait_interrupt_t interruptible;
1764 esync_policy_t policy;
1765
1766 switch (wait->interruptible) {
1767 case XrtHosted_Interruptibility_None:
1768 interruptible = THREAD_UNINT;
1769 policy = ESYNC_POLICY_KERNEL;
1770 break;
1771
1772 case XrtHosted_Interruptibility_Voluntary:
1773 interruptible = THREAD_INTERRUPTIBLE;
1774 policy = ESYNC_POLICY_KERNEL;
1775 break;
1776
1777 case XrtHosted_Interruptibility_DynamicQueue:
1778 interruptible = THREAD_INTERRUPTIBLE;
1779 policy = ESYNC_POLICY_USER;
1780 break;
1781
1782 default:
1783 panic("Unknown exclaves interruptibility: %llu",
1784 wait->interruptible);
1785 }
1786
1787 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1788 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_START, id, epoch, owner,
1789 wait->interruptible);
1790 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1791 exclaves_get_queue_counter(id), owner, policy, interruptible);
1792 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1793 MACH_EXCLAVES_SCHEDULER_WAIT) | DBG_FUNC_END, wr);
1794
1795 switch (wr) {
1796 case THREAD_INTERRUPTED:
1797 return KERN_ABORTED;
1798
1799 case THREAD_NOT_WAITING:
1800 case THREAD_AWAKENED:
1801 return KERN_SUCCESS;
1802
1803 default:
1804 panic("Unexpected wait result from esync_wait: %d", wr);
1805 }
1806 }
1807
1808 static kern_return_t
handle_response_wake(const XrtHosted_Wake_t * wake)1809 handle_response_wake(const XrtHosted_Wake_t *wake)
1810 {
1811 Exclaves_L4_Word_t responding_scid = wake->waker;
1812 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1813
1814 exclaves_debug_printf(show_progress,
1815 "exclaves: Scheduler: Wake: "
1816 "scid 0x%lx wake of queue id 0x%llx, "
1817 "epoch 0x%llx, all 0x%llx\n", responding_scid,
1818 wake->queueId, wake->epoch, wake->all);
1819 assert3u(wake->wakerHostId, ==, ctid);
1820
1821 const XrtHosted_Word_t id = wake->queueId;
1822 const uint64_t epoch = wake->epoch;
1823 const esync_wake_mode_t mode = wake->all != 0 ?
1824 ESYNC_WAKE_ALL : ESYNC_WAKE_ONE;
1825
1826 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1827 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, 0, mode);
1828
1829 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1830 exclaves_get_queue_counter(id), mode, 0);
1831
1832 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1833 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1834 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1835
1836 return KERN_SUCCESS;
1837 }
1838
1839 static kern_return_t
handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t * wake)1840 handle_response_wake_with_owner(const XrtHosted_WakeWithOwner_t *wake)
1841 {
1842 Exclaves_L4_Word_t responding_scid = wake->waker;
1843 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1844
1845 exclaves_debug_printf(show_progress,
1846 "exclaves: Scheduler: WakeWithOwner: "
1847 "scid 0x%lx wake of queue id 0x%llx, "
1848 "epoch 0x%llx, owner 0x%llx\n", responding_scid,
1849 wake->queueId, wake->epoch,
1850 wake->owner);
1851
1852 assert3u(wake->wakerHostId, ==, ctid);
1853
1854 const ctid_t owner = (ctid_t)wake->ownerHostId;
1855 const XrtHosted_Word_t id = wake->queueId;
1856 const uint64_t epoch = wake->epoch;
1857
1858 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1859 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_START, id, epoch, owner,
1860 ESYNC_WAKE_ONE_WITH_OWNER);
1861
1862 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1863 exclaves_get_queue_counter(id), ESYNC_WAKE_ONE_WITH_OWNER, owner);
1864
1865 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1866 MACH_EXCLAVES_SCHEDULER_WAKE) | DBG_FUNC_END,
1867 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1868
1869 return KERN_SUCCESS;
1870 }
1871
1872 static kern_return_t
handle_response_panic_wait(const XrtHosted_PanicWait_t * panic_wait)1873 handle_response_panic_wait(const XrtHosted_PanicWait_t *panic_wait)
1874 {
1875 Exclaves_L4_Word_t panic_thread_scid = panic_wait->handler;
1876 __assert_only thread_t thread = current_thread();
1877
1878 exclaves_debug_printf(show_progress,
1879 "exclaves: Scheduler: PanicWait: "
1880 "Panic thread SCID %lx\n",
1881 panic_thread_scid);
1882
1883 assert3u(panic_thread_scid, ==, thread->th_exclaves_ipc_ctx.scid);
1884
1885 exclaves_panic_thread_wait();
1886
1887 /* NOT REACHABLE */
1888 return KERN_SUCCESS;
1889 }
1890
1891 static kern_return_t
handle_response_suspended(const XrtHosted_Suspended_t * suspended)1892 handle_response_suspended(const XrtHosted_Suspended_t *suspended)
1893 {
1894 Exclaves_L4_Word_t responding_scid = suspended->suspended;
1895 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1896
1897 exclaves_debug_printf(show_progress,
1898 "exclaves: Scheduler: Suspended: "
1899 "scid 0x%lx epoch 0x%llx\n", responding_scid, suspended->epoch);
1900 assert3u(suspended->suspendedHostId, ==, ctid);
1901
1902 const uint64_t id = suspended->suspended;
1903 const uint64_t epoch = suspended->epoch;
1904
1905 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1906 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_START, id, epoch);
1907
1908 const wait_result_t wr = esync_wait(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1909 exclaves_get_thread_counter(id), 0, ESYNC_POLICY_KERNEL, THREAD_UNINT);
1910
1911 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1912 MACH_EXCLAVES_SCHEDULER_SUSPENDED) | DBG_FUNC_END, wr);
1913
1914 switch (wr) {
1915 case THREAD_INTERRUPTED:
1916 return KERN_ABORTED;
1917
1918 case THREAD_NOT_WAITING:
1919 case THREAD_AWAKENED:
1920 return KERN_SUCCESS;
1921
1922 default:
1923 panic("Unexpected wait result from esync_wait: %d", wr);
1924 }
1925 }
1926
1927 static kern_return_t
handle_response_resumed(const XrtHosted_Resumed_t * resumed)1928 handle_response_resumed(const XrtHosted_Resumed_t *resumed)
1929 {
1930 Exclaves_L4_Word_t responding_scid = resumed->thread;
1931 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1932
1933 exclaves_debug_printf(show_progress,
1934 "exclaves: Scheduler: Resumed: scid 0x%lx resume of scid 0x%llx "
1935 "(ctid: 0x%llx), epoch 0x%llx\n", responding_scid, resumed->resumed,
1936 resumed->resumedHostId, resumed->epoch);
1937 assert3u(resumed->threadHostId, ==, ctid);
1938
1939 const ctid_t target = (ctid_t)resumed->resumedHostId;
1940 const XrtHosted_Word_t id = resumed->resumed;
1941 const uint64_t epoch = resumed->epoch;
1942
1943 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1944 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_START, id, epoch,
1945 target);
1946
1947 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_T, id, epoch,
1948 exclaves_get_thread_counter(id), ESYNC_WAKE_THREAD, target);
1949
1950 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1951 MACH_EXCLAVES_SCHEDULER_RESUMED) | DBG_FUNC_END,
1952 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1953
1954 return KERN_SUCCESS;
1955 }
1956
1957 static kern_return_t
handle_response_interrupted(const XrtHosted_Interrupted_t * interrupted)1958 handle_response_interrupted(const XrtHosted_Interrupted_t *interrupted)
1959 {
1960 Exclaves_L4_Word_t responding_scid = interrupted->thread;
1961 __assert_only ctid_t ctid = thread_get_ctid(current_thread());
1962
1963 exclaves_debug_printf(show_progress,
1964 "exclaves: Scheduler: Interrupted: "
1965 "scid 0x%lx interrupt on queue id 0x%llx, "
1966 "epoch 0x%llx, target 0x%llx\n", responding_scid,
1967 interrupted->queueId, interrupted->epoch,
1968 interrupted->interruptedHostId);
1969 assert3u(interrupted->threadHostId, ==, ctid);
1970
1971 const ctid_t target = (ctid_t)interrupted->interruptedHostId;
1972 const XrtHosted_Word_t id = interrupted->queueId;
1973 const uint64_t epoch = interrupted->epoch;
1974
1975 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1976 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_START, id, epoch,
1977 target);
1978
1979 kern_return_t kr = esync_wake(ESYNC_SPACE_EXCLAVES_Q, id, epoch,
1980 exclaves_get_queue_counter(id), ESYNC_WAKE_THREAD, target);
1981
1982 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1983 MACH_EXCLAVES_SCHEDULER_INTERRUPTED) | DBG_FUNC_END,
1984 kr == KERN_SUCCESS ? THREAD_AWAKENED : THREAD_NOT_WAITING);
1985
1986 return KERN_SUCCESS;
1987 }
1988
1989 static kern_return_t
handle_response_nothing_scheduled(__unused const XrtHosted_NothingScheduled_t * nothing_scheduled)1990 handle_response_nothing_scheduled(
1991 __unused const XrtHosted_NothingScheduled_t *nothing_scheduled)
1992 {
1993 exclaves_debug_printf(show_progress,
1994 "exclaves: Scheduler: nothing scheduled\n");
1995
1996 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
1997 MACH_EXCLAVES_SCHEDULER_NOTHING_SCHEDULED));
1998
1999 return KERN_SUCCESS;
2000 }
2001
2002 static kern_return_t
handle_response_all_exclaves_booted(__unused const XrtHosted_AllExclavesBooted_t * all_exclaves_booted)2003 handle_response_all_exclaves_booted(
2004 __unused const XrtHosted_AllExclavesBooted_t *all_exclaves_booted)
2005 {
2006 exclaves_debug_printf(show_progress,
2007 "exclaves: scheduler: all exclaves booted\n");
2008
2009 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2010 MACH_EXCLAVES_SCHEDULER_ALL_EXCLAVES_BOOTED));
2011
2012 return KERN_SUCCESS;
2013 }
2014
2015 /*
2016 * The Early Alloc response asks for npages to be allocated. The list of
2017 * allocated pages is written into the first allocated page in the form of 32bit
2018 * page numbers. The physical address of the first page is passed back to the
2019 * exclaves scheduler as part of the next request.
2020 */
2021 static kern_return_t
handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t * pmm_early_alloc,uint64_t * pagelist_pa)2022 handle_response_pmm_early_alloc(const XrtHosted_PmmEarlyAlloc_t *pmm_early_alloc,
2023 uint64_t *pagelist_pa)
2024 {
2025 const uint32_t npages = (uint32_t)pmm_early_alloc->a;
2026 const uint64_t flags = pmm_early_alloc->b;
2027
2028 exclaves_memory_pagekind_t kind = EXCLAVES_MEMORY_PAGEKIND_ROOTDOMAIN;
2029 exclaves_memory_page_flags_t alloc_flags = EXCLAVES_MEMORY_PAGE_FLAGS_NONE;
2030
2031 exclaves_debug_printf(show_progress,
2032 "exclaves: scheduler: pmm early alloc, npages: %u, flags: %llu\n",
2033 npages, flags);
2034
2035 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2036 MACH_EXCLAVES_SCHEDULER_EARLY_ALLOC), npages, flags);
2037
2038 if (npages == 0) {
2039 return KERN_SUCCESS;
2040 }
2041
2042 if (npages > EXCLAVES_MEMORY_MAX_REQUEST) {
2043 exclaves_debug_printf(show_errors,
2044 "exclaves: request to allocate too many pages: %u\n",
2045 npages);
2046 return KERN_NO_SPACE;
2047 }
2048
2049
2050 /*
2051 * As npages must be relatively small (<= EXCLAVES_MEMORY_MAX_REQUEST),
2052 * stack allocation is sufficient and fast. If
2053 * EXCLAVES_MEMORY_MAX_REQUEST gets large, this should probably be moved
2054 * to the heap.
2055 */
2056 uint32_t page[EXCLAVES_MEMORY_MAX_REQUEST];
2057 exclaves_memory_alloc(npages, page, kind, alloc_flags);
2058
2059 /* Now copy the list of pages into the first page. */
2060 uint64_t first_page_pa = ptoa(page[0]);
2061 #if 0
2062 // move to before sptm retype
2063 uint32_t *first_page = (uint32_t *)phystokv(first_page_pa);
2064 for (int i = 0; i < npages; i++) {
2065 first_page[i] = page[i];
2066 }
2067 #endif
2068
2069 *pagelist_pa = first_page_pa;
2070 return KERN_SUCCESS;
2071 }
2072
2073 static void
handle_response_watchdog_panic_complete(__unused const XrtHosted_WatchdogPanicComplete_t * panic_complete)2074 handle_response_watchdog_panic_complete(
2075 __unused const XrtHosted_WatchdogPanicComplete_t *panic_complete)
2076 {
2077 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2078 MACH_EXCLAVES_SCHEDULER_WATCHDOG_PANIC_COMPLETE));
2079 }
2080
2081 OS_NORETURN
2082 static void
handle_response_panicking(__unused const XrtHosted_Panicking_t * panicking)2083 handle_response_panicking(
2084 __unused const XrtHosted_Panicking_t *panicking)
2085 {
2086 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCLAVES_SCHEDULER,
2087 MACH_EXCLAVES_SCHEDULER_PANICKING));
2088
2089 exclaves_wait_for_panic();
2090
2091 /* Not reached. */
2092 }
2093
2094 static inline bool
exclaves_clocks_need_update(void)2095 exclaves_clocks_need_update(void)
2096 {
2097 const exclaves_clock_type_t clocks[] = {
2098 EXCLAVES_CLOCK_ABSOLUTE,
2099 EXCLAVES_CLOCK_CONTINUOUS
2100 };
2101
2102 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2103 const exclaves_clock_t *clock = &exclaves_clock[i];
2104 exclaves_clock_t local = {
2105 .u128 = os_atomic_load(&clock->a_u128, relaxed),
2106 };
2107
2108 if (local.u64.sent_offset != local.u64.latest_offset) {
2109 return true;
2110 }
2111 }
2112
2113 return false;
2114 }
2115
2116 OS_NOINLINE
2117 static kern_return_t
exclaves_clocks_update(void)2118 exclaves_clocks_update(void)
2119 {
2120 const exclaves_clock_type_t clocks[] = {
2121 EXCLAVES_CLOCK_ABSOLUTE,
2122 EXCLAVES_CLOCK_CONTINUOUS
2123 };
2124
2125 for (int i = 0; i < ARRAY_COUNT(clocks); i++) {
2126 exclaves_clock_t local;
2127 exclaves_clock_t *clock = &exclaves_clock[i];
2128
2129 local.u128 = os_atomic_load(&clock->a_u128, relaxed);
2130 while (local.u64.sent_offset != local.u64.latest_offset) {
2131 XrtHosted_Timer_t timer = i == EXCLAVES_CLOCK_ABSOLUTE ?
2132 XrtHosted_Timer_Absolute :
2133 XrtHosted_Timer_Continuous;
2134
2135 kern_return_t kr =
2136 exclaves_scheduler_request_update_timer(timer,
2137 local.u64.latest_offset);
2138 if (kr != KERN_SUCCESS) {
2139 return kr;
2140 }
2141
2142 /*
2143 * Swap the sent offset with the local latest offset. If
2144 * it fails, the sent offset will be reloaded.
2145 */
2146 os_atomic_cmpxchgv(&clock->a_u64.sent_offset,
2147 local.u64.sent_offset, local.u64.latest_offset,
2148 &local.u64.sent_offset, relaxed);
2149
2150 /*
2151 * Fetch the latest offset again, in case we are stale.
2152 */
2153 local.u64.latest_offset = os_atomic_load(
2154 &clock->a_u64.latest_offset, relaxed);
2155 }
2156 }
2157
2158 return KERN_SUCCESS;
2159 }
2160
2161 static kern_return_t
exclaves_scheduler_boot(void)2162 exclaves_scheduler_boot(void)
2163 {
2164 /* This must happen on the boot CPU - bind the thread. */
2165 bind_to_boot_core();
2166
2167 /*
2168 * Set the request/response buffers. These may be overriden later when
2169 * doing multicore setup.
2170 */
2171 *PERCPU_GET(exclaves_request) =
2172 exclaves_callbacks->v1.Core.request(XrtHosted_Core_bootIndex);
2173 *PERCPU_GET(exclaves_response) =
2174 exclaves_callbacks->v1.Core.response(XrtHosted_Core_bootIndex);
2175
2176 kern_return_t kr = exclaves_scheduler_request_boot();
2177
2178 unbind_from_boot_core();
2179
2180 return kr;
2181 }
2182
2183 static kern_return_t
exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,uint64_t offset)2184 exclaves_scheduler_request_update_timer(XrtHosted_Timer_t timer,
2185 uint64_t offset)
2186 {
2187 thread_t thread = current_thread();
2188
2189 exclaves_debug_printf(show_progress,
2190 "exclaves: Scheduler: Request to update timer\n");
2191
2192 XrtHosted_Response_t response = {
2193 .tag = XrtHosted_Response_NothingScheduled,
2194 };
2195
2196 const XrtHosted_Request_t request = XrtHosted_Request_UpdateTimerOffsetMsg(
2197 .timer = timer,
2198 .offset = offset,
2199 );
2200
2201 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2202 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2203 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2204
2205 switch (kr) {
2206 case KERN_SUCCESS:
2207 break;
2208
2209 case KERN_POLICY_LIMIT:
2210 /*
2211 * POLICY_LIMIT should only happen if a timer update was pending
2212 * (and thus should never happen when trying to update a timer.
2213 */
2214 panic("exclaves: timer update requested when updating timer");
2215
2216 default:
2217 exclaves_debug_printf(show_errors,
2218 "exclaves: scheduler request failed\n");
2219 return kr;
2220 }
2221
2222 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2223
2224 switch (response.tag) {
2225 case XrtHosted_Response_NothingScheduled:
2226 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2227 break;
2228
2229 default:
2230 exclaves_debug_printf(show_errors, "exclaves: "
2231 "unexpected scheduler response when updating timer\n");
2232 kr = KERN_FAILURE;
2233 break;
2234 }
2235
2236 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2237
2238 return kr;
2239 }
2240
2241 static kern_return_t
exclaves_scheduler_request_boot(void)2242 exclaves_scheduler_request_boot(void)
2243 {
2244 kern_return_t kr = KERN_FAILURE;
2245 thread_t thread = current_thread();
2246
2247 assert3u(thread->th_exclaves_state & TH_EXCLAVES_STATE_ANY, ==, 0);
2248
2249 exclaves_debug_printf(show_progress,
2250 "exclaves: Scheduler: Request to boot exclave\n");
2251
2252 XrtHosted_Response_t response = {
2253 .tag = XrtHosted_Response_Invalid,
2254 };
2255 uint64_t pagelist_pa = 0;
2256
2257 while (response.tag != XrtHosted_Response_AllExclavesBooted) {
2258 const XrtHosted_Request_t request = pagelist_pa != 0 ?
2259 XrtHosted_Request_PmmEarlyAllocResponseMsg(.a = pagelist_pa):
2260 XrtHosted_Request_BootExclavesMsg();
2261 pagelist_pa = 0;
2262
2263 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2264 kr = exclaves_scheduler_request(&request, &response);
2265 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2266
2267 switch (kr) {
2268 case KERN_SUCCESS:
2269 break;
2270
2271 case KERN_POLICY_LIMIT:
2272 kr = exclaves_clocks_update();
2273 if (kr != KERN_SUCCESS) {
2274 return kr;
2275 }
2276 /*
2277 * Don't try to process the response - we just updated
2278 * the clock so continue with the boot request.
2279 */
2280 continue;
2281
2282 default:
2283 exclaves_debug_printf(show_errors,
2284 "exclaves: scheduler request failed\n");
2285 return KERN_FAILURE;
2286 }
2287
2288 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2289
2290 switch (response.tag) {
2291 case XrtHosted_Response_Yield:
2292 kr = handle_response_yield(true, 0, &response.Yield);
2293 break;
2294
2295 case XrtHosted_Response_NothingScheduled:
2296 kr = handle_response_nothing_scheduled(&response.NothingScheduled);
2297 break;
2298
2299 case XrtHosted_Response_AllExclavesBooted:
2300 kr = handle_response_all_exclaves_booted(&response.AllExclavesBooted);
2301 break;
2302
2303 case XrtHosted_Response_PmmEarlyAlloc:
2304 kr = handle_response_pmm_early_alloc(&response.PmmEarlyAlloc, &pagelist_pa);
2305 break;
2306
2307 case XrtHosted_Response_PanicBufferAddress:
2308 handle_response_panic_buffer_address(response.PanicBufferAddress.physical);
2309 break;
2310
2311 case XrtHosted_Response_Panicking:
2312 handle_response_panicking(&response.Panicking);
2313 /* Not reached. */
2314
2315 default:
2316 exclaves_debug_printf(show_errors,
2317 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2318 response.tag);
2319 kr = KERN_FAILURE;
2320 break;
2321 }
2322
2323 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2324
2325 if (kr != KERN_SUCCESS) {
2326 break;
2327 }
2328 }
2329
2330 return kr;
2331 }
2332
2333 OS_INLINE
2334 kern_return_t
exclaves_scheduler_request_resume(const exclaves_ctx_t * ctx,bool interrupted)2335 exclaves_scheduler_request_resume(const exclaves_ctx_t *ctx, bool interrupted)
2336 {
2337 thread_t thread = current_thread();
2338 const ctid_t ctid = thread_get_ctid(thread);
2339
2340 assert3u(thread->th_exclaves_state &
2341 (TH_EXCLAVES_RESUME_PANIC_THREAD | TH_EXCLAVES_RPC), !=, 0);
2342
2343 exclaves_debug_printf(show_progress,
2344 "exclaves: Scheduler: Request to resume scid 0x%lx\n", ctx->scid);
2345
2346 XrtHosted_Response_t response = {};
2347 const XrtHosted_Request_t request = interrupted ?
2348 XrtHosted_Request_InterruptWithHostIdMsg(
2349 .thread = ctx->scid,
2350 .hostId = ctid,
2351 ) :
2352 XrtHosted_Request_ResumeWithHostIdMsg(
2353 .thread = ctx->scid,
2354 .hostId = ctid,
2355 );
2356
2357 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2358
2359 switch (kr) {
2360 case KERN_SUCCESS:
2361 break;
2362
2363 case KERN_POLICY_LIMIT:
2364 /*
2365 * Don't try to handle any response (as there isn't one), just
2366 * return to the caller which will check MSG STATUS and re-enter
2367 * if neccessary.
2368 */
2369 return exclaves_clocks_update();
2370
2371 default:
2372 exclaves_debug_printf(show_errors,
2373 "exclaves: scheduler request failed\n");
2374 break;
2375 }
2376
2377 if (kr != KERN_SUCCESS) {
2378 return kr;
2379 }
2380
2381 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_START:\n\t");
2382 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2383
2384 switch (response.tag) {
2385 case XrtHosted_Response_Wait:
2386 kr = handle_response_wait(&response.Wait);
2387 break;
2388
2389 case XrtHosted_Response_Wake:
2390 kr = handle_response_wake(&response.Wake);
2391 break;
2392
2393 case XrtHosted_Response_Yield:
2394 kr = handle_response_yield(false, ctx->scid, &response.Yield);
2395 break;
2396
2397 case XrtHosted_Response_Spawned:
2398 kr = handle_response_spawned(ctx->scid, &response.Spawned);
2399 break;
2400
2401 case XrtHosted_Response_Terminated:
2402 kr = handle_response_terminated(&response.Terminated);
2403 break;
2404
2405 case XrtHosted_Response_WakeWithOwner:
2406 kr = handle_response_wake_with_owner(&response.WakeWithOwner);
2407 break;
2408
2409 case XrtHosted_Response_PanicWait:
2410 kr = handle_response_panic_wait(&response.PanicWait);
2411 break;
2412
2413 case XrtHosted_Response_Suspended:
2414 kr = handle_response_suspended(&response.Suspended);
2415 break;
2416
2417 case XrtHosted_Response_Resumed:
2418 kr = handle_response_resumed(&response.Resumed);
2419 break;
2420
2421 case XrtHosted_Response_Interrupted:
2422 kr = handle_response_interrupted(&response.Interrupted);
2423 break;
2424
2425 case XrtHosted_Response_Panicking:
2426 handle_response_panicking(&response.Panicking);
2427 /* Not reached. */
2428
2429 case XrtHosted_Response_Invalid:
2430 case XrtHosted_Response_Failure:
2431 case XrtHosted_Response_Pong:
2432 case XrtHosted_Response_SleepUntil:
2433 case XrtHosted_Response_Awaken:
2434 default:
2435 exclaves_debug_printf(show_errors,
2436 "exclaves: Scheduler: Unexpected response: tag 0x%x\n",
2437 response.tag);
2438 kr = KERN_FAILURE;
2439 break;
2440 }
2441
2442 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2443 __asm__ volatile ( "EXCLAVES_SCHEDULER_REQUEST_END:\n\t");
2444
2445 return kr;
2446 }
2447
2448 /* A friendly name to show up in backtraces. */
2449 OS_NOINLINE
2450 kern_return_t
exclaves_run(thread_t thread,bool interrupted)2451 exclaves_run(thread_t thread, bool interrupted)
2452 {
2453 return exclaves_scheduler_request_resume(&thread->th_exclaves_ipc_ctx,
2454 interrupted);
2455 }
2456
2457 /*
2458 * Note: this is called from a thread with RT priority which is on the way to
2459 * panicking and thus doesn't log.
2460 */
2461 kern_return_t
exclaves_scheduler_request_watchdog_panic(void)2462 exclaves_scheduler_request_watchdog_panic(void)
2463 {
2464 thread_t thread = current_thread();
2465
2466 XrtHosted_Response_t response = {};
2467 const XrtHosted_Request_t request = XrtHosted_Request_WatchdogPanicMsg();
2468
2469 /*
2470 * Check for consistent exclaves thread state to make sure we don't
2471 * accidentally block. This should normally never happen but if it does,
2472 * just return and allow the caller to panic without gathering an
2473 * exclaves stackshot.
2474 */
2475 if (os_atomic_load(&thread->th_exclaves_inspection_state, relaxed) != 0 ||
2476 thread->th_exclaves_state != 0) {
2477 return KERN_FAILURE;
2478 }
2479
2480 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_CALL;
2481 kern_return_t kr = exclaves_scheduler_request(&request, &response);
2482 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_CALL;
2483
2484 switch (kr) {
2485 case KERN_SUCCESS:
2486 break;
2487
2488 case KERN_POLICY_LIMIT:
2489 /*
2490 * POLICY_LIMIT should only happen if a timer update was pending
2491 * (and thus should never happen when trying to send a watchdog
2492 * panic message.
2493 */
2494 panic("exclaves: "
2495 "timer update requested when calling watchdog panic");
2496
2497 default:
2498 return kr;
2499 }
2500
2501 thread->th_exclaves_state |= TH_EXCLAVES_SCHEDULER_REQUEST;
2502
2503 switch (response.tag) {
2504 case XrtHosted_Response_WatchdogPanicComplete:
2505 handle_response_watchdog_panic_complete(&response.WatchdogPanicComplete);
2506 break;
2507
2508 case XrtHosted_Response_Panicking:
2509 handle_response_panicking(&response.Panicking);
2510 /* Not Reached. */
2511
2512 default:
2513 panic("exclaves: unexpected scheduler response "
2514 "when sending watchdog panic request: %d", response.tag);
2515 }
2516
2517 thread->th_exclaves_state &= ~TH_EXCLAVES_SCHEDULER_REQUEST;
2518
2519 return kr;
2520 }
2521
2522 /* -------------------------------------------------------------------------- */
2523
2524 #pragma mark exclaves xnu proxy communication
2525
2526 static kern_return_t
exclaves_hosted_error(bool success,XrtHosted_Error_t * error)2527 exclaves_hosted_error(bool success, XrtHosted_Error_t *error)
2528 {
2529 if (success) {
2530 return KERN_SUCCESS;
2531 } else {
2532 exclaves_debug_printf(show_errors,
2533 "exclaves: XrtHosted: %s[%d] (%s): %s\n",
2534 error->file,
2535 error->line,
2536 error->function,
2537 error->expression
2538 );
2539 return KERN_FAILURE;
2540 }
2541 }
2542
2543 #pragma mark exclaves privilege management
2544
2545 /*
2546 * All entitlement checking enabled by default.
2547 */
2548 #define DEFAULT_ENTITLEMENT_FLAGS (~0)
2549
2550 /*
2551 * boot-arg to control the use of entitlements.
2552 * Eventually this should be removed and entitlement checking should be gated on
2553 * the EXCLAVES_R_ENTITLEMENTS requirement.
2554 * This will be addressed with rdar://125153460.
2555 */
2556 TUNABLE(unsigned int, exclaves_entitlement_flags,
2557 "exclaves_entitlement_flags", DEFAULT_ENTITLEMENT_FLAGS);
2558
2559 static bool
has_entitlement(task_t task,const exclaves_priv_t priv,const char * entitlement)2560 has_entitlement(task_t task, const exclaves_priv_t priv,
2561 const char *entitlement)
2562 {
2563 /* Skip the entitlement if not enabled. */
2564 if ((exclaves_entitlement_flags & priv) == 0) {
2565 return true;
2566 }
2567
2568 return IOTaskHasEntitlement(task, entitlement);
2569 }
2570
2571 static bool
has_entitlement_vnode(void * vnode,const int64_t off,const exclaves_priv_t priv,const char * entitlement)2572 has_entitlement_vnode(void *vnode, const int64_t off,
2573 const exclaves_priv_t priv, const char *entitlement)
2574 {
2575 /* Skip the entitlement if not enabled. */
2576 if ((exclaves_entitlement_flags & priv) == 0) {
2577 return true;
2578 }
2579
2580 return IOVnodeHasEntitlement(vnode, off, entitlement);
2581 }
2582
2583 bool
exclaves_has_priv(task_t task,exclaves_priv_t priv)2584 exclaves_has_priv(task_t task, exclaves_priv_t priv)
2585 {
2586 const bool is_kernel = task == kernel_task;
2587 const bool is_launchd = task_pid(task) == 1;
2588
2589 switch (priv) {
2590 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2591 /* Both launchd and entitled tasks can spawn new conclaves. */
2592 if (is_launchd) {
2593 return true;
2594 }
2595 return has_entitlement(task, priv,
2596 "com.apple.private.exclaves.conclave-spawn");
2597
2598 case EXCLAVES_PRIV_KERNEL_DOMAIN:
2599 /*
2600 * Both the kernel itself and user tasks with the right
2601 * privilege can access exclaves resources in the kernel domain.
2602 */
2603 if (is_kernel) {
2604 return true;
2605 }
2606
2607 /*
2608 * If the task was entitled and has been through this path
2609 * before, it will have set the TFRO_HAS_KD_ACCESS flag.
2610 */
2611 if ((task_ro_flags_get(task) & TFRO_HAS_KD_ACCESS) != 0) {
2612 return true;
2613 }
2614
2615 if (has_entitlement(task, priv,
2616 "com.apple.private.exclaves.kernel-domain")) {
2617 task_ro_flags_set(task, TFRO_HAS_KD_ACCESS);
2618 return true;
2619 }
2620
2621 return false;
2622
2623 case EXCLAVES_PRIV_BOOT:
2624 /* Both launchd and entitled tasks can boot exclaves. */
2625 if (is_launchd) {
2626 return true;
2627 }
2628 /* BEGIN IGNORE CODESTYLE */
2629 return has_entitlement(task, priv,
2630 "com.apple.private.exclaves.boot");
2631 /* END IGNORE CODESTYLE */
2632
2633 case EXCLAVES_PRIV_INDICATOR_MIN_ON_TIME:
2634 /*
2635 * If the task was entitled and has been through this path
2636 * before, it will have set the TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS flag.
2637 */
2638 if ((task_ro_flags_get(task) & TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS) != 0) {
2639 return true;
2640 }
2641
2642 if (has_entitlement(task, priv,
2643 "com.apple.private.exclaves.indicator_min_on_time")) {
2644 task_ro_flags_set(task, TFRO_HAS_SENSOR_MIN_ON_TIME_ACCESS);
2645 return true;
2646 }
2647
2648 return false;
2649
2650 /* The CONCLAVE HOST priv is always checked by vnode. */
2651 case EXCLAVES_PRIV_CONCLAVE_HOST:
2652 default:
2653 panic("bad exclaves privilege (%u)", priv);
2654 }
2655 }
2656
2657 bool
exclaves_has_priv_vnode(void * vnode,int64_t off,exclaves_priv_t priv)2658 exclaves_has_priv_vnode(void *vnode, int64_t off, exclaves_priv_t priv)
2659 {
2660 switch (priv) {
2661 case EXCLAVES_PRIV_CONCLAVE_HOST: {
2662 const bool has_conclave_host = has_entitlement_vnode(vnode,
2663 off, priv, "com.apple.private.exclaves.conclave-host");
2664
2665 /*
2666 * Tasks should never have both EXCLAVES_PRIV_CONCLAVE_HOST
2667 * *and* EXCLAVES_PRIV_KERNEL_DOMAIN.
2668 */
2669
2670 /* Don't check if neither entitlemenent is being enforced.*/
2671 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0 ||
2672 (exclaves_entitlement_flags & EXCLAVES_PRIV_KERNEL_DOMAIN) == 0) {
2673 return has_conclave_host;
2674 }
2675
2676 const bool has_domain_kernel = has_entitlement_vnode(vnode, off,
2677 EXCLAVES_PRIV_KERNEL_DOMAIN,
2678 "com.apple.private.exclaves.kernel-domain");
2679
2680 /* See if it has both. */
2681 if (has_conclave_host && has_domain_kernel) {
2682 exclaves_debug_printf(show_errors,
2683 "exclaves: task has both conclave-host and "
2684 "kernel-domain entitlements which is forbidden\n");
2685 return false;
2686 }
2687
2688 return has_conclave_host;
2689 }
2690
2691 case EXCLAVES_PRIV_CONCLAVE_SPAWN:
2692 return has_entitlement_vnode(vnode, off, priv,
2693 "com.apple.private.exclaves.conclave-spawn");
2694
2695 default:
2696 panic("bad exclaves privilege (%u)", priv);
2697 }
2698 }
2699
2700
2701 #pragma mark exclaves stackshot range
2702
2703 /* Unslid pointers defining the range of code which switches threads into
2704 * secure world */
2705 uintptr_t exclaves_enter_range_start;
2706 uintptr_t exclaves_enter_range_end;
2707
2708 /* Unslid pointers defining the range of code which handles exclaves scheduler request */
2709 uintptr_t exclaves_scheduler_request_range_start;
2710 uintptr_t exclaves_scheduler_request_range_end;
2711
2712
2713 __startup_func
2714 static void
initialize_exclaves_ranges(void)2715 initialize_exclaves_ranges(void)
2716 {
2717 exclaves_enter_range_start = VM_KERNEL_UNSLIDE(&exclaves_enter_start_label);
2718 assert3u(exclaves_enter_range_start, !=, 0);
2719 exclaves_enter_range_end = VM_KERNEL_UNSLIDE(&exclaves_enter_end_label);
2720 assert3u(exclaves_enter_range_end, !=, 0);
2721
2722 exclaves_scheduler_request_range_start = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_start_label);
2723 assert3u(exclaves_scheduler_request_range_start, !=, 0);
2724 exclaves_scheduler_request_range_end = VM_KERNEL_UNSLIDE(&exclaves_scheduler_request_end_label);
2725 assert3u(exclaves_scheduler_request_range_end, !=, 0);
2726 }
2727 STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, initialize_exclaves_ranges);
2728
2729 /*
2730 * Return true if the specified address is in exclaves_enter.
2731 */
2732 static bool
exclaves_enter_in_range(uintptr_t addr,bool slid)2733 exclaves_enter_in_range(uintptr_t addr, bool slid)
2734 {
2735 return slid ?
2736 exclaves_in_range(addr, (uintptr_t)&exclaves_enter_start_label, (uintptr_t)&exclaves_enter_end_label) :
2737 exclaves_in_range(addr, exclaves_enter_range_start, exclaves_enter_range_end);
2738 }
2739
2740 /*
2741 * Return true if the specified address is in scheduler request handlers.
2742 */
2743 static bool
exclaves_scheduler_request_in_range(uintptr_t addr,bool slid)2744 exclaves_scheduler_request_in_range(uintptr_t addr, bool slid)
2745 {
2746 return slid ?
2747 exclaves_in_range(addr, (uintptr_t)&exclaves_scheduler_request_start_label, (uintptr_t)&exclaves_scheduler_request_end_label) :
2748 exclaves_in_range(addr, exclaves_scheduler_request_range_start, exclaves_scheduler_request_range_end);
2749 }
2750
2751 uint32_t
exclaves_stack_offset(const uintptr_t * addr,size_t nframes,bool slid)2752 exclaves_stack_offset(const uintptr_t *addr, size_t nframes, bool slid)
2753 {
2754 size_t i = 0;
2755
2756 // Check for a frame matching scheduler request range
2757 for (i = 0; i < nframes; i++) {
2758 if (exclaves_scheduler_request_in_range(addr[i], slid)) {
2759 break;
2760 }
2761 }
2762
2763 // Insert exclaves stacks before the scheduler request frame
2764 if (i < nframes) {
2765 return (uint32_t)(i + 1);
2766 }
2767
2768 // Check for a frame matching upcall code range
2769 for (i = 0; i < nframes; i++) {
2770 if (exclaves_upcall_in_range(addr[i], slid)) {
2771 break;
2772 }
2773 }
2774
2775 // Insert exclaves stacks before the upcall frame when found
2776 if (i < nframes) {
2777 return (uint32_t)(i + 1);
2778 }
2779
2780 // Check for a frame matching exclaves enter range
2781 for (i = 0; i < nframes; i++) {
2782 if (exclaves_enter_in_range(addr[i], slid)) {
2783 break;
2784 }
2785 }
2786
2787 // Put exclaves stacks on top of kernel stacks by default
2788 if (i == nframes) {
2789 i = 0;
2790 }
2791 return (uint32_t)i;
2792 }
2793
2794 #if DEVELOPMENT || DEBUG
2795
2796 /* Tweak the set of relaxed requirements on startup. */
2797 __startup_func
2798 static void
exclaves_requirement_startup(void)2799 exclaves_requirement_startup(void)
2800 {
2801 /*
2802 * The medium-term plan is that the boot-arg controlling entitlements
2803 * goes away entirely and is replaced with EXCLAVES_R_ENTITLEMENTS.
2804 * Until that happens, for historical reasons, if the entitlement
2805 * boot-arg has disabled EXCLAVES_PRIV_CONCLAVE_HOST, then relax
2806 * EXCLAVES_R_CONCLAVE_RESOURCES here too.
2807 */
2808 if ((exclaves_entitlement_flags & EXCLAVES_PRIV_CONCLAVE_HOST) == 0) {
2809 exclaves_requirement_relax(EXCLAVES_R_CONCLAVE_RESOURCES);
2810 }
2811
2812 exclaves_requirement_relax(EXCLAVES_R_EIC);
2813 }
2814 STARTUP(TUNABLES, STARTUP_RANK_MIDDLE, exclaves_requirement_startup);
2815
2816 #endif /* DEVELOPMENT || DEBUG */
2817
2818 #endif /* CONFIG_EXCLAVES */
2819
2820
2821 #ifndef CONFIG_EXCLAVES
2822 /* stubs for sensor functions which are not compiled in from exclaves.c when
2823 * CONFIG_EXCLAVE is disabled */
2824
2825 kern_return_t
exclaves_sensor_start(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2826 exclaves_sensor_start(exclaves_sensor_type_t sensor_type, uint64_t flags,
2827 exclaves_sensor_status_t *status)
2828 {
2829 #pragma unused(sensor_type, flags, status)
2830 return KERN_NOT_SUPPORTED;
2831 }
2832
2833 kern_return_t
exclaves_sensor_stop(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2834 exclaves_sensor_stop(exclaves_sensor_type_t sensor_type, uint64_t flags,
2835 exclaves_sensor_status_t *status)
2836 {
2837 #pragma unused(sensor_type, flags, status)
2838 return KERN_NOT_SUPPORTED;
2839 }
2840
2841 kern_return_t
exclaves_sensor_status(exclaves_sensor_type_t sensor_type,uint64_t flags,exclaves_sensor_status_t * status)2842 exclaves_sensor_status(exclaves_sensor_type_t sensor_type, uint64_t flags,
2843 exclaves_sensor_status_t *status)
2844 {
2845 #pragma unused(sensor_type, flags, status)
2846 return KERN_NOT_SUPPORTED;
2847 }
2848
2849 #endif /* ! CONFIG_EXCLAVES */
2850