1 /*
2 * Copyright (c) 2000-2025 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: kern/machine.c
60 * Author: Avadis Tevanian, Jr.
61 * Date: 1987
62 *
63 * Support for machine independent machine abstraction.
64 */
65
66 #include <string.h>
67
68 #include <mach/mach_types.h>
69 #include <mach/boolean.h>
70 #include <mach/kern_return.h>
71 #include <mach/machine.h>
72 #include <mach/host_info.h>
73 #include <mach/host_reboot.h>
74 #include <mach/host_priv_server.h>
75 #include <mach/processor_server.h>
76 #include <mach/sdt.h>
77
78 #include <kern/kern_types.h>
79 #include <kern/cpu_data.h>
80 #include <kern/ipc_host.h>
81 #include <kern/host.h>
82 #include <kern/machine.h>
83 #include <kern/misc_protos.h>
84 #include <kern/percpu.h>
85 #include <kern/processor.h>
86 #include <kern/queue.h>
87 #include <kern/sched.h>
88 #include <kern/startup.h>
89 #include <kern/task.h>
90 #include <kern/thread.h>
91 #include <kern/timeout.h>
92 #include <kern/iotrace.h>
93 #include <kern/smr.h>
94
95 #include <libkern/OSDebug.h>
96 #if ML_IO_TIMEOUTS_ENABLED
97 #include <libkern/tree.h>
98 #endif
99
100 #include <pexpert/device_tree.h>
101
102 #include <machine/commpage.h>
103 #include <machine/machine_routines.h>
104
105 #if HIBERNATION
106 #include <IOKit/IOHibernatePrivate.h>
107 #endif
108 #include <IOKit/IOPlatformExpert.h>
109
110 #if CONFIG_DTRACE
111 extern void (*dtrace_cpu_state_changed_hook)(int, boolean_t);
112 #endif
113
114 #if defined(__arm64__)
115 extern void wait_while_mp_kdp_trap(bool check_SIGPdebug);
116 #if CONFIG_SPTM
117 #include <arm64/sptm/pmap/pmap_data.h>
118 #else
119 #include <arm/pmap/pmap_data.h>
120 #endif /* CONFIG_SPTM */
121 #endif /* defined(__arm64__) */
122
123 #if defined(__x86_64__)
124 #include <i386/panic_notify.h>
125 #endif
126
127 /*
128 * Exported variables:
129 */
130
131 TUNABLE(long, wdt, "wdt", 0);
132
133 struct machine_info machine_info;
134
135
136 /* Forwards */
137 static void
138 processor_offline(void * parameter, __unused wait_result_t result);
139
140 static void
141 processor_offline_intstack(processor_t processor) __dead2;
142
143
144 /*
145 * processor_up:
146 *
147 * Flag processor as up and running, and available
148 * for scheduling.
149 */
150 void
processor_up(processor_t processor)151 processor_up(
152 processor_t processor)
153 {
154 spl_t s = splsched();
155 init_ast_check(processor);
156
157 #if defined(__arm64__)
158 /*
159 * A processor coming online won't have received a SIGPdebug signal
160 * to cause it to spin while a stackshot or panic is taking place,
161 * so spin here on mp_kdp_trap.
162 *
163 * However, since cpu_signal() is not yet enabled for this processor,
164 * there is a race if we have just passed this when a cpu_signal()
165 * is attempted. The sender will assume the cpu is offline, so it will
166 * not end up spinning anywhere. See processor_cpu_reinit() for the fix
167 * for this race.
168 */
169 wait_while_mp_kdp_trap(false);
170 #endif
171
172 /* Boot CPU coming online for the first time, either at boot or after sleep */
173 __assert_only bool is_first_online_processor;
174
175 is_first_online_processor = sched_mark_processor_online(processor,
176 processor->last_startup_reason);
177
178 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
179 assert(processor->processor_instartup == true || is_first_online_processor);
180 simple_unlock(&processor_start_state_lock);
181
182 splx(s);
183
184 #if defined(__x86_64__)
185 ml_cpu_up();
186 #endif /* defined(__x86_64__) */
187
188 #if CONFIG_DTRACE
189 if (dtrace_cpu_state_changed_hook) {
190 (*dtrace_cpu_state_changed_hook)(processor->cpu_id, TRUE);
191 }
192 #endif
193 }
194
195 #include <atm/atm_internal.h>
196
197 kern_return_t
host_reboot(host_priv_t host_priv,int options)198 host_reboot(
199 host_priv_t host_priv,
200 int options)
201 {
202 if (host_priv == HOST_PRIV_NULL) {
203 return KERN_INVALID_HOST;
204 }
205
206 #if DEVELOPMENT || DEBUG
207 if (options & HOST_REBOOT_DEBUGGER) {
208 Debugger("Debugger");
209 return KERN_SUCCESS;
210 }
211 #endif
212
213 if (options & HOST_REBOOT_UPSDELAY) {
214 // UPS power cutoff path
215 PEHaltRestart( kPEUPSDelayHaltCPU );
216 } else {
217 halt_all_cpus(!(options & HOST_REBOOT_HALT));
218 }
219
220 return KERN_SUCCESS;
221 }
222
223 kern_return_t
processor_assign(__unused processor_t processor,__unused processor_set_t new_pset,__unused boolean_t wait)224 processor_assign(
225 __unused processor_t processor,
226 __unused processor_set_t new_pset,
227 __unused boolean_t wait)
228 {
229 return KERN_FAILURE;
230 }
231
232 void
processor_doshutdown(processor_t processor,bool is_final_system_sleep)233 processor_doshutdown(
234 processor_t processor,
235 bool is_final_system_sleep)
236 {
237 lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
238 lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
239
240 if (!processor->processor_booted) {
241 panic("processor %d not booted", processor->cpu_id);
242 }
243
244 if (is_final_system_sleep) {
245 assert(processor == current_processor());
246 assert(processor == master_processor);
247 assert(processor_avail_count == 1);
248 }
249
250 processor_set_t pset = processor->processor_set;
251
252 ml_cpu_begin_state_transition(processor->cpu_id);
253
254 ml_broadcast_cpu_event(CPU_EXIT_REQUESTED, processor->cpu_id);
255
256 #if HIBERNATION
257 if (is_final_system_sleep) {
258 /*
259 * Ensure the page queues are in a state where the hibernation
260 * code can manipulate them without requiring other threads
261 * to be scheduled.
262 *
263 * This operation can block,
264 * and unlock must be done from the same thread.
265 */
266 assert(processor_avail_count < 2);
267 hibernate_vm_lock();
268 }
269 #endif
270
271 spl_t s = splsched();
272 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
273 pset_lock(pset);
274
275 assert(processor->state != PROCESSOR_START);
276 assert(processor->state != PROCESSOR_PENDING_OFFLINE);
277 assert(processor->state != PROCESSOR_OFF_LINE);
278
279 assert(!processor->processor_inshutdown);
280 processor->processor_inshutdown = true;
281
282 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
283 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_BEGIN_SHUTDOWN);
284
285 if (!is_final_system_sleep) {
286 sched_assert_not_last_online_cpu(processor->cpu_id);
287 }
288
289 pset_unlock(pset);
290 simple_unlock(&sched_available_cores_lock);
291
292 if (is_final_system_sleep) {
293 assert(processor == current_processor());
294
295 #if HIBERNATION
296 /*
297 * After this point, the system is now
298 * committed to hibernation and must
299 * not run any other thread that could take this lock.
300 */
301 hibernate_vm_unlock();
302 #endif
303 } else {
304 /*
305 * Get onto the processor to shut down.
306 * The scheduler picks this thread naturally according to its
307 * priority.
308 * The processor can run any other thread if this one blocks.
309 * So, don't block.
310 */
311 processor_t prev = thread_bind(processor);
312 thread_block(THREAD_CONTINUE_NULL);
313
314 /* interrupts still disabled */
315 assert(ml_get_interrupts_enabled() == FALSE);
316
317 assert(processor == current_processor());
318 assert(processor->processor_inshutdown);
319
320 thread_bind(prev);
321 /* interrupts still disabled */
322 }
323
324 /*
325 * Continue processor shutdown on the processor's idle thread.
326 * The handoff won't fail because the idle thread has a reserved stack.
327 * Switching to the idle thread leaves interrupts disabled,
328 * so we can't accidentally take an interrupt after the context switch.
329 */
330 thread_t shutdown_thread = processor->idle_thread;
331 shutdown_thread->continuation = processor_offline;
332 shutdown_thread->parameter = (void*)is_final_system_sleep;
333
334 thread_run(current_thread(), THREAD_CONTINUE_NULL, NULL, shutdown_thread);
335
336 /*
337 * After this point, we are in regular scheduled context on a remaining
338 * available CPU. Interrupts are still disabled.
339 */
340
341 if (is_final_system_sleep) {
342 /*
343 * We are coming out of system sleep here, so there won't be a
344 * corresponding processor_startup for this processor, so we
345 * need to put it back in the correct running state.
346 *
347 * There's nowhere to execute a call to CPU_EXITED during system
348 * sleep for the boot processor, and it's already been CPU_BOOTED
349 * by this point anyways, so skip the call.
350 */
351 assert(current_processor() == master_processor);
352 assert(processor->state == PROCESSOR_RUNNING);
353 assert(processor->processor_inshutdown);
354 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
355 processor->processor_inshutdown = false;
356 processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
357
358 splx(s);
359 } else {
360 splx(s);
361
362 cpu_exit_wait(processor->cpu_id);
363
364 s = splsched();
365 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
366 pset_lock(pset);
367 assert(processor->processor_inshutdown);
368 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_PENDING_OFFLINE);
369 assert(processor->state == PROCESSOR_PENDING_OFFLINE);
370 pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
371 processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_CPU_OFFLINE);
372 pset_unlock(pset);
373 simple_unlock(&sched_available_cores_lock);
374 splx(s);
375
376 ml_broadcast_cpu_event(CPU_EXITED, processor->cpu_id);
377 ml_cpu_power_disable(processor->cpu_id);
378
379 assert(processor->processor_offline_state == PROCESSOR_OFFLINE_CPU_OFFLINE);
380 processor_update_offline_state(processor, PROCESSOR_OFFLINE_FULLY_OFFLINE);
381 }
382
383 ml_cpu_end_state_transition(processor->cpu_id);
384 }
385
386 /*
387 * Called in the context of the idle thread to shut down the processor
388 *
389 * A shut-down processor looks like it's 'running' the idle thread parked
390 * in this routine, but it's actually been powered off and has no hardware state.
391 */
392 static void
processor_offline(void * parameter,__unused wait_result_t result)393 processor_offline(
394 void * parameter,
395 __unused wait_result_t result)
396 {
397 bool is_final_system_sleep = (bool) parameter;
398 processor_t processor = current_processor();
399 thread_t self = current_thread();
400 __assert_only thread_t old_thread = THREAD_NULL;
401
402 assert(self->state & TH_IDLE);
403 assert(processor->idle_thread == self);
404 assert(ml_get_interrupts_enabled() == FALSE);
405 assert(self->continuation == NULL);
406 assert(processor->processor_online == true);
407 assert(processor->running_timers_active == false);
408
409 if (is_final_system_sleep) {
410 assert(processor == current_processor());
411 assert(processor == master_processor);
412 assert(processor_avail_count == 1);
413 }
414
415 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_START, processor->cpu_id);
416
417 bool enforce_quiesce_safety = gEnforcePlatformActionSafety;
418
419 /*
420 * Scheduling is now disabled for this processor.
421 * Ensure that primitives that need scheduling (like mutexes) know this.
422 */
423 if (enforce_quiesce_safety) {
424 disable_preemption_without_measurements();
425 }
426
427 #if CONFIG_DTRACE
428 if (dtrace_cpu_state_changed_hook) {
429 (*dtrace_cpu_state_changed_hook)(processor->cpu_id, FALSE);
430 }
431 #endif
432
433 smr_cpu_down(processor, SMR_CPU_REASON_OFFLINE);
434
435 /* Drain pending IPIs for the last time here. */
436 ml_cpu_down();
437
438 sched_mark_processor_offline(processor, is_final_system_sleep);
439
440 /*
441 * Switch to the interrupt stack and shut down the processor.
442 *
443 * When the processor comes back, it will eventually call load_context which
444 * restores the context saved by machine_processor_shutdown, returning here.
445 */
446 old_thread = machine_processor_shutdown(self, processor_offline_intstack, processor);
447
448 /*
449 * The processor is back. sched_mark_processor_online and
450 * friends have already run via processor_up.
451 */
452
453 /* old_thread should be NULL because we got here through Load_context */
454 assert(old_thread == THREAD_NULL);
455
456 assert(processor == current_processor());
457 assert(processor->idle_thread == current_thread());
458 assert(processor->processor_online == true);
459
460 assert(ml_get_interrupts_enabled() == FALSE);
461 assert(self->continuation == NULL);
462
463 /* Extract the machine_param value stashed by secondary_cpu_main */
464 void * machine_param = self->parameter;
465 self->parameter = NULL;
466
467 processor_cpu_reinit(machine_param, true, is_final_system_sleep);
468
469 if (enforce_quiesce_safety) {
470 enable_preemption();
471 }
472
473 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_END, processor->cpu_id);
474
475 /*
476 * Now that the processor is back, invoke the idle thread to find out what to do next.
477 * idle_thread will enable interrupts.
478 */
479 thread_block(idle_thread);
480 /*NOTREACHED*/
481 }
482
483 /*
484 * Complete the shutdown and place the processor offline.
485 *
486 * Called at splsched in the shutdown context
487 * (i.e. on the idle thread, on the interrupt stack)
488 *
489 * The onlining half of this is done in load_context().
490 */
491 static void
processor_offline_intstack(processor_t processor)492 processor_offline_intstack(
493 processor_t processor)
494 {
495 assert(processor == current_processor());
496 assert(processor->active_thread == current_thread());
497
498 struct recount_snap snap = { 0 };
499 recount_snapshot(&snap);
500 recount_processor_idle(&processor->pr_recount, &snap);
501
502 smr_cpu_leave(processor, processor->last_dispatch);
503
504 PMAP_DEACTIVATE_KERNEL(processor->cpu_id);
505
506 cpu_sleep();
507 panic("zombie processor");
508 /*NOTREACHED*/
509 }
510
511 /*
512 * Called on the idle thread with interrupts disabled to initialize a
513 * secondary processor on boot or to reinitialize any processor on resume
514 * from processor offline.
515 */
516 void
processor_cpu_reinit(void * machine_param,__unused bool wait_for_cpu_signal,__assert_only bool is_final_system_sleep)517 processor_cpu_reinit(void* machine_param,
518 __unused bool wait_for_cpu_signal,
519 __assert_only bool is_final_system_sleep)
520 {
521 /* Re-initialize the processor */
522 machine_cpu_reinit(machine_param);
523
524 #if defined(__arm64__)
525 /*
526 * See the comments for wait_while_mp_kdp_trap in processor_up().
527 *
528 * SIGPdisabled is cleared (to enable cpu_signal() to succeed with this processor)
529 * the first time we take an IPI. This is triggered by machine_cpu_reinit(), above,
530 * which calls cpu_machine_init()->PE_cpu_machine_init()->PE_cpu_signal() which sends
531 * a self-IPI to ensure that happens when we enable interrupts. So enable interrupts
532 * here so that cpu_signal() can succeed before we spin on mp_kdp_trap.
533 */
534 assert_ml_cpu_signal_is_enabled(false);
535
536 ml_set_interrupts_enabled(TRUE);
537
538 if (wait_for_cpu_signal) {
539 ml_wait_for_cpu_signal_to_enable();
540 }
541
542 ml_set_interrupts_enabled(FALSE);
543
544 wait_while_mp_kdp_trap(true);
545
546 /*
547 * At this point,
548 * if a stackshot or panic is in progress, we either spin on mp_kdp_trap
549 * or we sucessfully received a SIGPdebug signal which will cause us to
550 * break out of the spin on mp_kdp_trap and instead
551 * spin next time interrupts are enabled in idle_thread().
552 */
553 if (wait_for_cpu_signal) {
554 assert_ml_cpu_signal_is_enabled(true);
555 }
556
557 /*
558 * Now that we know SIGPdisabled is cleared, we can publish that
559 * this CPU has fully come out of offline state.
560 *
561 * Without wait_for_cpu_signal, we'll publish this earlier than
562 * cpu_signal is actually ready, but as long as it's ready by next S2R,
563 * it will be good enough.
564 */
565 ml_cpu_up();
566 #endif
567
568 /*
569 * Interrupts must be disabled while processor_start_state_lock is
570 * held to prevent a deadlock with CPU startup of other CPUs that
571 * may be proceeding in parallel to this CPU's reinitialization.
572 */
573 spl_t s = splsched();
574 processor_t processor = current_processor();
575
576 simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
577 assert(processor->processor_instartup == true || is_final_system_sleep);
578 processor->processor_instartup = false;
579 simple_unlock(&processor_start_state_lock);
580
581 splx(s);
582
583 thread_wakeup((event_t)&processor->processor_instartup);
584 }
585
586 kern_return_t
host_get_boot_info(host_priv_t host_priv,kernel_boot_info_t boot_info)587 host_get_boot_info(
588 host_priv_t host_priv,
589 kernel_boot_info_t boot_info)
590 {
591 const char *src = "";
592 if (host_priv == HOST_PRIV_NULL) {
593 return KERN_INVALID_HOST;
594 }
595
596 /*
597 * Copy first operator string terminated by '\0' followed by
598 * standardized strings generated from boot string.
599 */
600 src = machine_boot_info(boot_info, KERNEL_BOOT_INFO_MAX);
601 if (src != boot_info) {
602 (void) strncpy(boot_info, src, KERNEL_BOOT_INFO_MAX);
603 }
604
605 return KERN_SUCCESS;
606 }
607
608 // These are configured through sysctls.
609 #if DEVELOPMENT || DEBUG
610 uint32_t phy_read_panic = 1;
611 uint32_t phy_write_panic = 1;
612 uint64_t simulate_stretched_io = 0;
613 #else
614 uint32_t phy_read_panic = 0;
615 uint32_t phy_write_panic = 0;
616 #endif
617
618 #if ML_IO_TIMEOUTS_ENABLED
619 mmio_track_t PERCPU_DATA(mmio_tracker);
620 #endif
621
622 #if !defined(__x86_64__)
623
624 #if DEVELOPMENT || DEBUG
625 static const uint64_t TIMEBASE_TICKS_PER_USEC = 24000000ULL / USEC_PER_SEC;
626 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 100 * TIMEBASE_TICKS_PER_USEC;
627 #else
628 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 0;
629 #endif
630
631 // The MACHINE_TIMEOUT facility only exists on ARM.
632 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_read_delay_to, "report-phy-read-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
633 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_write_delay_to, "report-phy-write-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
634 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_read_delay_to, "trace-phy-read-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
635 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_write_delay_to, "trace-phy-write-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
636
637 #if SCHED_HYGIENE_DEBUG
638 /*
639 * Note: The interrupt-masked timeout goes through two initializations - one
640 * early in boot and one later. Thus this function is also called twice and
641 * can't be marked '__startup_func'.
642 */
643 static void
ml_io_init_timeouts(void)644 ml_io_init_timeouts(void)
645 {
646 /*
647 * The timeouts may be completely disabled via an override.
648 */
649 if (kern_feature_override(KF_IO_TIMEOUT_OVRD)) {
650 os_atomic_store(&report_phy_write_delay_to, 0, relaxed);
651 os_atomic_store(&report_phy_read_delay_to, 0, relaxed);
652 return;
653 }
654
655 /*
656 * There may be no interrupt masked timeout set.
657 */
658 const uint64_t interrupt_masked_to = os_atomic_load(&interrupt_masked_timeout, relaxed);
659 if (interrupt_masked_timeout == 0) {
660 return;
661 }
662
663 /*
664 * Inherit from the interrupt masked timeout if smaller and the timeout
665 * hasn't been explicitly set via boot-arg.
666 */
667 uint64_t arg = 0;
668
669 if (!PE_parse_boot_argn("ml-timeout-report-phy-read-delay", &arg, sizeof(arg))) {
670 uint64_t report_phy_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
671 report_phy_read_delay = report_phy_read_delay == 0 ?
672 interrupt_masked_to :
673 MIN(report_phy_read_delay, interrupt_masked_to);
674 os_atomic_store(&report_phy_read_delay_to, report_phy_read_delay, relaxed);
675 }
676
677 if (!PE_parse_boot_argn("ml-timeout-report-phy-write-delay", &arg, sizeof(arg))) {
678 uint64_t report_phy_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
679 report_phy_write_delay = report_phy_write_delay == 0 ?
680 interrupt_masked_to :
681 MIN(report_phy_write_delay, interrupt_masked_to);
682 os_atomic_store(&report_phy_write_delay_to, report_phy_write_delay, relaxed);
683 }
684 }
685
686 /*
687 * It's important that this happens after machine timeouts have initialized so
688 * the correct timeouts can be inherited.
689 */
690 STARTUP(TIMEOUTS, STARTUP_RANK_SECOND, ml_io_init_timeouts);
691 #endif /* SCHED_HYGIENE_DEBUG */
692
693 extern pmap_paddr_t kvtophys(vm_offset_t va);
694 #endif /* !defined(__x86_64__) */
695
696 #if ML_IO_TIMEOUTS_ENABLED
697
698 static LCK_GRP_DECLARE(io_timeout_override_lock_grp, "io_timeout_override");
699 static LCK_SPIN_DECLARE(io_timeout_override_lock, &io_timeout_override_lock_grp);
700
701 struct io_timeout_override_entry {
702 RB_ENTRY(io_timeout_override_entry) tree;
703
704 uintptr_t ioaddr_base;
705 unsigned int size;
706 uint32_t read_timeout;
707 uint32_t write_timeout;
708 };
709
710 static inline int
io_timeout_override_cmp(const struct io_timeout_override_entry * a,const struct io_timeout_override_entry * b)711 io_timeout_override_cmp(const struct io_timeout_override_entry *a, const struct io_timeout_override_entry *b)
712 {
713 if (a->ioaddr_base < b->ioaddr_base) {
714 return -1;
715 } else if (a->ioaddr_base > b->ioaddr_base) {
716 return 1;
717 } else {
718 return 0;
719 }
720 }
721
722 static RB_HEAD(io_timeout_override, io_timeout_override_entry)
723 io_timeout_override_root_pa, io_timeout_override_root_va;
724
725 RB_PROTOTYPE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
726 RB_GENERATE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
727
728 static int
io_increase_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)729 io_increase_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base,
730 unsigned int size, uint32_t read_timeout_us, uint32_t write_timeout_us)
731 {
732 const uint64_t MAX_TIMEOUT_ABS = UINT32_MAX;
733
734 assert(preemption_enabled());
735
736 int ret = KERN_SUCCESS;
737
738 if (size == 0) {
739 return KERN_INVALID_ARGUMENT;
740 }
741
742 uintptr_t ioaddr_end;
743 if (os_add_overflow(ioaddr_base, size - 1, &ioaddr_end)) {
744 return KERN_INVALID_ARGUMENT;
745 }
746
747 uint64_t read_timeout_abs, write_timeout_abs;
748 nanoseconds_to_absolutetime(NSEC_PER_USEC * read_timeout_us, &read_timeout_abs);
749 nanoseconds_to_absolutetime(NSEC_PER_USEC * write_timeout_us, &write_timeout_abs);
750 if (read_timeout_abs > MAX_TIMEOUT_ABS || write_timeout_abs > MAX_TIMEOUT_ABS) {
751 return KERN_INVALID_ARGUMENT;
752 }
753
754 struct io_timeout_override_entry *node = kalloc_type(struct io_timeout_override_entry, Z_WAITOK | Z_ZERO | Z_NOFAIL);
755 node->ioaddr_base = ioaddr_base;
756 node->size = size;
757 node->read_timeout = (uint32_t)read_timeout_abs;
758 node->write_timeout = (uint32_t)write_timeout_abs;
759
760 /*
761 * Interrupt handlers are allowed to call ml_io_{read,write}*, so
762 * interrupts must be disabled any time io_timeout_override_lock is
763 * held. Otherwise the CPU could take an interrupt while holding the
764 * lock, invoke an ISR that calls ml_io_{read,write}*, and deadlock
765 * trying to acquire the lock again.
766 */
767 boolean_t istate = ml_set_interrupts_enabled(FALSE);
768 lck_spin_lock(&io_timeout_override_lock);
769 if (RB_INSERT(io_timeout_override, root, node)) {
770 ret = KERN_INVALID_ARGUMENT;
771 goto out;
772 }
773
774 /* Check that this didn't create any new overlaps */
775 struct io_timeout_override_entry *prev = RB_PREV(io_timeout_override, root, node);
776 if (prev && (prev->ioaddr_base + prev->size) > node->ioaddr_base) {
777 RB_REMOVE(io_timeout_override, root, node);
778 ret = KERN_INVALID_ARGUMENT;
779 goto out;
780 }
781 struct io_timeout_override_entry *next = RB_NEXT(io_timeout_override, root, node);
782 if (next && (node->ioaddr_base + node->size) > next->ioaddr_base) {
783 RB_REMOVE(io_timeout_override, root, node);
784 ret = KERN_INVALID_ARGUMENT;
785 goto out;
786 }
787
788 out:
789 lck_spin_unlock(&io_timeout_override_lock);
790 ml_set_interrupts_enabled(istate);
791 if (ret != KERN_SUCCESS) {
792 kfree_type(struct io_timeout_override_entry, node);
793 }
794 return ret;
795 }
796
797 static int
io_reset_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size)798 io_reset_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base, unsigned int size)
799 {
800 assert(preemption_enabled());
801
802 struct io_timeout_override_entry key = { .ioaddr_base = ioaddr_base };
803
804 boolean_t istate = ml_set_interrupts_enabled(FALSE);
805 lck_spin_lock(&io_timeout_override_lock);
806 struct io_timeout_override_entry *node = RB_FIND(io_timeout_override, root, &key);
807 if (node) {
808 if (node->size == size) {
809 RB_REMOVE(io_timeout_override, root, node);
810 } else {
811 node = NULL;
812 }
813 }
814 lck_spin_unlock(&io_timeout_override_lock);
815 ml_set_interrupts_enabled(istate);
816
817 if (!node) {
818 return KERN_NOT_FOUND;
819 }
820
821 kfree_type(struct io_timeout_override_entry, node);
822 return KERN_SUCCESS;
823 }
824
825 static bool
io_override_timeout(struct io_timeout_override * root,uintptr_t addr,uint64_t * read_timeout,uint64_t * write_timeout)826 io_override_timeout(struct io_timeout_override *root, uintptr_t addr,
827 uint64_t *read_timeout, uint64_t *write_timeout)
828 {
829 assert(!ml_get_interrupts_enabled());
830 assert3p(read_timeout, !=, NULL);
831 assert3p(write_timeout, !=, NULL);
832
833 struct io_timeout_override_entry *node = RB_ROOT(root);
834
835 lck_spin_lock(&io_timeout_override_lock);
836 /* RB_FIND() doesn't support custom cmp functions, so we have to open-code our own */
837 while (node) {
838 if (node->ioaddr_base <= addr && addr < node->ioaddr_base + node->size) {
839 *read_timeout = node->read_timeout;
840 *write_timeout = node->write_timeout;
841 lck_spin_unlock(&io_timeout_override_lock);
842 return true;
843 } else if (addr < node->ioaddr_base) {
844 node = RB_LEFT(node, tree);
845 } else {
846 node = RB_RIGHT(node, tree);
847 }
848 }
849 lck_spin_unlock(&io_timeout_override_lock);
850
851 return false;
852 }
853
854 static bool
io_override_timeout_ss(uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)855 io_override_timeout_ss(uint64_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
856 {
857 #if defined(__arm64__)
858
859 /*
860 * PCIe regions are marked with PMAP_IO_RANGE_STRONG_SYNC. Apply a
861 * timeout greater than two PCIe completion timeouts (90ms) as they can
862 * stack.
863 */
864 #define STRONG_SYNC_TIMEOUT 2160000 /* 90ms */
865
866 pmap_io_range_t *range = pmap_find_io_attr(paddr);
867 if (range != NULL && (range->wimg & PMAP_IO_RANGE_STRONG_SYNC) != 0) {
868 *read_timeout = STRONG_SYNC_TIMEOUT;
869 *write_timeout = STRONG_SYNC_TIMEOUT;
870 return true;
871 }
872 #else
873 (void)paddr;
874 (void)read_timeout;
875 (void)write_timeout;
876 #endif /* __arm64__ */
877 return false;
878 }
879
880 /*
881 * Return timeout override values for the read/write timeout for a given
882 * address.
883 * A virtual address (vaddr), physical address (paddr) or both may be passed.
884 * Up to three separate timeout overrides can be found
885 * - A virtual address override
886 * - A physical address override
887 * - A strong sync override
888 * The largest override found is returned.
889 */
890 void
override_io_timeouts(uintptr_t vaddr,uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)891 override_io_timeouts(uintptr_t vaddr, uint64_t paddr, uint64_t *read_timeout,
892 uint64_t *write_timeout)
893 {
894 uint64_t rt_va = 0, wt_va = 0, rt_pa = 0, wt_pa = 0, rt_ss = 0, wt_ss = 0;
895
896 if (vaddr != 0) {
897 /* Override from virtual address. */
898 io_override_timeout(&io_timeout_override_root_va, vaddr, &rt_va, &wt_va);
899 }
900
901 if (paddr != 0) {
902 /* Override from physical address. */
903 io_override_timeout(&io_timeout_override_root_pa, paddr, &rt_pa, &wt_pa);
904
905 /* Override from strong sync range. */
906 io_override_timeout_ss(paddr, &rt_ss, &wt_ss);
907 }
908
909 if (read_timeout != NULL) {
910 *read_timeout = MAX(MAX(rt_va, rt_pa), rt_ss);
911 }
912
913 if (write_timeout != NULL) {
914 *write_timeout = MAX(MAX(wt_va, wt_pa), wt_ss);
915 }
916 }
917
918 #endif /* ML_IO_TIMEOUTS_ENABLED */
919
920 int
ml_io_increase_timeouts(uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)921 ml_io_increase_timeouts(uintptr_t ioaddr_base, unsigned int size,
922 uint32_t read_timeout_us, uint32_t write_timeout_us)
923 {
924 #if ML_IO_TIMEOUTS_ENABLED
925 const size_t MAX_SIZE = 4096;
926
927 if (size > MAX_SIZE) {
928 return KERN_INVALID_ARGUMENT;
929 }
930
931 return io_increase_timeouts(&io_timeout_override_root_va, ioaddr_base,
932 size, read_timeout_us, write_timeout_us);
933 #else
934 #pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
935 return KERN_SUCCESS;
936 #endif /* ML_IO_TIMEOUTS_ENABLED */
937 }
938
939 int
ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)940 ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size,
941 uint32_t read_timeout_us, uint32_t write_timeout_us)
942 {
943 #if ML_IO_TIMEOUTS_ENABLED
944 return io_increase_timeouts(&io_timeout_override_root_pa, ioaddr_base,
945 size, read_timeout_us, write_timeout_us);
946 #else
947 #pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
948 return KERN_SUCCESS;
949 #endif /* ML_IO_TIMEOUTS_ENABLED */
950 }
951
952 int
ml_io_reset_timeouts(uintptr_t ioaddr_base,unsigned int size)953 ml_io_reset_timeouts(uintptr_t ioaddr_base, unsigned int size)
954 {
955 #if ML_IO_TIMEOUTS_ENABLED
956 return io_reset_timeouts(&io_timeout_override_root_va, ioaddr_base, size);
957 #else
958 #pragma unused(ioaddr_base, size)
959 return KERN_SUCCESS;
960 #endif /* ML_IO_TIMEOUTS_ENABLED */
961 }
962
963 int
ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size)964 ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size)
965 {
966 #if ML_IO_TIMEOUTS_ENABLED
967 return io_reset_timeouts(&io_timeout_override_root_pa, ioaddr_base, size);
968 #else
969 #pragma unused(ioaddr_base, size)
970 return KERN_SUCCESS;
971 #endif /* ML_IO_TIMEOUTS_ENABLED */
972 }
973
974 #if ML_IO_TIMEOUTS_ENABLED
975 boolean_t
ml_io_check_for_mmio_overrides(__unused uint64_t mt)976 ml_io_check_for_mmio_overrides(__unused uint64_t mt)
977 {
978 #if __arm64__
979 /* Issue a barrier before accessing the remote mmio trackers */
980 __builtin_arm_dmb(DMB_ISH);
981 #endif
982 boolean_t istate = ml_set_interrupts_enabled_with_debug(false, false);
983 percpu_foreach(mmiot, mmio_tracker) {
984 uint64_t read_timeout;
985 uint64_t write_timeout;
986
987 override_io_timeouts(mmiot->mmio_vaddr, mmiot->mmio_paddr, &read_timeout, &write_timeout);
988
989 if (read_timeout > 0 || write_timeout > 0) {
990 if (mt < (mmiot->mmio_start_mt + MAX(read_timeout, write_timeout))) {
991 ml_set_interrupts_enabled_with_debug(istate, false);
992 return true;
993 }
994 }
995 }
996 ml_set_interrupts_enabled_with_debug(istate, false);
997 return false;
998 }
999 #endif /* ML_IO_TIMEOUTS_ENABLED */
1000
1001 #if DEVELOPMENT || DEBUG
1002 static int ml_io_read_test_mode;
1003 #endif
1004
1005 unsigned long long
ml_io_read(uintptr_t vaddr,int size)1006 ml_io_read(uintptr_t vaddr, int size)
1007 {
1008 unsigned long long result = 0;
1009 unsigned char s1;
1010 unsigned short s2;
1011
1012 #if DEVELOPMENT || DEBUG
1013 /* For testing */
1014 extern void IODelay(int);
1015 if (__improbable(ml_io_read_test_mode)) {
1016 if (vaddr == 1) {
1017 IODelay(100);
1018 return 0;
1019 } else if (vaddr == 2) {
1020 return 0;
1021 }
1022 }
1023 #endif /* DEVELOPMENT || DEBUG */
1024
1025 #ifdef ML_IO_VERIFY_UNCACHEABLE
1026 uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1027 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1028 uintptr_t paddr = 0;
1029 #endif
1030
1031 #ifdef ML_IO_TIMEOUTS_ENABLED
1032 kern_timeout_t timeout;
1033 boolean_t istate, use_timeout = FALSE;
1034 uint64_t report_read_delay;
1035 #if __x86_64__
1036 report_read_delay = report_phy_read_delay;
1037 #else
1038 report_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
1039 uint64_t const trace_phy_read_delay = os_atomic_load(&trace_phy_read_delay_to, relaxed);
1040 #endif /* __x86_64__ */
1041
1042 if (__improbable(report_read_delay != 0)) {
1043 istate = ml_set_interrupts_enabled_with_debug(false, false);
1044
1045 kern_timeout_start(&timeout, TF_NONSPEC_TIMEBASE | TF_SAMPLE_PMC);
1046 use_timeout = true;
1047
1048 if (paddr == 0) {
1049 paddr = kvtophys(vaddr);
1050 }
1051 mmio_track_t *mmiot = PERCPU_GET(mmio_tracker);
1052 mmiot->mmio_start_mt = kern_timeout_start_time(&timeout);
1053 mmiot->mmio_paddr = paddr;
1054 mmiot->mmio_vaddr = vaddr;
1055 }
1056
1057 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1058 if (__improbable(use_timeout && simulate_stretched_io)) {
1059 kern_timeout_stretch(&timeout, simulate_stretched_io);
1060 }
1061 #endif /* ML_IO_SIMULATE_STRETCHED_ENABLED */
1062 #endif /* ML_IO_TIMEOUTS_ENABLED */
1063
1064 #if DEVELOPMENT || DEBUG
1065 boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1066 if (use_fences) {
1067 ml_timebase_to_memory_fence();
1068 }
1069 #endif
1070
1071 switch (size) {
1072 case 1:
1073 s1 = *(volatile unsigned char *)vaddr;
1074 result = s1;
1075 break;
1076 case 2:
1077 s2 = *(volatile unsigned short *)vaddr;
1078 result = s2;
1079 break;
1080 case 4:
1081 result = *(volatile unsigned int *)vaddr;
1082 break;
1083 case 8:
1084 result = *(volatile unsigned long long *)vaddr;
1085 break;
1086 default:
1087 panic("Invalid size %d for ml_io_read(%p)", size, (void *)vaddr);
1088 break;
1089 }
1090
1091 #if DEVELOPMENT || DEBUG
1092 if (use_fences) {
1093 ml_memory_to_timebase_fence();
1094 }
1095 #endif
1096
1097 #ifdef ML_IO_TIMEOUTS_ENABLED
1098 if (__improbable(use_timeout == TRUE)) {
1099 kern_timeout_end(&timeout, TF_NONSPEC_TIMEBASE);
1100 uint64_t duration = kern_timeout_gross_duration(&timeout);
1101
1102 /* Prevent the processor from calling iotrace during its
1103 * initialization procedure. */
1104 if (current_processor()->state == PROCESSOR_RUNNING) {
1105 iotrace(IOTRACE_IO_READ, vaddr, paddr, size, result, kern_timeout_start_time(&timeout), duration);
1106 }
1107
1108 if (__improbable(duration > report_read_delay)) {
1109 DTRACE_PHYSLAT5(physioread, uint64_t, duration,
1110 uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, result);
1111
1112 uint64_t override = 0;
1113 override_io_timeouts(vaddr, paddr, &override, NULL);
1114
1115 if (override != 0) {
1116 #if SCHED_HYGIENE_DEBUG
1117 /*
1118 * The IO timeout was overridden. If we were called in an
1119 * interrupt handler context, that can lead to a timeout
1120 * panic, so we need to abandon the measurement.
1121 */
1122 if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1123 ml_irq_debug_abandon();
1124 }
1125 #endif
1126 report_read_delay = override;
1127 }
1128 }
1129
1130 if (__improbable(duration > report_read_delay)) {
1131 if (phy_read_panic && (machine_timeout_suspended() == FALSE)) {
1132 char str[128];
1133 #if defined(__x86_64__)
1134 panic_notify();
1135 #endif /* defined(__x86_64__) */
1136 snprintf(str, sizeof(str),
1137 "Read from IO vaddr 0x%lx paddr 0x%lx (result: 0x%llx) timed out:",
1138 vaddr, paddr, result);
1139 kern_timeout_try_panic(KERN_TIMEOUT_MMIO, paddr, &timeout, str,
1140 report_read_delay);
1141 }
1142 }
1143
1144 if (__improbable(trace_phy_read_delay > 0 && duration > trace_phy_read_delay)) {
1145 KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_READ),
1146 duration, VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, result);
1147 }
1148
1149 (void)ml_set_interrupts_enabled_with_debug(istate, false);
1150 }
1151 #endif /* ML_IO_TIMEOUTS_ENABLED */
1152 return result;
1153 }
1154
1155 unsigned int
ml_io_read8(uintptr_t vaddr)1156 ml_io_read8(uintptr_t vaddr)
1157 {
1158 return (unsigned) ml_io_read(vaddr, 1);
1159 }
1160
1161 unsigned int
ml_io_read16(uintptr_t vaddr)1162 ml_io_read16(uintptr_t vaddr)
1163 {
1164 return (unsigned) ml_io_read(vaddr, 2);
1165 }
1166
1167 unsigned int
ml_io_read32(uintptr_t vaddr)1168 ml_io_read32(uintptr_t vaddr)
1169 {
1170 return (unsigned) ml_io_read(vaddr, 4);
1171 }
1172
1173 unsigned long long
ml_io_read64(uintptr_t vaddr)1174 ml_io_read64(uintptr_t vaddr)
1175 {
1176 return ml_io_read(vaddr, 8);
1177 }
1178
1179
1180 uint64_t
ml_io_read_cpu_reg(uintptr_t vaddr,int sz,__unused int logical_cpu)1181 ml_io_read_cpu_reg(uintptr_t vaddr, int sz, __unused int logical_cpu)
1182 {
1183 uint64_t val;
1184
1185
1186 val = ml_io_read(vaddr, sz);
1187
1188
1189 return val;
1190 }
1191
1192
1193 /* ml_io_write* */
1194
1195 void
ml_io_write(uintptr_t vaddr,uint64_t val,int size)1196 ml_io_write(uintptr_t vaddr, uint64_t val, int size)
1197 {
1198 #ifdef ML_IO_VERIFY_UNCACHEABLE
1199 uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1200 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1201 uintptr_t paddr = 0;
1202 #endif
1203
1204 #ifdef ML_IO_TIMEOUTS_ENABLED
1205 kern_timeout_t timeout;
1206 boolean_t istate, use_timeout = FALSE;
1207 uint64_t report_write_delay;
1208 #if __x86_64__
1209 report_write_delay = report_phy_write_delay;
1210 #else
1211 report_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
1212 uint64_t trace_phy_write_delay = os_atomic_load(&trace_phy_write_delay_to, relaxed);
1213 #endif /* !defined(__x86_64__) */
1214 if (__improbable(report_write_delay != 0)) {
1215 istate = ml_set_interrupts_enabled_with_debug(false, false);
1216
1217 kern_timeout_start(&timeout, TF_NONSPEC_TIMEBASE | TF_SAMPLE_PMC);
1218 use_timeout = TRUE;
1219
1220 if (paddr == 0) {
1221 paddr = kvtophys(vaddr);
1222 }
1223 mmio_track_t *mmiot = PERCPU_GET(mmio_tracker);
1224 mmiot->mmio_start_mt = kern_timeout_start_time(&timeout);
1225 mmiot->mmio_paddr = paddr;
1226 mmiot->mmio_vaddr = vaddr;
1227 }
1228
1229 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1230 if (__improbable(use_timeout && simulate_stretched_io)) {
1231 kern_timeout_stretch(&timeout, simulate_stretched_io);
1232 }
1233 #endif /* DEVELOPMENT || DEBUG */
1234 #endif /* ML_IO_TIMEOUTS_ENABLED */
1235
1236 #if DEVELOPMENT || DEBUG
1237 boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1238 if (use_fences) {
1239 ml_timebase_to_memory_fence();
1240 }
1241 #endif
1242
1243 switch (size) {
1244 case 1:
1245 *(volatile uint8_t *)vaddr = (uint8_t)val;
1246 break;
1247 case 2:
1248 *(volatile uint16_t *)vaddr = (uint16_t)val;
1249 break;
1250 case 4:
1251 *(volatile uint32_t *)vaddr = (uint32_t)val;
1252 break;
1253 case 8:
1254 *(volatile uint64_t *)vaddr = (uint64_t)val;
1255 break;
1256 default:
1257 panic("Invalid size %d for ml_io_write(%p, 0x%llx)", size, (void *)vaddr, val);
1258 break;
1259 }
1260
1261 #if DEVELOPMENT || DEBUG
1262 if (use_fences) {
1263 ml_memory_to_timebase_fence();
1264 }
1265 #endif
1266
1267 #ifdef ML_IO_TIMEOUTS_ENABLED
1268 if (__improbable(use_timeout == TRUE)) {
1269 kern_timeout_end(&timeout, TF_NONSPEC_TIMEBASE);
1270 uint64_t duration = kern_timeout_gross_duration(&timeout);
1271
1272 /* Prevent the processor from calling iotrace during its
1273 * initialization procedure. */
1274 if (current_processor()->state == PROCESSOR_RUNNING) {
1275 iotrace(IOTRACE_IO_WRITE, vaddr, paddr, size, val, kern_timeout_start_time(&timeout), duration);
1276 }
1277
1278 if (__improbable(duration > report_write_delay)) {
1279 DTRACE_PHYSLAT5(physiowrite, uint64_t, duration,
1280 uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, val);
1281
1282 uint64_t override = 0;
1283 override_io_timeouts(vaddr, paddr, NULL, &override);
1284
1285 if (override != 0) {
1286 #if SCHED_HYGIENE_DEBUG
1287 /*
1288 * The IO timeout was overridden. If we were called in an
1289 * interrupt handler context, that can lead to a timeout
1290 * panic, so we need to abandon the measurement.
1291 */
1292 if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1293 ml_irq_debug_abandon();
1294 }
1295 #endif
1296 report_write_delay = override;
1297 }
1298 }
1299
1300 if (__improbable(duration > report_write_delay)) {
1301 if (phy_write_panic && (machine_timeout_suspended() == FALSE)) {
1302 char str[128];
1303 #if defined(__x86_64__)
1304 panic_notify();
1305 #endif /* defined(__x86_64__) */
1306 snprintf(str, sizeof(str),
1307 "Write to IO vaddr 0x%lx paddr 0x%lx (value: 0x%llx) timed out:",
1308 vaddr, paddr, val);
1309 kern_timeout_try_panic(KERN_TIMEOUT_MMIO, paddr, &timeout, str,
1310 report_write_delay);
1311 }
1312 }
1313
1314 if (__improbable(trace_phy_write_delay > 0 && duration > trace_phy_write_delay)) {
1315 KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_WRITE),
1316 duration, VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, val);
1317 }
1318
1319 (void)ml_set_interrupts_enabled_with_debug(istate, false);
1320 }
1321 #endif /* ML_IO_TIMEOUTS_ENABLED */
1322 }
1323
1324 void
ml_io_write8(uintptr_t vaddr,uint8_t val)1325 ml_io_write8(uintptr_t vaddr, uint8_t val)
1326 {
1327 ml_io_write(vaddr, val, 1);
1328 }
1329
1330 void
ml_io_write16(uintptr_t vaddr,uint16_t val)1331 ml_io_write16(uintptr_t vaddr, uint16_t val)
1332 {
1333 ml_io_write(vaddr, val, 2);
1334 }
1335
1336 void
ml_io_write32(uintptr_t vaddr,uint32_t val)1337 ml_io_write32(uintptr_t vaddr, uint32_t val)
1338 {
1339 ml_io_write(vaddr, val, 4);
1340 }
1341
1342 void
ml_io_write64(uintptr_t vaddr,uint64_t val)1343 ml_io_write64(uintptr_t vaddr, uint64_t val)
1344 {
1345 ml_io_write(vaddr, val, 8);
1346 }
1347
1348 struct cpu_callback_chain_elem {
1349 cpu_callback_t fn;
1350 void *param;
1351 struct cpu_callback_chain_elem *next;
1352 };
1353
1354 static struct cpu_callback_chain_elem *cpu_callback_chain;
1355 static LCK_GRP_DECLARE(cpu_callback_chain_lock_grp, "cpu_callback_chain");
1356 static LCK_SPIN_DECLARE(cpu_callback_chain_lock, &cpu_callback_chain_lock_grp);
1357
1358 struct cpu_event_log_entry {
1359 uint64_t abstime;
1360 enum cpu_event event;
1361 unsigned int cpu_or_cluster;
1362 };
1363
1364 #if DEVELOPMENT || DEBUG
1365
1366 #define CPU_EVENT_RING_SIZE 128
1367 static struct cpu_event_log_entry cpu_event_ring[CPU_EVENT_RING_SIZE];
1368 static _Atomic int cpu_event_widx;
1369 static _Atomic uint64_t cpd_cycles;
1370
1371 void
cpu_event_debug_log(enum cpu_event event,unsigned int cpu_or_cluster)1372 cpu_event_debug_log(enum cpu_event event, unsigned int cpu_or_cluster)
1373 {
1374 int oldidx, newidx;
1375
1376 os_atomic_rmw_loop(&cpu_event_widx, oldidx, newidx, relaxed, {
1377 newidx = (oldidx + 1) % CPU_EVENT_RING_SIZE;
1378 });
1379 cpu_event_ring[newidx].abstime = ml_get_timebase();
1380 cpu_event_ring[newidx].event = event;
1381 cpu_event_ring[newidx].cpu_or_cluster = cpu_or_cluster;
1382
1383 if (event == CLUSTER_EXIT_REQUESTED) {
1384 os_atomic_inc(&cpd_cycles, relaxed);
1385 }
1386 }
1387
1388 static const char *
cpu_event_log_string(enum cpu_event e)1389 cpu_event_log_string(enum cpu_event e)
1390 {
1391 const char *event_strings[] = {
1392 "CPU_BOOT_REQUESTED",
1393 "CPU_BOOTED",
1394 "CPU_ACTIVE",
1395 "CLUSTER_ACTIVE",
1396 "CPU_EXIT_REQUESTED",
1397 "CPU_DOWN",
1398 "CLUSTER_EXIT_REQUESTED",
1399 "CPU_EXITED",
1400 "PLATFORM_QUIESCE",
1401 "PLATFORM_ACTIVE",
1402 "PLATFORM_HALT_RESTART",
1403 "PLATFORM_PANIC",
1404 "PLATFORM_PANIC_SYNC",
1405 "PLATFORM_PRE_SLEEP",
1406 "PLATFORM_POST_RESUME",
1407 };
1408
1409 assert((unsigned)e < sizeof(event_strings) / sizeof(event_strings[0]));
1410 return event_strings[e];
1411 }
1412
1413 void
dump_cpu_event_log(int (* printf_func)(const char * fmt,...))1414 dump_cpu_event_log(int (*printf_func)(const char * fmt, ...))
1415 {
1416 printf_func("CPU event history @ %016llx: (CPD cycles: %lld)\n",
1417 ml_get_timebase(), os_atomic_load(&cpd_cycles, relaxed));
1418
1419 int idx = os_atomic_load(&cpu_event_widx, relaxed);
1420 for (int c = 0; c < CPU_EVENT_RING_SIZE; c++) {
1421 idx = (idx + 1) % CPU_EVENT_RING_SIZE;
1422
1423 struct cpu_event_log_entry *e = &cpu_event_ring[idx];
1424 if (e->abstime != 0) {
1425 printf_func(" %016llx: %s %d\n", e->abstime,
1426 cpu_event_log_string(e->event), e->cpu_or_cluster);
1427 }
1428 }
1429 }
1430
1431 #else /* DEVELOPMENT || DEBUG */
1432
1433 void
cpu_event_debug_log(__unused enum cpu_event event,__unused unsigned int cpu_or_cluster)1434 cpu_event_debug_log(__unused enum cpu_event event, __unused unsigned int cpu_or_cluster)
1435 {
1436 /* no logging on production builds */
1437 }
1438
1439 void
dump_cpu_event_log(__unused int (* printf_func)(const char * fmt,...))1440 dump_cpu_event_log(__unused int (*printf_func)(const char * fmt, ...))
1441 {
1442 }
1443
1444 #endif /* DEVELOPMENT || DEBUG */
1445
1446 void
cpu_event_register_callback(cpu_callback_t fn,void * param)1447 cpu_event_register_callback(cpu_callback_t fn, void *param)
1448 {
1449 struct cpu_callback_chain_elem *new_elem;
1450
1451 new_elem = zalloc_permanent_type(struct cpu_callback_chain_elem);
1452 if (!new_elem) {
1453 panic("can't allocate cpu_callback_chain_elem");
1454 }
1455
1456 lck_spin_lock(&cpu_callback_chain_lock);
1457 new_elem->next = cpu_callback_chain;
1458 new_elem->fn = fn;
1459 new_elem->param = param;
1460 os_atomic_store(&cpu_callback_chain, new_elem, release);
1461 lck_spin_unlock(&cpu_callback_chain_lock);
1462 }
1463
1464 __attribute__((noreturn))
1465 void
cpu_event_unregister_callback(__unused cpu_callback_t fn)1466 cpu_event_unregister_callback(__unused cpu_callback_t fn)
1467 {
1468 panic("Unfortunately, cpu_event_unregister_callback is unimplemented.");
1469 }
1470
1471 void
ml_broadcast_cpu_event(enum cpu_event event,unsigned int cpu_or_cluster)1472 ml_broadcast_cpu_event(enum cpu_event event, unsigned int cpu_or_cluster)
1473 {
1474 struct cpu_callback_chain_elem *cursor;
1475
1476 cpu_event_debug_log(event, cpu_or_cluster);
1477
1478 cursor = os_atomic_load(&cpu_callback_chain, dependency);
1479 for (; cursor != NULL; cursor = cursor->next) {
1480 cursor->fn(cursor->param, event, cpu_or_cluster);
1481 }
1482 }
1483
1484 // Initialize Machine Timeouts (see the MACHINE_TIMEOUT macro
1485 // definition)
1486
1487 void
machine_timeout_init_with_suffix(const struct machine_timeout_spec * spec,char const * suffix,bool always_enabled)1488 machine_timeout_init_with_suffix(const struct machine_timeout_spec *spec, char const *suffix, bool always_enabled)
1489 {
1490 if (!always_enabled && (wdt == -1 || (spec->skip_predicate != NULL && spec->skip_predicate(spec)))) {
1491 // This timeout should be disabled.
1492 os_atomic_store_wide((uint64_t*)spec->ptr, 0, relaxed);
1493 return;
1494 }
1495
1496 assert(suffix != NULL);
1497 assert(strlen(spec->name) <= MACHINE_TIMEOUT_MAX_NAME_LEN);
1498
1499 size_t const suffix_len = strlen(suffix);
1500
1501 size_t const dt_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + 1;
1502 char dt_name[dt_name_size];
1503
1504 strlcpy(dt_name, spec->name, dt_name_size);
1505 strlcat(dt_name, suffix, dt_name_size);
1506
1507 size_t const scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + strlen("-scale") + 1;
1508 char scale_name[scale_name_size];
1509
1510 strlcpy(scale_name, spec->name, scale_name_size);
1511 strlcat(scale_name, suffix, scale_name_size);
1512 strlcat(scale_name, "-scale", scale_name_size);
1513
1514 size_t const boot_arg_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + strlen("ml-timeout-") + suffix_len + 1;
1515 char boot_arg_name[boot_arg_name_size];
1516
1517 strlcpy(boot_arg_name, "ml-timeout-", boot_arg_name_size);
1518 strlcat(boot_arg_name, spec->name, boot_arg_name_size);
1519 strlcat(boot_arg_name, suffix, boot_arg_name_size);
1520
1521 size_t const boot_arg_scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN +
1522 strlen("ml-timeout-") + strlen("-scale") + suffix_len + 1;
1523 char boot_arg_scale_name[boot_arg_scale_name_size];
1524
1525 strlcpy(boot_arg_scale_name, "ml-timeout-", boot_arg_scale_name_size);
1526 strlcat(boot_arg_scale_name, spec->name, boot_arg_scale_name_size);
1527 strlcat(boot_arg_scale_name, suffix, boot_arg_name_size);
1528 strlcat(boot_arg_scale_name, "-scale", boot_arg_scale_name_size);
1529
1530
1531 /*
1532 * Determine base value from DT and boot-args.
1533 */
1534
1535 DTEntry base, chosen;
1536
1537 if (SecureDTLookupEntry(NULL, "/machine-timeouts", &base) != kSuccess) {
1538 base = NULL;
1539 }
1540
1541 if (SecureDTLookupEntry(NULL, "/chosen/machine-timeouts", &chosen) != kSuccess) {
1542 chosen = NULL;
1543 }
1544
1545 uint64_t timeout = spec->default_value;
1546 bool found = false;
1547
1548 uint64_t const *data = NULL;
1549 unsigned int data_size = sizeof(*data);
1550
1551 /* First look in /machine-timeouts/<name> */
1552 if (base != NULL && SecureDTGetProperty(base, dt_name, (const void **)&data, &data_size) == kSuccess) {
1553 if (data_size != sizeof(*data)) {
1554 panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s", __func__, data_size, dt_name);
1555 }
1556
1557 timeout = *data;
1558 found = true;
1559 }
1560
1561 /* A value in /chosen/machine-timeouts/<name> overrides */
1562 if (chosen != NULL && SecureDTGetProperty(chosen, dt_name, (const void **)&data, &data_size) == kSuccess) {
1563 if (data_size != sizeof(*data)) {
1564 panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s", __func__, data_size, dt_name);
1565 }
1566
1567 timeout = *data;
1568 found = true;
1569 }
1570
1571 /* A boot-arg ml-timeout-<name> overrides */
1572 uint64_t boot_arg = 0;
1573
1574 if (PE_parse_boot_argn(boot_arg_name, &boot_arg, sizeof(boot_arg))) {
1575 timeout = boot_arg;
1576 found = true;
1577 }
1578
1579
1580 /*
1581 * Determine scale value from DT and boot-args.
1582 */
1583
1584 uint64_t scale = 1;
1585 uint32_t const *scale_data;
1586 unsigned int scale_size = sizeof(scale_data);
1587
1588 /* If there is a scale factor /machine-timeouts/<name>-scale, apply it. */
1589 if (base != NULL && SecureDTGetProperty(base, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1590 if (scale_size != sizeof(*scale_data)) {
1591 panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s-scale", __func__, scale_size, dt_name);
1592 }
1593
1594 scale = *scale_data;
1595 }
1596
1597 /* If there is a scale factor /chosen/machine-timeouts/<name>-scale, use that. */
1598 if (chosen != NULL && SecureDTGetProperty(chosen, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1599 if (scale_size != sizeof(*scale_data)) {
1600 panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s-scale", __func__,
1601 scale_size, dt_name);
1602 }
1603
1604 scale = *scale_data;
1605 }
1606
1607 /* Finally, a boot-arg ml-timeout-<name>-scale takes precedence. */
1608 if (PE_parse_boot_argn(boot_arg_scale_name, &boot_arg, sizeof(boot_arg))) {
1609 scale = boot_arg;
1610 }
1611
1612 static bool global_scale_set;
1613 static uint64_t global_scale;
1614
1615 if (!global_scale_set) {
1616 /* Apply /machine-timeouts/global-scale if present */
1617 if (SecureDTGetProperty(base, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1618 if (scale_size != sizeof(*scale_data)) {
1619 panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/global-scale", __func__,
1620 scale_size);
1621 }
1622
1623 global_scale = *scale_data;
1624 global_scale_set = true;
1625 }
1626
1627 /* Use /chosen/machine-timeouts/global-scale if present */
1628 if (SecureDTGetProperty(chosen, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1629 if (scale_size != sizeof(*scale_data)) {
1630 panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/global-scale", __func__,
1631 scale_size);
1632 }
1633
1634 global_scale = *scale_data;
1635 global_scale_set = true;
1636 }
1637
1638 /* Finally, the boot-arg ml-timeout-global-scale takes precedence. */
1639 if (PE_parse_boot_argn("ml-timeout-global-scale", &boot_arg, sizeof(boot_arg))) {
1640 global_scale = boot_arg;
1641 global_scale_set = true;
1642 }
1643 }
1644
1645 if (global_scale_set) {
1646 scale *= global_scale;
1647 }
1648
1649 /* Compute the final timeout, and done. */
1650 if (found && timeout > 0) {
1651 /* Only apply inherent unit scale if the value came in
1652 * externally. */
1653
1654 if (spec->unit_scale == MACHINE_TIMEOUT_UNIT_TIMEBASE) {
1655 uint64_t nanoseconds = timeout / 1000;
1656 nanoseconds_to_absolutetime(nanoseconds, &timeout);
1657 } else {
1658 timeout /= spec->unit_scale;
1659 }
1660
1661 if (timeout == 0) {
1662 /* Ensure unit scaling did not disable the timeout. */
1663 timeout = 1;
1664 }
1665 }
1666
1667 if (os_mul_overflow(timeout, scale, &timeout)) {
1668 timeout = UINT64_MAX; // clamp
1669 }
1670
1671 os_atomic_store_wide((uint64_t*)spec->ptr, timeout, relaxed);
1672 }
1673
1674 void
machine_timeout_init(const struct machine_timeout_spec * spec)1675 machine_timeout_init(const struct machine_timeout_spec *spec)
1676 {
1677 machine_timeout_init_with_suffix(spec, "", false);
1678 }
1679
1680 void
machine_timeout_init_always_enabled(const struct machine_timeout_spec * spec)1681 machine_timeout_init_always_enabled(const struct machine_timeout_spec *spec)
1682 {
1683 machine_timeout_init_with_suffix(spec, "", true);
1684 }
1685
1686 #if DEVELOPMENT || DEBUG
1687 /*
1688 * Late timeout (re-)initialization, at the end of bsd_init()
1689 */
1690 void
machine_timeout_bsd_init(void)1691 machine_timeout_bsd_init(void)
1692 {
1693 char const * const __unused mt_suffix = "-b";
1694 #if SCHED_HYGIENE_DEBUG
1695 machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(interrupt_masked_timeout), mt_suffix, false);
1696 machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(sched_preemption_disable_threshold_mt), mt_suffix, false);
1697
1698 /*
1699 * The io timeouts can inherit from interrupt_masked_timeout.
1700 * Re-initialize, as interrupt_masked_timeout may have changed.
1701 */
1702 ml_io_init_timeouts();
1703
1704 extern void preemption_disable_reset_max_durations(void);
1705 /*
1706 * Reset the preemption disable stats, so that they are not
1707 * polluted by long early boot code.
1708 */
1709 preemption_disable_reset_max_durations();
1710 #endif /* SCHED_HYGIENE_DEBUG */
1711 }
1712 #endif /* DEVELOPMENT || DEBUG */
1713
1714 #if ML_IO_TIMEOUTS_ENABLED && CONFIG_XNUPOST
1715 #include <tests/xnupost.h>
1716
1717 extern kern_return_t ml_io_timeout_test(void);
1718
1719 static inline void
ml_io_timeout_test_get_timeouts(uintptr_t vaddr,uint64_t * read_timeout,uint64_t * write_timeout)1720 ml_io_timeout_test_get_timeouts(uintptr_t vaddr, uint64_t *read_timeout, uint64_t *write_timeout)
1721 {
1722 *read_timeout = 0;
1723 *write_timeout = 0;
1724
1725 vm_offset_t paddr = kvtophys(vaddr);
1726
1727 boolean_t istate = ml_set_interrupts_enabled(FALSE);
1728 override_io_timeouts(vaddr, paddr, read_timeout, write_timeout);
1729 ml_set_interrupts_enabled(istate);
1730 }
1731
1732 static inline void
ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)1733 ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
1734 {
1735 *read_timeout = 0;
1736 *write_timeout = 0;
1737
1738 boolean_t istate = ml_set_interrupts_enabled(FALSE);
1739 override_io_timeouts(0, paddr, read_timeout, write_timeout);
1740 ml_set_interrupts_enabled(istate);
1741 }
1742
1743 kern_return_t
ml_io_timeout_test(void)1744 ml_io_timeout_test(void)
1745 {
1746 const size_t SIZE = 16;
1747 /*
1748 * Page align the base address to ensure that the regions are physically
1749 * contiguous.
1750 */
1751 const uintptr_t iovaddr_base1 = (uintptr_t)kernel_pmap & ~PAGE_MASK;
1752
1753 const uintptr_t iovaddr_base2 = iovaddr_base1 + SIZE;
1754 const uintptr_t vaddr1 = iovaddr_base1 + SIZE / 2;
1755 const uintptr_t vaddr2 = iovaddr_base2 + SIZE / 2;
1756
1757 const vm_offset_t iopaddr_base1 = kvtophys(iovaddr_base1);
1758 const vm_offset_t iopaddr_base2 = kvtophys(iovaddr_base2);
1759 const vm_offset_t paddr1 = iopaddr_base1 + SIZE / 2;
1760 const vm_offset_t paddr2 = iopaddr_base2 + SIZE / 2;
1761
1762 const uint64_t READ_TIMEOUT1_US = 50000, WRITE_TIMEOUT1_US = 50001;
1763 const uint64_t READ_TIMEOUT2_US = 50002, WRITE_TIMEOUT2_US = 50003;
1764 uint64_t read_timeout1_abs, write_timeout1_abs;
1765 uint64_t read_timeout2_abs, write_timeout2_abs;
1766 nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT1_US, &read_timeout1_abs);
1767 nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT1_US, &write_timeout1_abs);
1768 nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT2_US, &read_timeout2_abs);
1769 nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT2_US, &write_timeout2_abs);
1770
1771 int err = ml_io_increase_timeouts(iovaddr_base1, 0, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1772 T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for empty region");
1773
1774 err = ml_io_increase_timeouts(iovaddr_base1, 4097, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1775 T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for region > 4096 bytes");
1776
1777 err = ml_io_increase_timeouts(UINTPTR_MAX, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1778 T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overflowed region");
1779
1780 err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1781 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first VA region should succeed");
1782
1783 err = ml_io_increase_timeouts(iovaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1784 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second VA region should succeed");
1785
1786 err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1787 T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for same region twice");
1788
1789 err = ml_io_increase_timeouts(vaddr1, (uint32_t)(vaddr2 - vaddr1), READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1790 T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overlapping regions");
1791
1792 uint64_t read_timeout, write_timeout;
1793 ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1794 T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1795 T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1796
1797 ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1798 T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1799 T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1800
1801 ml_io_timeout_test_get_timeouts(iovaddr_base2 + SIZE, &read_timeout, &write_timeout);
1802 T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout without override");
1803 T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout without override");
1804
1805 err = ml_io_reset_timeouts(iovaddr_base1 + 1, SIZE - 1);
1806 T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for subregion");
1807
1808 err = ml_io_reset_timeouts(iovaddr_base2 + SIZE, SIZE);
1809 T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for non-existent region");
1810
1811 err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1812 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first VA region should succeed");
1813
1814 ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1815 T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1816 T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1817
1818 err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1819 T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for same region twice");
1820
1821 err = ml_io_reset_timeouts(iovaddr_base2, SIZE);
1822 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second VA region should succeed");
1823
1824 err = ml_io_increase_timeouts_phys(iopaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1825 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first PA region should succeed");
1826
1827 err = ml_io_increase_timeouts_phys(iopaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1828 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second PA region should succeed");
1829
1830 ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1831 T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1832 T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1833
1834 ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1835 T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1836 T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1837
1838 ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1839 T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1840 T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1841
1842 ml_io_timeout_test_get_timeouts_phys(paddr2, &read_timeout, &write_timeout);
1843 T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first physical region");
1844 T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first physical region");
1845
1846 err = ml_io_reset_timeouts_phys(iopaddr_base1, SIZE);
1847 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first PA region should succeed");
1848
1849 err = ml_io_reset_timeouts_phys(iopaddr_base2, SIZE);
1850 T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second PA region should succeed");
1851
1852 ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1853 T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1854 T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1855
1856 return KERN_SUCCESS;
1857 }
1858 #endif /* CONFIG_XNUPOST */
1859
1860 #if DEVELOPMENT || DEBUG
1861 static int
ml_io_read_cpu_reg_test(__unused int64_t in,int64_t * out)1862 ml_io_read_cpu_reg_test(__unused int64_t in, int64_t *out)
1863 {
1864 printf("Testing ml_io_read_cpu_reg()...\n");
1865
1866 ml_io_read_test_mode = 1;
1867 boolean_t istate = ml_set_interrupts_enabled_with_debug(false, false);
1868 (void) ml_io_read_cpu_reg((uintptr_t)1, 8, 1);
1869 (void) ml_io_read_cpu_reg((uintptr_t)2, 8, 1);
1870 ml_set_interrupts_enabled_with_debug(istate, false);
1871 (void) ml_io_read_cpu_reg((uintptr_t)1, 8, 1);
1872 (void) ml_io_read_cpu_reg((uintptr_t)2, 8, 1);
1873 ml_io_read_test_mode = 0;
1874
1875 *out = 0;
1876 return 0;
1877 }
1878 SYSCTL_TEST_REGISTER(ml_io_read_cpu_reg, ml_io_read_cpu_reg_test);
1879 #endif /* DEVELOPMENT || DEBUG */
1880