xref: /xnu-11417.140.69/osfmk/kern/machine.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2000-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/machine.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1987
62  *
63  *	Support for machine independent machine abstraction.
64  */
65 
66 #include <string.h>
67 
68 #include <mach/mach_types.h>
69 #include <mach/boolean.h>
70 #include <mach/kern_return.h>
71 #include <mach/machine.h>
72 #include <mach/host_info.h>
73 #include <mach/host_reboot.h>
74 #include <mach/host_priv_server.h>
75 #include <mach/processor_server.h>
76 #include <mach/sdt.h>
77 
78 #include <kern/kern_types.h>
79 #include <kern/cpu_data.h>
80 #include <kern/ipc_host.h>
81 #include <kern/host.h>
82 #include <kern/machine.h>
83 #include <kern/misc_protos.h>
84 #include <kern/percpu.h>
85 #include <kern/processor.h>
86 #include <kern/queue.h>
87 #include <kern/sched.h>
88 #include <kern/startup.h>
89 #include <kern/task.h>
90 #include <kern/thread.h>
91 #include <kern/iotrace.h>
92 
93 #include <libkern/OSDebug.h>
94 #if ML_IO_TIMEOUTS_ENABLED
95 #include <libkern/tree.h>
96 #endif
97 
98 #include <pexpert/device_tree.h>
99 
100 #include <machine/commpage.h>
101 #include <machine/machine_routines.h>
102 
103 #if HIBERNATION
104 #include <IOKit/IOHibernatePrivate.h>
105 #endif
106 #include <IOKit/IOPlatformExpert.h>
107 
108 #if CONFIG_DTRACE
109 extern void (*dtrace_cpu_state_changed_hook)(int, boolean_t);
110 #endif
111 
112 #if defined(__arm64__)
113 extern void wait_while_mp_kdp_trap(bool check_SIGPdebug);
114 #if CONFIG_SPTM
115 #include <arm64/sptm/pmap/pmap_data.h>
116 #else
117 #include <arm/pmap/pmap_data.h>
118 #endif /* CONFIG_SPTM */
119 #endif /* defined(__arm64__) */
120 
121 #if defined(__x86_64__)
122 #include <i386/panic_notify.h>
123 #endif
124 
125 #if ML_IO_TIMEOUTS_ENABLED
126 #if defined(__x86_64__)
127 #define ml_io_timestamp mach_absolute_time
128 #else
129 #define ml_io_timestamp ml_get_timebase
130 #endif /* __x86_64__ */
131 #endif /* ML_IO_TIMEOUTS_ENABLED */
132 
133 /*
134  *	Exported variables:
135  */
136 
137 TUNABLE(long, wdt, "wdt", 0);
138 
139 struct machine_info     machine_info;
140 
141 
142 /* Forwards */
143 static void
144 processor_offline(void * parameter, __unused wait_result_t result);
145 
146 static void
147 processor_offline_intstack(processor_t processor) __dead2;
148 
149 
150 /*
151  *	processor_up:
152  *
153  *	Flag processor as up and running, and available
154  *	for scheduling.
155  */
156 void
processor_up(processor_t processor)157 processor_up(
158 	processor_t                     processor)
159 {
160 	spl_t s = splsched();
161 	init_ast_check(processor);
162 
163 #if defined(__arm64__)
164 	/*
165 	 * A processor coming online won't have received a SIGPdebug signal
166 	 * to cause it to spin while a stackshot or panic is taking place,
167 	 * so spin here on mp_kdp_trap.
168 	 *
169 	 * However, since cpu_signal() is not yet enabled for this processor,
170 	 * there is a race if we have just passed this when a cpu_signal()
171 	 * is attempted.  The sender will assume the cpu is offline, so it will
172 	 * not end up spinning anywhere.  See processor_cpu_reinit() for the fix
173 	 * for this race.
174 	 */
175 	wait_while_mp_kdp_trap(false);
176 #endif
177 
178 	/* Boot CPU coming online for the first time, either at boot or after sleep */
179 	__assert_only bool is_first_online_processor;
180 
181 	is_first_online_processor = sched_mark_processor_online(processor,
182 	    processor->last_startup_reason);
183 
184 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
185 	assert(processor->processor_instartup == true || is_first_online_processor);
186 	simple_unlock(&processor_start_state_lock);
187 
188 	splx(s);
189 
190 #if defined(__x86_64__)
191 	ml_cpu_up();
192 #endif /* defined(__x86_64__) */
193 
194 #if CONFIG_DTRACE
195 	if (dtrace_cpu_state_changed_hook) {
196 		(*dtrace_cpu_state_changed_hook)(processor->cpu_id, TRUE);
197 	}
198 #endif
199 }
200 
201 #include <atm/atm_internal.h>
202 
203 kern_return_t
host_reboot(host_priv_t host_priv,int options)204 host_reboot(
205 	host_priv_t             host_priv,
206 	int                             options)
207 {
208 	if (host_priv == HOST_PRIV_NULL) {
209 		return KERN_INVALID_HOST;
210 	}
211 
212 #if DEVELOPMENT || DEBUG
213 	if (options & HOST_REBOOT_DEBUGGER) {
214 		Debugger("Debugger");
215 		return KERN_SUCCESS;
216 	}
217 #endif
218 
219 	if (options & HOST_REBOOT_UPSDELAY) {
220 		// UPS power cutoff path
221 		PEHaltRestart( kPEUPSDelayHaltCPU );
222 	} else {
223 		halt_all_cpus(!(options & HOST_REBOOT_HALT));
224 	}
225 
226 	return KERN_SUCCESS;
227 }
228 
229 kern_return_t
processor_assign(__unused processor_t processor,__unused processor_set_t new_pset,__unused boolean_t wait)230 processor_assign(
231 	__unused processor_t            processor,
232 	__unused processor_set_t        new_pset,
233 	__unused boolean_t              wait)
234 {
235 	return KERN_FAILURE;
236 }
237 
238 void
processor_doshutdown(processor_t processor,bool is_final_system_sleep)239 processor_doshutdown(
240 	processor_t     processor,
241 	bool            is_final_system_sleep)
242 {
243 	lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
244 	lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
245 
246 	if (!processor->processor_booted) {
247 		panic("processor %d not booted", processor->cpu_id);
248 	}
249 
250 	if (is_final_system_sleep) {
251 		assert(processor == current_processor());
252 		assert(processor == master_processor);
253 		assert(processor_avail_count == 1);
254 	}
255 
256 	processor_set_t pset = processor->processor_set;
257 
258 	ml_cpu_begin_state_transition(processor->cpu_id);
259 
260 	ml_broadcast_cpu_event(CPU_EXIT_REQUESTED, processor->cpu_id);
261 
262 #if HIBERNATION
263 	if (is_final_system_sleep) {
264 		/*
265 		 * Ensure the page queues are in a state where the hibernation
266 		 * code can manipulate them without requiring other threads
267 		 * to be scheduled.
268 		 *
269 		 * This operation can block,
270 		 * and unlock must be done from the same thread.
271 		 */
272 		assert(processor_avail_count < 2);
273 		hibernate_vm_lock();
274 	}
275 #endif
276 
277 	spl_t s = splsched();
278 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
279 	pset_lock(pset);
280 
281 	assert(processor->state != PROCESSOR_START);
282 	assert(processor->state != PROCESSOR_PENDING_OFFLINE);
283 	assert(processor->state != PROCESSOR_OFF_LINE);
284 
285 	assert(!processor->processor_inshutdown);
286 	processor->processor_inshutdown = true;
287 
288 	assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
289 	processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_BEGIN_SHUTDOWN);
290 
291 	if (!is_final_system_sleep) {
292 		sched_assert_not_last_online_cpu(processor->cpu_id);
293 	}
294 
295 	pset_unlock(pset);
296 	simple_unlock(&sched_available_cores_lock);
297 
298 	if (is_final_system_sleep) {
299 		assert(processor == current_processor());
300 
301 #if HIBERNATION
302 		/*
303 		 * After this point, the system is now
304 		 * committed to hibernation and must
305 		 * not run any other thread that could take this lock.
306 		 */
307 		hibernate_vm_unlock();
308 #endif
309 	} else {
310 		/*
311 		 * Get onto the processor to shut down.
312 		 * The scheduler picks this thread naturally according to its
313 		 * priority.
314 		 * The processor can run any other thread if this one blocks.
315 		 * So, don't block.
316 		 */
317 		processor_t prev = thread_bind(processor);
318 		thread_block(THREAD_CONTINUE_NULL);
319 
320 		/* interrupts still disabled */
321 		assert(ml_get_interrupts_enabled() == FALSE);
322 
323 		assert(processor == current_processor());
324 		assert(processor->processor_inshutdown);
325 
326 		thread_bind(prev);
327 		/* interrupts still disabled */
328 	}
329 
330 	/*
331 	 * Continue processor shutdown on the processor's idle thread.
332 	 * The handoff won't fail because the idle thread has a reserved stack.
333 	 * Switching to the idle thread leaves interrupts disabled,
334 	 * so we can't accidentally take an interrupt after the context switch.
335 	 */
336 	thread_t shutdown_thread = processor->idle_thread;
337 	shutdown_thread->continuation = processor_offline;
338 	shutdown_thread->parameter = (void*)is_final_system_sleep;
339 
340 	thread_run(current_thread(), THREAD_CONTINUE_NULL, NULL, shutdown_thread);
341 
342 	/*
343 	 * After this point, we are in regular scheduled context on a remaining
344 	 * available CPU. Interrupts are still disabled.
345 	 */
346 
347 	if (is_final_system_sleep) {
348 		/*
349 		 * We are coming out of system sleep here, so there won't be a
350 		 * corresponding processor_startup for this processor, so we
351 		 * need to put it back in the correct running state.
352 		 *
353 		 * There's nowhere to execute a call to CPU_EXITED during system
354 		 * sleep for the boot processor, and it's already been CPU_BOOTED
355 		 * by this point anyways, so skip the call.
356 		 */
357 		assert(current_processor() == master_processor);
358 		assert(processor->state == PROCESSOR_RUNNING);
359 		assert(processor->processor_inshutdown);
360 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
361 		processor->processor_inshutdown = false;
362 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
363 
364 		splx(s);
365 	} else {
366 		splx(s);
367 
368 		cpu_exit_wait(processor->cpu_id);
369 
370 		s = splsched();
371 		simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
372 		pset_lock(pset);
373 		assert(processor->processor_inshutdown);
374 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_PENDING_OFFLINE);
375 		assert(processor->state == PROCESSOR_PENDING_OFFLINE);
376 		pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
377 		processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_CPU_OFFLINE);
378 		pset_unlock(pset);
379 		simple_unlock(&sched_available_cores_lock);
380 		splx(s);
381 
382 		ml_broadcast_cpu_event(CPU_EXITED, processor->cpu_id);
383 		ml_cpu_power_disable(processor->cpu_id);
384 
385 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_CPU_OFFLINE);
386 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_FULLY_OFFLINE);
387 	}
388 
389 	ml_cpu_end_state_transition(processor->cpu_id);
390 }
391 
392 /*
393  * Called in the context of the idle thread to shut down the processor
394  *
395  * A shut-down processor looks like it's 'running' the idle thread parked
396  * in this routine, but it's actually been powered off and has no hardware state.
397  */
398 static void
processor_offline(void * parameter,__unused wait_result_t result)399 processor_offline(
400 	void * parameter,
401 	__unused wait_result_t result)
402 {
403 	bool is_final_system_sleep = (bool) parameter;
404 	processor_t processor = current_processor();
405 	thread_t self = current_thread();
406 	__assert_only thread_t old_thread = THREAD_NULL;
407 
408 	assert(self->state & TH_IDLE);
409 	assert(processor->idle_thread == self);
410 	assert(ml_get_interrupts_enabled() == FALSE);
411 	assert(self->continuation == NULL);
412 	assert(processor->processor_online == true);
413 	assert(processor->running_timers_active == false);
414 
415 	if (is_final_system_sleep) {
416 		assert(processor == current_processor());
417 		assert(processor == master_processor);
418 		assert(processor_avail_count == 1);
419 	}
420 
421 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_START, processor->cpu_id);
422 
423 	bool enforce_quiesce_safety = gEnforcePlatformActionSafety;
424 
425 	/*
426 	 * Scheduling is now disabled for this processor.
427 	 * Ensure that primitives that need scheduling (like mutexes) know this.
428 	 */
429 	if (enforce_quiesce_safety) {
430 		disable_preemption_without_measurements();
431 	}
432 
433 #if CONFIG_DTRACE
434 	if (dtrace_cpu_state_changed_hook) {
435 		(*dtrace_cpu_state_changed_hook)(processor->cpu_id, FALSE);
436 	}
437 #endif
438 
439 	smr_cpu_down(processor, SMR_CPU_REASON_OFFLINE);
440 
441 	/* Drain pending IPIs for the last time here. */
442 	ml_cpu_down();
443 
444 	sched_mark_processor_offline(processor, is_final_system_sleep);
445 
446 	/*
447 	 * Switch to the interrupt stack and shut down the processor.
448 	 *
449 	 * When the processor comes back, it will eventually call load_context which
450 	 * restores the context saved by machine_processor_shutdown, returning here.
451 	 */
452 	old_thread = machine_processor_shutdown(self, processor_offline_intstack, processor);
453 
454 	/*
455 	 * The processor is back. sched_mark_processor_online and
456 	 * friends have already run via processor_up.
457 	 */
458 
459 	/* old_thread should be NULL because we got here through Load_context */
460 	assert(old_thread == THREAD_NULL);
461 
462 	assert(processor == current_processor());
463 	assert(processor->idle_thread == current_thread());
464 	assert(processor->processor_online == true);
465 
466 	assert(ml_get_interrupts_enabled() == FALSE);
467 	assert(self->continuation == NULL);
468 
469 	/* Extract the machine_param value stashed by secondary_cpu_main */
470 	void * machine_param = self->parameter;
471 	self->parameter = NULL;
472 
473 	processor_cpu_reinit(machine_param, true, is_final_system_sleep);
474 
475 	if (enforce_quiesce_safety) {
476 		enable_preemption();
477 	}
478 
479 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_END, processor->cpu_id);
480 
481 	/*
482 	 * Now that the processor is back, invoke the idle thread to find out what to do next.
483 	 * idle_thread will enable interrupts.
484 	 */
485 	thread_block(idle_thread);
486 	/*NOTREACHED*/
487 }
488 
489 /*
490  * Complete the shutdown and place the processor offline.
491  *
492  * Called at splsched in the shutdown context
493  * (i.e. on the idle thread, on the interrupt stack)
494  *
495  * The onlining half of this is done in load_context().
496  */
497 static void
processor_offline_intstack(processor_t processor)498 processor_offline_intstack(
499 	processor_t processor)
500 {
501 	assert(processor == current_processor());
502 	assert(processor->active_thread == current_thread());
503 
504 	struct recount_snap snap = { 0 };
505 	recount_snapshot(&snap);
506 	recount_processor_idle(&processor->pr_recount, &snap);
507 
508 	smr_cpu_leave(processor, processor->last_dispatch);
509 
510 	PMAP_DEACTIVATE_KERNEL(processor->cpu_id);
511 
512 	cpu_sleep();
513 	panic("zombie processor");
514 	/*NOTREACHED*/
515 }
516 
517 /*
518  * Called on the idle thread with interrupts disabled to initialize a
519  * secondary processor on boot or to reinitialize any processor on resume
520  * from processor offline.
521  */
522 void
processor_cpu_reinit(void * machine_param,__unused bool wait_for_cpu_signal,__assert_only bool is_final_system_sleep)523 processor_cpu_reinit(void* machine_param,
524     __unused bool wait_for_cpu_signal,
525     __assert_only bool is_final_system_sleep)
526 {
527 	/* Re-initialize the processor */
528 	machine_cpu_reinit(machine_param);
529 
530 #if defined(__arm64__)
531 	/*
532 	 * See the comments for wait_while_mp_kdp_trap in processor_up().
533 	 *
534 	 * SIGPdisabled is cleared (to enable cpu_signal() to succeed with this processor)
535 	 * the first time we take an IPI.  This is triggered by machine_cpu_reinit(), above,
536 	 * which calls cpu_machine_init()->PE_cpu_machine_init()->PE_cpu_signal() which sends
537 	 * a self-IPI to ensure that happens when we enable interrupts.  So enable interrupts
538 	 * here so that cpu_signal() can succeed before we spin on mp_kdp_trap.
539 	 */
540 	assert_ml_cpu_signal_is_enabled(false);
541 
542 	ml_set_interrupts_enabled(TRUE);
543 
544 	if (wait_for_cpu_signal) {
545 		ml_wait_for_cpu_signal_to_enable();
546 	}
547 
548 	ml_set_interrupts_enabled(FALSE);
549 
550 	wait_while_mp_kdp_trap(true);
551 
552 	/*
553 	 * At this point,
554 	 * if a stackshot or panic is in progress, we either spin on mp_kdp_trap
555 	 * or we sucessfully received a SIGPdebug signal which will cause us to
556 	 * break out of the spin on mp_kdp_trap and instead
557 	 * spin next time interrupts are enabled in idle_thread().
558 	 */
559 	if (wait_for_cpu_signal) {
560 		assert_ml_cpu_signal_is_enabled(true);
561 	}
562 
563 	/*
564 	 * Now that we know SIGPdisabled is cleared, we can publish that
565 	 * this CPU has fully come out of offline state.
566 	 *
567 	 * Without wait_for_cpu_signal, we'll publish this earlier than
568 	 * cpu_signal is actually ready, but as long as it's ready by next S2R,
569 	 * it will be good enough.
570 	 */
571 	ml_cpu_up();
572 #endif
573 
574 	/*
575 	 * Interrupts must be disabled while processor_start_state_lock is
576 	 * held to prevent a deadlock with CPU startup of other CPUs that
577 	 * may be proceeding in parallel to this CPU's reinitialization.
578 	 */
579 	spl_t s = splsched();
580 	processor_t processor = current_processor();
581 
582 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
583 	assert(processor->processor_instartup == true || is_final_system_sleep);
584 	processor->processor_instartup = false;
585 	simple_unlock(&processor_start_state_lock);
586 
587 	splx(s);
588 
589 	thread_wakeup((event_t)&processor->processor_instartup);
590 }
591 
592 kern_return_t
host_get_boot_info(host_priv_t host_priv,kernel_boot_info_t boot_info)593 host_get_boot_info(
594 	host_priv_t         host_priv,
595 	kernel_boot_info_t  boot_info)
596 {
597 	const char *src = "";
598 	if (host_priv == HOST_PRIV_NULL) {
599 		return KERN_INVALID_HOST;
600 	}
601 
602 	/*
603 	 * Copy first operator string terminated by '\0' followed by
604 	 *	standardized strings generated from boot string.
605 	 */
606 	src = machine_boot_info(boot_info, KERNEL_BOOT_INFO_MAX);
607 	if (src != boot_info) {
608 		(void) strncpy(boot_info, src, KERNEL_BOOT_INFO_MAX);
609 	}
610 
611 	return KERN_SUCCESS;
612 }
613 
614 // These are configured through sysctls.
615 #if DEVELOPMENT || DEBUG
616 uint32_t phy_read_panic = 1;
617 uint32_t phy_write_panic = 1;
618 uint64_t simulate_stretched_io = 0;
619 #else
620 uint32_t phy_read_panic = 0;
621 uint32_t phy_write_panic = 0;
622 #endif
623 
624 #if !defined(__x86_64__)
625 
626 #if DEVELOPMENT || DEBUG
627 static const uint64_t TIMEBASE_TICKS_PER_USEC = 24000000ULL / USEC_PER_SEC;
628 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 100 * TIMEBASE_TICKS_PER_USEC;
629 #else
630 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 0;
631 #endif
632 
633 // The MACHINE_TIMEOUT facility only exists on ARM.
634 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_read_delay_to, "report-phy-read-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
635 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_write_delay_to, "report-phy-write-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
636 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_read_delay_to, "trace-phy-read-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
637 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_write_delay_to, "trace-phy-write-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
638 
639 #if SCHED_HYGIENE_DEBUG
640 /*
641  * Note: The interrupt-masked timeout goes through two initializations - one
642  * early in boot and one later. Thus this function is also called twice and
643  * can't be marked '__startup_func'.
644  */
645 static void
ml_io_init_timeouts(void)646 ml_io_init_timeouts(void)
647 {
648 	/*
649 	 * The timeouts may be completely disabled via an override.
650 	 */
651 	if (kern_feature_override(KF_IO_TIMEOUT_OVRD)) {
652 		os_atomic_store(&report_phy_write_delay_to, 0, relaxed);
653 		os_atomic_store(&report_phy_read_delay_to, 0, relaxed);
654 		return;
655 	}
656 
657 	/*
658 	 * There may be no interrupt masked timeout set.
659 	 */
660 	const uint64_t interrupt_masked_to = os_atomic_load(&interrupt_masked_timeout, relaxed);
661 	if (interrupt_masked_timeout == 0) {
662 		return;
663 	}
664 
665 	/*
666 	 * Inherit from the interrupt masked timeout if smaller and the timeout
667 	 * hasn't been explicitly set via boot-arg.
668 	 */
669 	uint64_t arg = 0;
670 
671 	if (!PE_parse_boot_argn("ml-timeout-report-phy-read-delay", &arg, sizeof(arg))) {
672 		uint64_t report_phy_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
673 		report_phy_read_delay = report_phy_read_delay == 0 ?
674 		    interrupt_masked_to :
675 		    MIN(report_phy_read_delay, interrupt_masked_to);
676 		os_atomic_store(&report_phy_read_delay_to, report_phy_read_delay, relaxed);
677 	}
678 
679 	if (!PE_parse_boot_argn("ml-timeout-report-phy-write-delay", &arg, sizeof(arg))) {
680 		uint64_t report_phy_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
681 		report_phy_write_delay = report_phy_write_delay == 0 ?
682 		    interrupt_masked_to :
683 		    MIN(report_phy_write_delay, interrupt_masked_to);
684 		os_atomic_store(&report_phy_write_delay_to, report_phy_write_delay, relaxed);
685 	}
686 }
687 
688 /*
689  * It's important that this happens after machine timeouts have initialized so
690  * the correct timeouts can be inherited.
691  */
692 STARTUP(TIMEOUTS, STARTUP_RANK_SECOND, ml_io_init_timeouts);
693 #endif /* SCHED_HYGIENE_DEBUG */
694 
695 extern pmap_paddr_t kvtophys(vm_offset_t va);
696 #endif /* !defined(__x86_64__) */
697 
698 #if ML_IO_TIMEOUTS_ENABLED
699 
700 static LCK_GRP_DECLARE(io_timeout_override_lock_grp, "io_timeout_override");
701 static LCK_SPIN_DECLARE(io_timeout_override_lock, &io_timeout_override_lock_grp);
702 
703 struct io_timeout_override_entry {
704 	RB_ENTRY(io_timeout_override_entry) tree;
705 
706 	uintptr_t ioaddr_base;
707 	unsigned int size;
708 	uint32_t read_timeout;
709 	uint32_t write_timeout;
710 };
711 
712 static inline int
io_timeout_override_cmp(const struct io_timeout_override_entry * a,const struct io_timeout_override_entry * b)713 io_timeout_override_cmp(const struct io_timeout_override_entry *a, const struct io_timeout_override_entry *b)
714 {
715 	if (a->ioaddr_base < b->ioaddr_base) {
716 		return -1;
717 	} else if (a->ioaddr_base > b->ioaddr_base) {
718 		return 1;
719 	} else {
720 		return 0;
721 	}
722 }
723 
724 static RB_HEAD(io_timeout_override, io_timeout_override_entry)
725 io_timeout_override_root_pa, io_timeout_override_root_va;
726 
727 RB_PROTOTYPE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
728 RB_GENERATE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
729 
730 static int
io_increase_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)731 io_increase_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base,
732     unsigned int size, uint32_t read_timeout_us, uint32_t write_timeout_us)
733 {
734 	const uint64_t MAX_TIMEOUT_ABS = UINT32_MAX;
735 
736 	assert(preemption_enabled());
737 
738 	int ret = KERN_SUCCESS;
739 
740 	if (size == 0) {
741 		return KERN_INVALID_ARGUMENT;
742 	}
743 
744 	uintptr_t ioaddr_end;
745 	if (os_add_overflow(ioaddr_base, size - 1, &ioaddr_end)) {
746 		return KERN_INVALID_ARGUMENT;
747 	}
748 
749 	uint64_t read_timeout_abs, write_timeout_abs;
750 	nanoseconds_to_absolutetime(NSEC_PER_USEC * read_timeout_us, &read_timeout_abs);
751 	nanoseconds_to_absolutetime(NSEC_PER_USEC * write_timeout_us, &write_timeout_abs);
752 	if (read_timeout_abs > MAX_TIMEOUT_ABS || write_timeout_abs > MAX_TIMEOUT_ABS) {
753 		return KERN_INVALID_ARGUMENT;
754 	}
755 
756 	struct io_timeout_override_entry *node = kalloc_type(struct io_timeout_override_entry, Z_WAITOK | Z_ZERO | Z_NOFAIL);
757 	node->ioaddr_base = ioaddr_base;
758 	node->size = size;
759 	node->read_timeout = (uint32_t)read_timeout_abs;
760 	node->write_timeout = (uint32_t)write_timeout_abs;
761 
762 	/*
763 	 * Interrupt handlers are allowed to call ml_io_{read,write}*, so
764 	 * interrupts must be disabled any time io_timeout_override_lock is
765 	 * held.  Otherwise the CPU could take an interrupt while holding the
766 	 * lock, invoke an ISR that calls ml_io_{read,write}*, and deadlock
767 	 * trying to acquire the lock again.
768 	 */
769 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
770 	lck_spin_lock(&io_timeout_override_lock);
771 	if (RB_INSERT(io_timeout_override, root, node)) {
772 		ret = KERN_INVALID_ARGUMENT;
773 		goto out;
774 	}
775 
776 	/* Check that this didn't create any new overlaps */
777 	struct io_timeout_override_entry *prev = RB_PREV(io_timeout_override, root, node);
778 	if (prev && (prev->ioaddr_base + prev->size) > node->ioaddr_base) {
779 		RB_REMOVE(io_timeout_override, root, node);
780 		ret = KERN_INVALID_ARGUMENT;
781 		goto out;
782 	}
783 	struct io_timeout_override_entry *next = RB_NEXT(io_timeout_override, root, node);
784 	if (next && (node->ioaddr_base + node->size) > next->ioaddr_base) {
785 		RB_REMOVE(io_timeout_override, root, node);
786 		ret = KERN_INVALID_ARGUMENT;
787 		goto out;
788 	}
789 
790 out:
791 	lck_spin_unlock(&io_timeout_override_lock);
792 	ml_set_interrupts_enabled(istate);
793 	if (ret != KERN_SUCCESS) {
794 		kfree_type(struct io_timeout_override_entry, node);
795 	}
796 	return ret;
797 }
798 
799 static int
io_reset_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size)800 io_reset_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base, unsigned int size)
801 {
802 	assert(preemption_enabled());
803 
804 	struct io_timeout_override_entry key = { .ioaddr_base = ioaddr_base };
805 
806 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
807 	lck_spin_lock(&io_timeout_override_lock);
808 	struct io_timeout_override_entry *node = RB_FIND(io_timeout_override, root, &key);
809 	if (node) {
810 		if (node->size == size) {
811 			RB_REMOVE(io_timeout_override, root, node);
812 		} else {
813 			node = NULL;
814 		}
815 	}
816 	lck_spin_unlock(&io_timeout_override_lock);
817 	ml_set_interrupts_enabled(istate);
818 
819 	if (!node) {
820 		return KERN_NOT_FOUND;
821 	}
822 
823 	kfree_type(struct io_timeout_override_entry, node);
824 	return KERN_SUCCESS;
825 }
826 
827 static bool
io_override_timeout(struct io_timeout_override * root,uintptr_t addr,uint64_t * read_timeout,uint64_t * write_timeout)828 io_override_timeout(struct io_timeout_override *root, uintptr_t addr,
829     uint64_t *read_timeout, uint64_t *write_timeout)
830 {
831 	assert(!ml_get_interrupts_enabled());
832 	assert3p(read_timeout, !=, NULL);
833 	assert3p(write_timeout, !=, NULL);
834 
835 	struct io_timeout_override_entry *node = RB_ROOT(root);
836 
837 	lck_spin_lock(&io_timeout_override_lock);
838 	/* RB_FIND() doesn't support custom cmp functions, so we have to open-code our own */
839 	while (node) {
840 		if (node->ioaddr_base <= addr && addr < node->ioaddr_base + node->size) {
841 			*read_timeout = node->read_timeout;
842 			*write_timeout = node->write_timeout;
843 			lck_spin_unlock(&io_timeout_override_lock);
844 			return true;
845 		} else if (addr < node->ioaddr_base) {
846 			node = RB_LEFT(node, tree);
847 		} else {
848 			node = RB_RIGHT(node, tree);
849 		}
850 	}
851 	lck_spin_unlock(&io_timeout_override_lock);
852 
853 	return false;
854 }
855 
856 static bool
io_override_timeout_ss(uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)857 io_override_timeout_ss(uint64_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
858 {
859 #if defined(__arm64__)
860 
861 	/*
862 	 * PCIe regions are marked with PMAP_IO_RANGE_STRONG_SYNC. Apply a
863 	 * timeout greater than two PCIe completion timeouts (90ms) as they can
864 	 * stack.
865 	 */
866 	#define STRONG_SYNC_TIMEOUT 2160000 /* 90ms */
867 
868 	pmap_io_range_t *range = pmap_find_io_attr(paddr);
869 	if (range != NULL && (range->wimg & PMAP_IO_RANGE_STRONG_SYNC) != 0) {
870 		*read_timeout = STRONG_SYNC_TIMEOUT;
871 		*write_timeout = STRONG_SYNC_TIMEOUT;
872 		return true;
873 	}
874 #else
875 	(void)paddr;
876 	(void)read_timeout;
877 	(void)write_timeout;
878 #endif /* __arm64__ */
879 	return false;
880 }
881 
882 /*
883  * Return timeout override values for the read/write timeout for a given
884  * address.
885  * A virtual address (vaddr), physical address (paddr) or both may be passed.
886  * Up to three separate timeout overrides can be found
887  *  - A virtual address override
888  *  - A physical address override
889  *  - A strong sync override
890  *  The largest override found is returned.
891  */
892 void
override_io_timeouts(uintptr_t vaddr,uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)893 override_io_timeouts(uintptr_t vaddr, uint64_t paddr, uint64_t *read_timeout,
894     uint64_t *write_timeout)
895 {
896 	uint64_t rt_va = 0, wt_va = 0, rt_pa = 0, wt_pa = 0, rt_ss = 0, wt_ss = 0;
897 
898 	if (vaddr != 0) {
899 		/* Override from virtual address. */
900 		io_override_timeout(&io_timeout_override_root_va, vaddr, &rt_va, &wt_va);
901 	}
902 
903 	if (paddr != 0) {
904 		/* Override from physical address. */
905 		io_override_timeout(&io_timeout_override_root_pa, paddr, &rt_pa, &wt_pa);
906 
907 		/* Override from strong sync range. */
908 		io_override_timeout_ss(paddr, &rt_ss, &wt_ss);
909 	}
910 
911 	if (read_timeout != NULL) {
912 		*read_timeout =  MAX(MAX(rt_va, rt_pa), rt_ss);
913 	}
914 
915 	if (write_timeout != NULL) {
916 		*write_timeout = MAX(MAX(wt_va, wt_pa), wt_ss);
917 	}
918 }
919 
920 #endif /* ML_IO_TIMEOUTS_ENABLED */
921 
922 int
ml_io_increase_timeouts(uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)923 ml_io_increase_timeouts(uintptr_t ioaddr_base, unsigned int size,
924     uint32_t read_timeout_us, uint32_t write_timeout_us)
925 {
926 #if ML_IO_TIMEOUTS_ENABLED
927 	const size_t MAX_SIZE = 4096;
928 
929 	if (size > MAX_SIZE) {
930 		return KERN_INVALID_ARGUMENT;
931 	}
932 
933 	return io_increase_timeouts(&io_timeout_override_root_va, ioaddr_base,
934 	           size, read_timeout_us, write_timeout_us);
935 #else
936 	#pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
937 	return KERN_SUCCESS;
938 #endif /* ML_IO_TIMEOUTS_ENABLED */
939 }
940 
941 int
ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)942 ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size,
943     uint32_t read_timeout_us, uint32_t write_timeout_us)
944 {
945 #if ML_IO_TIMEOUTS_ENABLED
946 	return io_increase_timeouts(&io_timeout_override_root_pa, ioaddr_base,
947 	           size, read_timeout_us, write_timeout_us);
948 #else
949 	#pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
950 	return KERN_SUCCESS;
951 #endif /* ML_IO_TIMEOUTS_ENABLED */
952 }
953 
954 int
ml_io_reset_timeouts(uintptr_t ioaddr_base,unsigned int size)955 ml_io_reset_timeouts(uintptr_t ioaddr_base, unsigned int size)
956 {
957 #if ML_IO_TIMEOUTS_ENABLED
958 	return io_reset_timeouts(&io_timeout_override_root_va, ioaddr_base, size);
959 #else
960 	#pragma unused(ioaddr_base, size)
961 	return KERN_SUCCESS;
962 #endif /* ML_IO_TIMEOUTS_ENABLED */
963 }
964 
965 int
ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size)966 ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size)
967 {
968 #if ML_IO_TIMEOUTS_ENABLED
969 	return io_reset_timeouts(&io_timeout_override_root_pa, ioaddr_base, size);
970 #else
971 	#pragma unused(ioaddr_base, size)
972 	return KERN_SUCCESS;
973 #endif /* ML_IO_TIMEOUTS_ENABLED */
974 }
975 
976 
977 #if DEVELOPMENT || DEBUG
978 static int ml_io_read_test_mode;
979 #endif
980 
981 unsigned long long
ml_io_read(uintptr_t vaddr,int size)982 ml_io_read(uintptr_t vaddr, int size)
983 {
984 	unsigned long long result = 0;
985 	unsigned char s1;
986 	unsigned short s2;
987 
988 #if DEVELOPMENT || DEBUG
989 	/* For testing */
990 	extern void IODelay(int);
991 	if (__improbable(ml_io_read_test_mode)) {
992 		if (vaddr == 1) {
993 			IODelay(100);
994 			return 0;
995 		} else if (vaddr == 2) {
996 			return 0;
997 		}
998 	}
999 #endif /* DEVELOPMENT || DEBUG */
1000 
1001 #ifdef ML_IO_VERIFY_UNCACHEABLE
1002 	uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1003 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1004 	uintptr_t paddr = 0;
1005 #endif
1006 
1007 #ifdef ML_IO_TIMEOUTS_ENABLED
1008 	uint64_t sabs, eabs;
1009 	boolean_t istate, timeread = FALSE;
1010 	uint64_t report_read_delay;
1011 #if __x86_64__
1012 	report_read_delay = report_phy_read_delay;
1013 #else
1014 	report_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
1015 	uint64_t const trace_phy_read_delay = os_atomic_load(&trace_phy_read_delay_to, relaxed);
1016 #endif /* __x86_64__ */
1017 
1018 	if (__improbable(report_read_delay != 0)) {
1019 		istate = ml_set_interrupts_enabled_with_debug(false, false);
1020 		sabs = ml_io_timestamp();
1021 		timeread = TRUE;
1022 	}
1023 
1024 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1025 	if (__improbable(timeread && simulate_stretched_io)) {
1026 		sabs -= simulate_stretched_io;
1027 	}
1028 #endif /* ML_IO_SIMULATE_STRETCHED_ENABLED */
1029 #endif /* ML_IO_TIMEOUTS_ENABLED */
1030 
1031 #if DEVELOPMENT || DEBUG
1032 	boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1033 	if (use_fences) {
1034 		ml_timebase_to_memory_fence();
1035 	}
1036 #endif
1037 
1038 	switch (size) {
1039 	case 1:
1040 		s1 = *(volatile unsigned char *)vaddr;
1041 		result = s1;
1042 		break;
1043 	case 2:
1044 		s2 = *(volatile unsigned short *)vaddr;
1045 		result = s2;
1046 		break;
1047 	case 4:
1048 		result = *(volatile unsigned int *)vaddr;
1049 		break;
1050 	case 8:
1051 		result = *(volatile unsigned long long *)vaddr;
1052 		break;
1053 	default:
1054 		panic("Invalid size %d for ml_io_read(%p)", size, (void *)vaddr);
1055 		break;
1056 	}
1057 
1058 #if DEVELOPMENT || DEBUG
1059 	if (use_fences) {
1060 		ml_memory_to_timebase_fence();
1061 	}
1062 #endif
1063 
1064 #ifdef ML_IO_TIMEOUTS_ENABLED
1065 	if (__improbable(timeread == TRUE)) {
1066 		eabs = ml_io_timestamp();
1067 
1068 
1069 		/* Prevent the processor from calling iotrace during its
1070 		 * initialization procedure. */
1071 		if (current_processor()->state == PROCESSOR_RUNNING) {
1072 			iotrace(IOTRACE_IO_READ, vaddr, paddr, size, result, sabs, eabs - sabs);
1073 		}
1074 
1075 		if (__improbable((eabs - sabs) > report_read_delay)) {
1076 			if (paddr == 0) {
1077 				paddr = kvtophys(vaddr);
1078 			}
1079 
1080 			DTRACE_PHYSLAT5(physioread, uint64_t, (eabs - sabs),
1081 			    uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, result);
1082 
1083 			uint64_t override = 0;
1084 			override_io_timeouts(vaddr, paddr, &override, NULL);
1085 
1086 			if (override != 0) {
1087 #if SCHED_HYGIENE_DEBUG
1088 				/*
1089 				 * The IO timeout was overridden. If we were called in an
1090 				 * interrupt handler context, that can lead to a timeout
1091 				 * panic, so we need to abandon the measurement.
1092 				 */
1093 				if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1094 					ml_irq_debug_abandon();
1095 				}
1096 #endif
1097 				report_read_delay = override;
1098 			}
1099 		}
1100 
1101 		if (__improbable((eabs - sabs) > report_read_delay)) {
1102 			if (phy_read_panic && (machine_timeout_suspended() == FALSE)) {
1103 #if defined(__x86_64__)
1104 				panic_notify();
1105 #endif /* defined(__x86_64__) */
1106 				uint64_t nsec = 0;
1107 				absolutetime_to_nanoseconds(eabs - sabs, &nsec);
1108 				panic("Read from IO vaddr 0x%lx paddr 0x%lx took %llu ns, "
1109 				    "result: 0x%llx (start: %llu, end: %llu), ceiling: %llu",
1110 				    vaddr, paddr, nsec, result, sabs, eabs,
1111 				    report_read_delay);
1112 			}
1113 		}
1114 
1115 		if (__improbable(trace_phy_read_delay > 0 && (eabs - sabs) > trace_phy_read_delay)) {
1116 			KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_READ),
1117 			    (eabs - sabs), VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, result);
1118 		}
1119 
1120 		(void)ml_set_interrupts_enabled_with_debug(istate, false);
1121 	}
1122 #endif /*  ML_IO_TIMEOUTS_ENABLED */
1123 	return result;
1124 }
1125 
1126 unsigned int
ml_io_read8(uintptr_t vaddr)1127 ml_io_read8(uintptr_t vaddr)
1128 {
1129 	return (unsigned) ml_io_read(vaddr, 1);
1130 }
1131 
1132 unsigned int
ml_io_read16(uintptr_t vaddr)1133 ml_io_read16(uintptr_t vaddr)
1134 {
1135 	return (unsigned) ml_io_read(vaddr, 2);
1136 }
1137 
1138 unsigned int
ml_io_read32(uintptr_t vaddr)1139 ml_io_read32(uintptr_t vaddr)
1140 {
1141 	return (unsigned) ml_io_read(vaddr, 4);
1142 }
1143 
1144 unsigned long long
ml_io_read64(uintptr_t vaddr)1145 ml_io_read64(uintptr_t vaddr)
1146 {
1147 	return ml_io_read(vaddr, 8);
1148 }
1149 
1150 
1151 uint64_t
ml_io_read_cpu_reg(uintptr_t vaddr,int sz,__unused int logical_cpu)1152 ml_io_read_cpu_reg(uintptr_t vaddr, int sz, __unused int logical_cpu)
1153 {
1154 	uint64_t val;
1155 
1156 
1157 	val = ml_io_read(vaddr, sz);
1158 
1159 
1160 	return val;
1161 }
1162 
1163 
1164 /* ml_io_write* */
1165 
1166 void
ml_io_write(uintptr_t vaddr,uint64_t val,int size)1167 ml_io_write(uintptr_t vaddr, uint64_t val, int size)
1168 {
1169 #ifdef ML_IO_VERIFY_UNCACHEABLE
1170 	uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1171 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1172 	uintptr_t paddr = 0;
1173 #endif
1174 
1175 #ifdef ML_IO_TIMEOUTS_ENABLED
1176 	uint64_t sabs, eabs;
1177 	boolean_t istate, timewrite = FALSE;
1178 	uint64_t report_write_delay;
1179 #if __x86_64__
1180 	report_write_delay = report_phy_write_delay;
1181 #else
1182 	report_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
1183 	uint64_t trace_phy_write_delay = os_atomic_load(&trace_phy_write_delay_to, relaxed);
1184 #endif /* !defined(__x86_64__) */
1185 	if (__improbable(report_write_delay != 0)) {
1186 		istate = ml_set_interrupts_enabled_with_debug(false, false);
1187 		sabs = ml_io_timestamp();
1188 		timewrite = TRUE;
1189 	}
1190 
1191 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1192 	if (__improbable(timewrite && simulate_stretched_io)) {
1193 		sabs -= simulate_stretched_io;
1194 	}
1195 #endif /* DEVELOPMENT || DEBUG */
1196 #endif /* ML_IO_TIMEOUTS_ENABLED */
1197 
1198 #if DEVELOPMENT || DEBUG
1199 	boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1200 	if (use_fences) {
1201 		ml_timebase_to_memory_fence();
1202 	}
1203 #endif
1204 
1205 	switch (size) {
1206 	case 1:
1207 		*(volatile uint8_t *)vaddr = (uint8_t)val;
1208 		break;
1209 	case 2:
1210 		*(volatile uint16_t *)vaddr = (uint16_t)val;
1211 		break;
1212 	case 4:
1213 		*(volatile uint32_t *)vaddr = (uint32_t)val;
1214 		break;
1215 	case 8:
1216 		*(volatile uint64_t *)vaddr = (uint64_t)val;
1217 		break;
1218 	default:
1219 		panic("Invalid size %d for ml_io_write(%p, 0x%llx)", size, (void *)vaddr, val);
1220 		break;
1221 	}
1222 
1223 #if DEVELOPMENT || DEBUG
1224 	if (use_fences) {
1225 		ml_memory_to_timebase_fence();
1226 	}
1227 #endif
1228 
1229 #ifdef ML_IO_TIMEOUTS_ENABLED
1230 	if (__improbable(timewrite == TRUE)) {
1231 		eabs = ml_io_timestamp();
1232 
1233 		/* Prevent the processor from calling iotrace during its
1234 		 * initialization procedure. */
1235 		if (current_processor()->state == PROCESSOR_RUNNING) {
1236 			iotrace(IOTRACE_IO_WRITE, vaddr, paddr, size, val, sabs, eabs - sabs);
1237 		}
1238 
1239 
1240 		if (__improbable((eabs - sabs) > report_write_delay)) {
1241 			if (paddr == 0) {
1242 				paddr = kvtophys(vaddr);
1243 			}
1244 
1245 			DTRACE_PHYSLAT5(physiowrite, uint64_t, (eabs - sabs),
1246 			    uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, val);
1247 
1248 			uint64_t override = 0;
1249 			override_io_timeouts(vaddr, paddr, NULL, &override);
1250 
1251 			if (override != 0) {
1252 #if SCHED_HYGIENE_DEBUG
1253 				/*
1254 				 * The IO timeout was overridden. If we were called in an
1255 				 * interrupt handler context, that can lead to a timeout
1256 				 * panic, so we need to abandon the measurement.
1257 				 */
1258 				if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1259 					ml_irq_debug_abandon();
1260 				}
1261 #endif
1262 				report_write_delay = override;
1263 			}
1264 		}
1265 
1266 		if (__improbable((eabs - sabs) > report_write_delay)) {
1267 			if (phy_write_panic && (machine_timeout_suspended() == FALSE)) {
1268 #if defined(__x86_64__)
1269 				panic_notify();
1270 #endif /*  defined(__x86_64__) */
1271 
1272 				uint64_t nsec = 0;
1273 				absolutetime_to_nanoseconds(eabs - sabs, &nsec);
1274 				panic("Write to IO vaddr %p paddr %p val 0x%llx took %llu ns,"
1275 				    " (start: %llu, end: %llu), ceiling: %llu",
1276 				    (void *)vaddr, (void *)paddr, val, nsec, sabs, eabs,
1277 				    report_write_delay);
1278 			}
1279 		}
1280 
1281 		if (__improbable(trace_phy_write_delay > 0 && (eabs - sabs) > trace_phy_write_delay)) {
1282 			KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_WRITE),
1283 			    (eabs - sabs), VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, val);
1284 		}
1285 
1286 		(void)ml_set_interrupts_enabled_with_debug(istate, false);
1287 	}
1288 #endif /* ML_IO_TIMEOUTS_ENABLED */
1289 }
1290 
1291 void
ml_io_write8(uintptr_t vaddr,uint8_t val)1292 ml_io_write8(uintptr_t vaddr, uint8_t val)
1293 {
1294 	ml_io_write(vaddr, val, 1);
1295 }
1296 
1297 void
ml_io_write16(uintptr_t vaddr,uint16_t val)1298 ml_io_write16(uintptr_t vaddr, uint16_t val)
1299 {
1300 	ml_io_write(vaddr, val, 2);
1301 }
1302 
1303 void
ml_io_write32(uintptr_t vaddr,uint32_t val)1304 ml_io_write32(uintptr_t vaddr, uint32_t val)
1305 {
1306 	ml_io_write(vaddr, val, 4);
1307 }
1308 
1309 void
ml_io_write64(uintptr_t vaddr,uint64_t val)1310 ml_io_write64(uintptr_t vaddr, uint64_t val)
1311 {
1312 	ml_io_write(vaddr, val, 8);
1313 }
1314 
1315 struct cpu_callback_chain_elem {
1316 	cpu_callback_t                  fn;
1317 	void                            *param;
1318 	struct cpu_callback_chain_elem  *next;
1319 };
1320 
1321 static struct cpu_callback_chain_elem *cpu_callback_chain;
1322 static LCK_GRP_DECLARE(cpu_callback_chain_lock_grp, "cpu_callback_chain");
1323 static LCK_SPIN_DECLARE(cpu_callback_chain_lock, &cpu_callback_chain_lock_grp);
1324 
1325 void
cpu_event_register_callback(cpu_callback_t fn,void * param)1326 cpu_event_register_callback(cpu_callback_t fn, void *param)
1327 {
1328 	struct cpu_callback_chain_elem *new_elem;
1329 
1330 	new_elem = zalloc_permanent_type(struct cpu_callback_chain_elem);
1331 	if (!new_elem) {
1332 		panic("can't allocate cpu_callback_chain_elem");
1333 	}
1334 
1335 	lck_spin_lock(&cpu_callback_chain_lock);
1336 	new_elem->next = cpu_callback_chain;
1337 	new_elem->fn = fn;
1338 	new_elem->param = param;
1339 	os_atomic_store(&cpu_callback_chain, new_elem, release);
1340 	lck_spin_unlock(&cpu_callback_chain_lock);
1341 }
1342 
1343 __attribute__((noreturn))
1344 void
cpu_event_unregister_callback(__unused cpu_callback_t fn)1345 cpu_event_unregister_callback(__unused cpu_callback_t fn)
1346 {
1347 	panic("Unfortunately, cpu_event_unregister_callback is unimplemented.");
1348 }
1349 
1350 void
ml_broadcast_cpu_event(enum cpu_event event,unsigned int cpu_or_cluster)1351 ml_broadcast_cpu_event(enum cpu_event event, unsigned int cpu_or_cluster)
1352 {
1353 	struct cpu_callback_chain_elem *cursor;
1354 
1355 	cursor = os_atomic_load(&cpu_callback_chain, dependency);
1356 	for (; cursor != NULL; cursor = cursor->next) {
1357 		cursor->fn(cursor->param, event, cpu_or_cluster);
1358 	}
1359 }
1360 
1361 // Initialize Machine Timeouts (see the MACHINE_TIMEOUT macro
1362 // definition)
1363 
1364 void
machine_timeout_init_with_suffix(const struct machine_timeout_spec * spec,char const * suffix,bool always_enabled)1365 machine_timeout_init_with_suffix(const struct machine_timeout_spec *spec, char const *suffix, bool always_enabled)
1366 {
1367 	if (!always_enabled && (wdt == -1 || (spec->skip_predicate != NULL && spec->skip_predicate(spec)))) {
1368 		// This timeout should be disabled.
1369 		os_atomic_store_wide((uint64_t*)spec->ptr, 0, relaxed);
1370 		return;
1371 	}
1372 
1373 	assert(suffix != NULL);
1374 	assert(strlen(spec->name) <= MACHINE_TIMEOUT_MAX_NAME_LEN);
1375 
1376 	size_t const suffix_len = strlen(suffix);
1377 
1378 	size_t const dt_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + 1;
1379 	char dt_name[dt_name_size];
1380 
1381 	strlcpy(dt_name, spec->name, dt_name_size);
1382 	strlcat(dt_name, suffix, dt_name_size);
1383 
1384 	size_t const scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + strlen("-scale") + 1;
1385 	char scale_name[scale_name_size];
1386 
1387 	strlcpy(scale_name, spec->name, scale_name_size);
1388 	strlcat(scale_name, suffix, scale_name_size);
1389 	strlcat(scale_name, "-scale", scale_name_size);
1390 
1391 	size_t const boot_arg_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + strlen("ml-timeout-") + suffix_len + 1;
1392 	char boot_arg_name[boot_arg_name_size];
1393 
1394 	strlcpy(boot_arg_name, "ml-timeout-", boot_arg_name_size);
1395 	strlcat(boot_arg_name, spec->name, boot_arg_name_size);
1396 	strlcat(boot_arg_name, suffix, boot_arg_name_size);
1397 
1398 	size_t const boot_arg_scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN +
1399 	    strlen("ml-timeout-") + strlen("-scale") + suffix_len + 1;
1400 	char boot_arg_scale_name[boot_arg_scale_name_size];
1401 
1402 	strlcpy(boot_arg_scale_name, "ml-timeout-", boot_arg_scale_name_size);
1403 	strlcat(boot_arg_scale_name, spec->name, boot_arg_scale_name_size);
1404 	strlcat(boot_arg_scale_name, suffix, boot_arg_name_size);
1405 	strlcat(boot_arg_scale_name, "-scale", boot_arg_scale_name_size);
1406 
1407 
1408 	/*
1409 	 * Determine base value from DT and boot-args.
1410 	 */
1411 
1412 	DTEntry base, chosen;
1413 
1414 	if (SecureDTLookupEntry(NULL, "/machine-timeouts", &base) != kSuccess) {
1415 		base = NULL;
1416 	}
1417 
1418 	if (SecureDTLookupEntry(NULL, "/chosen/machine-timeouts", &chosen) != kSuccess) {
1419 		chosen = NULL;
1420 	}
1421 
1422 	uint64_t timeout = spec->default_value;
1423 	bool found = false;
1424 
1425 	uint64_t const *data = NULL;
1426 	unsigned int data_size = sizeof(*data);
1427 
1428 	/* First look in /machine-timeouts/<name> */
1429 	if (base != NULL && SecureDTGetProperty(base, dt_name, (const void **)&data, &data_size) == kSuccess) {
1430 		if (data_size != sizeof(*data)) {
1431 			panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s", __func__, data_size, dt_name);
1432 		}
1433 
1434 		timeout = *data;
1435 		found = true;
1436 	}
1437 
1438 	/* A value in /chosen/machine-timeouts/<name> overrides */
1439 	if (chosen != NULL && SecureDTGetProperty(chosen, dt_name, (const void **)&data, &data_size) == kSuccess) {
1440 		if (data_size != sizeof(*data)) {
1441 			panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s", __func__, data_size, dt_name);
1442 		}
1443 
1444 		timeout = *data;
1445 		found = true;
1446 	}
1447 
1448 	/* A boot-arg ml-timeout-<name> overrides */
1449 	uint64_t boot_arg = 0;
1450 
1451 	if (PE_parse_boot_argn(boot_arg_name, &boot_arg, sizeof(boot_arg))) {
1452 		timeout = boot_arg;
1453 		found = true;
1454 	}
1455 
1456 
1457 	/*
1458 	 * Determine scale value from DT and boot-args.
1459 	 */
1460 
1461 	uint64_t scale = 1;
1462 	uint32_t const *scale_data;
1463 	unsigned int scale_size = sizeof(scale_data);
1464 
1465 	/* If there is a scale factor /machine-timeouts/<name>-scale, apply it. */
1466 	if (base != NULL && SecureDTGetProperty(base, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1467 		if (scale_size != sizeof(*scale_data)) {
1468 			panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s-scale", __func__, scale_size, dt_name);
1469 		}
1470 
1471 		scale = *scale_data;
1472 	}
1473 
1474 	/* If there is a scale factor /chosen/machine-timeouts/<name>-scale, use that. */
1475 	if (chosen != NULL && SecureDTGetProperty(chosen, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1476 		if (scale_size != sizeof(*scale_data)) {
1477 			panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s-scale", __func__,
1478 			    scale_size, dt_name);
1479 		}
1480 
1481 		scale = *scale_data;
1482 	}
1483 
1484 	/* Finally, a boot-arg ml-timeout-<name>-scale takes precedence. */
1485 	if (PE_parse_boot_argn(boot_arg_scale_name, &boot_arg, sizeof(boot_arg))) {
1486 		scale = boot_arg;
1487 	}
1488 
1489 	static bool global_scale_set;
1490 	static uint64_t global_scale;
1491 
1492 	if (!global_scale_set) {
1493 		/* Apply /machine-timeouts/global-scale if present */
1494 		if (SecureDTGetProperty(base, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1495 			if (scale_size != sizeof(*scale_data)) {
1496 				panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/global-scale", __func__,
1497 				    scale_size);
1498 			}
1499 
1500 			global_scale = *scale_data;
1501 			global_scale_set = true;
1502 		}
1503 
1504 		/* Use /chosen/machine-timeouts/global-scale if present */
1505 		if (SecureDTGetProperty(chosen, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1506 			if (scale_size != sizeof(*scale_data)) {
1507 				panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/global-scale", __func__,
1508 				    scale_size);
1509 			}
1510 
1511 			global_scale = *scale_data;
1512 			global_scale_set = true;
1513 		}
1514 
1515 		/* Finally, the boot-arg ml-timeout-global-scale takes precedence. */
1516 		if (PE_parse_boot_argn("ml-timeout-global-scale", &boot_arg, sizeof(boot_arg))) {
1517 			global_scale = boot_arg;
1518 			global_scale_set = true;
1519 		}
1520 	}
1521 
1522 	if (global_scale_set) {
1523 		scale *= global_scale;
1524 	}
1525 
1526 	/* Compute the final timeout, and done. */
1527 	if (found && timeout > 0) {
1528 		/* Only apply inherent unit scale if the value came in
1529 		 * externally. */
1530 
1531 		if (spec->unit_scale == MACHINE_TIMEOUT_UNIT_TIMEBASE) {
1532 			uint64_t nanoseconds = timeout / 1000;
1533 			nanoseconds_to_absolutetime(nanoseconds, &timeout);
1534 		} else {
1535 			timeout /= spec->unit_scale;
1536 		}
1537 
1538 		if (timeout == 0) {
1539 			/* Ensure unit scaling did not disable the timeout. */
1540 			timeout = 1;
1541 		}
1542 	}
1543 
1544 	if (os_mul_overflow(timeout, scale, &timeout)) {
1545 		timeout = UINT64_MAX; // clamp
1546 	}
1547 
1548 	os_atomic_store_wide((uint64_t*)spec->ptr, timeout, relaxed);
1549 }
1550 
1551 void
machine_timeout_init(const struct machine_timeout_spec * spec)1552 machine_timeout_init(const struct machine_timeout_spec *spec)
1553 {
1554 	machine_timeout_init_with_suffix(spec, "", false);
1555 }
1556 
1557 void
machine_timeout_init_always_enabled(const struct machine_timeout_spec * spec)1558 machine_timeout_init_always_enabled(const struct machine_timeout_spec *spec)
1559 {
1560 	machine_timeout_init_with_suffix(spec, "", true);
1561 }
1562 
1563 #if DEVELOPMENT || DEBUG
1564 /*
1565  * Late timeout (re-)initialization, at the end of bsd_init()
1566  */
1567 void
machine_timeout_bsd_init(void)1568 machine_timeout_bsd_init(void)
1569 {
1570 	char const * const __unused mt_suffix = "-b";
1571 #if SCHED_HYGIENE_DEBUG
1572 	machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(interrupt_masked_timeout), mt_suffix, false);
1573 	machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(sched_preemption_disable_threshold_mt), mt_suffix, false);
1574 
1575 	/*
1576 	 * The io timeouts can inherit from interrupt_masked_timeout.
1577 	 * Re-initialize, as interrupt_masked_timeout may have changed.
1578 	 */
1579 	ml_io_init_timeouts();
1580 
1581 	extern void preemption_disable_reset_max_durations(void);
1582 	/*
1583 	 * Reset the preemption disable stats, so that they are not
1584 	 * polluted by long early boot code.
1585 	 */
1586 	preemption_disable_reset_max_durations();
1587 #endif /* SCHED_HYGIENE_DEBUG */
1588 }
1589 #endif /* DEVELOPMENT || DEBUG */
1590 
1591 #if ML_IO_TIMEOUTS_ENABLED && CONFIG_XNUPOST
1592 #include <tests/xnupost.h>
1593 
1594 extern kern_return_t ml_io_timeout_test(void);
1595 
1596 static inline void
ml_io_timeout_test_get_timeouts(uintptr_t vaddr,uint64_t * read_timeout,uint64_t * write_timeout)1597 ml_io_timeout_test_get_timeouts(uintptr_t vaddr, uint64_t *read_timeout, uint64_t *write_timeout)
1598 {
1599 	*read_timeout = 0;
1600 	*write_timeout = 0;
1601 
1602 	vm_offset_t paddr = kvtophys(vaddr);
1603 
1604 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
1605 	override_io_timeouts(vaddr, paddr, read_timeout, write_timeout);
1606 	ml_set_interrupts_enabled(istate);
1607 }
1608 
1609 static inline void
ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)1610 ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
1611 {
1612 	*read_timeout = 0;
1613 	*write_timeout = 0;
1614 
1615 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
1616 	override_io_timeouts(0, paddr, read_timeout, write_timeout);
1617 	ml_set_interrupts_enabled(istate);
1618 }
1619 
1620 kern_return_t
ml_io_timeout_test(void)1621 ml_io_timeout_test(void)
1622 {
1623 	const size_t SIZE = 16;
1624 	/*
1625 	 * Page align the base address to ensure that the regions are physically
1626 	 * contiguous.
1627 	 */
1628 	const uintptr_t iovaddr_base1 = (uintptr_t)kernel_pmap & ~PAGE_MASK;
1629 
1630 	const uintptr_t iovaddr_base2 = iovaddr_base1 + SIZE;
1631 	const uintptr_t vaddr1 = iovaddr_base1 + SIZE / 2;
1632 	const uintptr_t vaddr2 = iovaddr_base2 + SIZE / 2;
1633 
1634 	const vm_offset_t iopaddr_base1 = kvtophys(iovaddr_base1);
1635 	const vm_offset_t iopaddr_base2 = kvtophys(iovaddr_base2);
1636 	const vm_offset_t paddr1 = iopaddr_base1 + SIZE / 2;
1637 	const vm_offset_t paddr2 = iopaddr_base2 + SIZE / 2;
1638 
1639 	const uint64_t READ_TIMEOUT1_US = 50000, WRITE_TIMEOUT1_US = 50001;
1640 	const uint64_t READ_TIMEOUT2_US = 50002, WRITE_TIMEOUT2_US = 50003;
1641 	uint64_t read_timeout1_abs, write_timeout1_abs;
1642 	uint64_t read_timeout2_abs, write_timeout2_abs;
1643 	nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT1_US, &read_timeout1_abs);
1644 	nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT1_US, &write_timeout1_abs);
1645 	nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT2_US, &read_timeout2_abs);
1646 	nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT2_US, &write_timeout2_abs);
1647 
1648 	int err = ml_io_increase_timeouts(iovaddr_base1, 0, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1649 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for empty region");
1650 
1651 	err = ml_io_increase_timeouts(iovaddr_base1, 4097, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1652 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for region > 4096 bytes");
1653 
1654 	err = ml_io_increase_timeouts(UINTPTR_MAX, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1655 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overflowed region");
1656 
1657 	err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1658 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first VA region should succeed");
1659 
1660 	err = ml_io_increase_timeouts(iovaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1661 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second VA region should succeed");
1662 
1663 	err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1664 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for same region twice");
1665 
1666 	err = ml_io_increase_timeouts(vaddr1, (uint32_t)(vaddr2 - vaddr1), READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1667 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overlapping regions");
1668 
1669 	uint64_t read_timeout, write_timeout;
1670 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1671 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1672 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1673 
1674 	ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1675 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1676 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1677 
1678 	ml_io_timeout_test_get_timeouts(iovaddr_base2 + SIZE, &read_timeout, &write_timeout);
1679 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout without override");
1680 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout without override");
1681 
1682 	err = ml_io_reset_timeouts(iovaddr_base1 + 1, SIZE - 1);
1683 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for subregion");
1684 
1685 	err = ml_io_reset_timeouts(iovaddr_base2 + SIZE, SIZE);
1686 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for non-existent region");
1687 
1688 	err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1689 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first VA region should succeed");
1690 
1691 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1692 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1693 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1694 
1695 	err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1696 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for same region twice");
1697 
1698 	err = ml_io_reset_timeouts(iovaddr_base2, SIZE);
1699 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second VA region should succeed");
1700 
1701 	err = ml_io_increase_timeouts_phys(iopaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1702 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first PA region should succeed");
1703 
1704 	err = ml_io_increase_timeouts_phys(iopaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1705 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second PA region should succeed");
1706 
1707 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1708 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1709 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1710 
1711 	ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1712 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1713 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1714 
1715 	ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1716 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1717 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1718 
1719 	ml_io_timeout_test_get_timeouts_phys(paddr2, &read_timeout, &write_timeout);
1720 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first physical region");
1721 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first physical region");
1722 
1723 	err = ml_io_reset_timeouts_phys(iopaddr_base1, SIZE);
1724 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first PA region should succeed");
1725 
1726 	err = ml_io_reset_timeouts_phys(iopaddr_base2, SIZE);
1727 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second PA region should succeed");
1728 
1729 	ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1730 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1731 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1732 
1733 	return KERN_SUCCESS;
1734 }
1735 #endif /* CONFIG_XNUPOST */
1736 
1737 #if DEVELOPMENT || DEBUG
1738 static int
ml_io_read_cpu_reg_test(__unused int64_t in,int64_t * out)1739 ml_io_read_cpu_reg_test(__unused int64_t in, int64_t *out)
1740 {
1741 	printf("Testing ml_io_read_cpu_reg()...\n");
1742 
1743 	ml_io_read_test_mode = 1;
1744 	boolean_t istate = ml_set_interrupts_enabled_with_debug(false, false);
1745 	(void) ml_io_read_cpu_reg((uintptr_t)1, 8, 1);
1746 	(void) ml_io_read_cpu_reg((uintptr_t)2, 8, 1);
1747 	ml_set_interrupts_enabled_with_debug(istate, false);
1748 	(void) ml_io_read_cpu_reg((uintptr_t)1, 8, 1);
1749 	(void) ml_io_read_cpu_reg((uintptr_t)2, 8, 1);
1750 	ml_io_read_test_mode = 0;
1751 
1752 	*out = 0;
1753 	return 0;
1754 }
1755 SYSCTL_TEST_REGISTER(ml_io_read_cpu_reg, ml_io_read_cpu_reg_test);
1756 #endif /* DEVELOPMENT || DEBUG */
1757