xref: /xnu-11215.1.10/osfmk/kern/machine.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/machine.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1987
62  *
63  *	Support for machine independent machine abstraction.
64  */
65 
66 #include <string.h>
67 
68 #include <mach/mach_types.h>
69 #include <mach/boolean.h>
70 #include <mach/kern_return.h>
71 #include <mach/machine.h>
72 #include <mach/host_info.h>
73 #include <mach/host_reboot.h>
74 #include <mach/host_priv_server.h>
75 #include <mach/processor_server.h>
76 #include <mach/sdt.h>
77 
78 #include <kern/kern_types.h>
79 #include <kern/cpu_data.h>
80 #include <kern/ipc_host.h>
81 #include <kern/host.h>
82 #include <kern/machine.h>
83 #include <kern/misc_protos.h>
84 #include <kern/percpu.h>
85 #include <kern/processor.h>
86 #include <kern/queue.h>
87 #include <kern/sched.h>
88 #include <kern/startup.h>
89 #include <kern/task.h>
90 #include <kern/thread.h>
91 #include <kern/iotrace.h>
92 
93 #include <libkern/OSDebug.h>
94 #if ML_IO_TIMEOUTS_ENABLED
95 #include <libkern/tree.h>
96 #endif
97 
98 #include <pexpert/device_tree.h>
99 
100 #include <machine/commpage.h>
101 #include <machine/machine_routines.h>
102 
103 #if HIBERNATION
104 #include <IOKit/IOHibernatePrivate.h>
105 #endif
106 #include <IOKit/IOPlatformExpert.h>
107 
108 #if CONFIG_DTRACE
109 extern void (*dtrace_cpu_state_changed_hook)(int, boolean_t);
110 #endif
111 
112 #if defined(__arm64__)
113 extern void wait_while_mp_kdp_trap(bool check_SIGPdebug);
114 #if CONFIG_SPTM
115 #include <arm64/sptm/pmap/pmap_data.h>
116 #else
117 #include <arm/pmap/pmap_data.h>
118 #endif /* CONFIG_SPTM */
119 #endif /* defined(__arm64__) */
120 
121 #if defined(__x86_64__)
122 #include <i386/panic_notify.h>
123 #endif
124 
125 #if ML_IO_TIMEOUTS_ENABLED
126 #if defined(__x86_64__)
127 #define ml_io_timestamp mach_absolute_time
128 #else
129 #define ml_io_timestamp ml_get_timebase
130 #endif /* __x86_64__ */
131 #endif /* ML_IO_TIMEOUTS_ENABLED */
132 
133 /*
134  *	Exported variables:
135  */
136 
137 struct machine_info     machine_info;
138 
139 /* Forwards */
140 static void
141 processor_offline(void * parameter, __unused wait_result_t result);
142 
143 static void
144 processor_offline_intstack(processor_t processor) __dead2;
145 
146 
147 /*
148  *	processor_up:
149  *
150  *	Flag processor as up and running, and available
151  *	for scheduling.
152  */
153 void
processor_up(processor_t processor)154 processor_up(
155 	processor_t                     processor)
156 {
157 	spl_t s = splsched();
158 	init_ast_check(processor);
159 
160 #if defined(__arm64__)
161 	/*
162 	 * A processor coming online won't have received a SIGPdebug signal
163 	 * to cause it to spin while a stackshot or panic is taking place,
164 	 * so spin here on mp_kdp_trap.
165 	 *
166 	 * However, since cpu_signal() is not yet enabled for this processor,
167 	 * there is a race if we have just passed this when a cpu_signal()
168 	 * is attempted.  The sender will assume the cpu is offline, so it will
169 	 * not end up spinning anywhere.  See processor_cpu_reinit() for the fix
170 	 * for this race.
171 	 */
172 	wait_while_mp_kdp_trap(false);
173 #endif
174 
175 	/* Boot CPU coming online for the first time, either at boot or after sleep */
176 	__assert_only bool is_first_online_processor;
177 
178 	is_first_online_processor = sched_mark_processor_online(processor,
179 	    processor->last_startup_reason);
180 
181 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
182 	assert(processor->processor_instartup == true || is_first_online_processor);
183 	simple_unlock(&processor_start_state_lock);
184 
185 	splx(s);
186 
187 #if defined(__x86_64__)
188 	ml_cpu_up();
189 #endif /* defined(__x86_64__) */
190 
191 #if CONFIG_DTRACE
192 	if (dtrace_cpu_state_changed_hook) {
193 		(*dtrace_cpu_state_changed_hook)(processor->cpu_id, TRUE);
194 	}
195 #endif
196 }
197 
198 #include <atm/atm_internal.h>
199 
200 kern_return_t
host_reboot(host_priv_t host_priv,int options)201 host_reboot(
202 	host_priv_t             host_priv,
203 	int                             options)
204 {
205 	if (host_priv == HOST_PRIV_NULL) {
206 		return KERN_INVALID_HOST;
207 	}
208 
209 #if DEVELOPMENT || DEBUG
210 	if (options & HOST_REBOOT_DEBUGGER) {
211 		Debugger("Debugger");
212 		return KERN_SUCCESS;
213 	}
214 #endif
215 
216 	if (options & HOST_REBOOT_UPSDELAY) {
217 		// UPS power cutoff path
218 		PEHaltRestart( kPEUPSDelayHaltCPU );
219 	} else {
220 		halt_all_cpus(!(options & HOST_REBOOT_HALT));
221 	}
222 
223 	return KERN_SUCCESS;
224 }
225 
226 kern_return_t
processor_assign(__unused processor_t processor,__unused processor_set_t new_pset,__unused boolean_t wait)227 processor_assign(
228 	__unused processor_t            processor,
229 	__unused processor_set_t        new_pset,
230 	__unused boolean_t              wait)
231 {
232 	return KERN_FAILURE;
233 }
234 
235 void
processor_doshutdown(processor_t processor,bool is_final_system_sleep)236 processor_doshutdown(
237 	processor_t     processor,
238 	bool            is_final_system_sleep)
239 {
240 	lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
241 	lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
242 
243 	if (!processor->processor_booted) {
244 		panic("processor %d not booted", processor->cpu_id);
245 	}
246 
247 	if (is_final_system_sleep) {
248 		assert(processor == current_processor());
249 		assert(processor == master_processor);
250 		assert(processor_avail_count == 1);
251 	}
252 
253 	processor_set_t pset = processor->processor_set;
254 
255 	ml_cpu_begin_state_transition(processor->cpu_id);
256 
257 	ml_broadcast_cpu_event(CPU_EXIT_REQUESTED, processor->cpu_id);
258 
259 #if HIBERNATION
260 	if (is_final_system_sleep) {
261 		/*
262 		 * Ensure the page queues are in a state where the hibernation
263 		 * code can manipulate them without requiring other threads
264 		 * to be scheduled.
265 		 *
266 		 * This operation can block,
267 		 * and unlock must be done from the same thread.
268 		 */
269 		assert(processor_avail_count < 2);
270 		hibernate_vm_lock();
271 	}
272 #endif
273 
274 	spl_t s = splsched();
275 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
276 	pset_lock(pset);
277 
278 	assert(processor->state != PROCESSOR_START);
279 	assert(processor->state != PROCESSOR_PENDING_OFFLINE);
280 	assert(processor->state != PROCESSOR_OFF_LINE);
281 
282 	assert(!processor->processor_inshutdown);
283 	processor->processor_inshutdown = true;
284 
285 	assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
286 	processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_BEGIN_SHUTDOWN);
287 
288 	if (!is_final_system_sleep) {
289 		sched_assert_not_last_online_cpu(processor->cpu_id);
290 	}
291 
292 	pset_unlock(pset);
293 	simple_unlock(&sched_available_cores_lock);
294 
295 	if (is_final_system_sleep) {
296 		assert(processor == current_processor());
297 
298 #if HIBERNATION
299 		/*
300 		 * After this point, the system is now
301 		 * committed to hibernation and must
302 		 * not run any other thread that could take this lock.
303 		 */
304 		hibernate_vm_unlock();
305 #endif
306 	} else {
307 		/*
308 		 * Get onto the processor to shut down.
309 		 * The scheduler picks this thread naturally according to its
310 		 * priority.
311 		 * The processor can run any other thread if this one blocks.
312 		 * So, don't block.
313 		 */
314 		processor_t prev = thread_bind(processor);
315 		thread_block(THREAD_CONTINUE_NULL);
316 
317 		/* interrupts still disabled */
318 		assert(ml_get_interrupts_enabled() == FALSE);
319 
320 		assert(processor == current_processor());
321 		assert(processor->processor_inshutdown);
322 
323 		thread_bind(prev);
324 		/* interrupts still disabled */
325 	}
326 
327 	/*
328 	 * Continue processor shutdown on the processor's idle thread.
329 	 * The handoff won't fail because the idle thread has a reserved stack.
330 	 * Switching to the idle thread leaves interrupts disabled,
331 	 * so we can't accidentally take an interrupt after the context switch.
332 	 */
333 	thread_t shutdown_thread = processor->idle_thread;
334 	shutdown_thread->continuation = processor_offline;
335 	shutdown_thread->parameter = (void*)is_final_system_sleep;
336 
337 	thread_run(current_thread(), THREAD_CONTINUE_NULL, NULL, shutdown_thread);
338 
339 	/*
340 	 * After this point, we are in regular scheduled context on a remaining
341 	 * available CPU. Interrupts are still disabled.
342 	 */
343 
344 	if (is_final_system_sleep) {
345 		/*
346 		 * We are coming out of system sleep here, so there won't be a
347 		 * corresponding processor_startup for this processor, so we
348 		 * need to put it back in the correct running state.
349 		 *
350 		 * There's nowhere to execute a call to CPU_EXITED during system
351 		 * sleep for the boot processor, and it's already been CPU_BOOTED
352 		 * by this point anyways, so skip the call.
353 		 */
354 		assert(current_processor() == master_processor);
355 		assert(processor->state == PROCESSOR_RUNNING);
356 		assert(processor->processor_inshutdown);
357 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
358 		processor->processor_inshutdown = false;
359 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
360 
361 		splx(s);
362 	} else {
363 		splx(s);
364 
365 		cpu_exit_wait(processor->cpu_id);
366 
367 		s = splsched();
368 		simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
369 		pset_lock(pset);
370 		assert(processor->processor_inshutdown);
371 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_PENDING_OFFLINE);
372 		assert(processor->state == PROCESSOR_PENDING_OFFLINE);
373 		pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
374 		processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_CPU_OFFLINE);
375 		pset_unlock(pset);
376 		simple_unlock(&sched_available_cores_lock);
377 		splx(s);
378 
379 		ml_broadcast_cpu_event(CPU_EXITED, processor->cpu_id);
380 		ml_cpu_power_disable(processor->cpu_id);
381 
382 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_CPU_OFFLINE);
383 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_FULLY_OFFLINE);
384 	}
385 
386 	ml_cpu_end_state_transition(processor->cpu_id);
387 }
388 
389 /*
390  * Called in the context of the idle thread to shut down the processor
391  *
392  * A shut-down processor looks like it's 'running' the idle thread parked
393  * in this routine, but it's actually been powered off and has no hardware state.
394  */
395 static void
processor_offline(void * parameter,__unused wait_result_t result)396 processor_offline(
397 	void * parameter,
398 	__unused wait_result_t result)
399 {
400 	bool is_final_system_sleep = (bool) parameter;
401 	processor_t processor = current_processor();
402 	thread_t self = current_thread();
403 	__assert_only thread_t old_thread = THREAD_NULL;
404 
405 	assert(self->state & TH_IDLE);
406 	assert(processor->idle_thread == self);
407 	assert(ml_get_interrupts_enabled() == FALSE);
408 	assert(self->continuation == NULL);
409 	assert(processor->processor_online == true);
410 	assert(processor->running_timers_active == false);
411 
412 	if (is_final_system_sleep) {
413 		assert(processor == current_processor());
414 		assert(processor == master_processor);
415 		assert(processor_avail_count == 1);
416 	}
417 
418 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_START, processor->cpu_id);
419 
420 	bool enforce_quiesce_safety = gEnforcePlatformActionSafety;
421 
422 	/*
423 	 * Scheduling is now disabled for this processor.
424 	 * Ensure that primitives that need scheduling (like mutexes) know this.
425 	 */
426 	if (enforce_quiesce_safety) {
427 		disable_preemption_without_measurements();
428 	}
429 
430 #if CONFIG_DTRACE
431 	if (dtrace_cpu_state_changed_hook) {
432 		(*dtrace_cpu_state_changed_hook)(processor->cpu_id, FALSE);
433 	}
434 #endif
435 
436 	smr_cpu_down(processor, SMR_CPU_REASON_OFFLINE);
437 
438 	/* Drain pending IPIs for the last time here. */
439 	ml_cpu_down();
440 
441 	sched_mark_processor_offline(processor, is_final_system_sleep);
442 
443 	/*
444 	 * Switch to the interrupt stack and shut down the processor.
445 	 *
446 	 * When the processor comes back, it will eventually call load_context which
447 	 * restores the context saved by machine_processor_shutdown, returning here.
448 	 */
449 	old_thread = machine_processor_shutdown(self, processor_offline_intstack, processor);
450 
451 	/*
452 	 * The processor is back. sched_mark_processor_online and
453 	 * friends have already run via processor_up.
454 	 */
455 
456 	/* old_thread should be NULL because we got here through Load_context */
457 	assert(old_thread == THREAD_NULL);
458 
459 	assert(processor == current_processor());
460 	assert(processor->idle_thread == current_thread());
461 	assert(processor->processor_online == true);
462 
463 	assert(ml_get_interrupts_enabled() == FALSE);
464 	assert(self->continuation == NULL);
465 
466 	/* Extract the machine_param value stashed by secondary_cpu_main */
467 	void * machine_param = self->parameter;
468 	self->parameter = NULL;
469 
470 	processor_cpu_reinit(machine_param, true, is_final_system_sleep);
471 
472 	if (enforce_quiesce_safety) {
473 		enable_preemption();
474 	}
475 
476 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_END, processor->cpu_id);
477 
478 	/*
479 	 * Now that the processor is back, invoke the idle thread to find out what to do next.
480 	 * idle_thread will enable interrupts.
481 	 */
482 	thread_block(idle_thread);
483 	/*NOTREACHED*/
484 }
485 
486 /*
487  * Complete the shutdown and place the processor offline.
488  *
489  * Called at splsched in the shutdown context
490  * (i.e. on the idle thread, on the interrupt stack)
491  *
492  * The onlining half of this is done in load_context().
493  */
494 static void
processor_offline_intstack(processor_t processor)495 processor_offline_intstack(
496 	processor_t processor)
497 {
498 	assert(processor == current_processor());
499 	assert(processor->active_thread == current_thread());
500 
501 	struct recount_snap snap = { 0 };
502 	recount_snapshot(&snap);
503 	recount_processor_idle(&processor->pr_recount, &snap);
504 
505 	smr_cpu_leave(processor, processor->last_dispatch);
506 
507 	PMAP_DEACTIVATE_KERNEL(processor->cpu_id);
508 
509 	cpu_sleep();
510 	panic("zombie processor");
511 	/*NOTREACHED*/
512 }
513 
514 /*
515  * Called on the idle thread with interrupts disabled to initialize a
516  * secondary processor on boot or to reinitialize any processor on resume
517  * from processor offline.
518  */
519 void
processor_cpu_reinit(void * machine_param,__unused bool wait_for_cpu_signal,__assert_only bool is_final_system_sleep)520 processor_cpu_reinit(void* machine_param,
521     __unused bool wait_for_cpu_signal,
522     __assert_only bool is_final_system_sleep)
523 {
524 	/* Re-initialize the processor */
525 	machine_cpu_reinit(machine_param);
526 
527 #if defined(__arm64__)
528 	/*
529 	 * See the comments for wait_while_mp_kdp_trap in processor_up().
530 	 *
531 	 * SIGPdisabled is cleared (to enable cpu_signal() to succeed with this processor)
532 	 * the first time we take an IPI.  This is triggered by machine_cpu_reinit(), above,
533 	 * which calls cpu_machine_init()->PE_cpu_machine_init()->PE_cpu_signal() which sends
534 	 * a self-IPI to ensure that happens when we enable interrupts.  So enable interrupts
535 	 * here so that cpu_signal() can succeed before we spin on mp_kdp_trap.
536 	 */
537 	assert_ml_cpu_signal_is_enabled(false);
538 
539 	ml_set_interrupts_enabled(TRUE);
540 
541 	if (wait_for_cpu_signal) {
542 		ml_wait_for_cpu_signal_to_enable();
543 	}
544 
545 	ml_set_interrupts_enabled(FALSE);
546 
547 	wait_while_mp_kdp_trap(true);
548 
549 	/*
550 	 * At this point,
551 	 * if a stackshot or panic is in progress, we either spin on mp_kdp_trap
552 	 * or we sucessfully received a SIGPdebug signal which will cause us to
553 	 * break out of the spin on mp_kdp_trap and instead
554 	 * spin next time interrupts are enabled in idle_thread().
555 	 */
556 	if (wait_for_cpu_signal) {
557 		assert_ml_cpu_signal_is_enabled(true);
558 	}
559 
560 	/*
561 	 * Now that we know SIGPdisabled is cleared, we can publish that
562 	 * this CPU has fully come out of offline state.
563 	 *
564 	 * Without wait_for_cpu_signal, we'll publish this earlier than
565 	 * cpu_signal is actually ready, but as long as it's ready by next S2R,
566 	 * it will be good enough.
567 	 */
568 	ml_cpu_up();
569 #endif
570 
571 	processor_t processor = current_processor();
572 
573 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
574 	assert(processor->processor_instartup == true || is_final_system_sleep);
575 	processor->processor_instartup = false;
576 	simple_unlock(&processor_start_state_lock);
577 
578 	thread_wakeup((event_t)&processor->processor_instartup);
579 }
580 
581 kern_return_t
host_get_boot_info(host_priv_t host_priv,kernel_boot_info_t boot_info)582 host_get_boot_info(
583 	host_priv_t         host_priv,
584 	kernel_boot_info_t  boot_info)
585 {
586 	const char *src = "";
587 	if (host_priv == HOST_PRIV_NULL) {
588 		return KERN_INVALID_HOST;
589 	}
590 
591 	/*
592 	 * Copy first operator string terminated by '\0' followed by
593 	 *	standardized strings generated from boot string.
594 	 */
595 	src = machine_boot_info(boot_info, KERNEL_BOOT_INFO_MAX);
596 	if (src != boot_info) {
597 		(void) strncpy(boot_info, src, KERNEL_BOOT_INFO_MAX);
598 	}
599 
600 	return KERN_SUCCESS;
601 }
602 
603 // These are configured through sysctls.
604 #if DEVELOPMENT || DEBUG
605 uint32_t phy_read_panic = 1;
606 uint32_t phy_write_panic = 1;
607 uint64_t simulate_stretched_io = 0;
608 #else
609 uint32_t phy_read_panic = 0;
610 uint32_t phy_write_panic = 0;
611 #endif
612 
613 #if !defined(__x86_64__)
614 
615 #if DEVELOPMENT || DEBUG
616 static const uint64_t TIMEBASE_TICKS_PER_USEC = 24000000ULL / USEC_PER_SEC;
617 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 100 * TIMEBASE_TICKS_PER_USEC;
618 #else
619 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 0;
620 #endif
621 
622 // The MACHINE_TIMEOUT facility only exists on ARM.
623 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_read_delay_to, "report-phy-read-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
624 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_write_delay_to, "report-phy-write-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
625 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_read_delay_to, "trace-phy-read-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
626 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_write_delay_to, "trace-phy-write-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
627 
628 #if SCHED_HYGIENE_DEBUG
629 /*
630  * Note: The interrupt-masked timeout goes through two initializations - one
631  * early in boot and one later. Thus this function is also called twice and
632  * can't be marked '__startup_func'.
633  */
634 static void
ml_io_init_timeouts(void)635 ml_io_init_timeouts(void)
636 {
637 	/*
638 	 * The timeouts may be completely disabled via an override.
639 	 */
640 	if (kern_feature_override(KF_IO_TIMEOUT_OVRD)) {
641 		os_atomic_store(&report_phy_write_delay_to, 0, relaxed);
642 		os_atomic_store(&report_phy_read_delay_to, 0, relaxed);
643 		return;
644 	}
645 
646 	/*
647 	 * There may be no interrupt masked timeout set.
648 	 */
649 	const uint64_t interrupt_masked_to = os_atomic_load(&interrupt_masked_timeout, relaxed);
650 	if (interrupt_masked_timeout == 0) {
651 		return;
652 	}
653 
654 	/*
655 	 * Inherit from the interrupt masked timeout if smaller and the timeout
656 	 * hasn't been explicitly set via boot-arg.
657 	 */
658 	uint64_t arg = 0;
659 
660 	if (!PE_parse_boot_argn("ml-timeout-report-phy-read-delay", &arg, sizeof(arg))) {
661 		uint64_t report_phy_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
662 		report_phy_read_delay = report_phy_read_delay == 0 ?
663 		    interrupt_masked_to :
664 		    MIN(report_phy_read_delay, interrupt_masked_to);
665 		os_atomic_store(&report_phy_read_delay_to, report_phy_read_delay, relaxed);
666 	}
667 
668 	if (!PE_parse_boot_argn("ml-timeout-report-phy-write-delay", &arg, sizeof(arg))) {
669 		uint64_t report_phy_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
670 		report_phy_write_delay = report_phy_write_delay == 0 ?
671 		    interrupt_masked_to :
672 		    MIN(report_phy_write_delay, interrupt_masked_to);
673 		os_atomic_store(&report_phy_write_delay_to, report_phy_write_delay, relaxed);
674 	}
675 }
676 
677 /*
678  * It's important that this happens after machine timeouts have initialized so
679  * the correct timeouts can be inherited.
680  */
681 STARTUP(TIMEOUTS, STARTUP_RANK_SECOND, ml_io_init_timeouts);
682 #endif /* SCHED_HYGIENE_DEBUG */
683 
684 extern pmap_paddr_t kvtophys(vm_offset_t va);
685 #endif /* !defined(__x86_64__) */
686 
687 #if ML_IO_TIMEOUTS_ENABLED
688 
689 static LCK_GRP_DECLARE(io_timeout_override_lock_grp, "io_timeout_override");
690 static LCK_SPIN_DECLARE(io_timeout_override_lock, &io_timeout_override_lock_grp);
691 
692 struct io_timeout_override_entry {
693 	RB_ENTRY(io_timeout_override_entry) tree;
694 
695 	uintptr_t ioaddr_base;
696 	unsigned int size;
697 	uint32_t read_timeout;
698 	uint32_t write_timeout;
699 };
700 
701 static inline int
io_timeout_override_cmp(const struct io_timeout_override_entry * a,const struct io_timeout_override_entry * b)702 io_timeout_override_cmp(const struct io_timeout_override_entry *a, const struct io_timeout_override_entry *b)
703 {
704 	if (a->ioaddr_base < b->ioaddr_base) {
705 		return -1;
706 	} else if (a->ioaddr_base > b->ioaddr_base) {
707 		return 1;
708 	} else {
709 		return 0;
710 	}
711 }
712 
713 static RB_HEAD(io_timeout_override, io_timeout_override_entry)
714 io_timeout_override_root_pa, io_timeout_override_root_va;
715 
716 RB_PROTOTYPE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
717 RB_GENERATE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
718 
719 static int
io_increase_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)720 io_increase_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base,
721     unsigned int size, uint32_t read_timeout_us, uint32_t write_timeout_us)
722 {
723 	const uint64_t MAX_TIMEOUT_ABS = UINT32_MAX;
724 
725 	assert(preemption_enabled());
726 
727 	int ret = KERN_SUCCESS;
728 
729 	if (size == 0) {
730 		return KERN_INVALID_ARGUMENT;
731 	}
732 
733 	uintptr_t ioaddr_end;
734 	if (os_add_overflow(ioaddr_base, size - 1, &ioaddr_end)) {
735 		return KERN_INVALID_ARGUMENT;
736 	}
737 
738 	uint64_t read_timeout_abs, write_timeout_abs;
739 	nanoseconds_to_absolutetime(NSEC_PER_USEC * read_timeout_us, &read_timeout_abs);
740 	nanoseconds_to_absolutetime(NSEC_PER_USEC * write_timeout_us, &write_timeout_abs);
741 	if (read_timeout_abs > MAX_TIMEOUT_ABS || write_timeout_abs > MAX_TIMEOUT_ABS) {
742 		return KERN_INVALID_ARGUMENT;
743 	}
744 
745 	struct io_timeout_override_entry *node = kalloc_type(struct io_timeout_override_entry, Z_WAITOK | Z_ZERO | Z_NOFAIL);
746 	node->ioaddr_base = ioaddr_base;
747 	node->size = size;
748 	node->read_timeout = (uint32_t)read_timeout_abs;
749 	node->write_timeout = (uint32_t)write_timeout_abs;
750 
751 	/*
752 	 * Interrupt handlers are allowed to call ml_io_{read,write}*, so
753 	 * interrupts must be disabled any time io_timeout_override_lock is
754 	 * held.  Otherwise the CPU could take an interrupt while holding the
755 	 * lock, invoke an ISR that calls ml_io_{read,write}*, and deadlock
756 	 * trying to acquire the lock again.
757 	 */
758 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
759 	lck_spin_lock(&io_timeout_override_lock);
760 	if (RB_INSERT(io_timeout_override, root, node)) {
761 		ret = KERN_INVALID_ARGUMENT;
762 		goto out;
763 	}
764 
765 	/* Check that this didn't create any new overlaps */
766 	struct io_timeout_override_entry *prev = RB_PREV(io_timeout_override, root, node);
767 	if (prev && (prev->ioaddr_base + prev->size) > node->ioaddr_base) {
768 		RB_REMOVE(io_timeout_override, root, node);
769 		ret = KERN_INVALID_ARGUMENT;
770 		goto out;
771 	}
772 	struct io_timeout_override_entry *next = RB_NEXT(io_timeout_override, root, node);
773 	if (next && (node->ioaddr_base + node->size) > next->ioaddr_base) {
774 		RB_REMOVE(io_timeout_override, root, node);
775 		ret = KERN_INVALID_ARGUMENT;
776 		goto out;
777 	}
778 
779 out:
780 	lck_spin_unlock(&io_timeout_override_lock);
781 	ml_set_interrupts_enabled(istate);
782 	if (ret != KERN_SUCCESS) {
783 		kfree_type(struct io_timeout_override_entry, node);
784 	}
785 	return ret;
786 }
787 
788 static int
io_reset_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size)789 io_reset_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base, unsigned int size)
790 {
791 	assert(preemption_enabled());
792 
793 	struct io_timeout_override_entry key = { .ioaddr_base = ioaddr_base };
794 
795 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
796 	lck_spin_lock(&io_timeout_override_lock);
797 	struct io_timeout_override_entry *node = RB_FIND(io_timeout_override, root, &key);
798 	if (node) {
799 		if (node->size == size) {
800 			RB_REMOVE(io_timeout_override, root, node);
801 		} else {
802 			node = NULL;
803 		}
804 	}
805 	lck_spin_unlock(&io_timeout_override_lock);
806 	ml_set_interrupts_enabled(istate);
807 
808 	if (!node) {
809 		return KERN_NOT_FOUND;
810 	}
811 
812 	kfree_type(struct io_timeout_override_entry, node);
813 	return KERN_SUCCESS;
814 }
815 
816 static bool
io_override_timeout(struct io_timeout_override * root,uintptr_t addr,uint64_t * read_timeout,uint64_t * write_timeout)817 io_override_timeout(struct io_timeout_override *root, uintptr_t addr,
818     uint64_t *read_timeout, uint64_t *write_timeout)
819 {
820 	assert(!ml_get_interrupts_enabled());
821 	assert3p(read_timeout, !=, NULL);
822 	assert3p(write_timeout, !=, NULL);
823 
824 	struct io_timeout_override_entry *node = RB_ROOT(root);
825 
826 	lck_spin_lock(&io_timeout_override_lock);
827 	/* RB_FIND() doesn't support custom cmp functions, so we have to open-code our own */
828 	while (node) {
829 		if (node->ioaddr_base <= addr && addr < node->ioaddr_base + node->size) {
830 			*read_timeout = node->read_timeout;
831 			*write_timeout = node->write_timeout;
832 			lck_spin_unlock(&io_timeout_override_lock);
833 			return true;
834 		} else if (addr < node->ioaddr_base) {
835 			node = RB_LEFT(node, tree);
836 		} else {
837 			node = RB_RIGHT(node, tree);
838 		}
839 	}
840 	lck_spin_unlock(&io_timeout_override_lock);
841 
842 	return false;
843 }
844 
845 static bool
io_override_timeout_ss(uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)846 io_override_timeout_ss(uint64_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
847 {
848 #if defined(__arm64__)
849 
850 	/*
851 	 * PCIe regions are marked with PMAP_IO_RANGE_STRONG_SYNC. Apply a
852 	 * timeout greater than two PCIe completion timeouts (90ms) as they can
853 	 * stack.
854 	 */
855 	#define STRONG_SYNC_TIMEOUT 2160000 /* 90ms */
856 
857 	pmap_io_range_t *range = pmap_find_io_attr(paddr);
858 	if (range != NULL && (range->wimg & PMAP_IO_RANGE_STRONG_SYNC) != 0) {
859 		*read_timeout = STRONG_SYNC_TIMEOUT;
860 		*write_timeout = STRONG_SYNC_TIMEOUT;
861 		return true;
862 	}
863 #else
864 	(void)paddr;
865 	(void)read_timeout;
866 	(void)write_timeout;
867 #endif /* __arm64__ */
868 	return false;
869 }
870 
871 /*
872  * Return timeout override values for the read/write timeout for a given
873  * address.
874  * A virtual address (vaddr), physical address (paddr) or both may be passed.
875  * Up to three separate timeout overrides can be found
876  *  - A virtual address override
877  *  - A physical address override
878  *  - A strong sync override
879  *  The largest override found is returned.
880  */
881 void
override_io_timeouts(uintptr_t vaddr,uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)882 override_io_timeouts(uintptr_t vaddr, uint64_t paddr, uint64_t *read_timeout,
883     uint64_t *write_timeout)
884 {
885 	uint64_t rt_va = 0, wt_va = 0, rt_pa = 0, wt_pa = 0, rt_ss = 0, wt_ss = 0;
886 
887 	if (vaddr != 0) {
888 		/* Override from virtual address. */
889 		io_override_timeout(&io_timeout_override_root_va, vaddr, &rt_va, &wt_va);
890 	}
891 
892 	if (paddr != 0) {
893 		/* Override from physical address. */
894 		io_override_timeout(&io_timeout_override_root_pa, paddr, &rt_pa, &wt_pa);
895 
896 		/* Override from strong sync range. */
897 		io_override_timeout_ss(paddr, &rt_ss, &wt_ss);
898 	}
899 
900 	if (read_timeout != NULL) {
901 		*read_timeout =  MAX(MAX(rt_va, rt_pa), rt_ss);
902 	}
903 
904 	if (write_timeout != NULL) {
905 		*write_timeout = MAX(MAX(wt_va, wt_pa), wt_ss);
906 	}
907 }
908 
909 #endif /* ML_IO_TIMEOUTS_ENABLED */
910 
911 int
ml_io_increase_timeouts(uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)912 ml_io_increase_timeouts(uintptr_t ioaddr_base, unsigned int size,
913     uint32_t read_timeout_us, uint32_t write_timeout_us)
914 {
915 #if ML_IO_TIMEOUTS_ENABLED
916 	const size_t MAX_SIZE = 4096;
917 
918 	if (size > MAX_SIZE) {
919 		return KERN_INVALID_ARGUMENT;
920 	}
921 
922 	return io_increase_timeouts(&io_timeout_override_root_va, ioaddr_base,
923 	           size, read_timeout_us, write_timeout_us);
924 #else
925 	#pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
926 	return KERN_SUCCESS;
927 #endif /* ML_IO_TIMEOUTS_ENABLED */
928 }
929 
930 int
ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)931 ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size,
932     uint32_t read_timeout_us, uint32_t write_timeout_us)
933 {
934 #if ML_IO_TIMEOUTS_ENABLED
935 	return io_increase_timeouts(&io_timeout_override_root_pa, ioaddr_base,
936 	           size, read_timeout_us, write_timeout_us);
937 #else
938 	#pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
939 	return KERN_SUCCESS;
940 #endif /* ML_IO_TIMEOUTS_ENABLED */
941 }
942 
943 int
ml_io_reset_timeouts(uintptr_t ioaddr_base,unsigned int size)944 ml_io_reset_timeouts(uintptr_t ioaddr_base, unsigned int size)
945 {
946 #if ML_IO_TIMEOUTS_ENABLED
947 	return io_reset_timeouts(&io_timeout_override_root_va, ioaddr_base, size);
948 #else
949 	#pragma unused(ioaddr_base, size)
950 	return KERN_SUCCESS;
951 #endif /* ML_IO_TIMEOUTS_ENABLED */
952 }
953 
954 int
ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size)955 ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size)
956 {
957 #if ML_IO_TIMEOUTS_ENABLED
958 	return io_reset_timeouts(&io_timeout_override_root_pa, ioaddr_base, size);
959 #else
960 	#pragma unused(ioaddr_base, size)
961 	return KERN_SUCCESS;
962 #endif /* ML_IO_TIMEOUTS_ENABLED */
963 }
964 
965 unsigned long long
ml_io_read(uintptr_t vaddr,int size)966 ml_io_read(uintptr_t vaddr, int size)
967 {
968 	unsigned long long result = 0;
969 	unsigned char s1;
970 	unsigned short s2;
971 
972 #ifdef ML_IO_VERIFY_UNCACHEABLE
973 	uintptr_t paddr = pmap_verify_noncacheable(vaddr);
974 #elif defined(ML_IO_TIMEOUTS_ENABLED)
975 	uintptr_t paddr = 0;
976 #endif
977 
978 #ifdef ML_IO_TIMEOUTS_ENABLED
979 	uint64_t sabs, eabs;
980 	boolean_t istate, timeread = FALSE;
981 	uint64_t report_read_delay;
982 #if __x86_64__
983 	report_read_delay = report_phy_read_delay;
984 #else
985 	report_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
986 	uint64_t const trace_phy_read_delay = os_atomic_load(&trace_phy_read_delay_to, relaxed);
987 #endif /* __x86_64__ */
988 
989 	if (__improbable(report_read_delay != 0)) {
990 		istate = ml_set_interrupts_enabled(FALSE);
991 		sabs = ml_io_timestamp();
992 		timeread = TRUE;
993 	}
994 
995 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
996 	if (__improbable(timeread && simulate_stretched_io)) {
997 		sabs -= simulate_stretched_io;
998 	}
999 #endif /* ML_IO_SIMULATE_STRETCHED_ENABLED */
1000 #endif /* ML_IO_TIMEOUTS_ENABLED */
1001 
1002 #if DEVELOPMENT || DEBUG
1003 	boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1004 	if (use_fences) {
1005 		ml_timebase_to_memory_fence();
1006 	}
1007 #endif
1008 
1009 	switch (size) {
1010 	case 1:
1011 		s1 = *(volatile unsigned char *)vaddr;
1012 		result = s1;
1013 		break;
1014 	case 2:
1015 		s2 = *(volatile unsigned short *)vaddr;
1016 		result = s2;
1017 		break;
1018 	case 4:
1019 		result = *(volatile unsigned int *)vaddr;
1020 		break;
1021 	case 8:
1022 		result = *(volatile unsigned long long *)vaddr;
1023 		break;
1024 	default:
1025 		panic("Invalid size %d for ml_io_read(%p)", size, (void *)vaddr);
1026 		break;
1027 	}
1028 
1029 #if DEVELOPMENT || DEBUG
1030 	if (use_fences) {
1031 		ml_memory_to_timebase_fence();
1032 	}
1033 #endif
1034 
1035 #ifdef ML_IO_TIMEOUTS_ENABLED
1036 	if (__improbable(timeread == TRUE)) {
1037 		eabs = ml_io_timestamp();
1038 
1039 
1040 		/* Prevent the processor from calling iotrace during its
1041 		 * initialization procedure. */
1042 		if (current_processor()->state == PROCESSOR_RUNNING) {
1043 			iotrace(IOTRACE_IO_READ, vaddr, paddr, size, result, sabs, eabs - sabs);
1044 		}
1045 
1046 		if (__improbable((eabs - sabs) > report_read_delay)) {
1047 			if (paddr == 0) {
1048 				paddr = kvtophys(vaddr);
1049 			}
1050 
1051 			DTRACE_PHYSLAT5(physioread, uint64_t, (eabs - sabs),
1052 			    uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, result);
1053 
1054 			uint64_t override = 0;
1055 			override_io_timeouts(vaddr, paddr, &override, NULL);
1056 
1057 			if (override != 0) {
1058 #if SCHED_HYGIENE_DEBUG
1059 				/*
1060 				 * The IO timeout was overridden. As interrupts are disabled in
1061 				 * order to accurately measure IO time this can cause the
1062 				 * interrupt masked timeout threshold to be exceeded.  If the
1063 				 * interrupt masked debug mode is set to panic, abandon the
1064 				 * measurement. If in trace mode leave it as-is for
1065 				 * observability.
1066 				 */
1067 				if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1068 					ml_spin_debug_clear(current_thread());
1069 					ml_irq_debug_abandon();
1070 				}
1071 #endif
1072 				report_read_delay = override;
1073 			}
1074 		}
1075 
1076 		if (__improbable((eabs - sabs) > report_read_delay)) {
1077 			if (phy_read_panic && (machine_timeout_suspended() == FALSE)) {
1078 #if defined(__x86_64__)
1079 				panic_notify();
1080 #endif /* defined(__x86_64__) */
1081 				uint64_t nsec = 0;
1082 				absolutetime_to_nanoseconds(eabs - sabs, &nsec);
1083 				panic("Read from IO vaddr 0x%lx paddr 0x%lx took %llu ns, "
1084 				    "result: 0x%llx (start: %llu, end: %llu), ceiling: %llu",
1085 				    vaddr, paddr, nsec, result, sabs, eabs,
1086 				    report_read_delay);
1087 			}
1088 		}
1089 
1090 		if (__improbable(trace_phy_read_delay > 0 && (eabs - sabs) > trace_phy_read_delay)) {
1091 			KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_READ),
1092 			    (eabs - sabs), VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, result);
1093 		}
1094 
1095 		(void)ml_set_interrupts_enabled(istate);
1096 	}
1097 #endif /*  ML_IO_TIMEOUTS_ENABLED */
1098 	return result;
1099 }
1100 
1101 unsigned int
ml_io_read8(uintptr_t vaddr)1102 ml_io_read8(uintptr_t vaddr)
1103 {
1104 	return (unsigned) ml_io_read(vaddr, 1);
1105 }
1106 
1107 unsigned int
ml_io_read16(uintptr_t vaddr)1108 ml_io_read16(uintptr_t vaddr)
1109 {
1110 	return (unsigned) ml_io_read(vaddr, 2);
1111 }
1112 
1113 unsigned int
ml_io_read32(uintptr_t vaddr)1114 ml_io_read32(uintptr_t vaddr)
1115 {
1116 	return (unsigned) ml_io_read(vaddr, 4);
1117 }
1118 
1119 unsigned long long
ml_io_read64(uintptr_t vaddr)1120 ml_io_read64(uintptr_t vaddr)
1121 {
1122 	return ml_io_read(vaddr, 8);
1123 }
1124 
1125 /* ml_io_write* */
1126 
1127 void
ml_io_write(uintptr_t vaddr,uint64_t val,int size)1128 ml_io_write(uintptr_t vaddr, uint64_t val, int size)
1129 {
1130 #ifdef ML_IO_VERIFY_UNCACHEABLE
1131 	uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1132 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1133 	uintptr_t paddr = 0;
1134 #endif
1135 
1136 #ifdef ML_IO_TIMEOUTS_ENABLED
1137 	uint64_t sabs, eabs;
1138 	boolean_t istate, timewrite = FALSE;
1139 	uint64_t report_write_delay;
1140 #if __x86_64__
1141 	report_write_delay = report_phy_write_delay;
1142 #else
1143 	report_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
1144 	uint64_t trace_phy_write_delay = os_atomic_load(&trace_phy_write_delay_to, relaxed);
1145 #endif /* !defined(__x86_64__) */
1146 	if (__improbable(report_write_delay != 0)) {
1147 		istate = ml_set_interrupts_enabled(FALSE);
1148 		sabs = ml_io_timestamp();
1149 		timewrite = TRUE;
1150 	}
1151 
1152 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1153 	if (__improbable(timewrite && simulate_stretched_io)) {
1154 		sabs -= simulate_stretched_io;
1155 	}
1156 #endif /* DEVELOPMENT || DEBUG */
1157 #endif /* ML_IO_TIMEOUTS_ENABLED */
1158 
1159 #if DEVELOPMENT || DEBUG
1160 	boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1161 	if (use_fences) {
1162 		ml_timebase_to_memory_fence();
1163 	}
1164 #endif
1165 
1166 	switch (size) {
1167 	case 1:
1168 		*(volatile uint8_t *)vaddr = (uint8_t)val;
1169 		break;
1170 	case 2:
1171 		*(volatile uint16_t *)vaddr = (uint16_t)val;
1172 		break;
1173 	case 4:
1174 		*(volatile uint32_t *)vaddr = (uint32_t)val;
1175 		break;
1176 	case 8:
1177 		*(volatile uint64_t *)vaddr = (uint64_t)val;
1178 		break;
1179 	default:
1180 		panic("Invalid size %d for ml_io_write(%p, 0x%llx)", size, (void *)vaddr, val);
1181 		break;
1182 	}
1183 
1184 #if DEVELOPMENT || DEBUG
1185 	if (use_fences) {
1186 		ml_memory_to_timebase_fence();
1187 	}
1188 #endif
1189 
1190 #ifdef ML_IO_TIMEOUTS_ENABLED
1191 	if (__improbable(timewrite == TRUE)) {
1192 		eabs = ml_io_timestamp();
1193 
1194 		/* Prevent the processor from calling iotrace during its
1195 		 * initialization procedure. */
1196 		if (current_processor()->state == PROCESSOR_RUNNING) {
1197 			iotrace(IOTRACE_IO_WRITE, vaddr, paddr, size, val, sabs, eabs - sabs);
1198 		}
1199 
1200 
1201 		if (__improbable((eabs - sabs) > report_write_delay)) {
1202 			if (paddr == 0) {
1203 				paddr = kvtophys(vaddr);
1204 			}
1205 
1206 			DTRACE_PHYSLAT5(physiowrite, uint64_t, (eabs - sabs),
1207 			    uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, val);
1208 
1209 			uint64_t override = 0;
1210 			override_io_timeouts(vaddr, paddr, NULL, &override);
1211 
1212 			if (override != 0) {
1213 #if SCHED_HYGIENE_DEBUG
1214 				/*
1215 				 * The IO timeout was overridden. As interrupts are disabled in
1216 				 * order to accurately measure IO time this can cause the
1217 				 * interrupt masked timeout threshold to be exceeded.  If the
1218 				 * interrupt masked debug mode is set to panic, abandon the
1219 				 * measurement. If in trace mode leave it as-is for
1220 				 * observability.
1221 				 */
1222 				if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1223 					ml_spin_debug_clear(current_thread());
1224 					ml_irq_debug_abandon();
1225 				}
1226 #endif
1227 				report_write_delay = override;
1228 			}
1229 		}
1230 
1231 		if (__improbable((eabs - sabs) > report_write_delay)) {
1232 			if (phy_write_panic && (machine_timeout_suspended() == FALSE)) {
1233 #if defined(__x86_64__)
1234 				panic_notify();
1235 #endif /*  defined(__x86_64__) */
1236 
1237 				uint64_t nsec = 0;
1238 				absolutetime_to_nanoseconds(eabs - sabs, &nsec);
1239 				panic("Write to IO vaddr %p paddr %p val 0x%llx took %llu ns,"
1240 				    " (start: %llu, end: %llu), ceiling: %llu",
1241 				    (void *)vaddr, (void *)paddr, val, nsec, sabs, eabs,
1242 				    report_write_delay);
1243 			}
1244 		}
1245 
1246 		if (__improbable(trace_phy_write_delay > 0 && (eabs - sabs) > trace_phy_write_delay)) {
1247 			KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_WRITE),
1248 			    (eabs - sabs), VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, val);
1249 		}
1250 
1251 		(void)ml_set_interrupts_enabled(istate);
1252 	}
1253 #endif /* ML_IO_TIMEOUTS_ENABLED */
1254 }
1255 
1256 void
ml_io_write8(uintptr_t vaddr,uint8_t val)1257 ml_io_write8(uintptr_t vaddr, uint8_t val)
1258 {
1259 	ml_io_write(vaddr, val, 1);
1260 }
1261 
1262 void
ml_io_write16(uintptr_t vaddr,uint16_t val)1263 ml_io_write16(uintptr_t vaddr, uint16_t val)
1264 {
1265 	ml_io_write(vaddr, val, 2);
1266 }
1267 
1268 void
ml_io_write32(uintptr_t vaddr,uint32_t val)1269 ml_io_write32(uintptr_t vaddr, uint32_t val)
1270 {
1271 	ml_io_write(vaddr, val, 4);
1272 }
1273 
1274 void
ml_io_write64(uintptr_t vaddr,uint64_t val)1275 ml_io_write64(uintptr_t vaddr, uint64_t val)
1276 {
1277 	ml_io_write(vaddr, val, 8);
1278 }
1279 
1280 struct cpu_callback_chain_elem {
1281 	cpu_callback_t                  fn;
1282 	void                            *param;
1283 	struct cpu_callback_chain_elem  *next;
1284 };
1285 
1286 static struct cpu_callback_chain_elem *cpu_callback_chain;
1287 static LCK_GRP_DECLARE(cpu_callback_chain_lock_grp, "cpu_callback_chain");
1288 static LCK_SPIN_DECLARE(cpu_callback_chain_lock, &cpu_callback_chain_lock_grp);
1289 
1290 void
cpu_event_register_callback(cpu_callback_t fn,void * param)1291 cpu_event_register_callback(cpu_callback_t fn, void *param)
1292 {
1293 	struct cpu_callback_chain_elem *new_elem;
1294 
1295 	new_elem = zalloc_permanent_type(struct cpu_callback_chain_elem);
1296 	if (!new_elem) {
1297 		panic("can't allocate cpu_callback_chain_elem");
1298 	}
1299 
1300 	lck_spin_lock(&cpu_callback_chain_lock);
1301 	new_elem->next = cpu_callback_chain;
1302 	new_elem->fn = fn;
1303 	new_elem->param = param;
1304 	os_atomic_store(&cpu_callback_chain, new_elem, release);
1305 	lck_spin_unlock(&cpu_callback_chain_lock);
1306 }
1307 
1308 __attribute__((noreturn))
1309 void
cpu_event_unregister_callback(__unused cpu_callback_t fn)1310 cpu_event_unregister_callback(__unused cpu_callback_t fn)
1311 {
1312 	panic("Unfortunately, cpu_event_unregister_callback is unimplemented.");
1313 }
1314 
1315 void
ml_broadcast_cpu_event(enum cpu_event event,unsigned int cpu_or_cluster)1316 ml_broadcast_cpu_event(enum cpu_event event, unsigned int cpu_or_cluster)
1317 {
1318 	struct cpu_callback_chain_elem *cursor;
1319 
1320 	cursor = os_atomic_load(&cpu_callback_chain, dependency);
1321 	for (; cursor != NULL; cursor = cursor->next) {
1322 		cursor->fn(cursor->param, event, cpu_or_cluster);
1323 	}
1324 }
1325 
1326 // Initialize Machine Timeouts (see the MACHINE_TIMEOUT macro
1327 // definition)
1328 
1329 void
machine_timeout_init_with_suffix(const struct machine_timeout_spec * spec,char const * suffix)1330 machine_timeout_init_with_suffix(const struct machine_timeout_spec *spec, char const *suffix)
1331 {
1332 	if (spec->skip_predicate != NULL && spec->skip_predicate(spec)) {
1333 		// This timeout should be disabled.
1334 		os_atomic_store_wide((uint64_t*)spec->ptr, 0, relaxed);
1335 		return;
1336 	}
1337 
1338 	assert(suffix != NULL);
1339 	assert(strlen(spec->name) <= MACHINE_TIMEOUT_MAX_NAME_LEN);
1340 
1341 	size_t const suffix_len = strlen(suffix);
1342 
1343 	size_t const dt_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + 1;
1344 	char dt_name[dt_name_size];
1345 
1346 	strlcpy(dt_name, spec->name, dt_name_size);
1347 	strlcat(dt_name, suffix, dt_name_size);
1348 
1349 	size_t const scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + strlen("-scale") + 1;
1350 	char scale_name[scale_name_size];
1351 
1352 	strlcpy(scale_name, spec->name, scale_name_size);
1353 	strlcat(scale_name, suffix, scale_name_size);
1354 	strlcat(scale_name, "-scale", scale_name_size);
1355 
1356 	size_t const boot_arg_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + strlen("ml-timeout-") + suffix_len + 1;
1357 	char boot_arg_name[boot_arg_name_size];
1358 
1359 	strlcpy(boot_arg_name, "ml-timeout-", boot_arg_name_size);
1360 	strlcat(boot_arg_name, spec->name, boot_arg_name_size);
1361 	strlcat(boot_arg_name, suffix, boot_arg_name_size);
1362 
1363 	size_t const boot_arg_scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN +
1364 	    strlen("ml-timeout-") + strlen("-scale") + suffix_len + 1;
1365 	char boot_arg_scale_name[boot_arg_scale_name_size];
1366 
1367 	strlcpy(boot_arg_scale_name, "ml-timeout-", boot_arg_scale_name_size);
1368 	strlcat(boot_arg_scale_name, spec->name, boot_arg_scale_name_size);
1369 	strlcat(boot_arg_scale_name, suffix, boot_arg_name_size);
1370 	strlcat(boot_arg_scale_name, "-scale", boot_arg_scale_name_size);
1371 
1372 
1373 	/*
1374 	 * Determine base value from DT and boot-args.
1375 	 */
1376 
1377 	DTEntry base, chosen;
1378 
1379 	if (SecureDTLookupEntry(NULL, "/machine-timeouts", &base) != kSuccess) {
1380 		base = NULL;
1381 	}
1382 
1383 	if (SecureDTLookupEntry(NULL, "/chosen/machine-timeouts", &chosen) != kSuccess) {
1384 		chosen = NULL;
1385 	}
1386 
1387 	uint64_t timeout = spec->default_value;
1388 	bool found = false;
1389 
1390 	uint64_t const *data = NULL;
1391 	unsigned int data_size = sizeof(*data);
1392 
1393 	/* First look in /machine-timeouts/<name> */
1394 	if (base != NULL && SecureDTGetProperty(base, dt_name, (const void **)&data, &data_size) == kSuccess) {
1395 		if (data_size != sizeof(*data)) {
1396 			panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s", __func__, data_size, dt_name);
1397 		}
1398 
1399 		timeout = *data;
1400 		found = true;
1401 	}
1402 
1403 	/* A value in /chosen/machine-timeouts/<name> overrides */
1404 	if (chosen != NULL && SecureDTGetProperty(chosen, dt_name, (const void **)&data, &data_size) == kSuccess) {
1405 		if (data_size != sizeof(*data)) {
1406 			panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s", __func__, data_size, dt_name);
1407 		}
1408 
1409 		timeout = *data;
1410 		found = true;
1411 	}
1412 
1413 	/* A boot-arg ml-timeout-<name> overrides */
1414 	uint64_t boot_arg = 0;
1415 
1416 	if (PE_parse_boot_argn(boot_arg_name, &boot_arg, sizeof(boot_arg))) {
1417 		timeout = boot_arg;
1418 		found = true;
1419 	}
1420 
1421 
1422 	/*
1423 	 * Determine scale value from DT and boot-args.
1424 	 */
1425 
1426 	uint64_t scale = 1;
1427 	uint32_t const *scale_data;
1428 	unsigned int scale_size = sizeof(scale_data);
1429 
1430 	/* If there is a scale factor /machine-timeouts/<name>-scale, apply it. */
1431 	if (base != NULL && SecureDTGetProperty(base, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1432 		if (scale_size != sizeof(*scale_data)) {
1433 			panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s-scale", __func__, scale_size, dt_name);
1434 		}
1435 
1436 		scale = *scale_data;
1437 	}
1438 
1439 	/* If there is a scale factor /chosen/machine-timeouts/<name>-scale, use that. */
1440 	if (chosen != NULL && SecureDTGetProperty(chosen, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1441 		if (scale_size != sizeof(*scale_data)) {
1442 			panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s-scale", __func__,
1443 			    scale_size, dt_name);
1444 		}
1445 
1446 		scale = *scale_data;
1447 	}
1448 
1449 	/* Finally, a boot-arg ml-timeout-<name>-scale takes precedence. */
1450 	if (PE_parse_boot_argn(boot_arg_scale_name, &boot_arg, sizeof(boot_arg))) {
1451 		scale = boot_arg;
1452 	}
1453 
1454 	static bool global_scale_set;
1455 	static uint64_t global_scale;
1456 
1457 	if (!global_scale_set) {
1458 		/* Apply /machine-timeouts/global-scale if present */
1459 		if (SecureDTGetProperty(base, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1460 			if (scale_size != sizeof(*scale_data)) {
1461 				panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/global-scale", __func__,
1462 				    scale_size);
1463 			}
1464 
1465 			global_scale = *scale_data;
1466 			global_scale_set = true;
1467 		}
1468 
1469 		/* Use /chosen/machine-timeouts/global-scale if present */
1470 		if (SecureDTGetProperty(chosen, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1471 			if (scale_size != sizeof(*scale_data)) {
1472 				panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/global-scale", __func__,
1473 				    scale_size);
1474 			}
1475 
1476 			global_scale = *scale_data;
1477 			global_scale_set = true;
1478 		}
1479 
1480 		/* Finally, the boot-arg ml-timeout-global-scale takes precedence. */
1481 		if (PE_parse_boot_argn("ml-timeout-global-scale", &boot_arg, sizeof(boot_arg))) {
1482 			global_scale = boot_arg;
1483 			global_scale_set = true;
1484 		}
1485 	}
1486 
1487 	if (global_scale_set) {
1488 		scale *= global_scale;
1489 	}
1490 
1491 	/* Compute the final timeout, and done. */
1492 	if (found && timeout > 0) {
1493 		/* Only apply inherent unit scale if the value came in
1494 		 * externally. */
1495 
1496 		if (spec->unit_scale == MACHINE_TIMEOUT_UNIT_TIMEBASE) {
1497 			uint64_t nanoseconds = timeout / 1000;
1498 			nanoseconds_to_absolutetime(nanoseconds, &timeout);
1499 		} else {
1500 			timeout /= spec->unit_scale;
1501 		}
1502 
1503 		if (timeout == 0) {
1504 			/* Ensure unit scaling did not disable the timeout. */
1505 			timeout = 1;
1506 		}
1507 	}
1508 
1509 	if (os_mul_overflow(timeout, scale, &timeout)) {
1510 		timeout = UINT64_MAX; // clamp
1511 	}
1512 
1513 	os_atomic_store_wide((uint64_t*)spec->ptr, timeout, relaxed);
1514 }
1515 
1516 void
machine_timeout_init(const struct machine_timeout_spec * spec)1517 machine_timeout_init(const struct machine_timeout_spec *spec)
1518 {
1519 	machine_timeout_init_with_suffix(spec, "");
1520 }
1521 
1522 #if DEVELOPMENT || DEBUG
1523 /*
1524  * Late timeout (re-)initialization, at the end of bsd_init()
1525  */
1526 void
machine_timeout_bsd_init(void)1527 machine_timeout_bsd_init(void)
1528 {
1529 	char const * const __unused mt_suffix = "-b";
1530 #if SCHED_HYGIENE_DEBUG
1531 	machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(interrupt_masked_timeout), mt_suffix);
1532 	machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(sched_preemption_disable_threshold_mt), mt_suffix);
1533 
1534 	/*
1535 	 * The io timeouts can inherit from interrupt_masked_timeout.
1536 	 * Re-initialize, as interrupt_masked_timeout may have changed.
1537 	 */
1538 	ml_io_init_timeouts();
1539 
1540 	extern void preemption_disable_reset_max_durations(void);
1541 	/*
1542 	 * Reset the preemption disable stats, so that they are not
1543 	 * polluted by long early boot code.
1544 	 */
1545 	preemption_disable_reset_max_durations();
1546 #endif /* SCHED_HYGIENE_DEBUG */
1547 }
1548 #endif /* DEVELOPMENT || DEBUG */
1549 
1550 #if ML_IO_TIMEOUTS_ENABLED && CONFIG_XNUPOST
1551 #include <tests/xnupost.h>
1552 
1553 extern kern_return_t ml_io_timeout_test(void);
1554 
1555 static inline void
ml_io_timeout_test_get_timeouts(uintptr_t vaddr,uint64_t * read_timeout,uint64_t * write_timeout)1556 ml_io_timeout_test_get_timeouts(uintptr_t vaddr, uint64_t *read_timeout, uint64_t *write_timeout)
1557 {
1558 	*read_timeout = 0;
1559 	*write_timeout = 0;
1560 
1561 	vm_offset_t paddr = kvtophys(vaddr);
1562 
1563 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
1564 	override_io_timeouts(vaddr, paddr, read_timeout, write_timeout);
1565 	ml_set_interrupts_enabled(istate);
1566 }
1567 
1568 static inline void
ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)1569 ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
1570 {
1571 	*read_timeout = 0;
1572 	*write_timeout = 0;
1573 
1574 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
1575 	override_io_timeouts(0, paddr, read_timeout, write_timeout);
1576 	ml_set_interrupts_enabled(istate);
1577 }
1578 
1579 kern_return_t
ml_io_timeout_test(void)1580 ml_io_timeout_test(void)
1581 {
1582 	const size_t SIZE = 16;
1583 	/*
1584 	 * Page align the base address to ensure that the regions are physically
1585 	 * contiguous.
1586 	 */
1587 	const uintptr_t iovaddr_base1 = (uintptr_t)kernel_pmap & ~PAGE_MASK;
1588 
1589 	const uintptr_t iovaddr_base2 = iovaddr_base1 + SIZE;
1590 	const uintptr_t vaddr1 = iovaddr_base1 + SIZE / 2;
1591 	const uintptr_t vaddr2 = iovaddr_base2 + SIZE / 2;
1592 
1593 	const vm_offset_t iopaddr_base1 = kvtophys(iovaddr_base1);
1594 	const vm_offset_t iopaddr_base2 = kvtophys(iovaddr_base2);
1595 	const vm_offset_t paddr1 = iopaddr_base1 + SIZE / 2;
1596 	const vm_offset_t paddr2 = iopaddr_base2 + SIZE / 2;
1597 
1598 	const uint64_t READ_TIMEOUT1_US = 50000, WRITE_TIMEOUT1_US = 50001;
1599 	const uint64_t READ_TIMEOUT2_US = 50002, WRITE_TIMEOUT2_US = 50003;
1600 	uint64_t read_timeout1_abs, write_timeout1_abs;
1601 	uint64_t read_timeout2_abs, write_timeout2_abs;
1602 	nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT1_US, &read_timeout1_abs);
1603 	nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT1_US, &write_timeout1_abs);
1604 	nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT2_US, &read_timeout2_abs);
1605 	nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT2_US, &write_timeout2_abs);
1606 
1607 	int err = ml_io_increase_timeouts(iovaddr_base1, 0, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1608 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for empty region");
1609 
1610 	err = ml_io_increase_timeouts(iovaddr_base1, 4097, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1611 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for region > 4096 bytes");
1612 
1613 	err = ml_io_increase_timeouts(UINTPTR_MAX, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1614 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overflowed region");
1615 
1616 	err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1617 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first VA region should succeed");
1618 
1619 	err = ml_io_increase_timeouts(iovaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1620 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second VA region should succeed");
1621 
1622 	err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1623 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for same region twice");
1624 
1625 	err = ml_io_increase_timeouts(vaddr1, (uint32_t)(vaddr2 - vaddr1), READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1626 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overlapping regions");
1627 
1628 	uint64_t read_timeout, write_timeout;
1629 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1630 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1631 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1632 
1633 	ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1634 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1635 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1636 
1637 	ml_io_timeout_test_get_timeouts(iovaddr_base2 + SIZE, &read_timeout, &write_timeout);
1638 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout without override");
1639 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout without override");
1640 
1641 	err = ml_io_reset_timeouts(iovaddr_base1 + 1, SIZE - 1);
1642 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for subregion");
1643 
1644 	err = ml_io_reset_timeouts(iovaddr_base2 + SIZE, SIZE);
1645 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for non-existent region");
1646 
1647 	err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1648 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first VA region should succeed");
1649 
1650 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1651 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1652 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1653 
1654 	err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1655 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for same region twice");
1656 
1657 	err = ml_io_reset_timeouts(iovaddr_base2, SIZE);
1658 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second VA region should succeed");
1659 
1660 	err = ml_io_increase_timeouts_phys(iopaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1661 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first PA region should succeed");
1662 
1663 	err = ml_io_increase_timeouts_phys(iopaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1664 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second PA region should succeed");
1665 
1666 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1667 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1668 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1669 
1670 	ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1671 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1672 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1673 
1674 	ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1675 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1676 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1677 
1678 	ml_io_timeout_test_get_timeouts_phys(paddr2, &read_timeout, &write_timeout);
1679 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first physical region");
1680 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first physical region");
1681 
1682 	err = ml_io_reset_timeouts_phys(iopaddr_base1, SIZE);
1683 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first PA region should succeed");
1684 
1685 	err = ml_io_reset_timeouts_phys(iopaddr_base2, SIZE);
1686 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second PA region should succeed");
1687 
1688 	ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1689 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1690 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1691 
1692 	return KERN_SUCCESS;
1693 }
1694 #endif /* CONFIG_XNUPOST */
1695