xref: /xnu-12377.41.6/osfmk/kern/machine.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2000-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	kern/machine.c
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1987
62  *
63  *	Support for machine independent machine abstraction.
64  */
65 
66 #include <string.h>
67 
68 #include <mach/mach_types.h>
69 #include <mach/boolean.h>
70 #include <mach/kern_return.h>
71 #include <mach/machine.h>
72 #include <mach/host_info.h>
73 #include <mach/host_reboot.h>
74 #include <mach/host_priv_server.h>
75 #include <mach/processor_server.h>
76 #include <mach/sdt.h>
77 
78 #include <kern/kern_types.h>
79 #include <kern/cpu_data.h>
80 #include <kern/ipc_host.h>
81 #include <kern/host.h>
82 #include <kern/machine.h>
83 #include <kern/misc_protos.h>
84 #include <kern/percpu.h>
85 #include <kern/processor.h>
86 #include <kern/queue.h>
87 #include <kern/sched.h>
88 #include <kern/startup.h>
89 #include <kern/task.h>
90 #include <kern/thread.h>
91 #include <kern/timeout.h>
92 #include <kern/iotrace.h>
93 #include <kern/smr.h>
94 
95 #include <libkern/OSDebug.h>
96 #if ML_IO_TIMEOUTS_ENABLED
97 #include <libkern/tree.h>
98 #endif
99 
100 #include <pexpert/device_tree.h>
101 
102 #include <machine/commpage.h>
103 #include <machine/machine_routines.h>
104 
105 #if HIBERNATION
106 #include <IOKit/IOHibernatePrivate.h>
107 #endif
108 #include <IOKit/IOPlatformExpert.h>
109 
110 #if CONFIG_DTRACE
111 extern void (*dtrace_cpu_state_changed_hook)(int, boolean_t);
112 #endif
113 
114 #if defined(__arm64__)
115 extern void wait_while_mp_kdp_trap(bool check_SIGPdebug);
116 #if CONFIG_SPTM
117 #include <arm64/sptm/pmap/pmap_data.h>
118 #else
119 #include <arm/pmap/pmap_data.h>
120 #endif /* CONFIG_SPTM */
121 #endif /* defined(__arm64__) */
122 
123 #if defined(__x86_64__)
124 #include <i386/panic_notify.h>
125 #endif
126 
127 /*
128  *	Exported variables:
129  */
130 
131 TUNABLE(long, wdt, "wdt", 0);
132 
133 struct machine_info     machine_info;
134 
135 
136 /* Forwards */
137 static void
138 processor_offline(void * parameter, __unused wait_result_t result);
139 
140 static void
141 processor_offline_intstack(processor_t processor) __dead2;
142 
143 
144 /*
145  *	processor_up:
146  *
147  *	Flag processor as up and running, and available
148  *	for scheduling.
149  */
150 void
processor_up(processor_t processor)151 processor_up(
152 	processor_t                     processor)
153 {
154 	spl_t s = splsched();
155 	init_ast_check(processor);
156 
157 #if defined(__arm64__)
158 	/*
159 	 * A processor coming online won't have received a SIGPdebug signal
160 	 * to cause it to spin while a stackshot or panic is taking place,
161 	 * so spin here on mp_kdp_trap.
162 	 *
163 	 * However, since cpu_signal() is not yet enabled for this processor,
164 	 * there is a race if we have just passed this when a cpu_signal()
165 	 * is attempted.  The sender will assume the cpu is offline, so it will
166 	 * not end up spinning anywhere.  See processor_cpu_reinit() for the fix
167 	 * for this race.
168 	 */
169 	wait_while_mp_kdp_trap(false);
170 #endif
171 
172 	/* Boot CPU coming online for the first time, either at boot or after sleep */
173 	__assert_only bool is_first_online_processor;
174 
175 	is_first_online_processor = sched_mark_processor_online(processor,
176 	    processor->last_startup_reason);
177 
178 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
179 	assert(processor->processor_instartup == true || is_first_online_processor);
180 	simple_unlock(&processor_start_state_lock);
181 
182 	splx(s);
183 
184 #if defined(__x86_64__)
185 	ml_cpu_up();
186 #endif /* defined(__x86_64__) */
187 
188 #if CONFIG_DTRACE
189 	if (dtrace_cpu_state_changed_hook) {
190 		(*dtrace_cpu_state_changed_hook)(processor->cpu_id, TRUE);
191 	}
192 #endif
193 }
194 
195 #include <atm/atm_internal.h>
196 
197 kern_return_t
host_reboot(host_priv_t host_priv,int options)198 host_reboot(
199 	host_priv_t             host_priv,
200 	int                             options)
201 {
202 	if (host_priv == HOST_PRIV_NULL) {
203 		return KERN_INVALID_HOST;
204 	}
205 
206 #if DEVELOPMENT || DEBUG
207 	if (options & HOST_REBOOT_DEBUGGER) {
208 		Debugger("Debugger");
209 		return KERN_SUCCESS;
210 	}
211 #endif
212 
213 	if (options & HOST_REBOOT_UPSDELAY) {
214 		// UPS power cutoff path
215 		PEHaltRestart( kPEUPSDelayHaltCPU );
216 	} else {
217 		halt_all_cpus(!(options & HOST_REBOOT_HALT));
218 	}
219 
220 	return KERN_SUCCESS;
221 }
222 
223 kern_return_t
processor_assign(__unused processor_t processor,__unused processor_set_t new_pset,__unused boolean_t wait)224 processor_assign(
225 	__unused processor_t            processor,
226 	__unused processor_set_t        new_pset,
227 	__unused boolean_t              wait)
228 {
229 	return KERN_FAILURE;
230 }
231 
232 void
processor_doshutdown(processor_t processor,bool is_final_system_sleep)233 processor_doshutdown(
234 	processor_t     processor,
235 	bool            is_final_system_sleep)
236 {
237 	lck_mtx_assert(&cluster_powerdown_lock, LCK_MTX_ASSERT_OWNED);
238 	lck_mtx_assert(&processor_updown_lock, LCK_MTX_ASSERT_OWNED);
239 
240 	if (!processor->processor_booted) {
241 		panic("processor %d not booted", processor->cpu_id);
242 	}
243 
244 	if (is_final_system_sleep) {
245 		assert(processor == current_processor());
246 		assert(processor == master_processor);
247 		assert(processor_avail_count == 1);
248 	}
249 
250 	processor_set_t pset = processor->processor_set;
251 
252 	ml_cpu_begin_state_transition(processor->cpu_id);
253 
254 	ml_broadcast_cpu_event(CPU_EXIT_REQUESTED, processor->cpu_id);
255 
256 #if HIBERNATION
257 	if (is_final_system_sleep) {
258 		/*
259 		 * Ensure the page queues are in a state where the hibernation
260 		 * code can manipulate them without requiring other threads
261 		 * to be scheduled.
262 		 *
263 		 * This operation can block,
264 		 * and unlock must be done from the same thread.
265 		 */
266 		assert(processor_avail_count < 2);
267 		hibernate_vm_lock();
268 	}
269 #endif
270 
271 	spl_t s = splsched();
272 	simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
273 	pset_lock(pset);
274 
275 	assert(processor->state != PROCESSOR_START);
276 	assert(processor->state != PROCESSOR_PENDING_OFFLINE);
277 	assert(processor->state != PROCESSOR_OFF_LINE);
278 
279 	assert(!processor->processor_inshutdown);
280 	processor->processor_inshutdown = true;
281 
282 	assert(processor->processor_offline_state == PROCESSOR_OFFLINE_RUNNING);
283 	processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_BEGIN_SHUTDOWN);
284 
285 	if (!is_final_system_sleep) {
286 		sched_assert_not_last_online_cpu(processor->cpu_id);
287 	}
288 
289 	pset_unlock(pset);
290 	simple_unlock(&sched_available_cores_lock);
291 
292 	if (is_final_system_sleep) {
293 		assert(processor == current_processor());
294 
295 #if HIBERNATION
296 		/*
297 		 * After this point, the system is now
298 		 * committed to hibernation and must
299 		 * not run any other thread that could take this lock.
300 		 */
301 		hibernate_vm_unlock();
302 #endif
303 	} else {
304 		/*
305 		 * Get onto the processor to shut down.
306 		 * The scheduler picks this thread naturally according to its
307 		 * priority.
308 		 * The processor can run any other thread if this one blocks.
309 		 * So, don't block.
310 		 */
311 		processor_t prev = thread_bind(processor);
312 		thread_block(THREAD_CONTINUE_NULL);
313 
314 		/* interrupts still disabled */
315 		assert(ml_get_interrupts_enabled() == FALSE);
316 
317 		assert(processor == current_processor());
318 		assert(processor->processor_inshutdown);
319 
320 		thread_bind(prev);
321 		/* interrupts still disabled */
322 	}
323 
324 	/*
325 	 * Continue processor shutdown on the processor's idle thread.
326 	 * The handoff won't fail because the idle thread has a reserved stack.
327 	 * Switching to the idle thread leaves interrupts disabled,
328 	 * so we can't accidentally take an interrupt after the context switch.
329 	 */
330 	thread_t shutdown_thread = processor->idle_thread;
331 	shutdown_thread->continuation = processor_offline;
332 	shutdown_thread->parameter = (void*)is_final_system_sleep;
333 
334 	thread_run(current_thread(), THREAD_CONTINUE_NULL, NULL, shutdown_thread);
335 
336 	/*
337 	 * After this point, we are in regular scheduled context on a remaining
338 	 * available CPU. Interrupts are still disabled.
339 	 */
340 
341 	if (is_final_system_sleep) {
342 		/*
343 		 * We are coming out of system sleep here, so there won't be a
344 		 * corresponding processor_startup for this processor, so we
345 		 * need to put it back in the correct running state.
346 		 *
347 		 * There's nowhere to execute a call to CPU_EXITED during system
348 		 * sleep for the boot processor, and it's already been CPU_BOOTED
349 		 * by this point anyways, so skip the call.
350 		 */
351 		assert(current_processor() == master_processor);
352 		assert(processor->state == PROCESSOR_RUNNING);
353 		assert(processor->processor_inshutdown);
354 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_STARTED_NOT_WAITED);
355 		processor->processor_inshutdown = false;
356 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_RUNNING);
357 
358 		splx(s);
359 	} else {
360 		splx(s);
361 
362 		cpu_exit_wait(processor->cpu_id);
363 
364 		s = splsched();
365 		simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
366 		pset_lock(pset);
367 		assert(processor->processor_inshutdown);
368 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_PENDING_OFFLINE);
369 		assert(processor->state == PROCESSOR_PENDING_OFFLINE);
370 		pset_update_processor_state(pset, processor, PROCESSOR_OFF_LINE);
371 		processor_update_offline_state_locked(processor, PROCESSOR_OFFLINE_CPU_OFFLINE);
372 		pset_unlock(pset);
373 		simple_unlock(&sched_available_cores_lock);
374 		splx(s);
375 
376 		ml_broadcast_cpu_event(CPU_EXITED, processor->cpu_id);
377 		ml_cpu_power_disable(processor->cpu_id);
378 
379 		assert(processor->processor_offline_state == PROCESSOR_OFFLINE_CPU_OFFLINE);
380 		processor_update_offline_state(processor, PROCESSOR_OFFLINE_FULLY_OFFLINE);
381 	}
382 
383 	ml_cpu_end_state_transition(processor->cpu_id);
384 }
385 
386 /*
387  * Called in the context of the idle thread to shut down the processor
388  *
389  * A shut-down processor looks like it's 'running' the idle thread parked
390  * in this routine, but it's actually been powered off and has no hardware state.
391  */
392 static void
processor_offline(void * parameter,__unused wait_result_t result)393 processor_offline(
394 	void * parameter,
395 	__unused wait_result_t result)
396 {
397 	bool is_final_system_sleep = (bool) parameter;
398 	processor_t processor = current_processor();
399 	thread_t self = current_thread();
400 	__assert_only thread_t old_thread = THREAD_NULL;
401 
402 	assert(self->state & TH_IDLE);
403 	assert(processor->idle_thread == self);
404 	assert(ml_get_interrupts_enabled() == FALSE);
405 	assert(self->continuation == NULL);
406 	assert(processor->processor_online == true);
407 	assert(processor->running_timers_active == false);
408 
409 	if (is_final_system_sleep) {
410 		assert(processor == current_processor());
411 		assert(processor == master_processor);
412 		assert(processor_avail_count == 1);
413 	}
414 
415 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_START, processor->cpu_id);
416 
417 	bool enforce_quiesce_safety = gEnforcePlatformActionSafety;
418 
419 	/*
420 	 * Scheduling is now disabled for this processor.
421 	 * Ensure that primitives that need scheduling (like mutexes) know this.
422 	 */
423 	if (enforce_quiesce_safety) {
424 		disable_preemption_without_measurements();
425 	}
426 
427 #if CONFIG_DTRACE
428 	if (dtrace_cpu_state_changed_hook) {
429 		(*dtrace_cpu_state_changed_hook)(processor->cpu_id, FALSE);
430 	}
431 #endif
432 
433 	smr_cpu_down(processor, SMR_CPU_REASON_OFFLINE);
434 
435 	/* Drain pending IPIs for the last time here. */
436 	ml_cpu_down();
437 
438 	sched_mark_processor_offline(processor, is_final_system_sleep);
439 
440 	/*
441 	 * Switch to the interrupt stack and shut down the processor.
442 	 *
443 	 * When the processor comes back, it will eventually call load_context which
444 	 * restores the context saved by machine_processor_shutdown, returning here.
445 	 */
446 	old_thread = machine_processor_shutdown(self, processor_offline_intstack, processor);
447 
448 	/*
449 	 * The processor is back. sched_mark_processor_online and
450 	 * friends have already run via processor_up.
451 	 */
452 
453 	/* old_thread should be NULL because we got here through Load_context */
454 	assert(old_thread == THREAD_NULL);
455 
456 	assert(processor == current_processor());
457 	assert(processor->idle_thread == current_thread());
458 	assert(processor->processor_online == true);
459 
460 	assert(ml_get_interrupts_enabled() == FALSE);
461 	assert(self->continuation == NULL);
462 
463 	/* Extract the machine_param value stashed by secondary_cpu_main */
464 	void * machine_param = self->parameter;
465 	self->parameter = NULL;
466 
467 	processor_cpu_reinit(machine_param, true, is_final_system_sleep);
468 
469 	if (enforce_quiesce_safety) {
470 		enable_preemption();
471 	}
472 
473 	KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_PROCESSOR_SHUTDOWN) | DBG_FUNC_END, processor->cpu_id);
474 
475 	/*
476 	 * Now that the processor is back, invoke the idle thread to find out what to do next.
477 	 * idle_thread will enable interrupts.
478 	 */
479 	thread_block(idle_thread);
480 	/*NOTREACHED*/
481 }
482 
483 /*
484  * Complete the shutdown and place the processor offline.
485  *
486  * Called at splsched in the shutdown context
487  * (i.e. on the idle thread, on the interrupt stack)
488  *
489  * The onlining half of this is done in load_context().
490  */
491 static void
processor_offline_intstack(processor_t processor)492 processor_offline_intstack(
493 	processor_t processor)
494 {
495 	assert(processor == current_processor());
496 	assert(processor->active_thread == current_thread());
497 
498 	struct recount_snap snap = { 0 };
499 	recount_snapshot(&snap);
500 	recount_processor_idle(&processor->pr_recount, &snap);
501 
502 	smr_cpu_leave(processor, processor->last_dispatch);
503 
504 	PMAP_DEACTIVATE_KERNEL(processor->cpu_id);
505 
506 	cpu_sleep();
507 	panic("zombie processor");
508 	/*NOTREACHED*/
509 }
510 
511 /*
512  * Called on the idle thread with interrupts disabled to initialize a
513  * secondary processor on boot or to reinitialize any processor on resume
514  * from processor offline.
515  */
516 void
processor_cpu_reinit(void * machine_param,__unused bool wait_for_cpu_signal,__assert_only bool is_final_system_sleep)517 processor_cpu_reinit(void* machine_param,
518     __unused bool wait_for_cpu_signal,
519     __assert_only bool is_final_system_sleep)
520 {
521 	/* Re-initialize the processor */
522 	machine_cpu_reinit(machine_param);
523 
524 #if defined(__arm64__)
525 	/*
526 	 * See the comments for wait_while_mp_kdp_trap in processor_up().
527 	 *
528 	 * SIGPdisabled is cleared (to enable cpu_signal() to succeed with this processor)
529 	 * the first time we take an IPI.  This is triggered by machine_cpu_reinit(), above,
530 	 * which calls cpu_machine_init()->PE_cpu_machine_init()->PE_cpu_signal() which sends
531 	 * a self-IPI to ensure that happens when we enable interrupts.  So enable interrupts
532 	 * here so that cpu_signal() can succeed before we spin on mp_kdp_trap.
533 	 */
534 	assert_ml_cpu_signal_is_enabled(false);
535 
536 	ml_set_interrupts_enabled(TRUE);
537 
538 	if (wait_for_cpu_signal) {
539 		ml_wait_for_cpu_signal_to_enable();
540 	}
541 
542 	ml_set_interrupts_enabled(FALSE);
543 
544 	wait_while_mp_kdp_trap(true);
545 
546 	/*
547 	 * At this point,
548 	 * if a stackshot or panic is in progress, we either spin on mp_kdp_trap
549 	 * or we sucessfully received a SIGPdebug signal which will cause us to
550 	 * break out of the spin on mp_kdp_trap and instead
551 	 * spin next time interrupts are enabled in idle_thread().
552 	 */
553 	if (wait_for_cpu_signal) {
554 		assert_ml_cpu_signal_is_enabled(true);
555 	}
556 
557 	/*
558 	 * Now that we know SIGPdisabled is cleared, we can publish that
559 	 * this CPU has fully come out of offline state.
560 	 *
561 	 * Without wait_for_cpu_signal, we'll publish this earlier than
562 	 * cpu_signal is actually ready, but as long as it's ready by next S2R,
563 	 * it will be good enough.
564 	 */
565 	ml_cpu_up();
566 #endif
567 
568 	/*
569 	 * Interrupts must be disabled while processor_start_state_lock is
570 	 * held to prevent a deadlock with CPU startup of other CPUs that
571 	 * may be proceeding in parallel to this CPU's reinitialization.
572 	 */
573 	spl_t s = splsched();
574 	processor_t processor = current_processor();
575 
576 	simple_lock(&processor_start_state_lock, LCK_GRP_NULL);
577 	assert(processor->processor_instartup == true || is_final_system_sleep);
578 	processor->processor_instartup = false;
579 	simple_unlock(&processor_start_state_lock);
580 
581 	splx(s);
582 
583 	thread_wakeup((event_t)&processor->processor_instartup);
584 }
585 
586 kern_return_t
host_get_boot_info(host_priv_t host_priv,kernel_boot_info_t boot_info)587 host_get_boot_info(
588 	host_priv_t         host_priv,
589 	kernel_boot_info_t  boot_info)
590 {
591 	const char *src = "";
592 	if (host_priv == HOST_PRIV_NULL) {
593 		return KERN_INVALID_HOST;
594 	}
595 
596 	/*
597 	 * Copy first operator string terminated by '\0' followed by
598 	 *	standardized strings generated from boot string.
599 	 */
600 	src = machine_boot_info(boot_info, KERNEL_BOOT_INFO_MAX);
601 	if (src != boot_info) {
602 		(void) strncpy(boot_info, src, KERNEL_BOOT_INFO_MAX);
603 	}
604 
605 	return KERN_SUCCESS;
606 }
607 
608 // These are configured through sysctls.
609 #if DEVELOPMENT || DEBUG
610 uint32_t phy_read_panic = 1;
611 uint32_t phy_write_panic = 1;
612 uint64_t simulate_stretched_io = 0;
613 #else
614 uint32_t phy_read_panic = 0;
615 uint32_t phy_write_panic = 0;
616 #endif
617 
618 #if ML_IO_TIMEOUTS_ENABLED
619 mmio_track_t PERCPU_DATA(mmio_tracker);
620 #endif
621 
622 #if !defined(__x86_64__)
623 
624 #if DEVELOPMENT || DEBUG
625 static const uint64_t TIMEBASE_TICKS_PER_USEC = 24000000ULL / USEC_PER_SEC;
626 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 100 * TIMEBASE_TICKS_PER_USEC;
627 #else
628 static const uint64_t DEFAULT_TRACE_PHY_TIMEOUT = 0;
629 #endif
630 
631 // The MACHINE_TIMEOUT facility only exists on ARM.
632 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_read_delay_to, "report-phy-read-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
633 MACHINE_TIMEOUT_DEV_WRITEABLE(report_phy_write_delay_to, "report-phy-write-delay", 0, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
634 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_read_delay_to, "trace-phy-read-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
635 MACHINE_TIMEOUT_DEV_WRITEABLE(trace_phy_write_delay_to, "trace-phy-write-delay", DEFAULT_TRACE_PHY_TIMEOUT, MACHINE_TIMEOUT_UNIT_TIMEBASE, NULL);
636 
637 #if SCHED_HYGIENE_DEBUG
638 /*
639  * Note: The interrupt-masked timeout goes through two initializations - one
640  * early in boot and one later. Thus this function is also called twice and
641  * can't be marked '__startup_func'.
642  */
643 static void
ml_io_init_timeouts(void)644 ml_io_init_timeouts(void)
645 {
646 	/*
647 	 * The timeouts may be completely disabled via an override.
648 	 */
649 	if (kern_feature_override(KF_IO_TIMEOUT_OVRD)) {
650 		os_atomic_store(&report_phy_write_delay_to, 0, relaxed);
651 		os_atomic_store(&report_phy_read_delay_to, 0, relaxed);
652 		return;
653 	}
654 
655 	/*
656 	 * There may be no interrupt masked timeout set.
657 	 */
658 	const uint64_t interrupt_masked_to = os_atomic_load(&interrupt_masked_timeout, relaxed);
659 	if (interrupt_masked_timeout == 0) {
660 		return;
661 	}
662 
663 	/*
664 	 * Inherit from the interrupt masked timeout if smaller and the timeout
665 	 * hasn't been explicitly set via boot-arg.
666 	 */
667 	uint64_t arg = 0;
668 
669 	if (!PE_parse_boot_argn("ml-timeout-report-phy-read-delay", &arg, sizeof(arg))) {
670 		uint64_t report_phy_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
671 		report_phy_read_delay = report_phy_read_delay == 0 ?
672 		    interrupt_masked_to :
673 		    MIN(report_phy_read_delay, interrupt_masked_to);
674 		os_atomic_store(&report_phy_read_delay_to, report_phy_read_delay, relaxed);
675 	}
676 
677 	if (!PE_parse_boot_argn("ml-timeout-report-phy-write-delay", &arg, sizeof(arg))) {
678 		uint64_t report_phy_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
679 		report_phy_write_delay = report_phy_write_delay == 0 ?
680 		    interrupt_masked_to :
681 		    MIN(report_phy_write_delay, interrupt_masked_to);
682 		os_atomic_store(&report_phy_write_delay_to, report_phy_write_delay, relaxed);
683 	}
684 }
685 
686 /*
687  * It's important that this happens after machine timeouts have initialized so
688  * the correct timeouts can be inherited.
689  */
690 STARTUP(TIMEOUTS, STARTUP_RANK_SECOND, ml_io_init_timeouts);
691 #endif /* SCHED_HYGIENE_DEBUG */
692 
693 extern pmap_paddr_t kvtophys(vm_offset_t va);
694 #endif /* !defined(__x86_64__) */
695 
696 #if ML_IO_TIMEOUTS_ENABLED
697 
698 static LCK_GRP_DECLARE(io_timeout_override_lock_grp, "io_timeout_override");
699 static LCK_SPIN_DECLARE(io_timeout_override_lock, &io_timeout_override_lock_grp);
700 
701 struct io_timeout_override_entry {
702 	RB_ENTRY(io_timeout_override_entry) tree;
703 
704 	uintptr_t ioaddr_base;
705 	unsigned int size;
706 	uint32_t read_timeout;
707 	uint32_t write_timeout;
708 };
709 
710 static inline int
io_timeout_override_cmp(const struct io_timeout_override_entry * a,const struct io_timeout_override_entry * b)711 io_timeout_override_cmp(const struct io_timeout_override_entry *a, const struct io_timeout_override_entry *b)
712 {
713 	if (a->ioaddr_base < b->ioaddr_base) {
714 		return -1;
715 	} else if (a->ioaddr_base > b->ioaddr_base) {
716 		return 1;
717 	} else {
718 		return 0;
719 	}
720 }
721 
722 static RB_HEAD(io_timeout_override, io_timeout_override_entry)
723 io_timeout_override_root_pa, io_timeout_override_root_va;
724 
725 RB_PROTOTYPE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
726 RB_GENERATE_PREV(io_timeout_override, io_timeout_override_entry, tree, io_timeout_override_cmp);
727 
728 static int
io_increase_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)729 io_increase_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base,
730     unsigned int size, uint32_t read_timeout_us, uint32_t write_timeout_us)
731 {
732 	const uint64_t MAX_TIMEOUT_ABS = UINT32_MAX;
733 
734 	assert(preemption_enabled());
735 
736 	int ret = KERN_SUCCESS;
737 
738 	if (size == 0) {
739 		return KERN_INVALID_ARGUMENT;
740 	}
741 
742 	uintptr_t ioaddr_end;
743 	if (os_add_overflow(ioaddr_base, size - 1, &ioaddr_end)) {
744 		return KERN_INVALID_ARGUMENT;
745 	}
746 
747 	uint64_t read_timeout_abs, write_timeout_abs;
748 	nanoseconds_to_absolutetime(NSEC_PER_USEC * read_timeout_us, &read_timeout_abs);
749 	nanoseconds_to_absolutetime(NSEC_PER_USEC * write_timeout_us, &write_timeout_abs);
750 	if (read_timeout_abs > MAX_TIMEOUT_ABS || write_timeout_abs > MAX_TIMEOUT_ABS) {
751 		return KERN_INVALID_ARGUMENT;
752 	}
753 
754 	struct io_timeout_override_entry *node = kalloc_type(struct io_timeout_override_entry, Z_WAITOK | Z_ZERO | Z_NOFAIL);
755 	node->ioaddr_base = ioaddr_base;
756 	node->size = size;
757 	node->read_timeout = (uint32_t)read_timeout_abs;
758 	node->write_timeout = (uint32_t)write_timeout_abs;
759 
760 	/*
761 	 * Interrupt handlers are allowed to call ml_io_{read,write}*, so
762 	 * interrupts must be disabled any time io_timeout_override_lock is
763 	 * held.  Otherwise the CPU could take an interrupt while holding the
764 	 * lock, invoke an ISR that calls ml_io_{read,write}*, and deadlock
765 	 * trying to acquire the lock again.
766 	 */
767 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
768 	lck_spin_lock(&io_timeout_override_lock);
769 	if (RB_INSERT(io_timeout_override, root, node)) {
770 		ret = KERN_INVALID_ARGUMENT;
771 		goto out;
772 	}
773 
774 	/* Check that this didn't create any new overlaps */
775 	struct io_timeout_override_entry *prev = RB_PREV(io_timeout_override, root, node);
776 	if (prev && (prev->ioaddr_base + prev->size) > node->ioaddr_base) {
777 		RB_REMOVE(io_timeout_override, root, node);
778 		ret = KERN_INVALID_ARGUMENT;
779 		goto out;
780 	}
781 	struct io_timeout_override_entry *next = RB_NEXT(io_timeout_override, root, node);
782 	if (next && (node->ioaddr_base + node->size) > next->ioaddr_base) {
783 		RB_REMOVE(io_timeout_override, root, node);
784 		ret = KERN_INVALID_ARGUMENT;
785 		goto out;
786 	}
787 
788 out:
789 	lck_spin_unlock(&io_timeout_override_lock);
790 	ml_set_interrupts_enabled(istate);
791 	if (ret != KERN_SUCCESS) {
792 		kfree_type(struct io_timeout_override_entry, node);
793 	}
794 	return ret;
795 }
796 
797 static int
io_reset_timeouts(struct io_timeout_override * root,uintptr_t ioaddr_base,unsigned int size)798 io_reset_timeouts(struct io_timeout_override *root, uintptr_t ioaddr_base, unsigned int size)
799 {
800 	assert(preemption_enabled());
801 
802 	struct io_timeout_override_entry key = { .ioaddr_base = ioaddr_base };
803 
804 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
805 	lck_spin_lock(&io_timeout_override_lock);
806 	struct io_timeout_override_entry *node = RB_FIND(io_timeout_override, root, &key);
807 	if (node) {
808 		if (node->size == size) {
809 			RB_REMOVE(io_timeout_override, root, node);
810 		} else {
811 			node = NULL;
812 		}
813 	}
814 	lck_spin_unlock(&io_timeout_override_lock);
815 	ml_set_interrupts_enabled(istate);
816 
817 	if (!node) {
818 		return KERN_NOT_FOUND;
819 	}
820 
821 	kfree_type(struct io_timeout_override_entry, node);
822 	return KERN_SUCCESS;
823 }
824 
825 static bool
io_override_timeout(struct io_timeout_override * root,uintptr_t addr,uint64_t * read_timeout,uint64_t * write_timeout)826 io_override_timeout(struct io_timeout_override *root, uintptr_t addr,
827     uint64_t *read_timeout, uint64_t *write_timeout)
828 {
829 	assert(!ml_get_interrupts_enabled());
830 	assert3p(read_timeout, !=, NULL);
831 	assert3p(write_timeout, !=, NULL);
832 
833 	struct io_timeout_override_entry *node = RB_ROOT(root);
834 
835 	lck_spin_lock(&io_timeout_override_lock);
836 	/* RB_FIND() doesn't support custom cmp functions, so we have to open-code our own */
837 	while (node) {
838 		if (node->ioaddr_base <= addr && addr < node->ioaddr_base + node->size) {
839 			*read_timeout = node->read_timeout;
840 			*write_timeout = node->write_timeout;
841 			lck_spin_unlock(&io_timeout_override_lock);
842 			return true;
843 		} else if (addr < node->ioaddr_base) {
844 			node = RB_LEFT(node, tree);
845 		} else {
846 			node = RB_RIGHT(node, tree);
847 		}
848 	}
849 	lck_spin_unlock(&io_timeout_override_lock);
850 
851 	return false;
852 }
853 
854 static bool
io_override_timeout_ss(uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)855 io_override_timeout_ss(uint64_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
856 {
857 #if defined(__arm64__)
858 
859 	/*
860 	 * PCIe regions are marked with PMAP_IO_RANGE_STRONG_SYNC. Apply a
861 	 * timeout greater than two PCIe completion timeouts (90ms) as they can
862 	 * stack.
863 	 */
864 	#define STRONG_SYNC_TIMEOUT 2160000 /* 90ms */
865 
866 	pmap_io_range_t *range = pmap_find_io_attr(paddr);
867 	if (range != NULL && (range->wimg & PMAP_IO_RANGE_STRONG_SYNC) != 0) {
868 		*read_timeout = STRONG_SYNC_TIMEOUT;
869 		*write_timeout = STRONG_SYNC_TIMEOUT;
870 		return true;
871 	}
872 #else
873 	(void)paddr;
874 	(void)read_timeout;
875 	(void)write_timeout;
876 #endif /* __arm64__ */
877 	return false;
878 }
879 
880 /*
881  * Return timeout override values for the read/write timeout for a given
882  * address.
883  * A virtual address (vaddr), physical address (paddr) or both may be passed.
884  * Up to three separate timeout overrides can be found
885  *  - A virtual address override
886  *  - A physical address override
887  *  - A strong sync override
888  *  The largest override found is returned.
889  */
890 void
override_io_timeouts(uintptr_t vaddr,uint64_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)891 override_io_timeouts(uintptr_t vaddr, uint64_t paddr, uint64_t *read_timeout,
892     uint64_t *write_timeout)
893 {
894 	uint64_t rt_va = 0, wt_va = 0, rt_pa = 0, wt_pa = 0, rt_ss = 0, wt_ss = 0;
895 
896 	if (vaddr != 0) {
897 		/* Override from virtual address. */
898 		io_override_timeout(&io_timeout_override_root_va, vaddr, &rt_va, &wt_va);
899 	}
900 
901 	if (paddr != 0) {
902 		/* Override from physical address. */
903 		io_override_timeout(&io_timeout_override_root_pa, paddr, &rt_pa, &wt_pa);
904 
905 		/* Override from strong sync range. */
906 		io_override_timeout_ss(paddr, &rt_ss, &wt_ss);
907 	}
908 
909 	if (read_timeout != NULL) {
910 		*read_timeout =  MAX(MAX(rt_va, rt_pa), rt_ss);
911 	}
912 
913 	if (write_timeout != NULL) {
914 		*write_timeout = MAX(MAX(wt_va, wt_pa), wt_ss);
915 	}
916 }
917 
918 #endif /* ML_IO_TIMEOUTS_ENABLED */
919 
920 int
ml_io_increase_timeouts(uintptr_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)921 ml_io_increase_timeouts(uintptr_t ioaddr_base, unsigned int size,
922     uint32_t read_timeout_us, uint32_t write_timeout_us)
923 {
924 #if ML_IO_TIMEOUTS_ENABLED
925 	const size_t MAX_SIZE = 4096;
926 
927 	if (size > MAX_SIZE) {
928 		return KERN_INVALID_ARGUMENT;
929 	}
930 
931 	return io_increase_timeouts(&io_timeout_override_root_va, ioaddr_base,
932 	           size, read_timeout_us, write_timeout_us);
933 #else
934 	#pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
935 	return KERN_SUCCESS;
936 #endif /* ML_IO_TIMEOUTS_ENABLED */
937 }
938 
939 int
ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size,uint32_t read_timeout_us,uint32_t write_timeout_us)940 ml_io_increase_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size,
941     uint32_t read_timeout_us, uint32_t write_timeout_us)
942 {
943 #if ML_IO_TIMEOUTS_ENABLED
944 	return io_increase_timeouts(&io_timeout_override_root_pa, ioaddr_base,
945 	           size, read_timeout_us, write_timeout_us);
946 #else
947 	#pragma unused(ioaddr_base, size, read_timeout_us, write_timeout_us)
948 	return KERN_SUCCESS;
949 #endif /* ML_IO_TIMEOUTS_ENABLED */
950 }
951 
952 int
ml_io_reset_timeouts(uintptr_t ioaddr_base,unsigned int size)953 ml_io_reset_timeouts(uintptr_t ioaddr_base, unsigned int size)
954 {
955 #if ML_IO_TIMEOUTS_ENABLED
956 	return io_reset_timeouts(&io_timeout_override_root_va, ioaddr_base, size);
957 #else
958 	#pragma unused(ioaddr_base, size)
959 	return KERN_SUCCESS;
960 #endif /* ML_IO_TIMEOUTS_ENABLED */
961 }
962 
963 int
ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base,unsigned int size)964 ml_io_reset_timeouts_phys(vm_offset_t ioaddr_base, unsigned int size)
965 {
966 #if ML_IO_TIMEOUTS_ENABLED
967 	return io_reset_timeouts(&io_timeout_override_root_pa, ioaddr_base, size);
968 #else
969 	#pragma unused(ioaddr_base, size)
970 	return KERN_SUCCESS;
971 #endif /* ML_IO_TIMEOUTS_ENABLED */
972 }
973 
974 #if ML_IO_TIMEOUTS_ENABLED
975 boolean_t
ml_io_check_for_mmio_overrides(__unused uint64_t mt)976 ml_io_check_for_mmio_overrides(__unused uint64_t mt)
977 {
978 #if __arm64__
979 	/* Issue a barrier before accessing the remote mmio trackers */
980 	__builtin_arm_dmb(DMB_ISH);
981 #endif
982 	boolean_t istate = ml_set_interrupts_enabled_with_debug(false, false);
983 	percpu_foreach(mmiot, mmio_tracker) {
984 		uint64_t read_timeout;
985 		uint64_t write_timeout;
986 
987 		override_io_timeouts(mmiot->mmio_vaddr, mmiot->mmio_paddr, &read_timeout, &write_timeout);
988 
989 		if (read_timeout > 0 || write_timeout > 0) {
990 			if (mt < (mmiot->mmio_start_mt + MAX(read_timeout, write_timeout))) {
991 				ml_set_interrupts_enabled_with_debug(istate, false);
992 				return true;
993 			}
994 		}
995 	}
996 	ml_set_interrupts_enabled_with_debug(istate, false);
997 	return false;
998 }
999 #endif /* ML_IO_TIMEOUTS_ENABLED */
1000 
1001 #if DEVELOPMENT || DEBUG
1002 static int ml_io_read_test_mode;
1003 #endif
1004 
1005 unsigned long long
ml_io_read(uintptr_t vaddr,int size)1006 ml_io_read(uintptr_t vaddr, int size)
1007 {
1008 	unsigned long long result = 0;
1009 	unsigned char s1;
1010 	unsigned short s2;
1011 
1012 #if DEVELOPMENT || DEBUG
1013 	/* For testing */
1014 	extern void IODelay(int);
1015 	if (__improbable(ml_io_read_test_mode)) {
1016 		if (vaddr == 1) {
1017 			IODelay(100);
1018 			return 0;
1019 		} else if (vaddr == 2) {
1020 			return 0;
1021 		}
1022 	}
1023 #endif /* DEVELOPMENT || DEBUG */
1024 
1025 #ifdef ML_IO_VERIFY_UNCACHEABLE
1026 	uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1027 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1028 	uintptr_t paddr = 0;
1029 #endif
1030 
1031 #ifdef ML_IO_TIMEOUTS_ENABLED
1032 	kern_timeout_t timeout;
1033 	boolean_t istate, use_timeout = FALSE;
1034 	uint64_t report_read_delay;
1035 #if __x86_64__
1036 	report_read_delay = report_phy_read_delay;
1037 #else
1038 	report_read_delay = os_atomic_load(&report_phy_read_delay_to, relaxed);
1039 	uint64_t const trace_phy_read_delay = os_atomic_load(&trace_phy_read_delay_to, relaxed);
1040 #endif /* __x86_64__ */
1041 
1042 	if (__improbable(report_read_delay != 0)) {
1043 		istate = ml_set_interrupts_enabled_with_debug(false, false);
1044 
1045 		kern_timeout_start(&timeout, TF_NONSPEC_TIMEBASE | TF_SAMPLE_PMC);
1046 		use_timeout = true;
1047 
1048 		if (paddr == 0) {
1049 			paddr = kvtophys(vaddr);
1050 		}
1051 		mmio_track_t *mmiot = PERCPU_GET(mmio_tracker);
1052 		mmiot->mmio_start_mt = kern_timeout_start_time(&timeout);
1053 		mmiot->mmio_paddr = paddr;
1054 		mmiot->mmio_vaddr = vaddr;
1055 	}
1056 
1057 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1058 	if (__improbable(use_timeout && simulate_stretched_io)) {
1059 		kern_timeout_stretch(&timeout, simulate_stretched_io);
1060 	}
1061 #endif /* ML_IO_SIMULATE_STRETCHED_ENABLED */
1062 #endif /* ML_IO_TIMEOUTS_ENABLED */
1063 
1064 #if DEVELOPMENT || DEBUG
1065 	boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1066 	if (use_fences) {
1067 		ml_timebase_to_memory_fence();
1068 	}
1069 #endif
1070 
1071 	switch (size) {
1072 	case 1:
1073 		s1 = *(volatile unsigned char *)vaddr;
1074 		result = s1;
1075 		break;
1076 	case 2:
1077 		s2 = *(volatile unsigned short *)vaddr;
1078 		result = s2;
1079 		break;
1080 	case 4:
1081 		result = *(volatile unsigned int *)vaddr;
1082 		break;
1083 	case 8:
1084 		result = *(volatile unsigned long long *)vaddr;
1085 		break;
1086 	default:
1087 		panic("Invalid size %d for ml_io_read(%p)", size, (void *)vaddr);
1088 		break;
1089 	}
1090 
1091 #if DEVELOPMENT || DEBUG
1092 	if (use_fences) {
1093 		ml_memory_to_timebase_fence();
1094 	}
1095 #endif
1096 
1097 #ifdef ML_IO_TIMEOUTS_ENABLED
1098 	if (__improbable(use_timeout == TRUE)) {
1099 		kern_timeout_end(&timeout, TF_NONSPEC_TIMEBASE);
1100 		uint64_t duration = kern_timeout_gross_duration(&timeout);
1101 
1102 		/* Prevent the processor from calling iotrace during its
1103 		 * initialization procedure. */
1104 		if (current_processor()->state == PROCESSOR_RUNNING) {
1105 			iotrace(IOTRACE_IO_READ, vaddr, paddr, size, result, kern_timeout_start_time(&timeout), duration);
1106 		}
1107 
1108 		if (__improbable(duration > report_read_delay)) {
1109 			DTRACE_PHYSLAT5(physioread, uint64_t, duration,
1110 			    uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, result);
1111 
1112 			uint64_t override = 0;
1113 			override_io_timeouts(vaddr, paddr, &override, NULL);
1114 
1115 			if (override != 0) {
1116 #if SCHED_HYGIENE_DEBUG
1117 				/*
1118 				 * The IO timeout was overridden. If we were called in an
1119 				 * interrupt handler context, that can lead to a timeout
1120 				 * panic, so we need to abandon the measurement.
1121 				 */
1122 				if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1123 					ml_irq_debug_abandon();
1124 				}
1125 #endif
1126 				report_read_delay = override;
1127 			}
1128 		}
1129 
1130 		if (__improbable(duration > report_read_delay)) {
1131 			if (phy_read_panic && (machine_timeout_suspended() == FALSE)) {
1132 				char str[128];
1133 #if defined(__x86_64__)
1134 				panic_notify();
1135 #endif /* defined(__x86_64__) */
1136 				snprintf(str, sizeof(str),
1137 				    "Read from IO vaddr 0x%lx paddr 0x%lx (result: 0x%llx) timed out:",
1138 				    vaddr, paddr, result);
1139 				kern_timeout_try_panic(KERN_TIMEOUT_MMIO, paddr, &timeout, str,
1140 				    report_read_delay);
1141 			}
1142 		}
1143 
1144 		if (__improbable(trace_phy_read_delay > 0 && duration > trace_phy_read_delay)) {
1145 			KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_READ),
1146 			    duration, VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, result);
1147 		}
1148 
1149 		(void)ml_set_interrupts_enabled_with_debug(istate, false);
1150 	}
1151 #endif /*  ML_IO_TIMEOUTS_ENABLED */
1152 	return result;
1153 }
1154 
1155 unsigned int
ml_io_read8(uintptr_t vaddr)1156 ml_io_read8(uintptr_t vaddr)
1157 {
1158 	return (unsigned) ml_io_read(vaddr, 1);
1159 }
1160 
1161 unsigned int
ml_io_read16(uintptr_t vaddr)1162 ml_io_read16(uintptr_t vaddr)
1163 {
1164 	return (unsigned) ml_io_read(vaddr, 2);
1165 }
1166 
1167 unsigned int
ml_io_read32(uintptr_t vaddr)1168 ml_io_read32(uintptr_t vaddr)
1169 {
1170 	return (unsigned) ml_io_read(vaddr, 4);
1171 }
1172 
1173 unsigned long long
ml_io_read64(uintptr_t vaddr)1174 ml_io_read64(uintptr_t vaddr)
1175 {
1176 	return ml_io_read(vaddr, 8);
1177 }
1178 
1179 
1180 uint64_t
ml_io_read_cpu_reg(uintptr_t vaddr,int sz,__unused int logical_cpu)1181 ml_io_read_cpu_reg(uintptr_t vaddr, int sz, __unused int logical_cpu)
1182 {
1183 	uint64_t val;
1184 
1185 
1186 	val = ml_io_read(vaddr, sz);
1187 
1188 
1189 	return val;
1190 }
1191 
1192 
1193 /* ml_io_write* */
1194 
1195 void
ml_io_write(uintptr_t vaddr,uint64_t val,int size)1196 ml_io_write(uintptr_t vaddr, uint64_t val, int size)
1197 {
1198 #ifdef ML_IO_VERIFY_UNCACHEABLE
1199 	uintptr_t paddr = pmap_verify_noncacheable(vaddr);
1200 #elif defined(ML_IO_TIMEOUTS_ENABLED)
1201 	uintptr_t paddr = 0;
1202 #endif
1203 
1204 #ifdef ML_IO_TIMEOUTS_ENABLED
1205 	kern_timeout_t timeout;
1206 	boolean_t istate, use_timeout = FALSE;
1207 	uint64_t report_write_delay;
1208 #if __x86_64__
1209 	report_write_delay = report_phy_write_delay;
1210 #else
1211 	report_write_delay = os_atomic_load(&report_phy_write_delay_to, relaxed);
1212 	uint64_t trace_phy_write_delay = os_atomic_load(&trace_phy_write_delay_to, relaxed);
1213 #endif /* !defined(__x86_64__) */
1214 	if (__improbable(report_write_delay != 0)) {
1215 		istate = ml_set_interrupts_enabled_with_debug(false, false);
1216 
1217 		kern_timeout_start(&timeout, TF_NONSPEC_TIMEBASE | TF_SAMPLE_PMC);
1218 		use_timeout = TRUE;
1219 
1220 		if (paddr == 0) {
1221 			paddr = kvtophys(vaddr);
1222 		}
1223 		mmio_track_t *mmiot = PERCPU_GET(mmio_tracker);
1224 		mmiot->mmio_start_mt = kern_timeout_start_time(&timeout);
1225 		mmiot->mmio_paddr = paddr;
1226 		mmiot->mmio_vaddr = vaddr;
1227 	}
1228 
1229 #ifdef ML_IO_SIMULATE_STRETCHED_ENABLED
1230 	if (__improbable(use_timeout && simulate_stretched_io)) {
1231 		kern_timeout_stretch(&timeout, simulate_stretched_io);
1232 	}
1233 #endif /* DEVELOPMENT || DEBUG */
1234 #endif /* ML_IO_TIMEOUTS_ENABLED */
1235 
1236 #if DEVELOPMENT || DEBUG
1237 	boolean_t use_fences = !kern_feature_override(KF_IO_TIMEOUT_OVRD);
1238 	if (use_fences) {
1239 		ml_timebase_to_memory_fence();
1240 	}
1241 #endif
1242 
1243 	switch (size) {
1244 	case 1:
1245 		*(volatile uint8_t *)vaddr = (uint8_t)val;
1246 		break;
1247 	case 2:
1248 		*(volatile uint16_t *)vaddr = (uint16_t)val;
1249 		break;
1250 	case 4:
1251 		*(volatile uint32_t *)vaddr = (uint32_t)val;
1252 		break;
1253 	case 8:
1254 		*(volatile uint64_t *)vaddr = (uint64_t)val;
1255 		break;
1256 	default:
1257 		panic("Invalid size %d for ml_io_write(%p, 0x%llx)", size, (void *)vaddr, val);
1258 		break;
1259 	}
1260 
1261 #if DEVELOPMENT || DEBUG
1262 	if (use_fences) {
1263 		ml_memory_to_timebase_fence();
1264 	}
1265 #endif
1266 
1267 #ifdef ML_IO_TIMEOUTS_ENABLED
1268 	if (__improbable(use_timeout == TRUE)) {
1269 		kern_timeout_end(&timeout, TF_NONSPEC_TIMEBASE);
1270 		uint64_t duration = kern_timeout_gross_duration(&timeout);
1271 
1272 		/* Prevent the processor from calling iotrace during its
1273 		 * initialization procedure. */
1274 		if (current_processor()->state == PROCESSOR_RUNNING) {
1275 			iotrace(IOTRACE_IO_WRITE, vaddr, paddr, size, val, kern_timeout_start_time(&timeout), duration);
1276 		}
1277 
1278 		if (__improbable(duration > report_write_delay)) {
1279 			DTRACE_PHYSLAT5(physiowrite, uint64_t, duration,
1280 			    uint64_t, vaddr, uint32_t, size, uint64_t, paddr, uint64_t, val);
1281 
1282 			uint64_t override = 0;
1283 			override_io_timeouts(vaddr, paddr, NULL, &override);
1284 
1285 			if (override != 0) {
1286 #if SCHED_HYGIENE_DEBUG
1287 				/*
1288 				 * The IO timeout was overridden. If we were called in an
1289 				 * interrupt handler context, that can lead to a timeout
1290 				 * panic, so we need to abandon the measurement.
1291 				 */
1292 				if (interrupt_masked_debug_mode == SCHED_HYGIENE_MODE_PANIC) {
1293 					ml_irq_debug_abandon();
1294 				}
1295 #endif
1296 				report_write_delay = override;
1297 			}
1298 		}
1299 
1300 		if (__improbable(duration > report_write_delay)) {
1301 			if (phy_write_panic && (machine_timeout_suspended() == FALSE)) {
1302 				char str[128];
1303 #if defined(__x86_64__)
1304 				panic_notify();
1305 #endif /*  defined(__x86_64__) */
1306 				snprintf(str, sizeof(str),
1307 				    "Write to IO vaddr 0x%lx paddr 0x%lx (value: 0x%llx) timed out:",
1308 				    vaddr, paddr, val);
1309 				kern_timeout_try_panic(KERN_TIMEOUT_MMIO, paddr, &timeout, str,
1310 				    report_write_delay);
1311 			}
1312 		}
1313 
1314 		if (__improbable(trace_phy_write_delay > 0 && duration > trace_phy_write_delay)) {
1315 			KDBG(MACHDBG_CODE(DBG_MACH_IO, DBC_MACH_IO_MMIO_WRITE),
1316 			    duration, VM_KERNEL_UNSLIDE_OR_PERM(vaddr), paddr, val);
1317 		}
1318 
1319 		(void)ml_set_interrupts_enabled_with_debug(istate, false);
1320 	}
1321 #endif /* ML_IO_TIMEOUTS_ENABLED */
1322 }
1323 
1324 void
ml_io_write8(uintptr_t vaddr,uint8_t val)1325 ml_io_write8(uintptr_t vaddr, uint8_t val)
1326 {
1327 	ml_io_write(vaddr, val, 1);
1328 }
1329 
1330 void
ml_io_write16(uintptr_t vaddr,uint16_t val)1331 ml_io_write16(uintptr_t vaddr, uint16_t val)
1332 {
1333 	ml_io_write(vaddr, val, 2);
1334 }
1335 
1336 void
ml_io_write32(uintptr_t vaddr,uint32_t val)1337 ml_io_write32(uintptr_t vaddr, uint32_t val)
1338 {
1339 	ml_io_write(vaddr, val, 4);
1340 }
1341 
1342 void
ml_io_write64(uintptr_t vaddr,uint64_t val)1343 ml_io_write64(uintptr_t vaddr, uint64_t val)
1344 {
1345 	ml_io_write(vaddr, val, 8);
1346 }
1347 
1348 struct cpu_callback_chain_elem {
1349 	cpu_callback_t                  fn;
1350 	void                            *param;
1351 	struct cpu_callback_chain_elem  *next;
1352 };
1353 
1354 static struct cpu_callback_chain_elem *cpu_callback_chain;
1355 static LCK_GRP_DECLARE(cpu_callback_chain_lock_grp, "cpu_callback_chain");
1356 static LCK_SPIN_DECLARE(cpu_callback_chain_lock, &cpu_callback_chain_lock_grp);
1357 
1358 struct cpu_event_log_entry {
1359 	uint64_t       abstime;
1360 	enum cpu_event event;
1361 	unsigned int   cpu_or_cluster;
1362 };
1363 
1364 #if DEVELOPMENT || DEBUG
1365 
1366 #define CPU_EVENT_RING_SIZE 128
1367 static struct cpu_event_log_entry cpu_event_ring[CPU_EVENT_RING_SIZE];
1368 static _Atomic int cpu_event_widx;
1369 static _Atomic uint64_t cpd_cycles;
1370 
1371 void
cpu_event_debug_log(enum cpu_event event,unsigned int cpu_or_cluster)1372 cpu_event_debug_log(enum cpu_event event, unsigned int cpu_or_cluster)
1373 {
1374 	int oldidx, newidx;
1375 
1376 	os_atomic_rmw_loop(&cpu_event_widx, oldidx, newidx, relaxed, {
1377 		newidx = (oldidx + 1) % CPU_EVENT_RING_SIZE;
1378 	});
1379 	cpu_event_ring[newidx].abstime = ml_get_timebase();
1380 	cpu_event_ring[newidx].event = event;
1381 	cpu_event_ring[newidx].cpu_or_cluster = cpu_or_cluster;
1382 
1383 	if (event == CLUSTER_EXIT_REQUESTED) {
1384 		os_atomic_inc(&cpd_cycles, relaxed);
1385 	}
1386 }
1387 
1388 static const char *
cpu_event_log_string(enum cpu_event e)1389 cpu_event_log_string(enum cpu_event e)
1390 {
1391 	const char *event_strings[] = {
1392 		"CPU_BOOT_REQUESTED",
1393 		"CPU_BOOTED",
1394 		"CPU_ACTIVE",
1395 		"CLUSTER_ACTIVE",
1396 		"CPU_EXIT_REQUESTED",
1397 		"CPU_DOWN",
1398 		"CLUSTER_EXIT_REQUESTED",
1399 		"CPU_EXITED",
1400 		"PLATFORM_QUIESCE",
1401 		"PLATFORM_ACTIVE",
1402 		"PLATFORM_HALT_RESTART",
1403 		"PLATFORM_PANIC",
1404 		"PLATFORM_PANIC_SYNC",
1405 		"PLATFORM_PRE_SLEEP",
1406 		"PLATFORM_POST_RESUME",
1407 	};
1408 
1409 	assert((unsigned)e < sizeof(event_strings) / sizeof(event_strings[0]));
1410 	return event_strings[e];
1411 }
1412 
1413 void
dump_cpu_event_log(int (* printf_func)(const char * fmt,...))1414 dump_cpu_event_log(int (*printf_func)(const char * fmt, ...))
1415 {
1416 	printf_func("CPU event history @ %016llx: (CPD cycles: %lld)\n",
1417 	    ml_get_timebase(), os_atomic_load(&cpd_cycles, relaxed));
1418 
1419 	int idx = os_atomic_load(&cpu_event_widx, relaxed);
1420 	for (int c = 0; c < CPU_EVENT_RING_SIZE; c++) {
1421 		idx = (idx + 1) % CPU_EVENT_RING_SIZE;
1422 
1423 		struct cpu_event_log_entry *e = &cpu_event_ring[idx];
1424 		if (e->abstime != 0) {
1425 			printf_func(" %016llx: %s %d\n", e->abstime,
1426 			    cpu_event_log_string(e->event), e->cpu_or_cluster);
1427 		}
1428 	}
1429 }
1430 
1431 #else /* DEVELOPMENT || DEBUG */
1432 
1433 void
cpu_event_debug_log(__unused enum cpu_event event,__unused unsigned int cpu_or_cluster)1434 cpu_event_debug_log(__unused enum cpu_event event, __unused unsigned int cpu_or_cluster)
1435 {
1436 	/* no logging on production builds */
1437 }
1438 
1439 void
dump_cpu_event_log(__unused int (* printf_func)(const char * fmt,...))1440 dump_cpu_event_log(__unused int (*printf_func)(const char * fmt, ...))
1441 {
1442 }
1443 
1444 #endif /* DEVELOPMENT || DEBUG */
1445 
1446 void
cpu_event_register_callback(cpu_callback_t fn,void * param)1447 cpu_event_register_callback(cpu_callback_t fn, void *param)
1448 {
1449 	struct cpu_callback_chain_elem *new_elem;
1450 
1451 	new_elem = zalloc_permanent_type(struct cpu_callback_chain_elem);
1452 	if (!new_elem) {
1453 		panic("can't allocate cpu_callback_chain_elem");
1454 	}
1455 
1456 	lck_spin_lock(&cpu_callback_chain_lock);
1457 	new_elem->next = cpu_callback_chain;
1458 	new_elem->fn = fn;
1459 	new_elem->param = param;
1460 	os_atomic_store(&cpu_callback_chain, new_elem, release);
1461 	lck_spin_unlock(&cpu_callback_chain_lock);
1462 }
1463 
1464 __attribute__((noreturn))
1465 void
cpu_event_unregister_callback(__unused cpu_callback_t fn)1466 cpu_event_unregister_callback(__unused cpu_callback_t fn)
1467 {
1468 	panic("Unfortunately, cpu_event_unregister_callback is unimplemented.");
1469 }
1470 
1471 void
ml_broadcast_cpu_event(enum cpu_event event,unsigned int cpu_or_cluster)1472 ml_broadcast_cpu_event(enum cpu_event event, unsigned int cpu_or_cluster)
1473 {
1474 	struct cpu_callback_chain_elem *cursor;
1475 
1476 	cpu_event_debug_log(event, cpu_or_cluster);
1477 
1478 	cursor = os_atomic_load(&cpu_callback_chain, dependency);
1479 	for (; cursor != NULL; cursor = cursor->next) {
1480 		cursor->fn(cursor->param, event, cpu_or_cluster);
1481 	}
1482 }
1483 
1484 // Initialize Machine Timeouts (see the MACHINE_TIMEOUT macro
1485 // definition)
1486 
1487 void
machine_timeout_init_with_suffix(const struct machine_timeout_spec * spec,char const * suffix,bool always_enabled)1488 machine_timeout_init_with_suffix(const struct machine_timeout_spec *spec, char const *suffix, bool always_enabled)
1489 {
1490 	if (!always_enabled && (wdt == -1 || (spec->skip_predicate != NULL && spec->skip_predicate(spec)))) {
1491 		// This timeout should be disabled.
1492 		os_atomic_store_wide((uint64_t*)spec->ptr, 0, relaxed);
1493 		return;
1494 	}
1495 
1496 	assert(suffix != NULL);
1497 	assert(strlen(spec->name) <= MACHINE_TIMEOUT_MAX_NAME_LEN);
1498 
1499 	size_t const suffix_len = strlen(suffix);
1500 
1501 	size_t const dt_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + 1;
1502 	char dt_name[dt_name_size];
1503 
1504 	strlcpy(dt_name, spec->name, dt_name_size);
1505 	strlcat(dt_name, suffix, dt_name_size);
1506 
1507 	size_t const scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + suffix_len + strlen("-scale") + 1;
1508 	char scale_name[scale_name_size];
1509 
1510 	strlcpy(scale_name, spec->name, scale_name_size);
1511 	strlcat(scale_name, suffix, scale_name_size);
1512 	strlcat(scale_name, "-scale", scale_name_size);
1513 
1514 	size_t const boot_arg_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN + strlen("ml-timeout-") + suffix_len + 1;
1515 	char boot_arg_name[boot_arg_name_size];
1516 
1517 	strlcpy(boot_arg_name, "ml-timeout-", boot_arg_name_size);
1518 	strlcat(boot_arg_name, spec->name, boot_arg_name_size);
1519 	strlcat(boot_arg_name, suffix, boot_arg_name_size);
1520 
1521 	size_t const boot_arg_scale_name_size = MACHINE_TIMEOUT_MAX_NAME_LEN +
1522 	    strlen("ml-timeout-") + strlen("-scale") + suffix_len + 1;
1523 	char boot_arg_scale_name[boot_arg_scale_name_size];
1524 
1525 	strlcpy(boot_arg_scale_name, "ml-timeout-", boot_arg_scale_name_size);
1526 	strlcat(boot_arg_scale_name, spec->name, boot_arg_scale_name_size);
1527 	strlcat(boot_arg_scale_name, suffix, boot_arg_name_size);
1528 	strlcat(boot_arg_scale_name, "-scale", boot_arg_scale_name_size);
1529 
1530 
1531 	/*
1532 	 * Determine base value from DT and boot-args.
1533 	 */
1534 
1535 	DTEntry base, chosen;
1536 
1537 	if (SecureDTLookupEntry(NULL, "/machine-timeouts", &base) != kSuccess) {
1538 		base = NULL;
1539 	}
1540 
1541 	if (SecureDTLookupEntry(NULL, "/chosen/machine-timeouts", &chosen) != kSuccess) {
1542 		chosen = NULL;
1543 	}
1544 
1545 	uint64_t timeout = spec->default_value;
1546 	bool found = false;
1547 
1548 	uint64_t const *data = NULL;
1549 	unsigned int data_size = sizeof(*data);
1550 
1551 	/* First look in /machine-timeouts/<name> */
1552 	if (base != NULL && SecureDTGetProperty(base, dt_name, (const void **)&data, &data_size) == kSuccess) {
1553 		if (data_size != sizeof(*data)) {
1554 			panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s", __func__, data_size, dt_name);
1555 		}
1556 
1557 		timeout = *data;
1558 		found = true;
1559 	}
1560 
1561 	/* A value in /chosen/machine-timeouts/<name> overrides */
1562 	if (chosen != NULL && SecureDTGetProperty(chosen, dt_name, (const void **)&data, &data_size) == kSuccess) {
1563 		if (data_size != sizeof(*data)) {
1564 			panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s", __func__, data_size, dt_name);
1565 		}
1566 
1567 		timeout = *data;
1568 		found = true;
1569 	}
1570 
1571 	/* A boot-arg ml-timeout-<name> overrides */
1572 	uint64_t boot_arg = 0;
1573 
1574 	if (PE_parse_boot_argn(boot_arg_name, &boot_arg, sizeof(boot_arg))) {
1575 		timeout = boot_arg;
1576 		found = true;
1577 	}
1578 
1579 
1580 	/*
1581 	 * Determine scale value from DT and boot-args.
1582 	 */
1583 
1584 	uint64_t scale = 1;
1585 	uint32_t const *scale_data;
1586 	unsigned int scale_size = sizeof(scale_data);
1587 
1588 	/* If there is a scale factor /machine-timeouts/<name>-scale, apply it. */
1589 	if (base != NULL && SecureDTGetProperty(base, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1590 		if (scale_size != sizeof(*scale_data)) {
1591 			panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/%s-scale", __func__, scale_size, dt_name);
1592 		}
1593 
1594 		scale = *scale_data;
1595 	}
1596 
1597 	/* If there is a scale factor /chosen/machine-timeouts/<name>-scale, use that. */
1598 	if (chosen != NULL && SecureDTGetProperty(chosen, scale_name, (const void **)&scale_data, &scale_size) == kSuccess) {
1599 		if (scale_size != sizeof(*scale_data)) {
1600 			panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/%s-scale", __func__,
1601 			    scale_size, dt_name);
1602 		}
1603 
1604 		scale = *scale_data;
1605 	}
1606 
1607 	/* Finally, a boot-arg ml-timeout-<name>-scale takes precedence. */
1608 	if (PE_parse_boot_argn(boot_arg_scale_name, &boot_arg, sizeof(boot_arg))) {
1609 		scale = boot_arg;
1610 	}
1611 
1612 	static bool global_scale_set;
1613 	static uint64_t global_scale;
1614 
1615 	if (!global_scale_set) {
1616 		/* Apply /machine-timeouts/global-scale if present */
1617 		if (SecureDTGetProperty(base, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1618 			if (scale_size != sizeof(*scale_data)) {
1619 				panic("%s: unexpected machine timeout data_size %u for /machine-timeouts/global-scale", __func__,
1620 				    scale_size);
1621 			}
1622 
1623 			global_scale = *scale_data;
1624 			global_scale_set = true;
1625 		}
1626 
1627 		/* Use /chosen/machine-timeouts/global-scale if present */
1628 		if (SecureDTGetProperty(chosen, "global-scale", (const void **)&scale_data, &scale_size) == kSuccess) {
1629 			if (scale_size != sizeof(*scale_data)) {
1630 				panic("%s: unexpected machine timeout data_size %u for /chosen/machine-timeouts/global-scale", __func__,
1631 				    scale_size);
1632 			}
1633 
1634 			global_scale = *scale_data;
1635 			global_scale_set = true;
1636 		}
1637 
1638 		/* Finally, the boot-arg ml-timeout-global-scale takes precedence. */
1639 		if (PE_parse_boot_argn("ml-timeout-global-scale", &boot_arg, sizeof(boot_arg))) {
1640 			global_scale = boot_arg;
1641 			global_scale_set = true;
1642 		}
1643 	}
1644 
1645 	if (global_scale_set) {
1646 		scale *= global_scale;
1647 	}
1648 
1649 	/* Compute the final timeout, and done. */
1650 	if (found && timeout > 0) {
1651 		/* Only apply inherent unit scale if the value came in
1652 		 * externally. */
1653 
1654 		if (spec->unit_scale == MACHINE_TIMEOUT_UNIT_TIMEBASE) {
1655 			uint64_t nanoseconds = timeout / 1000;
1656 			nanoseconds_to_absolutetime(nanoseconds, &timeout);
1657 		} else {
1658 			timeout /= spec->unit_scale;
1659 		}
1660 
1661 		if (timeout == 0) {
1662 			/* Ensure unit scaling did not disable the timeout. */
1663 			timeout = 1;
1664 		}
1665 	}
1666 
1667 	if (os_mul_overflow(timeout, scale, &timeout)) {
1668 		timeout = UINT64_MAX;         // clamp
1669 	}
1670 
1671 	os_atomic_store_wide((uint64_t*)spec->ptr, timeout, relaxed);
1672 }
1673 
1674 void
machine_timeout_init(const struct machine_timeout_spec * spec)1675 machine_timeout_init(const struct machine_timeout_spec *spec)
1676 {
1677 	machine_timeout_init_with_suffix(spec, "", false);
1678 }
1679 
1680 void
machine_timeout_init_always_enabled(const struct machine_timeout_spec * spec)1681 machine_timeout_init_always_enabled(const struct machine_timeout_spec *spec)
1682 {
1683 	machine_timeout_init_with_suffix(spec, "", true);
1684 }
1685 
1686 #if DEVELOPMENT || DEBUG
1687 /*
1688  * Late timeout (re-)initialization, at the end of bsd_init()
1689  */
1690 void
machine_timeout_bsd_init(void)1691 machine_timeout_bsd_init(void)
1692 {
1693 	char const * const __unused mt_suffix = "-b";
1694 #if SCHED_HYGIENE_DEBUG
1695 	machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(interrupt_masked_timeout), mt_suffix, false);
1696 	machine_timeout_init_with_suffix(MACHINE_TIMEOUT_SPEC_REF(sched_preemption_disable_threshold_mt), mt_suffix, false);
1697 
1698 	/*
1699 	 * The io timeouts can inherit from interrupt_masked_timeout.
1700 	 * Re-initialize, as interrupt_masked_timeout may have changed.
1701 	 */
1702 	ml_io_init_timeouts();
1703 
1704 	extern void preemption_disable_reset_max_durations(void);
1705 	/*
1706 	 * Reset the preemption disable stats, so that they are not
1707 	 * polluted by long early boot code.
1708 	 */
1709 	preemption_disable_reset_max_durations();
1710 #endif /* SCHED_HYGIENE_DEBUG */
1711 }
1712 #endif /* DEVELOPMENT || DEBUG */
1713 
1714 #if ML_IO_TIMEOUTS_ENABLED && CONFIG_XNUPOST
1715 #include <tests/xnupost.h>
1716 
1717 extern kern_return_t ml_io_timeout_test(void);
1718 
1719 static inline void
ml_io_timeout_test_get_timeouts(uintptr_t vaddr,uint64_t * read_timeout,uint64_t * write_timeout)1720 ml_io_timeout_test_get_timeouts(uintptr_t vaddr, uint64_t *read_timeout, uint64_t *write_timeout)
1721 {
1722 	*read_timeout = 0;
1723 	*write_timeout = 0;
1724 
1725 	vm_offset_t paddr = kvtophys(vaddr);
1726 
1727 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
1728 	override_io_timeouts(vaddr, paddr, read_timeout, write_timeout);
1729 	ml_set_interrupts_enabled(istate);
1730 }
1731 
1732 static inline void
ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr,uint64_t * read_timeout,uint64_t * write_timeout)1733 ml_io_timeout_test_get_timeouts_phys(vm_offset_t paddr, uint64_t *read_timeout, uint64_t *write_timeout)
1734 {
1735 	*read_timeout = 0;
1736 	*write_timeout = 0;
1737 
1738 	boolean_t istate = ml_set_interrupts_enabled(FALSE);
1739 	override_io_timeouts(0, paddr, read_timeout, write_timeout);
1740 	ml_set_interrupts_enabled(istate);
1741 }
1742 
1743 kern_return_t
ml_io_timeout_test(void)1744 ml_io_timeout_test(void)
1745 {
1746 	const size_t SIZE = 16;
1747 	/*
1748 	 * Page align the base address to ensure that the regions are physically
1749 	 * contiguous.
1750 	 */
1751 	const uintptr_t iovaddr_base1 = (uintptr_t)kernel_pmap & ~PAGE_MASK;
1752 
1753 	const uintptr_t iovaddr_base2 = iovaddr_base1 + SIZE;
1754 	const uintptr_t vaddr1 = iovaddr_base1 + SIZE / 2;
1755 	const uintptr_t vaddr2 = iovaddr_base2 + SIZE / 2;
1756 
1757 	const vm_offset_t iopaddr_base1 = kvtophys(iovaddr_base1);
1758 	const vm_offset_t iopaddr_base2 = kvtophys(iovaddr_base2);
1759 	const vm_offset_t paddr1 = iopaddr_base1 + SIZE / 2;
1760 	const vm_offset_t paddr2 = iopaddr_base2 + SIZE / 2;
1761 
1762 	const uint64_t READ_TIMEOUT1_US = 50000, WRITE_TIMEOUT1_US = 50001;
1763 	const uint64_t READ_TIMEOUT2_US = 50002, WRITE_TIMEOUT2_US = 50003;
1764 	uint64_t read_timeout1_abs, write_timeout1_abs;
1765 	uint64_t read_timeout2_abs, write_timeout2_abs;
1766 	nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT1_US, &read_timeout1_abs);
1767 	nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT1_US, &write_timeout1_abs);
1768 	nanoseconds_to_absolutetime(NSEC_PER_USEC * READ_TIMEOUT2_US, &read_timeout2_abs);
1769 	nanoseconds_to_absolutetime(NSEC_PER_USEC * WRITE_TIMEOUT2_US, &write_timeout2_abs);
1770 
1771 	int err = ml_io_increase_timeouts(iovaddr_base1, 0, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1772 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for empty region");
1773 
1774 	err = ml_io_increase_timeouts(iovaddr_base1, 4097, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1775 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for region > 4096 bytes");
1776 
1777 	err = ml_io_increase_timeouts(UINTPTR_MAX, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1778 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overflowed region");
1779 
1780 	err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1781 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first VA region should succeed");
1782 
1783 	err = ml_io_increase_timeouts(iovaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1784 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second VA region should succeed");
1785 
1786 	err = ml_io_increase_timeouts(iovaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1787 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for same region twice");
1788 
1789 	err = ml_io_increase_timeouts(vaddr1, (uint32_t)(vaddr2 - vaddr1), READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1790 	T_EXPECT_EQ_INT(err, KERN_INVALID_ARGUMENT, "Can't set timeout for overlapping regions");
1791 
1792 	uint64_t read_timeout, write_timeout;
1793 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1794 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1795 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1796 
1797 	ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1798 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1799 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1800 
1801 	ml_io_timeout_test_get_timeouts(iovaddr_base2 + SIZE, &read_timeout, &write_timeout);
1802 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout without override");
1803 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout without override");
1804 
1805 	err = ml_io_reset_timeouts(iovaddr_base1 + 1, SIZE - 1);
1806 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for subregion");
1807 
1808 	err = ml_io_reset_timeouts(iovaddr_base2 + SIZE, SIZE);
1809 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for non-existent region");
1810 
1811 	err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1812 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first VA region should succeed");
1813 
1814 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1815 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1816 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1817 
1818 	err = ml_io_reset_timeouts(iovaddr_base1, SIZE);
1819 	T_EXPECT_EQ_INT(err, KERN_NOT_FOUND, "Can't reset timeout for same region twice");
1820 
1821 	err = ml_io_reset_timeouts(iovaddr_base2, SIZE);
1822 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second VA region should succeed");
1823 
1824 	err = ml_io_increase_timeouts_phys(iopaddr_base1, SIZE, READ_TIMEOUT1_US, WRITE_TIMEOUT1_US);
1825 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for first PA region should succeed");
1826 
1827 	err = ml_io_increase_timeouts_phys(iopaddr_base2, SIZE, READ_TIMEOUT2_US, WRITE_TIMEOUT2_US);
1828 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Setting timeout for second PA region should succeed");
1829 
1830 	ml_io_timeout_test_get_timeouts(vaddr1, &read_timeout, &write_timeout);
1831 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1832 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1833 
1834 	ml_io_timeout_test_get_timeouts(vaddr2, &read_timeout, &write_timeout);
1835 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first region");
1836 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first region");
1837 
1838 	ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1839 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout1_abs, "Read timeout for first region");
1840 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout1_abs, "Write timeout for first region");
1841 
1842 	ml_io_timeout_test_get_timeouts_phys(paddr2, &read_timeout, &write_timeout);
1843 	T_EXPECT_EQ_ULLONG(read_timeout, read_timeout2_abs, "Read timeout for first physical region");
1844 	T_EXPECT_EQ_ULLONG(write_timeout, write_timeout2_abs, "Write timeout for first physical region");
1845 
1846 	err = ml_io_reset_timeouts_phys(iopaddr_base1, SIZE);
1847 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for first PA region should succeed");
1848 
1849 	err = ml_io_reset_timeouts_phys(iopaddr_base2, SIZE);
1850 	T_EXPECT_EQ_INT(err, KERN_SUCCESS, "Resetting timeout for second PA region should succeed");
1851 
1852 	ml_io_timeout_test_get_timeouts_phys(paddr1, &read_timeout, &write_timeout);
1853 	T_EXPECT_EQ_ULLONG(read_timeout, 0, "Read timeout for reset region");
1854 	T_EXPECT_EQ_ULLONG(write_timeout, 0, "Write timeout for reset region");
1855 
1856 	return KERN_SUCCESS;
1857 }
1858 #endif /* CONFIG_XNUPOST */
1859 
1860 #if DEVELOPMENT || DEBUG
1861 static int
ml_io_read_cpu_reg_test(__unused int64_t in,int64_t * out)1862 ml_io_read_cpu_reg_test(__unused int64_t in, int64_t *out)
1863 {
1864 	printf("Testing ml_io_read_cpu_reg()...\n");
1865 
1866 	ml_io_read_test_mode = 1;
1867 	boolean_t istate = ml_set_interrupts_enabled_with_debug(false, false);
1868 	(void) ml_io_read_cpu_reg((uintptr_t)1, 8, 1);
1869 	(void) ml_io_read_cpu_reg((uintptr_t)2, 8, 1);
1870 	ml_set_interrupts_enabled_with_debug(istate, false);
1871 	(void) ml_io_read_cpu_reg((uintptr_t)1, 8, 1);
1872 	(void) ml_io_read_cpu_reg((uintptr_t)2, 8, 1);
1873 	ml_io_read_test_mode = 0;
1874 
1875 	*out = 0;
1876 	return 0;
1877 }
1878 SYSCTL_TEST_REGISTER(ml_io_read_cpu_reg, ml_io_read_cpu_reg_test);
1879 #endif /* DEVELOPMENT || DEBUG */
1880