xref: /xnu-12377.1.9/osfmk/kern/timeout.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 /*
2  * Copyright (c) 2024 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/timeout.h>
30 #include <kern/clock.h>
31 #include <kern/monotonic.h>
32 #include <kern/recount.h>
33 #include <kern/debug.h>
34 #include <kern/backtrace.h>
35 #include <kern/trap_telemetry.h>
36 #include <machine/machine_routines.h>
37 
38 
39 kern_timeout_t panic_timeout; /* for debugging */
40 boolean_t kern_timeout_panic_initiated = false;
41 
42 #if defined(__x86_64__)
43 #define ml_get_speculative_timebase ml_get_timebase
44 #endif
45 
46 #if DEVELOPMENT || DEBUG
47 TUNABLE_DT_WRITEABLE(int, timeouts_are_fatal, "machine-timeouts", "timeouts-are-fatal",
48     "timeouts_are_fatal", 1, TUNABLE_DT_CHECK_CHOSEN);
49 #endif
50 
51 void
kern_timeout_restart(kern_timeout_t * to,timeout_flags_t flags)52 kern_timeout_restart(kern_timeout_t *to, timeout_flags_t flags)
53 {
54 #if CONFIG_CPU_COUNTERS
55 	if (__improbable(!(flags & TF_SAMPLE_PMC))) {
56 		to->start_cycles = 0;
57 		to->start_instrs = 0;
58 	} else {
59 		mt_cur_cpu_cycles_instrs_speculative(&to->start_cycles, &to->start_instrs);
60 	}
61 #endif /* CONFIG_CPU_COUNTERS */
62 
63 	if (flags & TF_SAMPLE_INTERRUPT_TIME) {
64 		to->int_mt = recount_current_processor_interrupt_duration_mach();
65 	} else {
66 		to->int_mt = 0;
67 	}
68 
69 	to->start_mt = (flags & TF_NONSPEC_TIMEBASE)? ml_get_timebase() : ml_get_speculative_timebase();
70 }
71 
72 void
kern_timeout_start(kern_timeout_t * to,timeout_flags_t flags)73 kern_timeout_start(kern_timeout_t *to, timeout_flags_t flags)
74 {
75 	if (flags & TF_BACKTRACE) {
76 		(void) backtrace(&to->bt[0], TO_BT_FRAMES, NULL, NULL);
77 	}
78 	kern_timeout_restart(to, flags);
79 }
80 
81 void
kern_timeout_end(kern_timeout_t * to,timeout_flags_t flags)82 kern_timeout_end(kern_timeout_t *to, timeout_flags_t flags)
83 {
84 	to->end_mt = (flags & TF_NONSPEC_TIMEBASE)? ml_get_timebase() : ml_get_speculative_timebase();
85 	if (flags & TF_SAMPLE_INTERRUPT_TIME) {
86 		to->int_mt = recount_current_processor_interrupt_duration_mach() - to->int_mt;
87 	}
88 }
89 
90 /*
91  * Zero out the timeout state so that we won't have a timeout triggered later in the processing
92  * of this timeout.
93  */
94 void
kern_timeout_override(kern_timeout_t * to)95 kern_timeout_override(kern_timeout_t *to)
96 {
97 	to->start_mt = 0;
98 #if CONFIG_CPU_COUNTERS
99 	to->start_cycles = 0;
100 	to->start_instrs = 0;
101 #endif /* CONFIG_CPU_COUNTERS */
102 }
103 
104 #if CONFIG_CPU_COUNTERS
105 void
kern_timeout_cycles_instrs(kern_timeout_t * to,uint64_t * cycles,uint64_t * instrs)106 kern_timeout_cycles_instrs(kern_timeout_t *to, uint64_t *cycles, uint64_t *instrs)
107 {
108 	uint64_t now_cycles, now_instrs;
109 
110 	if (__improbable(to->start_cycles == 0)) {
111 		*cycles = 0;
112 		*instrs = 0;
113 	} else {
114 		mt_cur_cpu_cycles_instrs_speculative(&now_cycles, &now_instrs);
115 		*cycles = now_cycles - to->start_cycles;
116 		*instrs = now_instrs - to->start_instrs;
117 	}
118 }
119 
120 void
kern_timeout_cpi(kern_timeout_t * to,uint64_t * cpi_whole,uint64_t * cpi_fractional)121 kern_timeout_cpi(kern_timeout_t *to, uint64_t *cpi_whole, uint64_t *cpi_fractional)
122 {
123 	uint64_t cycles, instrs;
124 
125 	kern_timeout_cycles_instrs(to, &cycles, &instrs);
126 	*cpi_whole = cycles / instrs;
127 	*cpi_fractional = ((cycles * 100) / instrs) % 100;
128 }
129 #else /* !CONFIG_CPU_COUNTERS */
130 void
kern_timeout_cycles_instrs(kern_timeout_t __unused * to,uint64_t * cycles,uint64_t * instrs)131 kern_timeout_cycles_instrs(kern_timeout_t __unused *to, uint64_t *cycles, uint64_t *instrs)
132 {
133 	*cycles = 0;
134 	*instrs = 0;
135 }
136 
137 void
kern_timeout_cpi(kern_timeout_t __unused * to,uint64_t * cpi_whole,uint64_t * cpi_fractional)138 kern_timeout_cpi(kern_timeout_t __unused *to, uint64_t *cpi_whole, uint64_t *cpi_fractional)
139 {
140 	*cpi_whole = 0;
141 	*cpi_fractional = 0;
142 }
143 #endif /* CONFIG_CPU_COUNTERS */
144 
145 __enum_closed_decl(timeout_mode_t, uint32_t, {
146 	TIMEOUT_TELEMETRY,
147 	TIMEOUT_PANIC
148 });
149 
150 /*
151  * This interface is a "try panic" because we won't invoke a nested panic
152  * if a timeout has already happened that initiated the original panic.
153  */
154 void
kern_timeout_try_panic(kern_timeout_type_t __unused type,uint64_t __unused payload,kern_timeout_t * to,const char * prefix,uint64_t threshold)155 kern_timeout_try_panic(kern_timeout_type_t __unused type, uint64_t __unused payload, kern_timeout_t *to, const char *prefix, uint64_t threshold)
156 {
157 	char cpi[80];
158 	char duration[80];
159 	const uint64_t gross_duration = kern_timeout_gross_duration(to);
160 	const uint64_t net_duration = kern_timeout_net_duration(to);
161 	uint64_t gross_ns, net_ns, threshold_ns;
162 	uint64_t gross_us, net_us, threshold_us;
163 	uint64_t gross_ms, net_ms, threshold_ms;
164 	uint64_t start_mt, end_mt;
165 	uint64_t __unused average_freq = 0;
166 	uint64_t __unused cpi_whole = 0;
167 #ifdef __arm64__
168 	const char __unused core_type = ml_get_current_core_type();
169 #else
170 	const char __unused core_type = '-';
171 #endif /* __arm64__ */
172 
173 	/*
174 	 * We can recursively try to panic due to a timeout in the panic flow,
175 	 * so if that happens, just bail out here.
176 	 */
177 	if (kern_timeout_panic_initiated) {
178 		return;
179 	}
180 
181 	absolutetime_to_nanoseconds(gross_duration, &gross_ns);
182 	absolutetime_to_nanoseconds(net_duration, &net_ns);
183 	absolutetime_to_nanoseconds(threshold, &threshold_ns);
184 	kern_timeout_mach_times(to, &start_mt, &end_mt);
185 
186 	cpi[0] = 0;
187 
188 #if CONFIG_CPU_COUNTERS
189 	uint64_t cycles;
190 	uint64_t instrs;
191 
192 	/*
193 	 * We're getting these values a bit late, but getting them
194 	 * is a bit expensive, so we take the slight hit in
195 	 * accuracy for the reported values (which aren't very
196 	 * stable anyway).
197 	 */
198 	kern_timeout_cycles_instrs(to, &cycles, &instrs);
199 	if (cycles > 0 && instrs > 0) {
200 		cpi_whole = cycles / instrs;
201 		average_freq = cycles / (gross_ns / 1000);
202 	}
203 #endif /* CONFIG_CPU_COUNTERS */
204 
205 #if DEVELOPMENT || DEBUG
206 	timeout_mode_t mode = timeouts_are_fatal ? TIMEOUT_PANIC : TIMEOUT_TELEMETRY;
207 	if (mode == TIMEOUT_PANIC) {
208 
209 #if CONFIG_CPU_COUNTERS && !defined(HAS_FEAT_XS)
210 		/*
211 		 * POLICY: if CPI > 100 and we are on a SoC that does not support
212 		 * FEAT_XS, it's likely the stall was caused by a long TLBI. This
213 		 * isn't an actionable radar condition for preemption or interrupt
214 		 * disabled timeouts, so do nothing.
215 		 */
216 		if ((type == KERN_TIMEOUT_PREEMPTION || type == KERN_TIMEOUT_INTERRUPT) &&
217 		    cpi_whole > 100) {
218 			return;
219 		}
220 #endif /* CONFIG_CPU_COUNTERS && !HAS_FEAT_XS */
221 
222 #if ML_IO_TIMEOUTS_ENABLED
223 		/*
224 		 * POLICY: check the MMIO override window to see if we are still
225 		 * within it. If we are, abandon the attempt to panic, since
226 		 * the timeout is almost certainly due to a known issue causing
227 		 * a stall that got entangled with this core. We don't emit
228 		 * telemetry in this case because the MMIO overrides have their
229 		 * own telemetry mechanism.
230 		 */
231 		if (ml_io_check_for_mmio_overrides(start_mt)) {
232 			return;
233 		}
234 #endif /* ML_IO_TIMEOUTS_ENABLED */
235 	}
236 
237 	if (mode == TIMEOUT_TELEMETRY) {
238 		trap_telemetry_type_t trap_type;
239 		switch (type) {
240 		case KERN_TIMEOUT_PREEMPTION:
241 			trap_type = TRAP_TELEMETRY_TYPE_PREEMPTION_TIMEOUT;
242 			break;
243 		case KERN_TIMEOUT_INTERRUPT:
244 			trap_type = TRAP_TELEMETRY_TYPE_INTERRUPT_TIMEOUT;
245 			break;
246 		case KERN_TIMEOUT_MMIO:
247 			trap_type = TRAP_TELEMETRY_TYPE_MMIO_TIMEOUT;
248 			break;
249 		case KERN_TIMEOUT_LOCK:
250 			trap_type = TRAP_TELEMETRY_TYPE_LOCK_TIMEOUT;
251 			break;
252 		default:
253 			panic("unknown timeout type\n");
254 		}
255 		trap_telemetry_report_latency_violation(
256 			trap_type,
257 			(trap_telemetry_latency_s) {
258 			.violation_cpi = cpi_whole,
259 			.violation_freq = average_freq,
260 			.violation_cpu_type = core_type,
261 			.violation_duration = net_ns,
262 			.violation_threshold = threshold_ns,
263 			.violation_payload = payload
264 		});
265 		return;
266 	}
267 #endif /* DEVELOPMENT || DEBUG */
268 
269 	kern_timeout_panic_initiated = true;
270 	panic_timeout = *to;
271 
272 	gross_us = gross_ns / 1000;
273 	net_us = net_ns / 1000;
274 	threshold_us = threshold_ns / 1000;
275 	gross_ms = gross_us / 1000;
276 	net_ms = net_us / 1000;
277 	threshold_ms = threshold_us / 1000;
278 
279 #if CONFIG_CPU_COUNTERS
280 	if (cycles > 0 && instrs > 0) {
281 		uint64_t cpi_fractional;
282 
283 		cpi_fractional = ((cycles * 100) / instrs) % 100;
284 
285 		snprintf(cpi, sizeof(cpi), ", freq %llu MHz, type = %c, CPI = %llu.%llu [%llu, %llu]",
286 		    average_freq, core_type, cpi_whole, cpi_fractional, cycles, instrs);
287 	}
288 #endif /* CONFIG_CPU_COUNTERS */
289 
290 	if (gross_ns > net_ns) {
291 		if (threshold_ms > 0) {
292 			snprintf(duration, sizeof(duration), "gross %llu.%llu ms, net %llu.%llu ms >= %llu.%llu ms",
293 			    gross_ms, gross_us % 1000, net_ms, net_us % 1000, threshold_ms, threshold_us % 1000);
294 		} else {
295 			snprintf(duration, sizeof(duration), "gross %llu us, net %llu us >= %llu us",
296 			    gross_us, net_us, threshold_us);
297 		}
298 	} else {
299 		if (threshold_ms > 0) {
300 			snprintf(duration, sizeof(duration), "%llu.%llu ms >= %llu.%llu ms",
301 			    gross_ms, gross_us % 1000, threshold_ms, threshold_us % 1000);
302 		} else {
303 			snprintf(duration, sizeof(duration), "%llu us >= %llu us",
304 			    gross_us, threshold_us);
305 		}
306 	}
307 
308 	panic_plain("%s %s (start: %llu, end: %llu)%s", prefix, duration, start_mt, end_mt, cpi);
309 }
310 
311