xref: /xnu-11417.121.6/pexpert/arm/hwtrace/hwtrace.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2007-2023 Apple Inc. All rights reserved.
3  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4  */
5 
6 
7 /* Required to know if we must compile the file. */
8 #include <pexpert/arm64/board_config.h>
9 
10 /* Generic headers. */
11 #include <pexpert/pexpert.h>
12 #include <pexpert/device_tree.h>
13 #include <machine/machine_routines.h>
14 #include <sys/sysctl.h>
15 #include <kern/clock.h>
16 
17 /* Dev headers. */
18 #if DEVELOPMENT || DEBUG
19 #include <kern/simple_lock.h>
20 #include <os/hash.h>
21 #endif /* DEVELOPMENT || DEBUG */
22 
23 /* Trace-specific headers. */
24 
25 /********
26 * Logs *
27 ********/
28 
29 #define PANIC_TRACE_LOG 1
30 #define panic_trace_error(msg, args...) { if (panic_trace_debug == 1) kprintf("panic_trace: " msg "\n", ##args); else if (panic_trace_debug == 2) printf("panic_trace: " msg "\n", ##args); }
31 #if PANIC_TRACE_LOG
32 #define panic_trace_log(msg, args...) { if (panic_trace_debug) panic_trace_error(msg, ##args); }
33 #else
34 #define panic_trace_log(msg, args...)
35 #endif /* PANIC_TRACE_LOG */
36 
37 /************
38 * Externals *
39 ************/
40 
41 /*
42  * Soc base physical address.
43  * Set by pe_identify_machine.c:pe_arm_map_interrupt_controller during
44  * early boot, null before.
45  */
46 extern vm_offset_t gSocPhys;
47 
48 /*******
49 * Logs *
50 *******/
51 
52 #if DEVELOPMENT || DEBUG
53 #ifndef CT_DFT_LOGS_ON
54 #define CT_DFT_LOGS_ON 0
55 #endif /* CT_DFT_LOGS_ON */
56 #endif /* DEVELOPMENT || DEBUG */
57 
58 /****************
59 * Default state *
60 ****************/
61 
62 #if DEVELOPMENT || DEBUG
63 
64 /*
65  * When supported, panic-trace is enabled by default on some platforms.
66  * This section defines on which platform it is enabled..
67  */
68 
69 /* Opensource -> disabled. */
70 #define DEFAULT_PANIC_TRACE_MODE panic_trace_disabled
71 
72 #endif /* DEVELOPMENT || DEBUG */
73 
74 /**********
75 * Globals *
76 **********/
77 
78 #if DEVELOPMENT || DEBUG
79 boolean_t panic_trace_disabled_for_rdar107003520 = FALSE;
80 #endif /* DEVELOPMENT || DEBUG */
81 
82 static boolean_t debug_and_trace_initialized = false;
83 
84 /************
85 * Boot-args *
86 ************/
87 
88 #if DEVELOPMENT || DEBUG
89 /*
90  * Panic trace state.
91  * Has a double meaning :
92  * - at system init, it gives the expected tracing state.
93  *   -> init code uses that to enable tracing.
94  * - after system init, used to report the tracing state.
95  */
96 TUNABLE_DT_WRITEABLE(panic_trace_t, panic_trace, "/arm-io/cpu-debug-interface",
97     "panic-trace-mode", "panic_trace", DEFAULT_PANIC_TRACE_MODE, TUNABLE_DT_NONE);
98 
99 /*
100  * Panic trace debug state. See 'Logs' section above.
101  */
102 TUNABLE_WRITEABLE(boolean_t, panic_trace_debug, "panic_trace_debug", CT_DFT_LOGS_ON);
103 
104 #endif /* DEVELOPMENT || DEBUG */
105 
106 /********
107 * Locks *
108 ********/
109 
110 /* Panic trace lock. */
111 
112 /****************
113 * Debug command *
114 ****************/
115 
116 #if DEVELOPMENT || DEBUG
117 
118 decl_simple_lock_data(, panic_hook_lock);
119 
120 TUNABLE(unsigned int, bootarg_stop_clocks, "stop_clocks", 0);
121 
122 // The command buffer contains the converted commands from the device tree for commanding cpu_halt, enable_trace, etc.
123 #define DEBUG_COMMAND_BUFFER_SIZE 256
124 typedef struct command_buffer_element {
125 	uintptr_t address;
126 	uintptr_t address_pa;
127 	uintptr_t value;
128 	union cpu_selector {
129 		uint16_t mask;
130 		struct cpu_range {
131 			uint8_t min_cpu;
132 			uint8_t max_cpu;
133 		} range;
134 	} destination_cpu_selector;
135 	uint16_t delay_us;
136 	bool cpu_selector_is_range;
137 	bool is_32bit;
138 } command_buffer_element_t;
139 
140 #define CPU_SELECTOR_SHIFT              (16)
141 #define CPU_SELECTOR_MASK               (0xFFFF << CPU_SELECTOR_SHIFT)
142 #define REGISTER_OFFSET_MASK            ((1 << CPU_SELECTOR_SHIFT) - 1)
143 #define REGISTER_OFFSET(register_prop)  (register_prop & REGISTER_OFFSET_MASK)
144 #define CPU_SELECTOR(register_offset)   ((register_offset & CPU_SELECTOR_MASK) >> CPU_SELECTOR_SHIFT) // Upper 16bits holds the cpu selector
145 #define MAX_WINDOW_SIZE                 0xFFFF
146 #define DELAY_SHIFT                     (32)
147 #define DELAY_MASK                      (0xFFFFULL << DELAY_SHIFT)
148 #define DELAY_US(register_offset)       ((register_offset & DELAY_MASK) >> DELAY_SHIFT)
149 #define CPU_SELECTOR_ISRANGE_MASK       (1ULL << 62)
150 #define REGISTER_32BIT_MASK             (1ULL << 63)
151 #define ALL_CPUS                        0x0000
152 #define RESET_VIRTUAL_ADDRESS_WINDOW    0xFFFFFFFF
153 
154 #define REGISTER_IS_32BIT(register_offset)      ((register_offset & REGISTER_32BIT_MASK) != 0)
155 #define REGISTER_SIZE(register_offset)          (REGISTER_IS_32BIT(register_offset) ? sizeof(uint32_t) : sizeof(uintptr_t))
156 #define CPU_SELECTOR_IS_RANGE(register_offset)  ((register_offset & CPU_SELECTOR_ISRANGE_MASK) != 0)
157 #define CPU_SELECTOR_MIN_CPU(register_offset)   ((CPU_SELECTOR(register_offset) & 0xff00) >> 8)
158 #define CPU_SELECTOR_MAX_CPU(register_offset)   (CPU_SELECTOR(register_offset) & 0x00ff)
159 
160 // Record which CPU is currently running one of our debug commands, so we can trap panic reentrancy to PE_arm_debug_panic_hook.
161 static int running_debug_command_on_cpu_number = -1;
162 
163 
164 // Determine whether the current debug command is intended for this CPU.
165 static inline bool
is_running_cpu_selected(command_buffer_element_t * command)166 is_running_cpu_selected(command_buffer_element_t *command)
167 {
168 	assert(running_debug_command_on_cpu_number >= 0);
169 	if (command->cpu_selector_is_range) {
170 		return running_debug_command_on_cpu_number >= command->destination_cpu_selector.range.min_cpu
171 		       && running_debug_command_on_cpu_number <= command->destination_cpu_selector.range.max_cpu;
172 	} else if (command->destination_cpu_selector.mask == ALL_CPUS) {
173 		return true;
174 	} else {
175 		return !!(command->destination_cpu_selector.mask & (1 << running_debug_command_on_cpu_number));
176 	}
177 }
178 
179 
180 // Pointers into debug_command_buffer for each operation. Assumes runtime will init them to zero.
181 static command_buffer_element_t *enable_stop_clocks;
182 static command_buffer_element_t *stop_clocks;
183 
184 boolean_t
PE_arm_debug_and_trace_initialized(void)185 PE_arm_debug_and_trace_initialized(void)
186 {
187 	return debug_and_trace_initialized;
188 }
189 
190 static void
pe_init_debug_command(DTEntry entryP,command_buffer_element_t ** command_buffer,const char * entry_name)191 pe_init_debug_command(DTEntry entryP, command_buffer_element_t **command_buffer, const char* entry_name)
192 {
193 	// statically allocate to prevent needing alloc at runtime
194 	static command_buffer_element_t debug_command_buffer[DEBUG_COMMAND_BUFFER_SIZE];
195 	static command_buffer_element_t *next_command_buffer_entry = debug_command_buffer;
196 
197 	// record this pointer but don't assign it to *command_buffer yet, in case we panic while half-initialized
198 	command_buffer_element_t *command_starting_index = next_command_buffer_entry;
199 
200 	uintptr_t const *reg_prop;
201 	uint32_t        prop_size, reg_window_size = 0;
202 	uintptr_t       base_address_pa = 0, debug_reg_window = 0;
203 
204 	if (command_buffer == 0) {
205 		panic_trace_log("%s: %s: no hook to assign this command to\n", __func__, entry_name);
206 		return;
207 	}
208 
209 	if (SecureDTGetProperty(entryP, entry_name, (void const **)&reg_prop, &prop_size) != kSuccess) {
210 		panic("%s: %s: failed to read property from device tree", __func__, entry_name);
211 	}
212 
213 	if (prop_size % (2 * sizeof(*reg_prop))) {
214 		panic("%s: %s: property size %u bytes is not a multiple of %lu",
215 		    __func__, entry_name, prop_size, 2 * sizeof(*reg_prop));
216 	}
217 
218 	// convert to real virt addresses and stuff commands into debug_command_buffer
219 	for (; prop_size; reg_prop += 2, prop_size -= 2 * sizeof(*reg_prop)) {
220 		if (*reg_prop == RESET_VIRTUAL_ADDRESS_WINDOW) {
221 			debug_reg_window = 0; // Create a new window
222 		} else if (debug_reg_window == 0) {
223 			// create a window from virtual address to the specified physical address
224 			base_address_pa = gSocPhys + *reg_prop;
225 			reg_window_size = ((uint32_t)*(reg_prop + 1));
226 			if (reg_window_size > MAX_WINDOW_SIZE) {
227 				panic("%s: %s: %#x-byte window at #%lx exceeds maximum size of %#x",
228 				    __func__, entry_name, reg_window_size, base_address_pa, MAX_WINDOW_SIZE );
229 			}
230 			debug_reg_window = ml_io_map(base_address_pa, reg_window_size);
231 			assert(debug_reg_window);
232 			panic_trace_log("%s: %s: %#x bytes at %#lx mapped to %#lx\n",
233 			    __func__, entry_name, reg_window_size, base_address_pa, debug_reg_window );
234 		} else {
235 			if ((REGISTER_OFFSET(*reg_prop) + REGISTER_SIZE(*reg_prop)) > reg_window_size) {
236 				panic("%s: %s[%ld]: %#lx(+%lu)-byte offset from %#lx exceeds allocated size of %#x",
237 				    __func__, entry_name, next_command_buffer_entry - command_starting_index,
238 				    REGISTER_OFFSET(*reg_prop), REGISTER_SIZE(*reg_prop), base_address_pa, reg_window_size );
239 			}
240 
241 			if (next_command_buffer_entry - debug_command_buffer >= DEBUG_COMMAND_BUFFER_SIZE - 1) {
242 				// can't use the very last entry, since we need it to terminate the command
243 				panic("%s: %s[%ld]: out of space in command buffer",
244 				    __func__, entry_name, next_command_buffer_entry - command_starting_index );
245 			}
246 
247 			next_command_buffer_entry->address    = debug_reg_window + REGISTER_OFFSET(*reg_prop);
248 			next_command_buffer_entry->address_pa = base_address_pa  + REGISTER_OFFSET(*reg_prop);
249 			next_command_buffer_entry->value      = *(reg_prop + 1);
250 #if defined(__arm64__)
251 			next_command_buffer_entry->delay_us   = DELAY_US(*reg_prop);
252 			next_command_buffer_entry->is_32bit   = REGISTER_IS_32BIT(*reg_prop);
253 #else
254 			next_command_buffer_entry->delay_us   = 0;
255 			next_command_buffer_entry->is_32bit   = false;
256 #endif
257 			if ((next_command_buffer_entry->cpu_selector_is_range = CPU_SELECTOR_IS_RANGE(*reg_prop))) {
258 				next_command_buffer_entry->destination_cpu_selector.range.min_cpu = (uint8_t)CPU_SELECTOR_MIN_CPU(*reg_prop);
259 				next_command_buffer_entry->destination_cpu_selector.range.max_cpu = (uint8_t)CPU_SELECTOR_MAX_CPU(*reg_prop);
260 			} else {
261 				next_command_buffer_entry->destination_cpu_selector.mask = (uint16_t)CPU_SELECTOR(*reg_prop);
262 			}
263 			next_command_buffer_entry++;
264 		}
265 	}
266 
267 	// null terminate the address field of the command to end it
268 	(next_command_buffer_entry++)->address = 0;
269 
270 	// save pointer into table for this command
271 	*command_buffer = command_starting_index;
272 }
273 
274 static void
pe_run_debug_command(command_buffer_element_t * command_buffer)275 pe_run_debug_command(command_buffer_element_t *command_buffer)
276 {
277 	if (!PE_arm_debug_and_trace_initialized()) {
278 		/*
279 		 * In practice this can only happen if we panicked very early,
280 		 * when only the boot CPU is online and before it has finished
281 		 * initializing the debug and trace infrastructure. Avoid an
282 		 * unhelpful nested panic() here and instead resume execution
283 		 * to handle_debugger_trap(), which logs a user friendly error
284 		 * message before spinning forever.
285 		 */
286 		return;
287 	}
288 
289 	// When both the CPUs panic, one will get stuck on the lock and the other CPU will be halted when the first executes the debug command
290 	simple_lock(&panic_hook_lock, LCK_GRP_NULL);
291 
292 	running_debug_command_on_cpu_number = cpu_number();
293 
294 	while (command_buffer && command_buffer->address) {
295 		if (is_running_cpu_selected(command_buffer)) {
296 			panic_trace_log("%s: cpu %d: reg write 0x%lx (VA 0x%lx):= 0x%lx",
297 			    __func__, running_debug_command_on_cpu_number, command_buffer->address_pa,
298 			    command_buffer->address, command_buffer->value);
299 			if (command_buffer->is_32bit) {
300 				*((volatile uint32_t*)(command_buffer->address)) = (uint32_t)(command_buffer->value);
301 			} else {
302 				*((volatile uintptr_t*)(command_buffer->address)) = command_buffer->value;      // register = value;
303 			}
304 			if (command_buffer->delay_us != 0) {
305 				uint64_t deadline;
306 				nanoseconds_to_absolutetime(command_buffer->delay_us * NSEC_PER_USEC, &deadline);
307 				deadline += ml_get_timebase();
308 				while (ml_get_timebase() < deadline) {
309 					os_compiler_barrier();
310 				}
311 			}
312 		}
313 		command_buffer++;
314 	}
315 
316 	running_debug_command_on_cpu_number = -1;
317 	simple_unlock(&panic_hook_lock);
318 }
319 
320 #endif /* DEVELOPMENT || DEBUG */
321 
322 /*****************
323 * Partial policy *
324 *****************/
325 
326 /* Debug-only section. */
327 #if DEVELOPMENT || DEBUG
328 
329 /* Util. */
330 #ifndef MIN
331 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
332 #endif /* MIN */
333 
334 /*
335  * The % of devices which will have panic_trace enabled when using a partial
336  * enablement policy.
337  */
338 static TUNABLE_DT(uint32_t, panic_trace_partial_percent,
339     "/arm-io/cpu-debug-interface", "panic-trace-partial-percent",
340     "panic_trace_partial_percent", 50, TUNABLE_DT_NONE);
341 
342 /*
343  * Stress racks opt out of panic_trace, unless overridden by the panic_trace boot-arg.
344  */
345 static void
panic_trace_apply_stress_rack_policy(void)346 panic_trace_apply_stress_rack_policy(void)
347 {
348 	DTEntry ent = NULL;
349 	DTEntry entryP = NULL;
350 	const void *propP = NULL;
351 	unsigned int size = 0;
352 
353 	if (SecureDTLookupEntry(NULL, "/chosen", &ent) == kSuccess &&
354 	    SecureDTGetProperty(ent, "stress-rack", &propP, &size) == kSuccess) {
355 		(void)entryP;
356 		if (PE_parse_boot_argn("panic_trace", NULL, 0)) {
357 			// Prefer user specified boot-arg even when running on stress racks.
358 			// Make an exception for devices with broken single-stepping.
359 		} else {
360 			panic_trace = 0;
361 		}
362 	}
363 }
364 
365 /*
366  * When the `panic_trace_partial_policy` flag is set, not all devices will have
367  * the panic_trace settings applied. The actual % is determined by
368  * `panic_trace_partial_percent`.
369  * By using the ECID instead of a random number the process is made
370  * deterministic for any given device.
371  * This function disables panic trace if the device falls into the disabled %
372  * range. It otherwise leaves the panic_trace value unmodified.
373  * Called on the boot path, thus does not lock panic_trace_lock.
374  */
375 static void
panic_trace_apply_partial_policy(void)376 panic_trace_apply_partial_policy(void)
377 {
378 	assert3u((panic_trace & panic_trace_partial_policy), !=, 0);
379 
380 	DTEntry ent = NULL;
381 	unsigned int size = 0;
382 	const void *ecid = NULL;
383 
384 	/* Grab the ECID. */
385 	if (SecureDTLookupEntry(NULL, "/chosen", &ent) != kSuccess ||
386 	    SecureDTGetProperty(ent, "unique-chip-id", &ecid, &size) != kSuccess) {
387 		panic_trace = panic_trace_disabled;
388 		return;
389 	}
390 
391 	/*
392 	 * Use os_hash_jenkins to convert the decidedly non-random ECID into
393 	 * something resembling a random number. Better (cryptographic) hash
394 	 * functions are not available at this point in boot.
395 	 */
396 	const uint32_t rand = os_hash_jenkins(ecid, size);
397 
398 	/* Sanitize the percent value. */
399 	const uint32_t percent = MIN(100, panic_trace_partial_percent);
400 
401 	/*
402 	 * Apply the ECID percent value. The bias here should be so tiny as to not
403 	 * matter for this purpose.
404 	 */
405 	if ((rand % 100) >= percent) {
406 		panic_trace = panic_trace_disabled;
407 	}
408 }
409 
410 #endif /* DEVELOPMENT || DEBUG */
411 
412 /***************
413 * External API *
414 ***************/
415 
416 #if DEVELOPMENT || DEBUG
417 void
PE_arm_debug_enable_trace(bool should_log)418 PE_arm_debug_enable_trace(bool should_log)
419 {
420 	if (should_log) {
421 		panic_trace_log("%s enter", __FUNCTION__);
422 	}
423 	if (should_log) {
424 		panic_trace_log("%s exit", __FUNCTION__);
425 	}
426 }
427 #endif /* DEVELOPMENT || DEBUG */
428 
429 #if DEVELOPMENT || DEBUG
430 static void
PE_arm_panic_hook(const char * str __unused)431 PE_arm_panic_hook(const char *str __unused)
432 {
433 	(void)str; // not used
434 #if defined(__arm64__) && !APPLEVIRTUALPLATFORM
435 	/*
436 	 * For Fastsim support--inform the simulator that it can dump a
437 	 * panic trace now (so we don't capture all the panic handling).
438 	 * This constant is randomly chosen by agreement between xnu and
439 	 * Fastsim.
440 	 */
441 	__asm__ volatile ("hint #0x4f");
442 #endif /* defined(__arm64__) && !APPLEVIRTUALPLATFORM */
443 	if (bootarg_stop_clocks) {
444 		pe_run_debug_command(stop_clocks);
445 	}
446 	// disable panic trace to snapshot its ringbuffer
447 	// note: Not taking panic_trace_lock to avoid delaying cpu halt.
448 	//       This is known to be racy.
449 	if (panic_trace) {
450 		if (running_debug_command_on_cpu_number == cpu_number()) {
451 			// This is going to end badly if we don't trap, since we'd be panic-ing during our own code
452 			kprintf("## Panic Trace code caused the panic ##\n");
453 			return;  // allow the normal panic operation to occur.
454 		}
455 
456 		// Stop tracing to freeze the buffer and return to normal panic processing.
457 	}
458 }
459 #endif /* DEVELOPMENT || DEBUG */
460 
461 
462 #if DEVELOPMENT || DEBUG
463 void (*PE_arm_debug_panic_hook)(const char *str) = PE_arm_panic_hook;
464 #else
465 void(*const PE_arm_debug_panic_hook)(const char *str) = NULL;
466 #endif  // DEVELOPMENT || DEBUG
467 
468 void
PE_init_cpu(void)469 PE_init_cpu(void)
470 {
471 #if DEVELOPMENT || DEBUG
472 	if (bootarg_stop_clocks) {
473 		pe_run_debug_command(enable_stop_clocks);
474 	}
475 #endif  // DEVELOPMENT || DEBUG
476 
477 	pe_init_fiq();
478 }
479 
480 
481 void
PE_singlestep_hook(void)482 PE_singlestep_hook(void)
483 {
484 }
485 
486 void
PE_panic_hook(const char * str __unused)487 PE_panic_hook(const char *str __unused)
488 {
489 	if (PE_arm_debug_panic_hook != NULL) {
490 		PE_arm_debug_panic_hook(str);
491 	}
492 }
493 
494 /*
495  * Early part of the debug system init.
496  * Ran on the boot CPU with VM system enabled, mappings to any region
497  * allowed, carveouts not yet enabled, SPR lockdown not applied.
498  */
499 void
pe_arm_debug_init_early(void * boot_cpu_data)500 pe_arm_debug_init_early(void *boot_cpu_data)
501 {
502 	DTEntry         entryP;
503 	uintptr_t const *reg_prop;
504 	uint32_t        prop_size;
505 
506 	/* Require gSocPhys to be initialized. */
507 	if (gSocPhys == 0) {
508 		kprintf("pe_arm_init_debug: failed to initialize : gSocPhys == 0\n");
509 		return;
510 	}
511 
512 	/* Update the panic_trace start policy depending on the execution environment. */
513 #if DEVELOPMENT || DEBUG
514 	if (panic_trace != 0) {
515 		panic_trace_apply_stress_rack_policy();
516 	}
517 
518 	if ((panic_trace & panic_trace_partial_policy) != 0) {
519 		panic_trace_apply_partial_policy();
520 	}
521 #endif /* DEVELOPMENT || DEBUG */
522 
523 	/* Lookup the cpu debug interface in the device tree. */
524 	if (SecureDTFindEntry("device_type", "cpu-debug-interface", &entryP) == kSuccess) {
525 		/* Initialize the arm debug interface. */
526 		if (SecureDTGetProperty(entryP, "reg", (void const **)&reg_prop, &prop_size) == kSuccess) {
527 			ml_init_arm_debug_interface(boot_cpu_data, ml_io_map(gSocPhys + *reg_prop, *(reg_prop + 1)));
528 		}
529 
530 		/* Initialze the stop-clocks infrastructure. */
531 #if DEVELOPMENT || DEBUG
532 		if (bootarg_stop_clocks) {
533 			pe_init_debug_command(entryP, &enable_stop_clocks, "enable_stop_clocks");
534 			pe_init_debug_command(entryP, &stop_clocks, "stop_clocks");
535 		}
536 #endif
537 
538 		/* Initialize panic-trace. */
539 #if DEVELOPMENT || DEBUG
540 		simple_lock_init(&panic_hook_lock, 0); //assuming single threaded mode
541 	#endif
542 	} else {
543 #if DEVELOPMENT || DEBUG
544 		const uint32_t dependent_modes = (panic_trace_enabled | panic_trace_alt_enabled);
545 		if (bootarg_stop_clocks || (panic_trace & dependent_modes)) {
546 			panic("failed to find cpu-debug-interface node in the EDT! "
547 			    "(required by `panic_trace={0x01, 0x10}` or `stop_clocks=1`)");
548 		} else
549 #endif
550 		{
551 			kprintf("pe_arm_init_debug: failed to find cpu-debug-interface\n");
552 		}
553 	}
554 
555 
556 	/* Report init. */
557 	debug_and_trace_initialized = true;
558 }
559 
560 /*
561  * Late part of the init of the debug system,
562  * when carveouts have been allocated.
563  */
564 void
pe_arm_debug_init_late(void)565 pe_arm_debug_init_late(void)
566 {
567 }
568 
569 /*********************
570 * Panic-trace sysctl *
571 *********************/
572 
573