xref: /xnu-12377.61.12/osfmk/arm64/sleh.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2012-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm/caches_internal.h>
30 #include <arm/cpu_data.h>
31 #include <arm/cpu_data_internal.h>
32 #include <arm/misc_protos.h>
33 #include <arm/thread.h>
34 #include <arm/rtclock.h>
35 #include <arm/trap_internal.h> /* for IS_ARM_GDB_TRAP() et al */
36 #include <arm64/proc_reg.h>
37 #include <arm64/machine_machdep.h>
38 #include <arm64/monotonic.h>
39 #include <arm64/instructions.h>
40 
41 #include <kern/debug.h>
42 #include <kern/exc_guard.h>
43 #include <kern/restartable.h>
44 #include <kern/socd_client.h>
45 #include <kern/task.h>
46 #include <kern/thread.h>
47 #include <kern/zalloc_internal.h>
48 #include <mach/exception.h>
49 #include <mach/arm/traps.h>
50 #include <mach/vm_types.h>
51 #include <mach/machine/thread_status.h>
52 
53 #include <machine/atomic.h>
54 #include <machine/limits.h>
55 
56 #include <pexpert/arm/protos.h>
57 #include <pexpert/arm64/apple_arm64_cpu.h>
58 #include <pexpert/arm64/apple_arm64_regs.h>
59 #include <pexpert/arm64/board_config.h>
60 
61 #include <vm/vm_page.h>
62 #include <vm/pmap.h>
63 #include <vm/vm_fault.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_map_xnu.h>
66 
67 #include <sys/errno.h>
68 #include <sys/kdebug.h>
69 #include <sys/code_signing.h>
70 #include <sys/reason.h>
71 #include <kperf/kperf.h>
72 
73 #include <kern/policy_internal.h>
74 #if CONFIG_TELEMETRY
75 #include <kern/telemetry.h>
76 #include <kern/trap_telemetry.h>
77 #endif
78 
79 #include <prng/entropy.h>
80 
81 
82 
83 
84 #include <arm64/platform_error_handler.h>
85 
86 #if KASAN_TBI
87 #include <san/kasan.h>
88 #endif /* KASAN_TBI */
89 
90 #if CONFIG_UBSAN_MINIMAL
91 #include <san/ubsan_minimal.h>
92 #endif
93 
94 #if HAS_MTE
95 #include <arm64/mte_xnu.h>
96 #endif /* HAS_MTE */
97 
98 
99 #ifndef __arm64__
100 #error Should only be compiling for arm64.
101 #endif
102 
103 #if DEBUG || DEVELOPMENT
104 #define HAS_TELEMETRY_KERNEL_BRK 1
105 #endif
106 
107 
108 #define TEST_CONTEXT32_SANITY(context) \
109 	(context->ss.ash.flavor == ARM_SAVED_STATE32 && context->ss.ash.count == ARM_SAVED_STATE32_COUNT && \
110 	 context->ns.nsh.flavor == ARM_NEON_SAVED_STATE32 && context->ns.nsh.count == ARM_NEON_SAVED_STATE32_COUNT)
111 
112 #define TEST_CONTEXT64_SANITY(context) \
113 	(context->ss.ash.flavor == ARM_SAVED_STATE64 && context->ss.ash.count == ARM_SAVED_STATE64_COUNT && \
114 	 context->ns.nsh.flavor == ARM_NEON_SAVED_STATE64 && context->ns.nsh.count == ARM_NEON_SAVED_STATE64_COUNT)
115 
116 #define ASSERT_CONTEXT_SANITY(context) \
117 	assert(TEST_CONTEXT32_SANITY(context) || TEST_CONTEXT64_SANITY(context))
118 
119 
120 #define COPYIN(src, dst, size)                           \
121 	(PSR64_IS_KERNEL(get_saved_state_cpsr(state))) ? \
122 	copyin_kern(src, dst, size) :                    \
123 	copyin(src, dst, size)
124 
125 #define COPYOUT(src, dst, size)                          \
126 	(PSR64_IS_KERNEL(get_saved_state_cpsr(state))) ? \
127 	copyout_kern(src, dst, size)                   : \
128 	copyout(src, dst, size)
129 
130 // Below is for concatenating a string param to a string literal
131 #define STR1(x) #x
132 #define STR(x) STR1(x)
133 
134 #define ARM64_KDBG_CODE_KERNEL (0 << 8)
135 #define ARM64_KDBG_CODE_USER   (1 << 8)
136 #define ARM64_KDBG_CODE_GUEST  (2 << 8)
137 
138 _Static_assert(ARM64_KDBG_CODE_GUEST <= KDBG_CODE_MAX, "arm64 KDBG trace codes out of range");
139 _Static_assert(ARM64_KDBG_CODE_GUEST <= UINT16_MAX, "arm64 KDBG trace codes out of range");
140 
141 void panic_with_thread_kernel_state(const char *msg, arm_saved_state_t *ss) __abortlike;
142 
143 void sleh_synchronous_sp1(arm_context_t *, uint64_t, vm_offset_t) __abortlike;
144 void sleh_synchronous(arm_context_t *, uint64_t, vm_offset_t, bool);
145 
146 
147 
148 void sleh_irq(arm_saved_state_t *);
149 void sleh_fiq(arm_saved_state_t *);
150 void sleh_serror(arm_context_t *context, uint64_t esr, vm_offset_t far);
151 void sleh_invalid_stack(arm_context_t *context, uint64_t esr, vm_offset_t far) __dead2;
152 
153 static void sleh_interrupt_handler_prologue(arm_saved_state_t *, unsigned int type);
154 static void sleh_interrupt_handler_epilogue(void);
155 
156 static void handle_svc(arm_saved_state_t *);
157 static void handle_mach_absolute_time_trap(arm_saved_state_t *);
158 static void handle_mach_continuous_time_trap(arm_saved_state_t *);
159 
160 static void handle_msr_trap(arm_saved_state_t *state, uint64_t esr);
161 #if __has_feature(ptrauth_calls)
162 static void handle_pac_fail(arm_saved_state_t *state, uint64_t esr) __dead2;
163 static inline uint64_t fault_addr_bitmask(unsigned int bit_from, unsigned int bit_to);
164 #endif
165 static void handle_bti_fail(arm_saved_state_t *state, uint64_t esr);
166 extern kern_return_t arm_fast_fault(pmap_t, vm_map_address_t, vm_prot_t, bool, bool);
167 
168 static void handle_uncategorized(arm_saved_state_t *);
169 
170 static void handle_kernel_breakpoint(arm_saved_state_t *, uint64_t);
171 
172 static void handle_user_breakpoint(arm_saved_state_t *, uint64_t) __dead2;
173 
174 typedef void (*abort_inspector_t)(uint32_t, fault_status_t *, vm_prot_t *);
175 static void inspect_instruction_abort(uint32_t, fault_status_t *, vm_prot_t *);
176 static void inspect_data_abort(uint32_t, fault_status_t *, vm_prot_t *);
177 
178 static int is_vm_fault(fault_status_t);
179 static int is_translation_fault(fault_status_t);
180 static int is_alignment_fault(fault_status_t);
181 
182 typedef void (*abort_handler_t)(arm_saved_state_t *, uint64_t, vm_offset_t, fault_status_t, vm_prot_t, expected_fault_handler_t);
183 static void handle_user_abort(arm_saved_state_t *, uint64_t, vm_offset_t, fault_status_t, vm_prot_t, expected_fault_handler_t);
184 static void handle_kernel_abort(arm_saved_state_t *, uint64_t, vm_offset_t, fault_status_t, vm_prot_t, expected_fault_handler_t);
185 
186 static void handle_pc_align(arm_saved_state_t *ss) __dead2;
187 static void handle_sp_align(arm_saved_state_t *ss) __dead2;
188 static void handle_sw_step_debug(arm_saved_state_t *ss) __dead2;
189 static void handle_wf_trap(arm_saved_state_t *ss) __dead2;
190 static void handle_fp_trap(arm_saved_state_t *ss, uint64_t esr) __dead2;
191 #if HAS_ARM_FEAT_SME
192 static void handle_sme_trap(arm_saved_state_t *state, uint64_t esr);
193 #endif /* HAS_ARM_FEAT_SME */
194 
195 static void handle_watchpoint(vm_offset_t fault_addr) __dead2;
196 
197 static void handle_abort(arm_saved_state_t *, uint64_t, vm_offset_t, abort_inspector_t, abort_handler_t, expected_fault_handler_t);
198 
199 static void handle_user_trapped_instruction32(arm_saved_state_t *, uint64_t esr) __dead2;
200 
201 static void handle_simd_trap(arm_saved_state_t *, uint64_t esr) __dead2;
202 
203 extern void current_cached_proc_cred_update(void);
204 void   mach_syscall_trace_exit(unsigned int retval, unsigned int call_number);
205 
206 struct proc;
207 
208 typedef uint32_t arm64_instr_t;
209 
210 extern void
211 unix_syscall(struct arm_saved_state * regs, thread_t thread_act, struct proc * proc);
212 
213 extern void
214 mach_syscall(struct arm_saved_state*);
215 
216 #if CONFIG_SPTM
217 bool sleh_panic_lockdown_should_initiate_el1_sp0_sync(uint64_t esr, uint64_t elr, uint64_t far, uint64_t spsr);
218 #endif /* CONFIG_SPTM */
219 
220 #if CONFIG_DTRACE
221 extern kern_return_t dtrace_user_probe(arm_saved_state_t* regs);
222 extern boolean_t dtrace_tally_fault(user_addr_t);
223 
224 /*
225  * Traps for userland processing. Can't include bsd/sys/fasttrap_isa.h, so copy
226  * and paste the trap instructions
227  * over from that file. Need to keep these in sync!
228  */
229 #define FASTTRAP_ARM32_INSTR 0xe7ffdefc
230 #define FASTTRAP_THUMB32_INSTR 0xdefc
231 #define FASTTRAP_ARM64_INSTR 0xe7eeee7e
232 
233 #define FASTTRAP_ARM32_RET_INSTR 0xe7ffdefb
234 #define FASTTRAP_THUMB32_RET_INSTR 0xdefb
235 #define FASTTRAP_ARM64_RET_INSTR 0xe7eeee7d
236 
237 /* See <rdar://problem/4613924> */
238 perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routine */
239 #endif
240 
241 
242 
243 extern void arm64_thread_exception_return(void) __dead2;
244 
245 #if defined(APPLETYPHOON)
246 #define CPU_NAME "Typhoon"
247 #elif defined(APPLETWISTER)
248 #define CPU_NAME "Twister"
249 #elif defined(APPLEHURRICANE)
250 #define CPU_NAME "Hurricane"
251 #elif defined(APPLELIGHTNING)
252 #define CPU_NAME "Lightning"
253 #elif defined(APPLEEVEREST)
254 #define CPU_NAME "Everest"
255 #elif defined(APPLEH16)
256 #define CPU_NAME "AppleH16"
257 #elif defined(APPLEACC8)
258 #define CPU_NAME "AppleACC8"
259 #else
260 #define CPU_NAME "Unknown"
261 #endif
262 
263 #if (CONFIG_KERNEL_INTEGRITY && defined(KERNEL_INTEGRITY_WT))
264 #define ESR_WT_SERROR(esr) (((esr) & 0xffffff00) == 0xbf575400)
265 #define ESR_WT_REASON(esr) ((esr) & 0xff)
266 
267 #define WT_REASON_NONE           0
268 #define WT_REASON_INTEGRITY_FAIL 1
269 #define WT_REASON_BAD_SYSCALL    2
270 #define WT_REASON_NOT_LOCKED     3
271 #define WT_REASON_ALREADY_LOCKED 4
272 #define WT_REASON_SW_REQ         5
273 #define WT_REASON_PT_INVALID     6
274 #define WT_REASON_PT_VIOLATION   7
275 #define WT_REASON_REG_VIOLATION  8
276 #endif
277 
278 #if defined(HAS_IPI)
279 void cpu_signal_handler(void);
280 extern unsigned int gFastIPI;
281 #endif /* defined(HAS_IPI) */
282 
283 static arm_saved_state64_t *original_faulting_state = NULL;
284 
285 /*
286  * A self-restrict mode describes which (if any, or several) special permissive
287  * modes are active at the time of a fault. This, in part, determines how the
288  * fault will be handled.
289  */
290 __options_closed_decl(self_restrict_mode_t, unsigned int, {
291 	/* None of the special modes are active. */
292 	SELF_RESTRICT_NONE  = 0U,
293 
294 	/*
295 	 * Any of the other more specific modes, this should be active if any other
296 	 * mode is active.
297 	 */
298 	SELF_RESTRICT_ANY   = (1U << 0),
299 
300 	/* Reserved */
301 
302 	/* Reserved */
303 });
304 
305 
306 TUNABLE(bool, fp_exceptions_enabled, "-fp_exceptions", false);
307 
308 extern const vm_map_address_t physmap_base;
309 extern const vm_map_address_t physmap_end;
310 extern vm_offset_t static_memory_end;
311 
312 /*
313  * Fault copyio_recovery_entry in copyin/copyout routines.
314  *
315  * Offets are expressed in bytes from &copy_recovery_table
316  */
317 struct copyio_recovery_entry {
318 	ptrdiff_t cre_start;
319 	ptrdiff_t cre_end;
320 	ptrdiff_t cre_recovery;
321 #if HAS_MTE
322 	uint8_t recover_from_kernel_read_tag_check_fault;
323 	uint8_t recover_from_kernel_write_tag_check_fault;
324 	uint8_t padding[6];
325 #endif
326 };
327 
328 extern struct copyio_recovery_entry copyio_recover_table[];
329 extern struct copyio_recovery_entry copyio_recover_table_end[];
330 
331 static inline ptrdiff_t
copyio_recovery_offset(uintptr_t addr)332 copyio_recovery_offset(uintptr_t addr)
333 {
334 	return (ptrdiff_t)(addr - (uintptr_t)copyio_recover_table);
335 }
336 
337 #if !HAS_APPLE_PAC
338 static inline uintptr_t
copyio_recovery_addr(ptrdiff_t offset)339 copyio_recovery_addr(ptrdiff_t offset)
340 {
341 	return (uintptr_t)copyio_recover_table + (uintptr_t)offset;
342 }
343 #endif
344 
345 static inline struct copyio_recovery_entry *
find_copyio_recovery_entry(uint64_t pc)346 find_copyio_recovery_entry(uint64_t pc)
347 {
348 	ptrdiff_t offset = copyio_recovery_offset(pc);
349 	struct copyio_recovery_entry *e;
350 
351 	for (e = copyio_recover_table; e < copyio_recover_table_end; e++) {
352 		if (offset >= e->cre_start && offset < e->cre_end) {
353 			return e;
354 		}
355 	}
356 
357 	return NULL;
358 }
359 
360 static inline int
is_vm_fault(fault_status_t status)361 is_vm_fault(fault_status_t status)
362 {
363 	switch (status) {
364 	case FSC_TRANSLATION_FAULT_L0:
365 	case FSC_TRANSLATION_FAULT_L1:
366 	case FSC_TRANSLATION_FAULT_L2:
367 	case FSC_TRANSLATION_FAULT_L3:
368 	case FSC_ACCESS_FLAG_FAULT_L1:
369 	case FSC_ACCESS_FLAG_FAULT_L2:
370 	case FSC_ACCESS_FLAG_FAULT_L3:
371 	case FSC_PERMISSION_FAULT_L1:
372 	case FSC_PERMISSION_FAULT_L2:
373 	case FSC_PERMISSION_FAULT_L3:
374 		return TRUE;
375 	default:
376 		return FALSE;
377 	}
378 }
379 
380 static inline int
is_translation_fault(fault_status_t status)381 is_translation_fault(fault_status_t status)
382 {
383 	switch (status) {
384 	case FSC_TRANSLATION_FAULT_L0:
385 	case FSC_TRANSLATION_FAULT_L1:
386 	case FSC_TRANSLATION_FAULT_L2:
387 	case FSC_TRANSLATION_FAULT_L3:
388 		return TRUE;
389 	default:
390 		return FALSE;
391 	}
392 }
393 
394 static inline int
is_permission_fault(fault_status_t status)395 is_permission_fault(fault_status_t status)
396 {
397 	switch (status) {
398 	case FSC_PERMISSION_FAULT_L1:
399 	case FSC_PERMISSION_FAULT_L2:
400 	case FSC_PERMISSION_FAULT_L3:
401 		return TRUE;
402 	default:
403 		return FALSE;
404 	}
405 }
406 
407 static inline int
is_alignment_fault(fault_status_t status)408 is_alignment_fault(fault_status_t status)
409 {
410 	return status == FSC_ALIGNMENT_FAULT;
411 }
412 
413 static inline int
is_parity_error(fault_status_t status)414 is_parity_error(fault_status_t status)
415 {
416 	switch (status) {
417 #if defined(ARM64_BOARD_CONFIG_T6020)
418 		/*
419 		 * H14 Erratum (rdar://61553243): Despite having FEAT_RAS implemented,
420 		 * FSC_SYNC_PARITY_X can be reported for data and instruction aborts
421 		 * and should be interpreted as FSC_SYNC_EXT_ABORT_x
422 		 */
423 #else
424 	/*
425 	 * TODO: According to ARM ARM, Async Parity (0b011001) is a DFSC that is
426 	 * only applicable to AArch32 HSR register. Can this be removed?
427 	 */
428 	case FSC_ASYNC_PARITY:
429 	case FSC_SYNC_PARITY:
430 	case FSC_SYNC_PARITY_TT_L1:
431 	case FSC_SYNC_PARITY_TT_L2:
432 	case FSC_SYNC_PARITY_TT_L3:
433 		return TRUE;
434 #endif
435 	default:
436 		return FALSE;
437 	}
438 }
439 
440 static inline int
is_sync_external_abort(fault_status_t status)441 is_sync_external_abort(fault_status_t status)
442 {
443 	switch (status) {
444 #if defined(ARM64_BOARD_CONFIG_T6020)
445 	/*
446 	 * H14 Erratum (rdar://61553243): Despite having FEAT_RAS implemented,
447 	 * FSC_SYNC_PARITY_x can be reported for data and instruction aborts
448 	 * and should be interpreted as FSC_SYNC_EXT_ABORT_x
449 	 */
450 	case FSC_SYNC_PARITY:
451 #endif /* defined(ARM64_BOARD_CONFIG_T6020) */
452 	case FSC_SYNC_EXT_ABORT:
453 		return TRUE;
454 	default:
455 		return FALSE;
456 	}
457 }
458 
459 static inline int
is_table_walk_error(fault_status_t status)460 is_table_walk_error(fault_status_t status)
461 {
462 	switch (status) {
463 	case FSC_SYNC_EXT_ABORT_TT_L1:
464 	case FSC_SYNC_EXT_ABORT_TT_L2:
465 	case FSC_SYNC_EXT_ABORT_TT_L3:
466 #if defined(ARM64_BOARD_CONFIG_T6020)
467 	/*
468 	 * H14 Erratum(rdar://61553243): Despite having FEAT_RAS implemented,
469 	 * FSC_SYNC_PARITY_x can be reported for data and instruction aborts
470 	 * and should be interpreted as FSC_SYNC_EXT_ABORT_x
471 	 */
472 	case FSC_SYNC_PARITY_TT_L1:
473 	case FSC_SYNC_PARITY_TT_L2:
474 	case FSC_SYNC_PARITY_TT_L3:
475 #endif /* defined(ARM64_BOARD_CONFIG_T6020) */
476 		return TRUE;
477 	default:
478 		return FALSE;
479 	}
480 }
481 
482 
483 #if HAS_MTE
484 static void
485 mte_send_sync_soft_mode_exception(thread_t thread, vm_map_address_t address, mach_exception_data_type_t mx_code);
486 
487 static inline int
is_tag_check_fault(fault_status_t status)488 is_tag_check_fault(fault_status_t status)
489 {
490 	return status == FSC_SYNC_TAG_CHECK_FAULT;
491 }
492 
493 static inline bool
is_canonical_memory_permission_fault(uint64_t esr)494 is_canonical_memory_permission_fault(uint64_t esr)
495 {
496 	return ESR_ISS2(esr) & ISS2_DA_TND;
497 }
498 
499 static inline uint16_t
tag_check_fault_type(pmap_t pmap,vm_map_address_t fault_address)500 tag_check_fault_type(pmap_t pmap, vm_map_address_t fault_address)
501 {
502 	if (pmap_is_tagged_mapping(pmap, pmap_strip_addr(pmap, fault_address))) {
503 		return EXC_ARM_MTE_TAGCHECK_FAIL;
504 	} else {
505 		return EXC_ARM_MTE_CANONICAL_FAIL;
506 	}
507 }
508 #endif /* HAS_MTE */
509 
510 
511 static inline int
is_servicible_fault(fault_status_t status,uint64_t esr)512 is_servicible_fault(fault_status_t status, uint64_t esr)
513 {
514 #if HAS_MTE
515 	if (is_tag_check_fault(status)) {
516 		/*
517 		 * Never called from the context of a kernel thread with its map switched
518 		 * to a user map, so current_task() is always the task responsible for
519 		 * the fault.
520 		 */
521 		task_t current = current_task_early();
522 		/*
523 		 * If the task is running in soft mode, we can "service" the fault by
524 		 * clearing TCF0 and letting the thread try again.
525 		 */
526 		if (current && task_has_sec_soft_mode(current)) {
527 			return TRUE;
528 		}
529 	}
530 	if (is_canonical_memory_permission_fault(esr)) {
531 		/*
532 		 * This fault was caused by a tag write to canonically tagged
533 		 * memory.  Trying to fault in the data page won't do any good.
534 		 */
535 		return FALSE;
536 	}
537 #else
538 #pragma unused(esr)
539 #endif
540 	return is_vm_fault(status);
541 }
542 
543 __dead2 __unused
544 static void
arm64_implementation_specific_error(arm_saved_state_t * state,uint64_t esr,vm_offset_t far)545 arm64_implementation_specific_error(arm_saved_state_t *state, uint64_t esr, vm_offset_t far)
546 {
547 #pragma unused (state, esr, far)
548 	panic_plain("Unhandled implementation specific error\n");
549 }
550 
551 #if CONFIG_KERNEL_INTEGRITY
552 #pragma clang diagnostic push
553 #pragma clang diagnostic ignored "-Wunused-parameter"
554 static void
kernel_integrity_error_handler(uint64_t esr,vm_offset_t far)555 kernel_integrity_error_handler(uint64_t esr, vm_offset_t far)
556 {
557 #if defined(KERNEL_INTEGRITY_WT)
558 #if (DEVELOPMENT || DEBUG)
559 	if (ESR_WT_SERROR(esr)) {
560 		switch (ESR_WT_REASON(esr)) {
561 		case WT_REASON_INTEGRITY_FAIL:
562 			panic_plain("Kernel integrity, violation in frame 0x%016lx.", far);
563 		case WT_REASON_BAD_SYSCALL:
564 			panic_plain("Kernel integrity, bad syscall.");
565 		case WT_REASON_NOT_LOCKED:
566 			panic_plain("Kernel integrity, not locked.");
567 		case WT_REASON_ALREADY_LOCKED:
568 			panic_plain("Kernel integrity, already locked.");
569 		case WT_REASON_SW_REQ:
570 			panic_plain("Kernel integrity, software request.");
571 		case WT_REASON_PT_INVALID:
572 			panic_plain("Kernel integrity, encountered invalid TTE/PTE while "
573 			    "walking 0x%016lx.", far);
574 		case WT_REASON_PT_VIOLATION:
575 			panic_plain("Kernel integrity, violation in mapping 0x%016lx.",
576 			    far);
577 		case WT_REASON_REG_VIOLATION:
578 			panic_plain("Kernel integrity, violation in system register %d.",
579 			    (unsigned) far);
580 		default:
581 			panic_plain("Kernel integrity, unknown (esr=0x%08llx).", esr);
582 		}
583 	}
584 #else
585 	if (ESR_WT_SERROR(esr)) {
586 		panic_plain("SError esr: 0x%08llx far: 0x%016lx.", esr, far);
587 	}
588 #endif
589 #endif
590 }
591 #pragma clang diagnostic pop
592 #endif
593 
594 static void
arm64_platform_error(arm_saved_state_t * state,uint64_t esr,vm_offset_t far,platform_error_source_t source)595 arm64_platform_error(arm_saved_state_t *state, uint64_t esr, vm_offset_t far, platform_error_source_t source)
596 {
597 #if CONFIG_KERNEL_INTEGRITY
598 	kernel_integrity_error_handler(esr, far);
599 #endif
600 
601 	(void)source;
602 	cpu_data_t *cdp = getCpuDatap();
603 
604 	if (PE_handle_platform_error(far)) {
605 		return;
606 	} else if (cdp->platform_error_handler != NULL) {
607 		cdp->platform_error_handler(cdp->cpu_id, far);
608 	} else {
609 		arm64_implementation_specific_error(state, esr, far);
610 	}
611 }
612 
613 
614 void
panic_with_thread_kernel_state(const char * msg,arm_saved_state_t * ss)615 panic_with_thread_kernel_state(const char *msg, arm_saved_state_t *ss)
616 {
617 	boolean_t ss_valid;
618 
619 	ss_valid = is_saved_state64(ss);
620 	arm_saved_state64_t *state = saved_state64(ss);
621 
622 	os_atomic_cmpxchg(&original_faulting_state, NULL, state, seq_cst);
623 
624 	// rdar://80659177
625 	// Read SoCD tracepoints up to twice — once the first time we call panic and
626 	// another time if we encounter a nested panic after that.
627 	static int twice = 2;
628 	if (twice > 0) {
629 		twice--;
630 		SOCD_TRACE_XNU(KERNEL_STATE_PANIC,
631 		    SOCD_TRACE_MODE_STICKY_TRACEPOINT,
632 		    ADDR(state->pc),
633 		    PACK_LSB(VALUE(state->lr), VALUE(ss_valid)),
634 		    PACK_2X32(VALUE(state->esr), VALUE(state->cpsr)),
635 		    VALUE(state->far));
636 	}
637 
638 
639 
640 	panic_plain("%s at pc 0x%016llx, lr 0x%016llx (saved state: %p%s)\n"
641 	    "\t  x0:  0x%016llx x1:  0x%016llx  x2:  0x%016llx  x3:  0x%016llx\n"
642 	    "\t  x4:  0x%016llx x5:  0x%016llx  x6:  0x%016llx  x7:  0x%016llx\n"
643 	    "\t  x8:  0x%016llx x9:  0x%016llx  x10: 0x%016llx  x11: 0x%016llx\n"
644 	    "\t  x12: 0x%016llx x13: 0x%016llx  x14: 0x%016llx  x15: 0x%016llx\n"
645 	    "\t  x16: 0x%016llx x17: 0x%016llx  x18: 0x%016llx  x19: 0x%016llx\n"
646 	    "\t  x20: 0x%016llx x21: 0x%016llx  x22: 0x%016llx  x23: 0x%016llx\n"
647 	    "\t  x24: 0x%016llx x25: 0x%016llx  x26: 0x%016llx  x27: 0x%016llx\n"
648 	    "\t  x28: 0x%016llx fp:  0x%016llx  lr:  0x%016llx  sp:  0x%016llx\n"
649 	    "\t  pc:  0x%016llx cpsr: 0x%08x         esr: 0x%016llx  far: 0x%016llx\n",
650 	    msg, state->pc, state->lr, ss, (ss_valid ? "" : " INVALID"),
651 	    state->x[0], state->x[1], state->x[2], state->x[3],
652 	    state->x[4], state->x[5], state->x[6], state->x[7],
653 	    state->x[8], state->x[9], state->x[10], state->x[11],
654 	    state->x[12], state->x[13], state->x[14], state->x[15],
655 	    state->x[16], state->x[17], state->x[18], state->x[19],
656 	    state->x[20], state->x[21], state->x[22], state->x[23],
657 	    state->x[24], state->x[25], state->x[26], state->x[27],
658 	    state->x[28], state->fp, state->lr, state->sp,
659 	    state->pc, state->cpsr, state->esr, state->far);
660 }
661 
662 void
sleh_synchronous_sp1(arm_context_t * context,uint64_t esr,vm_offset_t far __unused)663 sleh_synchronous_sp1(arm_context_t *context, uint64_t esr, vm_offset_t far __unused)
664 {
665 	esr_exception_class_t  class = ESR_EC(esr);
666 	arm_saved_state_t    * state = &context->ss;
667 
668 	switch (class) {
669 	case ESR_EC_UNCATEGORIZED:
670 	{
671 #if (DEVELOPMENT || DEBUG)
672 		uint32_t instr = *((uint32_t*)get_saved_state_pc(state));
673 		if (IS_ARM_GDB_TRAP(instr)) {
674 			DebuggerCall(EXC_BREAKPOINT, state);
675 		}
676 		OS_FALLTHROUGH; // panic if we return from the debugger
677 #else
678 		panic_with_thread_kernel_state("Unexpected debugger trap while SP1 selected", state);
679 #endif /* (DEVELOPMENT || DEBUG) */
680 	}
681 	default:
682 		panic_with_thread_kernel_state("Synchronous exception taken while SP1 selected", state);
683 	}
684 }
685 
686 
687 __attribute__((noreturn))
688 void
thread_exception_return()689 thread_exception_return()
690 {
691 	thread_t thread = current_thread();
692 	if (thread->machine.exception_trace_code != 0) {
693 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
694 		    MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, thread->machine.exception_trace_code) | DBG_FUNC_END, 0, 0, 0, 0, 0);
695 		thread->machine.exception_trace_code = 0;
696 	}
697 
698 #if HAS_MTE
699 	thread->machine.el0_synchronous_trap = false;
700 #endif /* HAS_MTE */
701 
702 #if KASAN_TBI
703 	kasan_unpoison_curstack(true);
704 #endif /* KASAN_TBI */
705 	arm64_thread_exception_return();
706 	__builtin_unreachable();
707 }
708 
709 /*
710  * check whether task vtimers are running and set thread and CPU BSD AST
711  *
712  * must be called with interrupts masked so updates of fields are atomic
713  * must be emitted inline to avoid generating an FBT probe on the exception path
714  *
715  */
716 __attribute__((__always_inline__))
717 static inline void
task_vtimer_check(thread_t thread)718 task_vtimer_check(thread_t thread)
719 {
720 	task_t task = get_threadtask_early(thread);
721 
722 	if (__improbable(task != NULL && task->vtimers)) {
723 		thread_ast_set(thread, AST_BSD);
724 		thread->machine.CpuDatap->cpu_pending_ast |= AST_BSD;
725 	}
726 }
727 
728 #if MACH_ASSERT
729 /**
730  * A version of get_preemption_level() that works in early boot.
731  *
732  * If an exception is raised in early boot before the initial thread has been
733  * set up, then calling get_preemption_level() in the SLEH will trigger an
734  * infinitely-recursing exception. This function handles this edge case.
735  */
736 static inline int
sleh_get_preemption_level(void)737 sleh_get_preemption_level(void)
738 {
739 	if (__improbable(current_thread() == NULL)) {
740 		return 0;
741 	}
742 	return get_preemption_level();
743 }
744 #endif // MACH_ASSERT
745 
746 static inline bool
is_platform_error(uint64_t esr)747 is_platform_error(uint64_t esr)
748 {
749 	esr_exception_class_t class = ESR_EC(esr);
750 	uint32_t iss = ESR_ISS(esr);
751 	fault_status_t fault_code;
752 
753 	if (class == ESR_EC_DABORT_EL0 || class == ESR_EC_DABORT_EL1) {
754 		fault_code = ISS_DA_FSC(iss);
755 	} else if (class == ESR_EC_IABORT_EL0 || class == ESR_EC_IABORT_EL1) {
756 		fault_code = ISS_IA_FSC(iss);
757 	} else {
758 		return false;
759 	}
760 
761 	return is_parity_error(fault_code) || is_sync_external_abort(fault_code) ||
762 	       is_table_walk_error(fault_code);
763 }
764 
765 void
sleh_synchronous(arm_context_t * context,uint64_t esr,vm_offset_t far,__unused bool did_initiate_panic_lockdown)766 sleh_synchronous(arm_context_t *context, uint64_t esr, vm_offset_t far, __unused bool did_initiate_panic_lockdown)
767 {
768 	esr_exception_class_t  class   = ESR_EC(esr);
769 	arm_saved_state_t    * state   = &context->ss;
770 	thread_t               thread  = current_thread();
771 #if MACH_ASSERT
772 	int                    preemption_level = sleh_get_preemption_level();
773 #endif
774 	expected_fault_handler_t expected_fault_handler = NULL;
775 #ifdef CONFIG_XNUPOST
776 	expected_fault_handler_t saved_expected_fault_handler = NULL;
777 	uintptr_t saved_expected_fault_addr = 0;
778 	uintptr_t saved_expected_fault_pc = 0;
779 #endif /* CONFIG_XNUPOST */
780 
781 	ASSERT_CONTEXT_SANITY(context);
782 
783 	task_vtimer_check(thread);
784 
785 #if CONFIG_DTRACE
786 	/*
787 	 * Handle kernel DTrace probes as early as possible to minimize the likelihood
788 	 * that this path will itself trigger a DTrace probe, which would lead to infinite
789 	 * probe recursion.
790 	 */
791 	if (__improbable((class == ESR_EC_UNCATEGORIZED) && tempDTraceTrapHook &&
792 	    (tempDTraceTrapHook(EXC_BAD_INSTRUCTION, state, 0, 0) == KERN_SUCCESS))) {
793 #if CONFIG_SPTM
794 		if (__improbable(did_initiate_panic_lockdown)) {
795 			panic("Unexpectedly initiated lockdown for DTrace probe?");
796 		}
797 #endif
798 		return;
799 	}
800 #endif
801 	bool is_user = PSR64_IS_USER(get_saved_state_cpsr(state));
802 
803 #if CONFIG_SPTM
804 	// Lockdown should only be initiated for kernel exceptions
805 	assert(!(is_user && did_initiate_panic_lockdown));
806 #endif /* CONFIG_SPTM */
807 
808 	/*
809 	 * Use KERNEL_DEBUG_CONSTANT_IST here to avoid producing tracepoints
810 	 * that would disclose the behavior of PT_DENY_ATTACH processes.
811 	 */
812 	if (is_user) {
813 		/* Sanitize FAR (but only if the exception was taken from userspace) */
814 		switch (class) {
815 		case ESR_EC_IABORT_EL1:
816 		case ESR_EC_IABORT_EL0:
817 			/* If this is a SEA, since we can't trust FnV, just clear FAR from the save area. */
818 			if (ISS_IA_FSC(ESR_ISS(esr)) == FSC_SYNC_EXT_ABORT) {
819 				saved_state64(state)->far = 0;
820 			}
821 			break;
822 		case ESR_EC_DABORT_EL1:
823 		case ESR_EC_DABORT_EL0:
824 			/* If this is a SEA, since we can't trust FnV, just clear FAR from the save area. */
825 			if (ISS_DA_FSC(ESR_ISS(esr)) == FSC_SYNC_EXT_ABORT) {
826 				saved_state64(state)->far = 0;
827 			}
828 			break;
829 		case ESR_EC_WATCHPT_MATCH_EL1:
830 		case ESR_EC_WATCHPT_MATCH_EL0:
831 		case ESR_EC_PC_ALIGN:
832 			break;  /* FAR_ELx is valid */
833 		default:
834 			saved_state64(state)->far = 0;
835 			break;
836 		}
837 
838 		thread->machine.exception_trace_code = (uint16_t)(ARM64_KDBG_CODE_USER | class);
839 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
840 		    MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, thread->machine.exception_trace_code) | DBG_FUNC_START,
841 		    esr, far, get_saved_state_pc(state), 0, 0);
842 	} else {
843 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
844 		    MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, ARM64_KDBG_CODE_KERNEL | class) | DBG_FUNC_START,
845 		    esr, VM_KERNEL_ADDRHIDE(far), VM_KERNEL_UNSLIDE(get_saved_state_pc(state)), 0, 0);
846 	}
847 
848 	if (__improbable(ESR_INSTR_IS_2BYTES(esr))) {
849 		/*
850 		 * We no longer support 32-bit, which means no 2-byte
851 		 * instructions.
852 		 */
853 		if (is_user) {
854 			panic("Exception on 2-byte instruction, "
855 			    "context=%p, esr=%#llx, far=%p",
856 			    context, esr, (void *)far);
857 		} else {
858 			panic_with_thread_kernel_state("Exception on 2-byte instruction", state);
859 		}
860 	}
861 
862 #ifdef CONFIG_XNUPOST
863 	if (thread->machine.expected_fault_handler != NULL) {
864 		bool matching_fault_pc = false;
865 		saved_expected_fault_handler = thread->machine.expected_fault_handler;
866 		saved_expected_fault_addr = thread->machine.expected_fault_addr;
867 		saved_expected_fault_pc = thread->machine.expected_fault_pc;
868 
869 		thread->machine.expected_fault_handler = NULL;
870 		thread->machine.expected_fault_addr = 0;
871 		thread->machine.expected_fault_pc = 0;
872 
873 #if __has_feature(ptrauth_calls)
874 		/*
875 		 * Compare only the bits of PC which make up the virtual address.
876 		 * This ignores the upper bits, which may have been corrupted by HW in
877 		 * platform dependent ways to signal pointer authentication fault.
878 		 */
879 		uint64_t fault_addr_mask = fault_addr_bitmask(0, 64 - T1SZ_BOOT - 1);
880 		uint64_t masked_expected_pc = saved_expected_fault_pc & fault_addr_mask;
881 		uint64_t masked_saved_pc = get_saved_state_pc(state) & fault_addr_mask;
882 		matching_fault_pc = masked_expected_pc == masked_saved_pc;
883 #else
884 		matching_fault_pc =
885 		    (saved_expected_fault_pc == get_saved_state_pc(state));
886 #endif /* ptrauth_call */
887 		if (saved_expected_fault_addr == far ||
888 		    matching_fault_pc) {
889 			expected_fault_handler = saved_expected_fault_handler;
890 		}
891 	}
892 #endif /* CONFIG_XNUPOST */
893 
894 	if (__improbable(is_platform_error(esr))) {
895 		/*
896 		 * Must gather error info in platform error handler before
897 		 * thread is preempted to another core/cluster to guarantee
898 		 * accurate error details
899 		 */
900 
901 		arm64_platform_error(state, esr, far, PLAT_ERR_SRC_SYNC);
902 #if CONFIG_SPTM
903 		if (__improbable(did_initiate_panic_lockdown)) {
904 			panic("Panic lockdown initiated for platform error");
905 		}
906 #endif
907 		return;
908 	}
909 
910 	if (is_user && class == ESR_EC_DABORT_EL0) {
911 		thread_reset_pcs_will_fault(thread);
912 	}
913 
914 #if CONFIG_SPTM
915 	if (__improbable(did_initiate_panic_lockdown && current_thread() != NULL)) {
916 		/*
917 		 * If we initiated panic lockdown, we must disable preemption before
918 		 * enabling interrupts. While unlikely, preempting the panicked thread
919 		 * after lockdown has occurred may hang the system if all cores end up
920 		 * blocked while attempting to return to user space.
921 		 */
922 		disable_preemption();
923 	}
924 #endif /* CONFIG_SPTM */
925 
926 	/* Inherit the interrupt masks from previous context */
927 	if (SPSR_INTERRUPTS_ENABLED(get_saved_state_cpsr(state))) {
928 		ml_set_interrupts_enabled(TRUE);
929 	}
930 
931 #if HAS_MTE
932 	if (is_user) {
933 		thread->machine.el0_synchronous_trap = true;
934 	}
935 #endif
936 
937 	switch (class) {
938 	case ESR_EC_SVC_64:
939 		if (!is_saved_state64(state) || !is_user) {
940 			panic("Invalid SVC_64 context");
941 		}
942 
943 		handle_svc(state);
944 		break;
945 
946 	case ESR_EC_DABORT_EL0:
947 		handle_abort(state, esr, far, inspect_data_abort, handle_user_abort, expected_fault_handler);
948 		break;
949 
950 	case ESR_EC_MSR_TRAP:
951 		handle_msr_trap(state, esr);
952 		break;
953 /**
954  * Some APPLEVIRTUALPLATFORM targets do not specify armv8.6, but it's still possible for
955  * them to be hosted by a host that implements ARM_FPAC. There's no way for such a host
956  * to disable it or trap it without substantial performance penalty. Therefore, the FPAC
957  * handler here needs to be built into the guest kernels to prevent the exception to fall
958  * through.
959  */
960 #if __has_feature(ptrauth_calls)
961 	case ESR_EC_PAC_FAIL:
962 #ifdef CONFIG_XNUPOST
963 		if (expected_fault_handler != NULL && expected_fault_handler(state)) {
964 			break;
965 		}
966 #endif /* CONFIG_XNUPOST */
967 		handle_pac_fail(state, esr);
968 		__builtin_unreachable();
969 
970 #endif /* __has_feature(ptrauth_calls) */
971 
972 #if HAS_ARM_FEAT_SME
973 	case ESR_EC_SME:
974 		handle_sme_trap(state, esr);
975 		break;
976 #endif /* HAS_ARM_FEAT_SME */
977 
978 	case ESR_EC_IABORT_EL0:
979 		handle_abort(state, esr, far, inspect_instruction_abort, handle_user_abort, expected_fault_handler);
980 		break;
981 
982 	case ESR_EC_IABORT_EL1:
983 #ifdef CONFIG_XNUPOST
984 		if ((expected_fault_handler != NULL) && expected_fault_handler(state)) {
985 			break;
986 		}
987 #endif /* CONFIG_XNUPOST */
988 
989 		panic_with_thread_kernel_state("Kernel instruction fetch abort", state);
990 
991 	case ESR_EC_PC_ALIGN:
992 		handle_pc_align(state);
993 		__builtin_unreachable();
994 
995 	case ESR_EC_DABORT_EL1:
996 		handle_abort(state, esr, far, inspect_data_abort, handle_kernel_abort, expected_fault_handler);
997 		break;
998 
999 	case ESR_EC_UNCATEGORIZED:
1000 		assert(!ESR_ISS(esr));
1001 
1002 #if CONFIG_XNUPOST
1003 		if (!is_user && (expected_fault_handler != NULL) && expected_fault_handler(state)) {
1004 			/*
1005 			 * The fault handler accepted the exception and handled it on its
1006 			 * own. Don't trap to the debugger/panic.
1007 			 */
1008 			break;
1009 		}
1010 #endif /* CONFIG_XNUPOST */
1011 		handle_uncategorized(&context->ss);
1012 		break;
1013 
1014 	case ESR_EC_SP_ALIGN:
1015 		handle_sp_align(state);
1016 		__builtin_unreachable();
1017 
1018 	case ESR_EC_BKPT_AARCH32:
1019 		handle_user_breakpoint(state, esr);
1020 		__builtin_unreachable();
1021 
1022 	case ESR_EC_BRK_AARCH64:
1023 #ifdef CONFIG_XNUPOST
1024 		if ((expected_fault_handler != NULL) && expected_fault_handler(state)) {
1025 			break;
1026 		}
1027 #endif /* CONFIG_XNUPOST */
1028 		if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
1029 			handle_kernel_breakpoint(state, esr);
1030 			break;
1031 		} else {
1032 			handle_user_breakpoint(state, esr);
1033 			__builtin_unreachable();
1034 		}
1035 
1036 	case ESR_EC_BKPT_REG_MATCH_EL0:
1037 		if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1038 			handle_user_breakpoint(state, esr);
1039 		}
1040 		panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1041 		    class, state, class, esr, (void *)far);
1042 		__builtin_unreachable();
1043 
1044 	case ESR_EC_BKPT_REG_MATCH_EL1:
1045 		panic_with_thread_kernel_state("Hardware Breakpoint Debug exception from kernel. Panic (by design)", state);
1046 		__builtin_unreachable();
1047 
1048 	case ESR_EC_SW_STEP_DEBUG_EL0:
1049 		if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1050 			handle_sw_step_debug(state);
1051 		}
1052 		panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1053 		    class, state, class, esr, (void *)far);
1054 		__builtin_unreachable();
1055 
1056 	case ESR_EC_SW_STEP_DEBUG_EL1:
1057 		panic_with_thread_kernel_state("Software Step Debug exception from kernel. Panic (by design)", state);
1058 		__builtin_unreachable();
1059 
1060 	case ESR_EC_WATCHPT_MATCH_EL0:
1061 		if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1062 			handle_watchpoint(far);
1063 		}
1064 		panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1065 		    class, state, class, esr, (void *)far);
1066 		__builtin_unreachable();
1067 
1068 	case ESR_EC_WATCHPT_MATCH_EL1:
1069 		/*
1070 		 * If we hit a watchpoint in kernel mode, probably in a copyin/copyout which we don't want to
1071 		 * abort.  Turn off watchpoints and keep going; we'll turn them back on in return_from_exception..
1072 		 */
1073 		if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1074 			arm_debug_set(NULL);
1075 			break; /* return to first level handler */
1076 		}
1077 		panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1078 		    class, state, class, esr, (void *)far);
1079 		__builtin_unreachable();
1080 
1081 	case ESR_EC_TRAP_SIMD_FP:
1082 		handle_simd_trap(state, esr);
1083 		__builtin_unreachable();
1084 
1085 	case ESR_EC_ILLEGAL_INSTR_SET:
1086 		panic("Illegal instruction set exception. state=%p class=%u esr=%llu far=%p spsr=0x%x",
1087 		    state, class, esr, (void *)far, get_saved_state_cpsr(state));
1088 		__builtin_unreachable();
1089 
1090 	case ESR_EC_MCR_MRC_CP15_TRAP:
1091 	case ESR_EC_MCRR_MRRC_CP15_TRAP:
1092 	case ESR_EC_MCR_MRC_CP14_TRAP:
1093 	case ESR_EC_LDC_STC_CP14_TRAP:
1094 	case ESR_EC_MCRR_MRRC_CP14_TRAP:
1095 		handle_user_trapped_instruction32(state, esr);
1096 		__builtin_unreachable();
1097 
1098 	case ESR_EC_WFI_WFE:
1099 		// Use of WFI or WFE instruction when they have been disabled for EL0
1100 		handle_wf_trap(state);
1101 		__builtin_unreachable();
1102 
1103 	case ESR_EC_FLOATING_POINT_64:
1104 		handle_fp_trap(state, esr);
1105 		__builtin_unreachable();
1106 	case ESR_EC_BTI_FAIL:
1107 #ifdef CONFIG_XNUPOST
1108 		if ((expected_fault_handler != NULL) && expected_fault_handler(state)) {
1109 			break;
1110 		}
1111 #endif /* CONFIG_XNUPOST */
1112 		handle_bti_fail(state, esr);
1113 		__builtin_unreachable();
1114 
1115 	default:
1116 		handle_uncategorized(state);
1117 	}
1118 
1119 #ifdef CONFIG_XNUPOST
1120 	if (saved_expected_fault_handler != NULL) {
1121 		thread->machine.expected_fault_handler = saved_expected_fault_handler;
1122 		thread->machine.expected_fault_addr = saved_expected_fault_addr;
1123 		thread->machine.expected_fault_pc = saved_expected_fault_pc;
1124 	}
1125 #endif /* CONFIG_XNUPOST */
1126 
1127 	if (is_user) {
1128 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1129 		    MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, thread->machine.exception_trace_code) | DBG_FUNC_END,
1130 		    esr, far, get_saved_state_pc(state), 0, 0);
1131 		thread->machine.exception_trace_code = 0;
1132 	} else {
1133 		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1134 		    MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, ARM64_KDBG_CODE_KERNEL | class) | DBG_FUNC_END,
1135 		    esr, VM_KERNEL_ADDRHIDE(far), VM_KERNEL_UNSLIDE(get_saved_state_pc(state)), 0, 0);
1136 	}
1137 
1138 #if CONFIG_SPTM
1139 	if (__improbable(did_initiate_panic_lockdown)) {
1140 #if CONFIG_XNUPOST
1141 		bool can_recover = !!(expected_fault_handler);
1142 #else
1143 		bool can_recover = false;
1144 #endif /* CONFIG_XNU_POST */
1145 
1146 		if (can_recover) {
1147 			/*
1148 			 * If we matched an exception handler, this was a simulated lockdown
1149 			 * and so we can recover. Re-enable preemption if we disabled it.
1150 			 */
1151 			if (current_thread() != NULL) {
1152 				enable_preemption();
1153 			}
1154 		} else {
1155 			/*
1156 			 * fleh already triggered a lockdown but we, for whatever reason,
1157 			 * didn't end up finding a reason to panic. Catch all panic in this
1158 			 * case.
1159 			 * Note that the panic here has no security benefit as the system is
1160 			 * already hosed, this is merely for telemetry.
1161 			 */
1162 			panic_with_thread_kernel_state("Panic lockdown initiated", state);
1163 		}
1164 	}
1165 #endif /* CONFIG_SPTM */
1166 
1167 #if MACH_ASSERT
1168 	if (preemption_level != sleh_get_preemption_level()) {
1169 		panic("synchronous exception changed preemption level from %d to %d", preemption_level, sleh_get_preemption_level());
1170 	}
1171 #endif
1172 
1173 #if HAS_MTE
1174 	if (is_user) {
1175 		thread->machine.el0_synchronous_trap = false;
1176 	}
1177 #endif
1178 }
1179 
1180 /*
1181  * Uncategorized exceptions are a catch-all for general execution errors.
1182  * ARM64_TODO: For now, we assume this is for undefined instruction exceptions.
1183  */
1184 static void
handle_uncategorized(arm_saved_state_t * state)1185 handle_uncategorized(arm_saved_state_t *state)
1186 {
1187 	exception_type_t           exception = EXC_BAD_INSTRUCTION;
1188 	mach_exception_data_type_t codes[2]  = {EXC_ARM_UNDEFINED};
1189 	mach_msg_type_number_t     numcodes  = 2;
1190 	uint32_t                   instr     = 0;
1191 
1192 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1193 
1194 #if CONFIG_DTRACE
1195 
1196 	if (PSR64_IS_USER64(get_saved_state_cpsr(state))) {
1197 		/*
1198 		 * For a 64bit user process, we care about all 4 bytes of the
1199 		 * instr.
1200 		 */
1201 		if (instr == FASTTRAP_ARM64_INSTR || instr == FASTTRAP_ARM64_RET_INSTR) {
1202 			if (dtrace_user_probe(state) == KERN_SUCCESS) {
1203 				return;
1204 			}
1205 		}
1206 	} else if (PSR64_IS_USER32(get_saved_state_cpsr(state))) {
1207 		/*
1208 		 * For a 32bit user process, we check for thumb mode, in
1209 		 * which case we only care about a 2 byte instruction length.
1210 		 * For non-thumb mode, we care about all 4 bytes of the instructin.
1211 		 */
1212 		if (get_saved_state_cpsr(state) & PSR64_MODE_USER32_THUMB) {
1213 			if (((uint16_t)instr == FASTTRAP_THUMB32_INSTR) ||
1214 			    ((uint16_t)instr == FASTTRAP_THUMB32_RET_INSTR)) {
1215 				if (dtrace_user_probe(state) == KERN_SUCCESS) {
1216 					return;
1217 				}
1218 			}
1219 		} else {
1220 			if ((instr == FASTTRAP_ARM32_INSTR) ||
1221 			    (instr == FASTTRAP_ARM32_RET_INSTR)) {
1222 				if (dtrace_user_probe(state) == KERN_SUCCESS) {
1223 					return;
1224 				}
1225 			}
1226 		}
1227 	}
1228 
1229 #endif /* CONFIG_DTRACE */
1230 
1231 	if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
1232 		if (IS_ARM_GDB_TRAP(instr)) {
1233 			boolean_t interrupt_state;
1234 			exception = EXC_BREAKPOINT;
1235 
1236 			interrupt_state = ml_set_interrupts_enabled(FALSE);
1237 
1238 			/* Save off the context here (so that the debug logic
1239 			 * can see the original state of this thread).
1240 			 */
1241 			current_thread()->machine.kpcb = state;
1242 
1243 			/* Hop into the debugger (typically either due to a
1244 			 * fatal exception, an explicit panic, or a stackshot
1245 			 * request.
1246 			 */
1247 			DebuggerCall(exception, state);
1248 
1249 			current_thread()->machine.kpcb = NULL;
1250 			(void) ml_set_interrupts_enabled(interrupt_state);
1251 			return;
1252 		} else {
1253 			panic("Undefined kernel instruction: pc=%p instr=%x", (void*)get_saved_state_pc(state), instr);
1254 		}
1255 	}
1256 
1257 	/*
1258 	 * Check for GDB breakpoint via illegal opcode.
1259 	 */
1260 	if (IS_ARM_GDB_TRAP(instr)) {
1261 		exception = EXC_BREAKPOINT;
1262 		codes[0] = EXC_ARM_BREAKPOINT;
1263 		codes[1] = instr;
1264 	} else {
1265 		codes[1] = instr;
1266 	}
1267 
1268 	exception_triage(exception, codes, numcodes);
1269 	__builtin_unreachable();
1270 }
1271 
1272 #if __has_feature(ptrauth_calls)
1273 static inline const char *
ptrauth_key_to_string(ptrauth_key key)1274 ptrauth_key_to_string(ptrauth_key key)
1275 {
1276 	switch (key) {
1277 	case ptrauth_key_asia:
1278 		return "IA";
1279 	case ptrauth_key_asib:
1280 		return "IB";
1281 	case ptrauth_key_asda:
1282 		return "DA";
1283 	case ptrauth_key_asdb:
1284 		return "DB";
1285 	default:
1286 		__builtin_unreachable();
1287 	}
1288 }
1289 
1290 static const char *
ptrauth_handle_brk_trap(void * tstate,uint16_t comment)1291 ptrauth_handle_brk_trap(void *tstate, uint16_t comment)
1292 {
1293 	kernel_panic_reason_t pr = PERCPU_GET(panic_reason);
1294 	arm_saved_state_t *state = (arm_saved_state_t *)tstate;
1295 
1296 	ptrauth_key key = (ptrauth_key)(comment - PTRAUTH_TRAP_START);
1297 	const char *key_str = ptrauth_key_to_string(key);
1298 
1299 	snprintf(pr->buf, sizeof(pr->buf),
1300 	    "Break 0x%04X instruction exception from kernel. "
1301 	    "Ptrauth failure with %s key resulted in 0x%016llx",
1302 	    comment, key_str, saved_state64(state)->x[16]);
1303 
1304 	return pr->buf;
1305 }
1306 #endif /* __has_feature(ptrauth_calls) */
1307 
1308 #if HAS_TELEMETRY_KERNEL_BRK
1309 static uint32_t bound_chk_violations_event;
1310 
1311 static const char *
xnu_soft_trap_handle_breakpoint(void * tstate,uint16_t comment)1312 xnu_soft_trap_handle_breakpoint(
1313 	void              *tstate,
1314 	uint16_t          comment)
1315 {
1316 #if CONFIG_UBSAN_MINIMAL
1317 	if (comment == UBSAN_SOFT_TRAP_SIGNED_OF) {
1318 		ubsan_handle_brk_trap(tstate, comment);
1319 	}
1320 #else
1321 	(void)tstate;
1322 #endif
1323 
1324 	if (comment == CLANG_SOFT_TRAP_BOUND_CHK) {
1325 		os_atomic_inc(&bound_chk_violations_event, relaxed);
1326 	}
1327 	return NULL;
1328 }
1329 #endif /* HAS_TELEMETRY_KERNEL_BRK */
1330 
1331 static const char *
xnu_hard_trap_handle_breakpoint(void * tstate,uint16_t comment)1332 xnu_hard_trap_handle_breakpoint(void *tstate, uint16_t comment)
1333 {
1334 	kernel_panic_reason_t pr = PERCPU_GET(panic_reason);
1335 	arm_saved_state64_t *state = saved_state64(tstate);
1336 
1337 	switch (comment) {
1338 	case XNU_HARD_TRAP_SAFE_UNLINK:
1339 		snprintf(pr->buf, sizeof(pr->buf),
1340 		    "panic: corrupt list around element %p",
1341 		    (void *)state->x[8]);
1342 		return pr->buf;
1343 
1344 	case XNU_HARD_TRAP_STRING_CHK:
1345 		return "panic: string operation caused an overflow";
1346 
1347 	case XNU_HARD_TRAP_ASSERT_FAILURE:
1348 		/*
1349 		 * Read the implicit assert arguments, see:
1350 		 * ML_TRAP_REGISTER_1: x8
1351 		 * ML_TRAP_REGISTER_2: x16
1352 		 * ML_TRAP_REGISTER_3: x17
1353 		 */
1354 		panic_assert_format(pr->buf, sizeof(pr->buf),
1355 		    (struct mach_assert_hdr *)state->x[8],
1356 		    state->x[16], state->x[17]);
1357 		return pr->buf;
1358 
1359 	default:
1360 		return NULL;
1361 	}
1362 }
1363 
1364 #if __has_feature(ptrauth_calls)
1365 KERNEL_BRK_DESCRIPTOR_DEFINE(ptrauth_desc,
1366     .type                = TRAP_TELEMETRY_TYPE_KERNEL_BRK_PTRAUTH,
1367     .base                = PTRAUTH_TRAP_START,
1368     .max                 = PTRAUTH_TRAP_END,
1369     .options             = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1370     .handle_breakpoint   = ptrauth_handle_brk_trap);
1371 #endif
1372 
1373 KERNEL_BRK_DESCRIPTOR_DEFINE(clang_desc,
1374     .type                = TRAP_TELEMETRY_TYPE_KERNEL_BRK_CLANG,
1375     .base                = CLANG_ARM_TRAP_START,
1376     .max                 = CLANG_ARM_TRAP_END,
1377     .options             = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1378     .handle_breakpoint   = NULL);
1379 
1380 KERNEL_BRK_DESCRIPTOR_DEFINE(libcxx_desc,
1381     .type                = TRAP_TELEMETRY_TYPE_KERNEL_BRK_LIBCXX,
1382     .base                = LIBCXX_TRAP_START,
1383     .max                 = LIBCXX_TRAP_END,
1384     .options             = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1385     .handle_breakpoint   = NULL);
1386 
1387 #if HAS_TELEMETRY_KERNEL_BRK
1388 KERNEL_BRK_DESCRIPTOR_DEFINE(xnu_soft_traps_desc,
1389     .type                = TRAP_TELEMETRY_TYPE_KERNEL_BRK_TELEMETRY,
1390     .base                = XNU_SOFT_TRAP_START,
1391     .max                 = XNU_SOFT_TRAP_END,
1392     .options             = BRK_TELEMETRY_OPTIONS_RECOVERABLE_DEFAULT(
1393 	    /* enable_telemetry */ true),
1394     .handle_breakpoint   = xnu_soft_trap_handle_breakpoint);
1395 #endif /* HAS_TELEMETRY_KERNEL_BRK */
1396 
1397 KERNEL_BRK_DESCRIPTOR_DEFINE(xnu_hard_traps_desc,
1398     .type                = TRAP_TELEMETRY_TYPE_KERNEL_BRK_XNU,
1399     .base                = XNU_HARD_TRAP_START,
1400     .max                 = XNU_HARD_TRAP_END,
1401     .options             = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1402     .handle_breakpoint   = xnu_hard_trap_handle_breakpoint);
1403 
1404 static void
1405 #if !HAS_TELEMETRY_KERNEL_BRK
1406 __attribute__((noreturn))
1407 #endif
handle_kernel_breakpoint(arm_saved_state_t * state,uint64_t esr)1408 handle_kernel_breakpoint(arm_saved_state_t *state, uint64_t esr)
1409 {
1410 	uint16_t comment = ISS_BRK_COMMENT(esr);
1411 	const struct kernel_brk_descriptor *desc;
1412 	const char *msg = NULL;
1413 
1414 	desc = find_kernel_brk_descriptor_by_comment(comment);
1415 
1416 	if (!desc) {
1417 		goto brk_out;
1418 	}
1419 
1420 #if HAS_TELEMETRY_KERNEL_BRK
1421 	if (desc->options.enable_trap_telemetry) {
1422 		trap_telemetry_report_exception(
1423 			/* trap_type   */ desc->type,
1424 			/* trap_code   */ comment,
1425 			/* options     */ desc->options.telemetry_options,
1426 			/* saved_state */ (void *)state);
1427 	}
1428 #endif
1429 
1430 	if (desc->handle_breakpoint) {
1431 		msg = desc->handle_breakpoint(state, comment);
1432 	}
1433 
1434 #if HAS_TELEMETRY_KERNEL_BRK
1435 	/* Still alive? Check if we should recover. */
1436 	if (desc->options.recoverable) {
1437 		add_saved_state_pc(state, 4);
1438 		return;
1439 	}
1440 #endif
1441 
1442 brk_out:
1443 	if (msg == NULL) {
1444 		kernel_panic_reason_t pr = PERCPU_GET(panic_reason);
1445 
1446 		if (comment == CLANG_ARM_TRAP_BOUND_CHK) {
1447 			msg = tsnprintf(pr->buf, sizeof(pr->buf),
1448 			    "Bounds safety trap");
1449 		} else {
1450 			msg = tsnprintf(pr->buf, sizeof(pr->buf),
1451 			    "Break 0x%04X instruction exception from kernel. "
1452 			    "Panic (by design)",
1453 			    comment);
1454 		}
1455 	}
1456 
1457 	panic_with_thread_kernel_state(msg, state);
1458 	__builtin_unreachable();
1459 #undef MSG_FMT
1460 }
1461 
1462 /*
1463  * Similar in spirit to kernel_brk_descriptor, but with less flexible semantics:
1464  * each descriptor defines a `brk` label range for use from userspace.
1465  * When used, system policy may decide to kill the calling process without giving them opportunity to
1466  * catch the exception or continue execution from a signal handler.
1467  * This is used to enforce security boundaries: userspace code may use this mechanism
1468  * to reliably terminate when internal inconsistencies are detected.
1469  * Note that we don't invariably terminate without giving the process a say: we might only enforce
1470  * such a policy if a security feature is enabled, for example.
1471  */
1472 typedef struct user_brk_label_range_descriptor {
1473 	uint16_t base;
1474 	uint16_t max;
1475 } user_brk_label_range_descriptor_t;
1476 
1477 const user_brk_label_range_descriptor_t user_brk_descriptors[] = {
1478 #if __has_feature(ptrauth_calls)
1479 	/* PAC failures detected in data by userspace */
1480 	{
1481 		/* Use the exact same label range as kernel PAC */
1482 		.base   = PTRAUTH_TRAP_START,
1483 		.max    = PTRAUTH_TRAP_END,
1484 	},
1485 #endif /* __has_feature(ptrauth_calls) */
1486 	/* Available for use by system libraries when detecting disallowed conditions */
1487 	{
1488 		/* Note this uses the same range as the kernel-specific XNU_HARD_TRAP range */
1489 		.base   = 0xB000,
1490 		.max    = 0xBFFF,
1491 	}
1492 };
1493 const int user_brk_descriptor_count = sizeof(user_brk_descriptors) / sizeof(user_brk_descriptors[0]);
1494 
1495 const static inline user_brk_label_range_descriptor_t *
find_user_brk_descriptor_by_comment(uint16_t comment)1496 find_user_brk_descriptor_by_comment(uint16_t comment)
1497 {
1498 	for (int desc_idx = 0; desc_idx < user_brk_descriptor_count; desc_idx++) {
1499 		const user_brk_label_range_descriptor_t* des = &user_brk_descriptors[desc_idx];
1500 		if (comment >= des->base && comment <= des->max) {
1501 			return des;
1502 		}
1503 	}
1504 
1505 	return NULL;
1506 }
1507 
1508 static void
handle_user_breakpoint(arm_saved_state_t * state,uint64_t esr __unused)1509 handle_user_breakpoint(arm_saved_state_t *state, uint64_t esr __unused)
1510 {
1511 	exception_type_t           exception = EXC_BREAKPOINT;
1512 	mach_exception_data_type_t codes[2]  = {EXC_ARM_BREAKPOINT};
1513 	mach_msg_type_number_t     numcodes  = 2;
1514 
1515 	if (ESR_EC(esr) == ESR_EC_BRK_AARCH64) {
1516 		/*
1517 		 * Consult the trap labels we know about to decide whether userspace
1518 		 * should be given the opportunity to handle the exception.
1519 		 */
1520 		uint16_t brk_label = ISS_BRK_COMMENT(esr);
1521 		const struct user_brk_label_range_descriptor* descriptor = find_user_brk_descriptor_by_comment(brk_label);
1522 		/*
1523 		 * Note it's no problem if we don't recognize the label.
1524 		 * In this case we'll just go through normal exception delivery.
1525 		 */
1526 		if (descriptor != NULL) {
1527 			exception |= EXC_MAY_BE_UNRECOVERABLE_BIT;
1528 
1529 #if __has_feature(ptrauth_calls)
1530 			/*
1531 			 * We have additional policy specifically for PAC violations.
1532 			 * To make the rest of the code easier to follow, don't set
1533 			 * EXC_MAY_BE_UNRECOVERABLE_BIT here and just set EXC_PTRAUTH_BIT instead.
1534 			 * Conceptually a PAC failure is absolutely 'maybe unrecoverable', but it's
1535 			 * not really worth excising the discrepency from the plumbing.
1536 			 */
1537 			if (descriptor->base == PTRAUTH_TRAP_START) {
1538 				exception &= ~(EXC_MAY_BE_UNRECOVERABLE_BIT);
1539 				exception |= EXC_PTRAUTH_BIT;
1540 			}
1541 #endif /* __has_feature(ptrauth_calls) */
1542 		}
1543 	}
1544 
1545 	codes[1] = get_saved_state_pc(state);
1546 	exception_triage(exception, codes, numcodes);
1547 	__builtin_unreachable();
1548 }
1549 
1550 static void
handle_watchpoint(vm_offset_t fault_addr)1551 handle_watchpoint(vm_offset_t fault_addr)
1552 {
1553 	exception_type_t           exception = EXC_BREAKPOINT;
1554 	mach_exception_data_type_t codes[2]  = {EXC_ARM_DA_DEBUG};
1555 	mach_msg_type_number_t     numcodes  = 2;
1556 
1557 	codes[1] = fault_addr;
1558 	exception_triage(exception, codes, numcodes);
1559 	__builtin_unreachable();
1560 }
1561 
1562 static void
handle_abort(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,abort_inspector_t inspect_abort,abort_handler_t handler,expected_fault_handler_t expected_fault_handler)1563 handle_abort(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
1564     abort_inspector_t inspect_abort, abort_handler_t handler, expected_fault_handler_t expected_fault_handler)
1565 {
1566 	fault_status_t fault_code;
1567 	vm_prot_t      fault_type;
1568 
1569 	inspect_abort(ESR_ISS(esr), &fault_code, &fault_type);
1570 	handler(state, esr, fault_addr, fault_code, fault_type, expected_fault_handler);
1571 }
1572 
1573 static void
inspect_instruction_abort(uint32_t iss,fault_status_t * fault_code,vm_prot_t * fault_type)1574 inspect_instruction_abort(uint32_t iss, fault_status_t *fault_code, vm_prot_t *fault_type)
1575 {
1576 	getCpuDatap()->cpu_stat.instr_ex_cnt++;
1577 	*fault_code = ISS_IA_FSC(iss);
1578 	*fault_type = (VM_PROT_READ | VM_PROT_EXECUTE);
1579 }
1580 
1581 static void
inspect_data_abort(uint32_t iss,fault_status_t * fault_code,vm_prot_t * fault_type)1582 inspect_data_abort(uint32_t iss, fault_status_t *fault_code, vm_prot_t *fault_type)
1583 {
1584 	getCpuDatap()->cpu_stat.data_ex_cnt++;
1585 	*fault_code = ISS_DA_FSC(iss);
1586 
1587 	/*
1588 	 * Cache maintenance operations always report faults as write access.
1589 	 * Change these to read access, unless they report a permission fault.
1590 	 * Only certain cache maintenance operations (e.g. 'dc ivac') require write
1591 	 * access to the mapping, but if a cache maintenance operation that only requires
1592 	 * read access generates a permission fault, then we will not be able to handle
1593 	 * the fault regardless of whether we treat it as a read or write fault.
1594 	 */
1595 	if ((iss & ISS_DA_WNR) && (!(iss & ISS_DA_CM) || is_permission_fault(*fault_code))) {
1596 		*fault_type = (VM_PROT_READ | VM_PROT_WRITE);
1597 	} else {
1598 		*fault_type = (VM_PROT_READ);
1599 	}
1600 }
1601 
1602 #if __has_feature(ptrauth_calls)
1603 static inline uint64_t
fault_addr_bitmask(unsigned int bit_from,unsigned int bit_to)1604 fault_addr_bitmask(unsigned int bit_from, unsigned int bit_to)
1605 {
1606 	return ((1ULL << (bit_to - bit_from + 1)) - 1) << bit_from;
1607 }
1608 
1609 static inline bool
fault_addr_bit(vm_offset_t fault_addr,unsigned int bit)1610 fault_addr_bit(vm_offset_t fault_addr, unsigned int bit)
1611 {
1612 	return (bool)((fault_addr >> bit) & 1);
1613 }
1614 
1615 extern int gARM_FEAT_FPAC;
1616 extern int gARM_FEAT_FPACCOMBINE;
1617 extern int gARM_FEAT_PAuth2;
1618 
1619 /**
1620  * Determines whether a fault address taken at EL0 contains a PAC error code
1621  * corresponding to the specified kind of ptrauth key.
1622  */
1623 static bool
user_fault_matches_pac_error_code(vm_offset_t fault_addr,uint64_t pc,bool data_key)1624 user_fault_matches_pac_error_code(vm_offset_t fault_addr, uint64_t pc, bool data_key)
1625 {
1626 	if (gARM_FEAT_FPACCOMBINE) {
1627 		/*
1628 		 * CPUs with FPACCOMBINE always raise PAC Fail exceptions during
1629 		 * PAC failure.  If the CPU took any other kind of exception, we
1630 		 * can rule out PAC as the root cause.
1631 		 */
1632 		return false;
1633 	}
1634 
1635 	if (data_key && gARM_FEAT_FPAC) {
1636 		uint32_t instr;
1637 		int err = copyin(pc, (char *)&instr, sizeof(instr));
1638 		if (!err && !ARM64_INSTR_IS_LDRAx(instr)) {
1639 			/*
1640 			 * On FPAC-enabled devices, PAC failure can only cause
1641 			 * data aborts during "combined" LDRAx instructions.  If
1642 			 * PAC fails during a discrete AUTxx + LDR/STR
1643 			 * instruction sequence, then the AUTxx instruction
1644 			 * raises a PAC Fail exception rather than poisoning its
1645 			 * output address.
1646 			 *
1647 			 * In principle the same logic applies to instruction
1648 			 * aborts.  But we have no way to identify the exact
1649 			 * instruction that caused the abort, so we can't tell
1650 			 * if it was a combined branch + auth instruction.
1651 			 */
1652 			return false;
1653 		}
1654 	}
1655 
1656 	bool instruction_tbi = !(get_tcr() & TCR_TBID0_TBI_DATA_ONLY);
1657 	bool tbi = data_key || __improbable(instruction_tbi);
1658 
1659 	if (gARM_FEAT_PAuth2) {
1660 		/*
1661 		 * EnhancedPAC2 CPUs don't encode error codes at fixed positions, so
1662 		 * treat all non-canonical address bits like potential poison bits.
1663 		 */
1664 		uint64_t mask = fault_addr_bitmask(64 - T0SZ_BOOT, 54);
1665 		if (!tbi) {
1666 			mask |= fault_addr_bitmask(56, 63);
1667 		}
1668 		return (fault_addr & mask) != 0;
1669 	} else {
1670 		unsigned int poison_shift;
1671 		if (tbi) {
1672 			poison_shift = 53;
1673 		} else {
1674 			poison_shift = 61;
1675 		}
1676 
1677 		/* PAC error codes are always in the form key_number:NOT(key_number) */
1678 		bool poison_bit_1 = fault_addr_bit(fault_addr, poison_shift);
1679 		bool poison_bit_2 = fault_addr_bit(fault_addr, poison_shift + 1);
1680 		return poison_bit_1 != poison_bit_2;
1681 	}
1682 }
1683 #endif /* __has_feature(ptrauth_calls) */
1684 
1685 /**
1686  * Determines whether the userland thread has a JIT region in RW mode, TPRO
1687  * in RW mode, or JCTL_EL0 in pointer signing mode.  A fault in any of these trusted
1688  * code paths may indicate an attack on WebKit.  Rather than letting a
1689  * potentially-compromised process try to handle the exception, it will be killed
1690  * by the kernel and a crash report will be generated.
1691  */
1692 static self_restrict_mode_t
user_fault_in_self_restrict_mode(thread_t thread __unused)1693 user_fault_in_self_restrict_mode(thread_t thread __unused)
1694 {
1695 	self_restrict_mode_t out = SELF_RESTRICT_NONE;
1696 
1697 	return out;
1698 }
1699 
1700 static void
handle_pc_align(arm_saved_state_t * ss)1701 handle_pc_align(arm_saved_state_t *ss)
1702 {
1703 	exception_type_t exc;
1704 	mach_exception_data_type_t codes[2];
1705 	mach_msg_type_number_t numcodes = 2;
1706 
1707 	if (!PSR64_IS_USER(get_saved_state_cpsr(ss))) {
1708 		panic_with_thread_kernel_state("PC alignment exception from kernel.", ss);
1709 	}
1710 
1711 	exc = EXC_BAD_ACCESS;
1712 #if __has_feature(ptrauth_calls)
1713 	uint64_t pc = get_saved_state_pc(ss);
1714 	if (user_fault_matches_pac_error_code(pc, pc, false)) {
1715 		exc |= EXC_PTRAUTH_BIT;
1716 	}
1717 #endif /* __has_feature(ptrauth_calls) */
1718 
1719 	codes[0] = EXC_ARM_DA_ALIGN;
1720 	codes[1] = get_saved_state_pc(ss);
1721 
1722 	exception_triage(exc, codes, numcodes);
1723 	__builtin_unreachable();
1724 }
1725 
1726 static void
handle_sp_align(arm_saved_state_t * ss)1727 handle_sp_align(arm_saved_state_t *ss)
1728 {
1729 	exception_type_t exc;
1730 	mach_exception_data_type_t codes[2];
1731 	mach_msg_type_number_t numcodes = 2;
1732 
1733 	if (!PSR64_IS_USER(get_saved_state_cpsr(ss))) {
1734 		panic_with_thread_kernel_state("SP alignment exception from kernel.", ss);
1735 	}
1736 
1737 	exc = EXC_BAD_ACCESS;
1738 #if __has_feature(ptrauth_calls)
1739 	if (user_fault_matches_pac_error_code(get_saved_state_sp(ss), get_saved_state_pc(ss), true)) {
1740 		exc |= EXC_PTRAUTH_BIT;
1741 	}
1742 #endif /* __has_feature(ptrauth_calls) */
1743 
1744 	codes[0] = EXC_ARM_SP_ALIGN;
1745 	codes[1] = get_saved_state_sp(ss);
1746 
1747 	exception_triage(exc, codes, numcodes);
1748 	__builtin_unreachable();
1749 }
1750 
1751 static void
handle_wf_trap(arm_saved_state_t * state)1752 handle_wf_trap(arm_saved_state_t *state)
1753 {
1754 	exception_type_t exc;
1755 	mach_exception_data_type_t codes[2];
1756 	mach_msg_type_number_t numcodes = 2;
1757 	uint32_t instr = 0;
1758 
1759 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1760 
1761 	exc = EXC_BAD_INSTRUCTION;
1762 	codes[0] = EXC_ARM_UNDEFINED;
1763 	codes[1] = instr;
1764 
1765 	exception_triage(exc, codes, numcodes);
1766 	__builtin_unreachable();
1767 }
1768 
1769 static void
handle_fp_trap(arm_saved_state_t * state,uint64_t esr)1770 handle_fp_trap(arm_saved_state_t *state, uint64_t esr)
1771 {
1772 	exception_type_t exc = EXC_ARITHMETIC;
1773 	mach_exception_data_type_t codes[2];
1774 	mach_msg_type_number_t numcodes = 2;
1775 	uint32_t instr = 0;
1776 
1777 	if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
1778 		panic_with_thread_kernel_state("Floating point exception from kernel", state);
1779 	}
1780 
1781 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1782 	codes[1] = instr;
1783 
1784 	/* The floating point trap flags are only valid if TFV is set. */
1785 	if (!fp_exceptions_enabled) {
1786 		exc = EXC_BAD_INSTRUCTION;
1787 		codes[0] = EXC_ARM_UNDEFINED;
1788 	} else if (!(esr & ISS_FP_TFV)) {
1789 		codes[0] = EXC_ARM_FP_UNDEFINED;
1790 	} else if (esr & ISS_FP_UFF) {
1791 		codes[0] = EXC_ARM_FP_UF;
1792 	} else if (esr & ISS_FP_OFF) {
1793 		codes[0] = EXC_ARM_FP_OF;
1794 	} else if (esr & ISS_FP_IOF) {
1795 		codes[0] = EXC_ARM_FP_IO;
1796 	} else if (esr & ISS_FP_DZF) {
1797 		codes[0] = EXC_ARM_FP_DZ;
1798 	} else if (esr & ISS_FP_IDF) {
1799 		codes[0] = EXC_ARM_FP_ID;
1800 	} else if (esr & ISS_FP_IXF) {
1801 		codes[0] = EXC_ARM_FP_IX;
1802 	} else {
1803 		panic("Unrecognized floating point exception, state=%p, esr=%#llx", state, esr);
1804 	}
1805 
1806 	exception_triage(exc, codes, numcodes);
1807 	__builtin_unreachable();
1808 }
1809 
1810 
1811 
1812 /*
1813  * handle_alignment_fault_from_user:
1814  *   state: Saved state
1815  *
1816  * Attempts to deal with an alignment fault from userspace (possibly by
1817  * emulating the faulting instruction).  If emulation failed due to an
1818  * unservicable fault, the ESR for that fault will be stored in the
1819  * recovery_esr field of the thread by the exception code.
1820  *
1821  * Returns:
1822  *   -1:     Emulation failed (emulation of state/instr not supported)
1823  *   0:      Successfully emulated the instruction
1824  *   EFAULT: Emulation failed (probably due to permissions)
1825  *   EINVAL: Emulation failed (probably due to a bad address)
1826  */
1827 
1828 
1829 static int
handle_alignment_fault_from_user(arm_saved_state_t * state,kern_return_t * vmfr)1830 handle_alignment_fault_from_user(arm_saved_state_t *state, kern_return_t *vmfr)
1831 {
1832 	int ret = -1;
1833 
1834 #pragma unused (state)
1835 #pragma unused (vmfr)
1836 
1837 	return ret;
1838 }
1839 
1840 
1841 
1842 #if HAS_ARM_FEAT_SME
1843 static void
handle_sme_trap(arm_saved_state_t * state,uint64_t esr)1844 handle_sme_trap(arm_saved_state_t *state, uint64_t esr)
1845 {
1846 	exception_type_t exc = EXC_BAD_INSTRUCTION;
1847 	mach_exception_data_type_t codes[2] = {EXC_ARM_UNDEFINED};
1848 	mach_msg_type_number_t numcodes = 2;
1849 
1850 	if (!PSR64_IS_USER(get_saved_state_cpsr(state))) {
1851 		panic("SME exception from kernel, state=%p, esr=%#llx", state, esr);
1852 	}
1853 	if (!arm_sme_version()) {
1854 		/*
1855 		 * If SME is disabled in software but userspace executes an SME
1856 		 * instruction anyway, then the CPU will still raise an
1857 		 * SME-specific trap.  Triage it as if the CPU raised an
1858 		 * undefined-instruction trap.
1859 		 */
1860 		exception_triage(exc, codes, numcodes);
1861 		__builtin_unreachable();
1862 	}
1863 
1864 	if (ISS_SME_SMTC(ESR_ISS(esr)) == ISS_SME_SMTC_CAPCR) {
1865 		thread_t thread = current_thread();
1866 		switch (machine_thread_sme_state_alloc(thread)) {
1867 		case KERN_SUCCESS:
1868 			return;
1869 
1870 
1871 		default:
1872 			panic("Failed to allocate SME state for thread %p", thread);
1873 		}
1874 	}
1875 
1876 	uint32_t instr;
1877 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1878 	codes[1] = instr;
1879 
1880 	exception_triage(exc, codes, numcodes);
1881 	__builtin_unreachable();
1882 }
1883 #endif /* HAS_ARM_FEAT_SME */
1884 
1885 static void
handle_sw_step_debug(arm_saved_state_t * state)1886 handle_sw_step_debug(arm_saved_state_t *state)
1887 {
1888 	thread_t thread = current_thread();
1889 	exception_type_t exc;
1890 	mach_exception_data_type_t codes[2];
1891 	mach_msg_type_number_t numcodes = 2;
1892 
1893 	if (!PSR64_IS_USER(get_saved_state_cpsr(state))) {
1894 		panic_with_thread_kernel_state("SW_STEP_DEBUG exception from kernel.", state);
1895 	}
1896 
1897 	// Disable single step and unmask interrupts (in the saved state, anticipating next exception return)
1898 	if (thread->machine.DebugData != NULL) {
1899 		thread->machine.DebugData->uds.ds64.mdscr_el1 &= ~0x1;
1900 	} else {
1901 		panic_with_thread_kernel_state("SW_STEP_DEBUG exception thread DebugData is NULL.", state);
1902 	}
1903 
1904 	mask_user_saved_state_cpsr(thread->machine.upcb, 0, PSR64_SS | DAIF_ALL);
1905 
1906 	// Special encoding for gdb single step event on ARM
1907 	exc = EXC_BREAKPOINT;
1908 	codes[0] = 1;
1909 	codes[1] = 0;
1910 
1911 	exception_triage(exc, codes, numcodes);
1912 	__builtin_unreachable();
1913 }
1914 
1915 #if MACH_ASSERT
1916 TUNABLE_WRITEABLE(self_restrict_mode_t, panic_on_jit_guard, "panic_on_jit_guard", SELF_RESTRICT_NONE);
1917 #endif /* MACH_ASSERT */
1918 
1919 static void
handle_user_abort(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,fault_status_t fault_code,vm_prot_t fault_type,expected_fault_handler_t expected_fault_handler)1920 handle_user_abort(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
1921     fault_status_t fault_code, vm_prot_t fault_type, expected_fault_handler_t expected_fault_handler)
1922 {
1923 	exception_type_t           exc      = EXC_BAD_ACCESS;
1924 	mach_exception_data_type_t codes[2];
1925 	mach_msg_type_number_t     numcodes = 2;
1926 	thread_t                   thread   = current_thread();
1927 
1928 	(void)expected_fault_handler;
1929 
1930 	if (__improbable(!SPSR_INTERRUPTS_ENABLED(get_saved_state_cpsr(state)))) {
1931 		panic_with_thread_kernel_state("User abort from non-interruptible context", state);
1932 	}
1933 
1934 	thread->iotier_override = THROTTLE_LEVEL_NONE; /* Reset IO tier override before handling abort from userspace */
1935 
1936 	if (!is_servicible_fault(fault_code, esr) &&
1937 	    thread->t_rr_state.trr_fault_state != TRR_FAULT_NONE) {
1938 		thread_reset_pcs_done_faulting(thread);
1939 	}
1940 
1941 #if HAS_MTE
1942 	if (is_tag_check_fault(fault_code)) {
1943 		pmap_t current_pmap = current_map()->pmap;
1944 		codes[0] = tag_check_fault_type(current_pmap, fault_addr);
1945 	} else if (is_canonical_memory_permission_fault(esr)) {
1946 		codes[0] = KERN_PROTECTION_FAILURE;
1947 	} else
1948 #endif
1949 	if (is_vm_fault(fault_code)) {
1950 		vm_map_t        map = thread->map;
1951 		vm_offset_t     vm_fault_addr = fault_addr;
1952 		kern_return_t   result = KERN_FAILURE;
1953 
1954 		assert(map != kernel_map);
1955 
1956 		if (!(fault_type & VM_PROT_EXECUTE)) {
1957 			vm_fault_addr = VM_USER_STRIP_TBI(fault_addr);
1958 		}
1959 
1960 		/* check to see if it is just a pmap ref/modify fault */
1961 		if (!is_translation_fault(fault_code)) {
1962 			result = arm_fast_fault(map->pmap,
1963 			    vm_fault_addr,
1964 			    fault_type, (fault_code == FSC_ACCESS_FLAG_FAULT_L3), TRUE);
1965 		}
1966 		if (result != KERN_SUCCESS) {
1967 
1968 			{
1969 				/* We have to fault the page in */
1970 				result = vm_fault(map, vm_fault_addr, fault_type,
1971 				    /* change_wiring */ FALSE, VM_KERN_MEMORY_NONE, THREAD_ABORTSAFE,
1972 				    /* caller_pmap */ NULL, /* caller_pmap_addr */ 0);
1973 			}
1974 		}
1975 		if (thread->t_rr_state.trr_fault_state != TRR_FAULT_NONE) {
1976 			thread_reset_pcs_done_faulting(thread);
1977 		}
1978 		if (result == KERN_SUCCESS || result == KERN_ABORTED) {
1979 			return;
1980 		}
1981 
1982 		/*
1983 		 * vm_fault() should never return KERN_FAILURE for page faults from user space.
1984 		 * If it does, we're leaking preemption disables somewhere in the kernel.
1985 		 */
1986 		if (__improbable(result == KERN_FAILURE)) {
1987 			panic("vm_fault() KERN_FAILURE from user fault on thread %p", thread);
1988 		}
1989 
1990 		codes[0] = result;
1991 	} else if (is_alignment_fault(fault_code)) {
1992 		kern_return_t vmfkr = KERN_SUCCESS;
1993 		thread->machine.recover_esr = 0;
1994 		thread->machine.recover_far = 0;
1995 		int result = handle_alignment_fault_from_user(state, &vmfkr);
1996 		if (result == 0) {
1997 			/* Successfully emulated, or instruction
1998 			 * copyin() for decode/emulation failed.
1999 			 * Continue, or redrive instruction.
2000 			 */
2001 			thread_exception_return();
2002 		} else if (((result == EFAULT) || (result == EINVAL)) &&
2003 		    (thread->machine.recover_esr == 0)) {
2004 			/*
2005 			 * If we didn't actually take a fault, but got one of
2006 			 * these errors, then we failed basic sanity checks of
2007 			 * the fault address.  Treat this as an invalid
2008 			 * address.
2009 			 */
2010 			codes[0] = KERN_INVALID_ADDRESS;
2011 		} else if ((result == EFAULT) &&
2012 		    (thread->machine.recover_esr)) {
2013 			/*
2014 			 * Since alignment aborts are prioritized
2015 			 * ahead of translation aborts, the misaligned
2016 			 * atomic emulation flow may have triggered a
2017 			 * VM pagefault, which the VM could not resolve.
2018 			 * Report the VM fault error in codes[]
2019 			 */
2020 
2021 			codes[0] = vmfkr;
2022 			assertf(vmfkr != KERN_SUCCESS, "Unexpected vmfkr 0x%x", vmfkr);
2023 			/* Cause ESR_EC to reflect an EL0 abort */
2024 			thread->machine.recover_esr &= ~ESR_EC_MASK;
2025 			thread->machine.recover_esr |= (ESR_EC_DABORT_EL0 << ESR_EC_SHIFT);
2026 			set_saved_state_esr(thread->machine.upcb, thread->machine.recover_esr);
2027 			set_saved_state_far(thread->machine.upcb, thread->machine.recover_far);
2028 			fault_addr = thread->machine.recover_far;
2029 		} else {
2030 			/* This was just an unsupported alignment
2031 			 * exception. Misaligned atomic emulation
2032 			 * timeouts fall in this category.
2033 			 */
2034 			codes[0] = EXC_ARM_DA_ALIGN;
2035 		}
2036 	} else if (is_parity_error(fault_code)) {
2037 #if defined(APPLE_ARM64_ARCH_FAMILY)
2038 		/*
2039 		 * Platform errors are handled in sleh_sync before interrupts are enabled.
2040 		 */
2041 #else
2042 		panic("User parity error.");
2043 #endif
2044 	} else {
2045 		codes[0] = KERN_FAILURE;
2046 	}
2047 
2048 #if CODE_SIGNING_MONITOR
2049 	/*
2050 	 * If the code reaches here, it means we weren't able to resolve the fault and we're
2051 	 * going to be sending the task an exception. On systems which have the code signing
2052 	 * monitor enabled, an execute fault which cannot be handled must result in sending
2053 	 * a SIGKILL to the task.
2054 	 */
2055 	if (is_vm_fault(fault_code) && (fault_type & VM_PROT_EXECUTE)) {
2056 		csm_code_signing_violation(current_proc(), fault_addr);
2057 	}
2058 #endif
2059 
2060 	codes[1] = fault_addr;
2061 #if __has_feature(ptrauth_calls)
2062 	bool is_data_abort = (ESR_EC(esr) == ESR_EC_DABORT_EL0);
2063 	if (user_fault_matches_pac_error_code(fault_addr, get_saved_state_pc(state), is_data_abort)) {
2064 		exc |= EXC_PTRAUTH_BIT;
2065 	}
2066 #endif /* __has_feature(ptrauth_calls) */
2067 
2068 	const self_restrict_mode_t self_restrict_mode = user_fault_in_self_restrict_mode(thread);
2069 	if ((self_restrict_mode != SELF_RESTRICT_NONE) &&
2070 	    task_is_jit_exception_fatal(get_threadtask(thread))) {
2071 		int flags = PX_KTRIAGE;
2072 		exception_info_t info = {
2073 			.os_reason = OS_REASON_SELF_RESTRICT,
2074 			.exception_type = exc,
2075 			.mx_code = codes[0],
2076 			.mx_subcode = codes[1]
2077 		};
2078 
2079 #if MACH_ASSERT
2080 		/*
2081 		 * Case: panic_on_jit_guard=1. Catch an early process creation TPRO issue causing
2082 		 * rdar://129742083. Only panic during early process creation (1 thread, few syscalls
2083 		 * issued) to avoid spurious panics.
2084 		 */
2085 		const self_restrict_mode_t self_restrict_panic_mask = panic_on_jit_guard & self_restrict_mode;
2086 		bool should_panic = ((self_restrict_panic_mask == SELF_RESTRICT_ANY) &&
2087 		    (current_task()->thread_count == 1) &&
2088 		    (thread->syscalls_unix < 24));
2089 
2090 		/*
2091 		 * Modes other than ANY will force panic, skipping checks that were done in the ANY case,
2092 		 * but allowing us to filter on a more specific scenario (e.g. TPRO, JIT, etc).  This is
2093 		 * meant to catch a TPRO issue causing rdar://145703251. Restrict to KERN_PROTECTION_FAILURE
2094 		 * only to avoid failures from the more frequent case of KERN_INVALID_ADDRESS that aren't
2095 		 * of interest for that radar.
2096 		 */
2097 		should_panic |= (codes[0] == KERN_PROTECTION_FAILURE)
2098 		    && ((self_restrict_panic_mask & ~SELF_RESTRICT_ANY) != 0);
2099 
2100 		printf("\nGUARD_REASON_JIT exc %d codes=<0x%llx,0x%llx> syscalls %d task %p thread %p va 0x%lx code 0x%x type 0x%x esr 0x%llx\n",
2101 		    exc, codes[0], codes[1], thread->syscalls_unix, current_task(), thread, fault_addr, fault_code, fault_type, esr);
2102 		if (should_panic) {
2103 			panic("GUARD_REASON_JIT exc %d codes=<0x%llx,0x%llx> syscalls %d task %p thread %p va 0x%lx code 0x%x type 0x%x esr 0x%llx state %p j %d t %d s user 0x%llx (0x%llx) jb 0x%llx (0x%llx)",
2104 			    exc, codes[0], codes[1], thread->syscalls_unix, current_task(), thread, fault_addr, fault_code, fault_type, esr, state,
2105 			    0, 0, 0ull, 0ull,
2106 			    0ull, 0ull
2107 			    );
2108 		}
2109 #endif /* MACH_ASSERT */
2110 
2111 		exit_with_mach_exception(current_proc(), info, flags);
2112 	}
2113 
2114 #if HAS_MTE
2115 	if (codes[0] == EXC_ARM_MTE_TAGCHECK_FAIL || codes[0] == EXC_ARM_MTE_CANONICAL_FAIL) {
2116 		/*
2117 		 * If soft-mode is enabled, we trigger a simulated crash, then we'll clear TCF0
2118 		 * and let the thread try again.
2119 		 */
2120 		if (task_has_sec_soft_mode(current_task())) {
2121 			mte_send_sync_soft_mode_exception(thread, /* fault_addr */ codes[1], /* mx_code */ codes[0]);
2122 			/* Disable tag checking for userspace addresses. This will be our first and last tag check fault. */
2123 			mte_disable_user_checking(current_task());
2124 
2125 			if (thread->t_rr_state.trr_fault_state != TRR_FAULT_NONE) {
2126 				thread_reset_pcs_done_faulting(thread);
2127 			}
2128 			/* Retry with tag checking disabled. */
2129 			thread_exception_return();
2130 		}
2131 
2132 		/* Hard-mode: */
2133 		int flags = PX_KTRIAGE;
2134 		exception_info_t info = {
2135 			.os_reason = OS_REASON_MTE_FAIL,
2136 			.exception_type = exc,
2137 			.mx_code = codes[0],
2138 			.mx_subcode = codes[1]
2139 		};
2140 		exit_with_mach_exception(current_proc(), info, flags);
2141 	}
2142 #endif /* HAS_MTE */
2143 
2144 	exception_triage(exc, codes, numcodes);
2145 	__builtin_unreachable();
2146 }
2147 
2148 /**
2149  * Panic because the kernel abort handler tried to apply a recovery handler that
2150  * isn't inside copyio_recover_table[].
2151  *
2152  * @param state original saved-state
2153  * @param recover invalid recovery handler
2154  */
2155 __attribute__((noreturn, used))
2156 static void
panic_on_invalid_recovery_handler(arm_saved_state_t * state,struct copyio_recovery_entry * recover)2157 panic_on_invalid_recovery_handler(arm_saved_state_t *state, struct copyio_recovery_entry *recover)
2158 {
2159 	panic("attempt to set invalid recovery handler %p on kernel saved-state %p", recover, state);
2160 }
2161 
2162 /**
2163  * Update a thread saved-state to store an error code in x0 and branch to a
2164  * copyio recovery handler.
2165  *
2166  * @param state original saved-state
2167  * @param esr ESR_ELx value for the fault taken
2168  * @param fault_addr FAR_ELx value for the fault taken
2169  * @param thread target thread
2170  * @param recover destination copyio recovery handler
2171  * @param x0 error code to populate into x0
2172  */
2173 static void
handle_kernel_abort_recover_with_error_code(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,thread_t thread,struct copyio_recovery_entry * _Nonnull recover,uint64_t x0)2174 handle_kernel_abort_recover_with_error_code(
2175 	arm_saved_state_t              *state,
2176 	uint64_t                        esr,
2177 	vm_offset_t                     fault_addr,
2178 	thread_t                        thread,
2179 	struct copyio_recovery_entry   *_Nonnull recover,
2180 	uint64_t                       x0)
2181 {
2182 	thread->machine.recover_esr = esr;
2183 	thread->machine.recover_far = fault_addr;
2184 	saved_state64(state)->x[0] = x0;
2185 #if defined(HAS_APPLE_PAC)
2186 	MANIPULATE_SIGNED_THREAD_STATE(state,
2187 	    "adrp	x6, _copyio_recover_table_end@page		\n"
2188 	    "add	x6, x6, _copyio_recover_table_end@pageoff	\n"
2189 	    "cmp	%[recover], x6					\n"
2190 	    "b.lt	1f						\n"
2191 	    "bl		_panic_on_invalid_recovery_handler		\n"
2192 	    "brk	#0						\n"
2193 	    "1:								\n"
2194 	    "adrp	x6, _copyio_recover_table@page			\n"
2195 	    "add	x6, x6, _copyio_recover_table@pageoff		\n"
2196 	    "subs	x7, %[recover], x6				\n"
2197 	    "b.pl	1f						\n"
2198 	    "bl		_panic_on_invalid_recovery_handler		\n"
2199 	    "brk	#0						\n"
2200 	    "1:								\n"
2201 	    "udiv	x8, x7, %[SIZEOF_RECOVER]			\n"
2202 	    "mul	x8, x8, %[SIZEOF_RECOVER]			\n"
2203 	    "cmp	x7, x8						\n"
2204 	    "b.eq	1f						\n"
2205 	    "bl		_panic_on_invalid_recovery_handler		\n"
2206 	    "brk	#0						\n"
2207 	    "1:								\n"
2208 	    "ldr	x1, [%[recover], %[CRE_RECOVERY]]		\n"
2209 	    "add	x1, x1, x6					\n"
2210 	    "str	x1, [x0, %[SS64_PC]]				\n",
2211 	    [recover] "r"(recover),
2212 	    [SIZEOF_RECOVER] "r"((sizeof(*recover))),
2213 	    [CRE_RECOVERY] "i"(offsetof(struct copyio_recovery_entry, cre_recovery))
2214 	    );
2215 #else
2216 	ptrdiff_t recover_offset = (uintptr_t)recover - (uintptr_t)copyio_recover_table;
2217 	if ((uintptr_t)recover < (uintptr_t)copyio_recover_table ||
2218 	    (uintptr_t)recover >= (uintptr_t)copyio_recover_table_end ||
2219 	    (recover_offset % sizeof(*recover)) != 0) {
2220 		panic_on_invalid_recovery_handler(state, recover);
2221 	}
2222 	saved_state64(state)->pc = copyio_recovery_addr(recover->cre_recovery);
2223 #endif
2224 }
2225 
2226 static inline void
handle_kernel_abort_recover(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,thread_t thread,struct copyio_recovery_entry * _Nonnull recover)2227 handle_kernel_abort_recover(
2228 	arm_saved_state_t              *state,
2229 	uint64_t                        esr,
2230 	vm_offset_t                     fault_addr,
2231 	thread_t                        thread,
2232 	struct copyio_recovery_entry   *_Nonnull recover)
2233 {
2234 	handle_kernel_abort_recover_with_error_code(state, esr, fault_addr, thread, recover, EFAULT);
2235 }
2236 
2237 #if HAS_MTE
2238 static void
mte_send_sync_soft_mode_exception(thread_t thread,vm_map_address_t address,mach_exception_data_type_t mx_code)2239 mte_send_sync_soft_mode_exception(thread_t thread, vm_map_address_t address, mach_exception_data_type_t mx_code)
2240 {
2241 	uint64_t code = mx_code | kGUARD_EXC_MTE_SOFT_MODE;
2242 	EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_VIRT_MEMORY);
2243 	EXC_GUARD_ENCODE_FLAVOR(code, kGUARD_EXC_MTE_SYNC_FAULT);
2244 	thread_guard_violation(thread, code, address, /* fatal */ false);
2245 }
2246 
2247 /*
2248  * We took a fault during a copyio routine, over a user address, in the context
2249  * of a user thread that "synchronously" asked the kernel to access a pointer.
2250  * The intention is to kill the user thread (EXC_BAD_ACCESS), but since we might
2251  * be within an interrupt context, delay sending the exception to the guard AST.
2252  */
2253 static void
mte_send_sync_kernel_on_user_fault(thread_t thread,vm_map_address_t fault_addr,mach_exception_data_type_t mx_code)2254 mte_send_sync_kernel_on_user_fault(thread_t thread, vm_map_address_t fault_addr, mach_exception_data_type_t mx_code)
2255 {
2256 	/* Soft-mode: send a sync fault simulated exception. */
2257 	if (task_has_sec_soft_mode(get_threadtask(thread))) {
2258 		mte_send_sync_soft_mode_exception(thread, fault_addr, mx_code);
2259 		return;
2260 	}
2261 
2262 	/* Hard-mode: send an old fashioned Tag Check Fault. */
2263 	set_saved_state_far(thread->machine.upcb, fault_addr);
2264 	int flags = PX_KTRIAGE;
2265 	exception_info_t info = {
2266 		.os_reason = OS_REASON_MTE_FAIL,
2267 		.exception_type = EXC_BAD_ACCESS,
2268 		.mx_code = mx_code,
2269 		.mx_subcode = fault_addr
2270 	};
2271 	exit_with_mach_exception_using_ast(info, flags, /* fatal */ true);
2272 }
2273 
2274 /*
2275  * We took a fault while servicing an AST, over a user address, on behalf of
2276  * a user process. The intention is to kill the user thread. The notification
2277  * is sent to the current thread, so we can use
2278  * thread_guard_violation()->thread_ast_mach_exception().
2279  *
2280  * @param thread current thread.
2281  * @param mx_code must be either a TAG CHECK FAIL or a CANONICAL (TAG CHECK) FAIL.
2282  * @param fault_addr the address that the fault was taken on.
2283  */
2284 static void
mte_send_async_ast_fault(thread_t thread,mach_exception_data_type_t mx_code,vm_map_address_t fault_addr)2285 mte_send_async_ast_fault(thread_t thread, mach_exception_data_type_t mx_code, vm_map_address_t fault_addr)
2286 {
2287 	assert(mx_code == EXC_ARM_MTE_TAGCHECK_FAIL || mx_code == EXC_ARM_MTE_CANONICAL_FAIL);
2288 	uint64_t code = mx_code;
2289 	bool soft_mode = task_has_sec_soft_mode(get_threadtask(thread));
2290 
2291 	if (soft_mode) {
2292 		code |= kGUARD_EXC_MTE_SOFT_MODE;
2293 	}
2294 
2295 	EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_VIRT_MEMORY);
2296 	EXC_GUARD_ENCODE_FLAVOR(code, kGUARD_EXC_MTE_ASYNC_USER_FAULT);
2297 
2298 	thread_guard_violation(thread, code, fault_addr, /* not fatal in soft_mode */ !soft_mode);
2299 }
2300 
2301 static void
mte_record_async_tag_check_fault_address(vm_map_t map,vm_map_address_t fault_address)2302 mte_record_async_tag_check_fault_address(vm_map_t map, vm_map_address_t fault_address)
2303 {
2304 	/*
2305 	 * Verify the address being reported (and the min address of the map) don't
2306 	 * conflict with any of the magic values used by this mechanism. These
2307 	 * asserts should not fire currently as the first page of VA is not mappable
2308 	 * in user maps today.
2309 	 */
2310 	assert(fault_address >= VM_ASYNC_TAG_FAULT_MIN_VALID_ADDR);
2311 	assert(vm_map_min(map) >= VM_ASYNC_TAG_FAULT_MIN_VALID_ADDR);
2312 
2313 	/*
2314 	 * Attempt to report the faulting address. If this fails, we know that a
2315 	 * faulting address has already been reported. Accordingly, we can just
2316 	 * ignore the failure and continue on since we never send more than one MTE
2317 	 * guard exception per task anyway.
2318 	 */
2319 	(void)os_atomic_cmpxchg(&map->async_tag_fault_address, 0, fault_address, relaxed);
2320 
2321 	/*
2322 	 * We cannot set the AST here, as we'd need to take a task lock and we may
2323 	 * deadlock. On exit from the switched map operation or on return from
2324 	 * the IOMD read/writeBytes path, the caller will check whether an exception
2325 	 * happened by inspecting `async_tag_fault_address` and act accordingly.
2326 	 */
2327 }
2328 
2329 /*
2330  * We took a fault accessing a userspace address, while in a kernel thread that
2331  * temporarily switched to the user map in order to do work on behalf of the target process.
2332  * Record onto the map the faulting address.
2333  *
2334  * This is essentially a thin layer over mte_record_async_tag_check_fault(), just adding
2335  * a bunch of sanity checks that we don't start hitting unexpected faults.
2336  */
2337 static void
mte_record_async_kernel_interposed_map_fault_address(vm_map_address_t fault_addr)2338 mte_record_async_kernel_interposed_map_fault_address(vm_map_address_t fault_addr)
2339 {
2340 	vm_map_t map = current_map();
2341 
2342 	assert(vm_kernel_map_is_kernel(current_task()->map));
2343 	assert(!vm_kernel_map_is_kernel(map));
2344 
2345 	if (!map->owning_task && !map->terminated) {
2346 		panic("Kernel tag-check fault on %p @ %#llx prior to vm_map_setup",
2347 		    map, map->async_tag_fault_address);
2348 	}
2349 
2350 	mte_record_async_tag_check_fault_address(map, fault_addr);
2351 }
2352 
2353 static void
handle_kernel_tag_check_fault(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,thread_t thread,struct copyio_recovery_entry * recover)2354 handle_kernel_tag_check_fault(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
2355     thread_t thread, struct copyio_recovery_entry *recover)
2356 {
2357 	/*
2358 	 * MTE tag check faults are treated as non-recoverable security
2359 	 * violations, even when they're raised inside a copyio routine.
2360 	 *
2361 	 * If the fault happened while accessing a user address inside a copyio
2362 	 * routine in the context of a userspace process, assume the current
2363 	 * process supplied that address and deliver a Mach exception.  This may
2364 	 * manifest either as `EXC_BAD_ACCESS` or a Mach guard exception.
2365 	 * The former is used when the fault was raised while servicing a
2366 	 * synchronous EL0 exception, so the crashing EL0 thread state will
2367 	 * likely have something to do with the root cause.
2368 	 *
2369 	 * If such a fault happened in the context of a kernel thread, assume
2370 	 * that the kernel is doing work on behalf of the process that owns the
2371 	 * current map and set a guard exception on the map to asynchronously
2372 	 * kill it.
2373 	 *
2374 	 * If the fault happened while accessing a kernel address, and the
2375 	 * copyio handler didn't explicitly say that it needs to tolerate kernel
2376 	 * tag check faults, then panic.  (We'll assume the kernel is always
2377 	 * responsible for mistagged kernel addresses: copy_validate() should
2378 	 * keep misbehaving userspace processes from passing those in.)
2379 	 */
2380 
2381 	/* Use the TTBR selector to determine whether it's a user or kernel address. */
2382 	bool is_user_addr = (fault_addr & TTBR_SELECTOR) == 0;
2383 	/* Are we running as a kernel thread. */
2384 	bool is_kernel_thread = vm_kernel_map_is_kernel(current_task()->map);
2385 
2386 	/*
2387 	 * Only attempt recovery if we have a recovery handler associated.
2388 	 * Recovery step will differ depending on whether we faulted on a user or kernel address.
2389 	 */
2390 	if (recover) {
2391 		if (is_user_addr) {
2392 			uint64_t error_code = EFAULT;
2393 			task_t owning_task = current_task();
2394 			bool in_el0_sync_trap = thread->machine.el0_synchronous_trap &&
2395 			    current_cpu_datap()->cpu_int_state == NULL;
2396 
2397 			if (in_el0_sync_trap) {
2398 				/* "Synchronous" software exception. */
2399 				mach_exception_data_type_t code = tag_check_fault_type(current_map()->pmap, fault_addr);
2400 				mte_send_sync_kernel_on_user_fault(thread, fault_addr, code);
2401 			} else {
2402 				/* "Asynchrnous" software exception */
2403 #if DEVELOPMENT || DEBUG
2404 				if (mte_panic_on_async_fault()) {
2405 					panic_with_thread_kernel_state("Kernel AST tag check fault accessing user space", state);
2406 				}
2407 #endif /* DEVELOPMENT || DEBUG */
2408 
2409 				if (is_kernel_thread) {
2410 					/* kernel thread executes with switched map. */
2411 					mte_record_async_kernel_interposed_map_fault_address(fault_addr);
2412 					owning_task = current_map()->owning_task;
2413 				} else {
2414 					/* Asynchronous but within current_thread() */
2415 					mach_exception_data_type_t code = tag_check_fault_type(current_map()->pmap, fault_addr);
2416 					mte_send_async_ast_fault(thread, code, fault_addr);
2417 				}
2418 			}
2419 			/* If in soft-mode, retry with tag checking disabled. */
2420 			if (task_has_sec_soft_mode(owning_task)) {
2421 				mte_disable_user_checking(owning_task);
2422 				error_code = EAGAIN;
2423 			}
2424 			/*
2425 			 * We took this exception from inside kernel copyio code.  Even
2426 			 * if we're not going to retry it, the kernel thread needs to
2427 			 * clean things up by branching to the copyio recovery handler.
2428 			 */
2429 			handle_kernel_abort_recover_with_error_code(state, esr, fault_addr, thread, recover, error_code);
2430 			return;
2431 		} else {
2432 			/* We further filter the side of the access that is actually allowed to fault. */
2433 			bool is_write_access = (ESR_ISS(esr) & ISS_DA_WNR);
2434 
2435 			if ((recover->recover_from_kernel_read_tag_check_fault && !is_write_access) ||
2436 			    (recover->recover_from_kernel_write_tag_check_fault && is_write_access)) {
2437 				/* Are we within an IOMD critical region - that will give us a task to blame. */
2438 				task_t task_providing_faultable_buffer = current_thread_get_iomd_faultable_access_buffer_provider();
2439 				if (task_providing_faultable_buffer != NULL) {
2440 					/* Same drill as the kernel thread case above: record here the required information. */
2441 #if DEVELOPMENT || DEBUG
2442 					mte_record_async_tag_check_fault_address(task_providing_faultable_buffer->map, fault_addr);
2443 					if (mte_panic_on_async_fault()) {
2444 						panic_with_thread_kernel_state("Kernel AST tag check fault accessing physmap", state);
2445 					}
2446 #else /* DEVELOPMENT || DEBUG */
2447 					mte_record_async_tag_check_fault_address(task_providing_faultable_buffer->map, 0xdeadbeef);
2448 #endif /* DEVELOPMENT || DEBUG */
2449 				}
2450 
2451 				/* No fault, just clean recovery returning EFAULT. */
2452 				handle_kernel_abort_recover(state, esr, fault_addr, thread, recover);
2453 				return;
2454 			}
2455 			/* Fallthrough to regular panic scenario. */
2456 		}
2457 	}
2458 
2459 	/*
2460 	 * Everything past this point doesn't have a recovery handler and is a fatal violation.
2461 	 * Package up and report as much useful information as possible.
2462 	 */
2463 #define MSG_FMT "Kernel tag check fault (expected tagged address: 0x%016llx)"
2464 	char msg[strlen(MSG_FMT)
2465 	- strlen("0x%016llx") + strlen("0xFFFFFFFFFFFFFFFF")
2466 	+ 1];
2467 
2468 	vm_address_t expected_tagged_address = vm_memtag_load_tag(fault_addr);
2469 	snprintf(msg, sizeof(msg), MSG_FMT, (uint64_t)expected_tagged_address);
2470 	panic_with_thread_kernel_state(msg, state);
2471 #undef MSG_FMT
2472 }
2473 #endif /* HAS_MTE */
2474 
2475 static void
handle_kernel_abort(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,fault_status_t fault_code,vm_prot_t fault_type,expected_fault_handler_t expected_fault_handler)2476 handle_kernel_abort(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
2477     fault_status_t fault_code, vm_prot_t fault_type, expected_fault_handler_t expected_fault_handler)
2478 {
2479 	thread_t thread = current_thread();
2480 	struct copyio_recovery_entry *recover = find_copyio_recovery_entry(
2481 		get_saved_state_pc(state));
2482 
2483 #ifndef CONFIG_XNUPOST
2484 	(void)expected_fault_handler;
2485 #endif /* CONFIG_XNUPOST */
2486 
2487 
2488 #if CONFIG_DTRACE
2489 	if (is_vm_fault(fault_code) && thread->t_dtrace_inprobe) { /* Executing under dtrace_probe? */
2490 		if (dtrace_tally_fault(fault_addr)) { /* Should a fault under dtrace be ignored? */
2491 			/*
2492 			 * Point to next instruction, or recovery handler if set.
2493 			 */
2494 			if (recover) {
2495 				handle_kernel_abort_recover(state, esr, VM_USER_STRIP_PTR(fault_addr), thread, recover);
2496 			} else {
2497 				add_saved_state_pc(state, 4);
2498 			}
2499 			return;
2500 		} else {
2501 			panic_with_thread_kernel_state("Unexpected page fault under dtrace_probe", state);
2502 		}
2503 	}
2504 #endif
2505 
2506 #if HAS_MTE
2507 	if (is_tag_check_fault(fault_code)) {
2508 #ifdef CONFIG_XNUPOST
2509 		if (expected_fault_handler && expected_fault_handler(state)) {
2510 			return;
2511 		}
2512 #endif /* CONFIG_XNUPOST */
2513 
2514 		handle_kernel_tag_check_fault(state, esr, fault_addr, thread, recover);
2515 		return;
2516 	} else
2517 #endif /* HAS_MTE */
2518 	if (is_vm_fault(fault_code)) {
2519 		kern_return_t result = KERN_FAILURE;
2520 		vm_map_t      map;
2521 		int           interruptible;
2522 
2523 #ifdef CONFIG_XNUPOST
2524 		if (expected_fault_handler && expected_fault_handler(state)) {
2525 			return;
2526 		}
2527 #endif /* CONFIG_XNUPOST */
2528 
2529 		if (VM_KERNEL_ADDRESS(fault_addr) || thread == THREAD_NULL || recover == 0) {
2530 			/*
2531 			 * If no recovery handler is supplied, always drive the fault against
2532 			 * the kernel map.  If the fault was taken against a userspace VA, indicating
2533 			 * an unprotected access to user address space, vm_fault() should fail and
2534 			 * ultimately lead to a panic here.
2535 			 */
2536 			map = kernel_map;
2537 			interruptible = THREAD_UNINT;
2538 
2539 #if CONFIG_KERNEL_TAGGING
2540 			/*
2541 			 * If kernel tagging is enabled, canonicalize the address here, so that we have a
2542 			 * chance to find it in the VM ranges. Do not mess with exec fault cases.
2543 			 */
2544 			if (!((fault_type) & VM_PROT_EXECUTE)) {
2545 				fault_addr = vm_memtag_canonicalize(map, fault_addr);
2546 			}
2547 #endif /* CONFIG_KERNEL_TAGGING */
2548 		} else {
2549 			map = thread->map;
2550 
2551 			/**
2552 			 * In the case that the recovery handler is set (e.g., during copyio
2553 			 * and dtrace probes), we don't want the vm_fault() operation to be
2554 			 * aborted early. Those code paths can't handle restarting the
2555 			 * vm_fault() operation so don't allow it to return early without
2556 			 * creating the wanted mapping.
2557 			 */
2558 			interruptible = (recover) ? THREAD_UNINT : THREAD_ABORTSAFE;
2559 
2560 #if HAS_MTE || HAS_MTE_EMULATION_SHIMS
2561 			/*
2562 			 * If we have MTE enabled on the process, allow recovery of tagged
2563 			 * addresses.
2564 			 */
2565 			if (current_task_has_sec_enabled() && recover) {
2566 				if (!((fault_type) & VM_PROT_EXECUTE)) {
2567 					fault_addr = vm_memtag_canonicalize(map, fault_addr);
2568 				}
2569 			}
2570 #endif /* HAS_MTE || HAS_MTE_EMULATION_SHIMS */
2571 		}
2572 
2573 		/*
2574 		 * Ensure no faults in the physical aperture. This could happen if
2575 		 * a page table is incorrectly allocated from the read only region
2576 		 * when running with KTRR.
2577 		 */
2578 		if (__improbable(fault_addr >= physmap_base) && (fault_addr < physmap_end)) {
2579 			panic_with_thread_kernel_state("Unexpected fault in kernel physical aperture", state);
2580 		}
2581 		if (__improbable(fault_addr >= gVirtBase && fault_addr < static_memory_end)) {
2582 			panic_with_thread_kernel_state("Unexpected fault in kernel static region", state);
2583 		}
2584 
2585 		/* check to see if it is just a pmap ref/modify fault */
2586 		if (!is_translation_fault(fault_code)) {
2587 			result = arm_fast_fault(map->pmap,
2588 			    fault_addr,
2589 			    fault_type, (fault_code == FSC_ACCESS_FLAG_FAULT_L3), FALSE);
2590 			if (result == KERN_SUCCESS) {
2591 				return;
2592 			}
2593 		}
2594 
2595 		/**
2596 		 * vm_fault() can be called with preemption disabled (and indeed this is expected for
2597 		 * certain copyio() scenarios), but can't safely be called with interrupts disabled once
2598 		 * the system has gone multi-threaded.  Other than some early-boot situations such as
2599 		 * startup kext loading, kernel paging operations should never be triggered by
2600 		 * non-interruptible code in the first place, so a fault from such a context will
2601 		 * ultimately produce a kernel data abort panic anyway.  In these cases, skip calling
2602 		 * vm_fault() to avoid masking the real kernel panic with a failed VM locking assertion.
2603 		 */
2604 		if (__probable(SPSR_INTERRUPTS_ENABLED(get_saved_state_cpsr(state)) ||
2605 		    startup_phase < STARTUP_SUB_EARLY_BOOT ||
2606 		    current_cpu_datap()->cpu_hibernate)) {
2607 			if (result != KERN_PROTECTION_FAILURE) {
2608 				// VM will query this property when deciding to throttle this fault, we don't want to
2609 				// throttle kernel faults for copyio faults. The presence of a recovery entry is used as a
2610 				// proxy for being in copyio code.
2611 				bool const was_recover = thread->recover;
2612 				thread->recover = was_recover || recover;
2613 
2614 				/*
2615 				 *  We have to "fault" the page in.
2616 				 */
2617 				result = vm_fault(map, fault_addr, fault_type,
2618 				    /* change_wiring */ FALSE, VM_KERN_MEMORY_NONE, interruptible,
2619 				    /* caller_pmap */ NULL, /* caller_pmap_addr */ 0);
2620 
2621 				thread->recover = was_recover;
2622 			}
2623 
2624 			if (result == KERN_SUCCESS) {
2625 				return;
2626 			}
2627 		}
2628 
2629 		/*
2630 		 *  If we have a recover handler, invoke it now.
2631 		 */
2632 		if (recover) {
2633 			handle_kernel_abort_recover(state, esr, fault_addr, thread, recover);
2634 			return;
2635 		}
2636 
2637 		panic_fault_address = fault_addr;
2638 	} else if (is_alignment_fault(fault_code)) {
2639 		if (recover) {
2640 			handle_kernel_abort_recover(state, esr, fault_addr, thread, recover);
2641 			return;
2642 		}
2643 		panic_with_thread_kernel_state("Unaligned kernel data abort.", state);
2644 	} else if (is_parity_error(fault_code)) {
2645 #if defined(APPLE_ARM64_ARCH_FAMILY)
2646 		/*
2647 		 * Platform errors are handled in sleh_sync before interrupts are enabled.
2648 		 */
2649 #else
2650 		panic_with_thread_kernel_state("Kernel parity error.", state);
2651 #endif
2652 	} else {
2653 		kprintf("Unclassified kernel abort (fault_code=0x%x)\n", fault_code);
2654 	}
2655 
2656 	panic_with_thread_kernel_state("Kernel data abort.", state);
2657 }
2658 
2659 extern void syscall_trace(struct arm_saved_state * regs);
2660 
2661 static void
handle_svc(arm_saved_state_t * state)2662 handle_svc(arm_saved_state_t *state)
2663 {
2664 	int      trap_no = get_saved_state_svc_number(state);
2665 	thread_t thread  = current_thread();
2666 	struct   proc *p;
2667 
2668 #define handle_svc_kprintf(x...) /* kprintf("handle_svc: " x) */
2669 
2670 #define TRACE_SYSCALL 1
2671 #if TRACE_SYSCALL
2672 	syscall_trace(state);
2673 #endif
2674 
2675 	thread->iotier_override = THROTTLE_LEVEL_NONE; /* Reset IO tier override before handling SVC from userspace */
2676 
2677 	if (trap_no == (int)PLATFORM_SYSCALL_TRAP_NO) {
2678 		platform_syscall(state);
2679 		panic("Returned from platform_syscall()?");
2680 	}
2681 
2682 	current_cached_proc_cred_update();
2683 
2684 	if (trap_no < 0) {
2685 		switch (trap_no) {
2686 		case MACH_ARM_TRAP_ABSTIME:
2687 			handle_mach_absolute_time_trap(state);
2688 			return;
2689 		case MACH_ARM_TRAP_CONTTIME:
2690 			handle_mach_continuous_time_trap(state);
2691 			return;
2692 		}
2693 
2694 		/* Counting perhaps better in the handler, but this is how it's been done */
2695 		thread->syscalls_mach++;
2696 		mach_syscall(state);
2697 	} else {
2698 		/* Counting perhaps better in the handler, but this is how it's been done */
2699 		thread->syscalls_unix++;
2700 		p = get_bsdthreadtask_info(thread);
2701 
2702 		assert(p);
2703 
2704 		unix_syscall(state, thread, p);
2705 	}
2706 }
2707 
2708 static void
handle_mach_absolute_time_trap(arm_saved_state_t * state)2709 handle_mach_absolute_time_trap(arm_saved_state_t *state)
2710 {
2711 	uint64_t now = mach_absolute_time();
2712 	saved_state64(state)->x[0] = now;
2713 }
2714 
2715 static void
handle_mach_continuous_time_trap(arm_saved_state_t * state)2716 handle_mach_continuous_time_trap(arm_saved_state_t *state)
2717 {
2718 	uint64_t now = mach_continuous_time();
2719 	saved_state64(state)->x[0] = now;
2720 }
2721 
2722 
2723 __attribute__((noreturn))
2724 static void
handle_msr_trap(arm_saved_state_t * state,uint64_t esr)2725 handle_msr_trap(arm_saved_state_t *state, uint64_t esr)
2726 {
2727 	exception_type_t           exception = EXC_BAD_INSTRUCTION;
2728 	mach_exception_data_type_t codes[2]  = {EXC_ARM_UNDEFINED};
2729 	mach_msg_type_number_t     numcodes  = 2;
2730 	uint32_t                   instr     = 0;
2731 
2732 	if (!is_saved_state64(state)) {
2733 		panic("MSR/MRS trap (ESR 0x%llx) from 32-bit state", esr);
2734 	}
2735 
2736 	if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2737 		panic("MSR/MRS trap (ESR 0x%llx) from kernel", esr);
2738 	}
2739 
2740 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2741 	codes[1] = instr;
2742 
2743 	exception_triage(exception, codes, numcodes);
2744 	__builtin_unreachable();
2745 }
2746 
2747 #if __has_feature(ptrauth_calls)
2748 static void
stringify_gpr(unsigned int r,char reg[4])2749 stringify_gpr(unsigned int r, char reg[4])
2750 {
2751 	switch (r) {
2752 	case 29:
2753 		strncpy(reg, "fp", 4);
2754 		return;
2755 
2756 	case 30:
2757 		strncpy(reg, "lr", 4);
2758 		return;
2759 
2760 	case 31:
2761 		strncpy(reg, "xzr", 4);
2762 		return;
2763 
2764 	default:
2765 		snprintf(reg, 4, "x%u", r);
2766 		return;
2767 	}
2768 }
2769 
2770 static void
autxx_instruction_extract_reg(uint32_t instr,char reg[4])2771 autxx_instruction_extract_reg(uint32_t instr, char reg[4])
2772 {
2773 	unsigned int rd = ARM64_INSTR_AUTxx_RD_GET(instr);
2774 	stringify_gpr(rd, reg);
2775 }
2776 
2777 static const char *
autix_system_instruction_extract_reg(uint32_t instr)2778 autix_system_instruction_extract_reg(uint32_t instr)
2779 {
2780 	unsigned int crm_op2 = ARM64_INSTR_AUTIx_SYSTEM_CRM_OP2_GET(instr);
2781 	if (crm_op2 == ARM64_INSTR_AUTIx_SYSTEM_CRM_OP2_AUTIA1716 ||
2782 	    crm_op2 == ARM64_INSTR_AUTIx_SYSTEM_CRM_OP2_AUTIB1716) {
2783 		return "x17";
2784 	} else {
2785 		return "lr";
2786 	}
2787 }
2788 
2789 static void
bxrax_instruction_extract_reg(uint32_t instr,char reg[4])2790 bxrax_instruction_extract_reg(uint32_t instr, char reg[4])
2791 {
2792 	unsigned int rn = ARM64_INSTR_BxRAx_RN_GET(instr);
2793 	stringify_gpr(rn, reg);
2794 }
2795 
2796 static void
handle_pac_fail(arm_saved_state_t * state,uint64_t esr)2797 handle_pac_fail(arm_saved_state_t *state, uint64_t esr)
2798 {
2799 	exception_type_t           exception = EXC_BAD_ACCESS | EXC_PTRAUTH_BIT;
2800 	mach_exception_data_type_t codes[2]  = {EXC_ARM_PAC_FAIL};
2801 	mach_msg_type_number_t     numcodes  = 2;
2802 	uint32_t                   instr     = 0;
2803 
2804 	if (!is_saved_state64(state)) {
2805 		panic("PAC failure (ESR 0x%llx) from 32-bit state", esr);
2806 	}
2807 
2808 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2809 
2810 	if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2811 #define GENERIC_PAC_FAILURE_MSG_FMT "PAC failure from kernel with %s key"
2812 #define AUTXX_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT " while authing %s"
2813 #define BXRAX_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT " while branching to %s"
2814 #define RETAX_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT " while returning"
2815 #define GENERIC_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT
2816 #define MAX_PAC_MSG_FMT BXRAX_MSG_FMT
2817 
2818 		char msg[strlen(MAX_PAC_MSG_FMT)
2819 		- strlen("%s") + strlen("IA")
2820 		- strlen("%s") + strlen("xzr")
2821 		+ 1];
2822 		ptrauth_key key = (ptrauth_key)(esr & 0x3);
2823 		const char *key_str = ptrauth_key_to_string(key);
2824 
2825 		if (ARM64_INSTR_IS_AUTxx(instr)) {
2826 			char reg[4];
2827 			autxx_instruction_extract_reg(instr, reg);
2828 			snprintf(msg, sizeof(msg), AUTXX_MSG_FMT, key_str, reg);
2829 		} else if (ARM64_INSTR_IS_AUTIx_SYSTEM(instr)) {
2830 			const char *reg = autix_system_instruction_extract_reg(instr);
2831 			snprintf(msg, sizeof(msg), AUTXX_MSG_FMT, key_str, reg);
2832 		} else if (ARM64_INSTR_IS_BxRAx(instr)) {
2833 			char reg[4];
2834 			bxrax_instruction_extract_reg(instr, reg);
2835 			snprintf(msg, sizeof(msg), BXRAX_MSG_FMT, key_str, reg);
2836 		} else if (ARM64_INSTR_IS_RETAx(instr)) {
2837 			snprintf(msg, sizeof(msg), RETAX_MSG_FMT, key_str);
2838 		} else {
2839 			snprintf(msg, sizeof(msg), GENERIC_MSG_FMT, key_str);
2840 		}
2841 		panic_with_thread_kernel_state(msg, state);
2842 	}
2843 
2844 	codes[1] = instr;
2845 
2846 	exception_triage(exception, codes, numcodes);
2847 	__builtin_unreachable();
2848 }
2849 #endif /* __has_feature(ptrauth_calls) */
2850 
2851 __attribute__((noreturn))
2852 static void
handle_bti_fail(arm_saved_state_t * state,uint64_t esr)2853 handle_bti_fail(arm_saved_state_t *state, uint64_t esr)
2854 {
2855 	uint32_t btype = (uint32_t) esr & ISS_BTI_BTYPE_MASK;
2856 
2857 	if (!is_saved_state64(state)) {
2858 		/* BTI is an ARMv8 feature, this should not be possible */
2859 		panic("BTI failure for 32-bit state? (ESR=0x%llx)", esr);
2860 	}
2861 
2862 	/*
2863 	 * We currently only expect BTI to be enabled for kernel pages, so panic if
2864 	 * we detect otherwise.
2865 	 */
2866 	if (!PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2867 		panic("Unexpected non-kernel BTI failure? (ESR=0x%llx)", esr);
2868 	}
2869 
2870 #define BTI_FAIL_PTR_FMT "%04x"
2871 #define BTI_FAIL_MSG_FMT "Kernel BTI failure (BTYPE=0x" BTI_FAIL_PTR_FMT ")"
2872 	/* Replace the pointer format with the length of the pointer message+NULL */
2873 	char msg[strlen(BTI_FAIL_MSG_FMT) - strlen(BTI_FAIL_PTR_FMT) + 8 + 1];
2874 	snprintf(msg, sizeof(msg), BTI_FAIL_MSG_FMT, btype);
2875 	panic_with_thread_kernel_state(msg, state);
2876 	__builtin_unreachable();
2877 }
2878 
2879 static void
handle_user_trapped_instruction32(arm_saved_state_t * state,uint64_t esr)2880 handle_user_trapped_instruction32(arm_saved_state_t *state, uint64_t esr)
2881 {
2882 	exception_type_t           exception = EXC_BAD_INSTRUCTION;
2883 	mach_exception_data_type_t codes[2]  = {EXC_ARM_UNDEFINED};
2884 	mach_msg_type_number_t     numcodes  = 2;
2885 	uint32_t                   instr;
2886 
2887 	if (is_saved_state64(state)) {
2888 		panic("ESR (0x%llx) for instruction trapped from U32, but saved state is 64-bit.", esr);
2889 	}
2890 
2891 	if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2892 		panic("ESR (0x%llx) for instruction trapped from U32, actually came from kernel?", esr);
2893 	}
2894 
2895 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2896 	codes[1] = instr;
2897 
2898 	exception_triage(exception, codes, numcodes);
2899 	__builtin_unreachable();
2900 }
2901 
2902 static void
handle_simd_trap(arm_saved_state_t * state,uint64_t esr)2903 handle_simd_trap(arm_saved_state_t *state, uint64_t esr)
2904 {
2905 	exception_type_t           exception = EXC_BAD_INSTRUCTION;
2906 	mach_exception_data_type_t codes[2]  = {EXC_ARM_UNDEFINED};
2907 	mach_msg_type_number_t     numcodes  = 2;
2908 	uint32_t                   instr     = 0;
2909 
2910 	if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2911 		panic("ESR (0x%llx) for SIMD trap from userland, actually came from kernel?", esr);
2912 	}
2913 
2914 	COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2915 	codes[1] = instr;
2916 
2917 	exception_triage(exception, codes, numcodes);
2918 	__builtin_unreachable();
2919 }
2920 
2921 void
sleh_irq(arm_saved_state_t * state)2922 sleh_irq(arm_saved_state_t *state)
2923 {
2924 	cpu_data_t * cdp __unused             = getCpuDatap();
2925 #if MACH_ASSERT
2926 	int preemption_level = sleh_get_preemption_level();
2927 #endif
2928 
2929 
2930 	sleh_interrupt_handler_prologue(state, DBG_INTR_TYPE_OTHER);
2931 
2932 #if USE_APPLEARMSMP
2933 	PE_handle_ext_interrupt();
2934 #else
2935 	/* Run the registered interrupt handler. */
2936 	cdp->interrupt_handler(cdp->interrupt_target,
2937 	    cdp->interrupt_refCon,
2938 	    cdp->interrupt_nub,
2939 	    cdp->interrupt_source);
2940 #endif
2941 
2942 	entropy_collect();
2943 
2944 
2945 	sleh_interrupt_handler_epilogue();
2946 #if MACH_ASSERT
2947 	if (preemption_level != sleh_get_preemption_level()) {
2948 		panic("irq handler %p changed preemption level from %d to %d", cdp->interrupt_handler, preemption_level, sleh_get_preemption_level());
2949 	}
2950 #endif
2951 }
2952 
2953 void
sleh_fiq(arm_saved_state_t * state)2954 sleh_fiq(arm_saved_state_t *state)
2955 {
2956 	unsigned int type   = DBG_INTR_TYPE_UNKNOWN;
2957 #if MACH_ASSERT
2958 	int preemption_level = sleh_get_preemption_level();
2959 #endif
2960 
2961 #if MONOTONIC_FIQ
2962 	uint64_t pmcr0 = 0, upmsr = 0;
2963 #endif /* MONOTONIC_FIQ */
2964 
2965 #if defined(HAS_IPI)
2966 	boolean_t    is_ipi = FALSE;
2967 	uint64_t     ipi_sr = 0;
2968 
2969 	if (gFastIPI) {
2970 		MRS(ipi_sr, "S3_5_C15_C1_1");
2971 
2972 		if (ipi_sr & ARM64_IPISR_IPI_PENDING) {
2973 			is_ipi = TRUE;
2974 		}
2975 	}
2976 
2977 	if (is_ipi) {
2978 		type = DBG_INTR_TYPE_IPI;
2979 	} else
2980 #endif /* defined(HAS_IPI) */
2981 	if (ml_get_timer_pending()) {
2982 		type = DBG_INTR_TYPE_TIMER;
2983 	}
2984 #if MONOTONIC_FIQ
2985 	/* Consult the PMI sysregs last, after IPI/timer
2986 	 * classification.
2987 	 */
2988 	else if (mt_pmi_pending(&pmcr0, &upmsr)) {
2989 		type = DBG_INTR_TYPE_PMI;
2990 	}
2991 #endif /* MONOTONIC_FIQ */
2992 
2993 	sleh_interrupt_handler_prologue(state, type);
2994 
2995 #if APPLEVIRTUALPLATFORM
2996 	uint64_t iar = __builtin_arm_rsr64("ICC_IAR0_EL1");
2997 #endif
2998 
2999 #if defined(HAS_IPI)
3000 	if (type == DBG_INTR_TYPE_IPI) {
3001 		/*
3002 		 * Order is important here: we must ack the IPI by writing IPI_SR
3003 		 * before we call cpu_signal_handler().  Otherwise, there will be
3004 		 * a window between the completion of pending-signal processing in
3005 		 * cpu_signal_handler() and the ack during which a newly-issued
3006 		 * IPI to this CPU may be lost.  ISB is required to ensure the msr
3007 		 * is retired before execution of cpu_signal_handler().
3008 		 */
3009 		MSR("S3_5_C15_C1_1", ARM64_IPISR_IPI_PENDING);
3010 		__builtin_arm_isb(ISB_SY);
3011 		cpu_signal_handler();
3012 	} else
3013 #endif /* defined(HAS_IPI) */
3014 #if MONOTONIC_FIQ
3015 	if (type == DBG_INTR_TYPE_PMI) {
3016 		ml_interrupt_masked_debug_start(mt_fiq, DBG_INTR_TYPE_PMI);
3017 		mt_fiq(getCpuDatap(), pmcr0, upmsr);
3018 		ml_interrupt_masked_debug_end();
3019 	} else
3020 #endif /* MONOTONIC_FIQ */
3021 	{
3022 		/*
3023 		 * We don't know that this is a timer, but we don't have insight into
3024 		 * the other interrupts that go down this path.
3025 		 */
3026 
3027 		cpu_data_t *cdp = getCpuDatap();
3028 
3029 		cdp->cpu_decrementer = -1; /* Large */
3030 
3031 		/*
3032 		 * ARM64_TODO: whether we're coming from userland is ignored right now.
3033 		 * We can easily thread it through, but not bothering for the
3034 		 * moment (AArch32 doesn't either).
3035 		 */
3036 		ml_interrupt_masked_debug_start(rtclock_intr, DBG_INTR_TYPE_TIMER);
3037 		rtclock_intr(TRUE);
3038 		ml_interrupt_masked_debug_end();
3039 	}
3040 
3041 #if APPLEVIRTUALPLATFORM
3042 	if (iar != GIC_SPURIOUS_IRQ) {
3043 		__builtin_arm_wsr64("ICC_EOIR0_EL1", iar);
3044 		__builtin_arm_isb(ISB_SY);
3045 	}
3046 #endif
3047 
3048 	sleh_interrupt_handler_epilogue();
3049 #if MACH_ASSERT
3050 	if (preemption_level != sleh_get_preemption_level()) {
3051 		panic("fiq type %u changed preemption level from %d to %d", type, preemption_level, sleh_get_preemption_level());
3052 	}
3053 #endif
3054 }
3055 
3056 void
sleh_serror(arm_context_t * context,uint64_t esr,vm_offset_t far)3057 sleh_serror(arm_context_t *context, uint64_t esr, vm_offset_t far)
3058 {
3059 	task_vtimer_check(current_thread());
3060 
3061 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_SERR_ARM, 0) | DBG_FUNC_START,
3062 	    esr, VM_KERNEL_ADDRHIDE(far));
3063 	arm_saved_state_t *state = &context->ss;
3064 #if MACH_ASSERT
3065 	int preemption_level = sleh_get_preemption_level();
3066 #endif
3067 
3068 	if (PSR64_IS_USER(get_saved_state_cpsr(state))) {
3069 		/* Sanitize FAR (only if we came from userspace) */
3070 		saved_state64(state)->far = 0;
3071 	}
3072 
3073 	ASSERT_CONTEXT_SANITY(context);
3074 	arm64_platform_error(state, esr, far, PLAT_ERR_SRC_ASYNC);
3075 #if MACH_ASSERT
3076 	if (preemption_level != sleh_get_preemption_level()) {
3077 		panic("serror changed preemption level from %d to %d", preemption_level, sleh_get_preemption_level());
3078 	}
3079 #endif
3080 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_SERR_ARM, 0) | DBG_FUNC_END,
3081 	    esr, VM_KERNEL_ADDRHIDE(far));
3082 }
3083 
3084 void
mach_syscall_trace_exit(unsigned int retval,unsigned int call_number)3085 mach_syscall_trace_exit(unsigned int retval,
3086     unsigned int call_number)
3087 {
3088 	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3089 	    MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) |
3090 	    DBG_FUNC_END, retval, 0, 0, 0, 0);
3091 }
3092 
3093 __attribute__((noreturn))
3094 void
thread_syscall_return(kern_return_t error)3095 thread_syscall_return(kern_return_t error)
3096 {
3097 	thread_t thread;
3098 	struct arm_saved_state *state;
3099 
3100 	thread = current_thread();
3101 	state = get_user_regs(thread);
3102 
3103 	assert(is_saved_state64(state));
3104 	saved_state64(state)->x[0] = error;
3105 
3106 #if MACH_ASSERT
3107 	kern_allocation_name_t
3108 	prior __assert_only = thread_get_kernel_state(thread)->allocation_name;
3109 	assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
3110 #endif /* MACH_ASSERT */
3111 
3112 	if (kdebug_enable) {
3113 		/* Invert syscall number (negative for a mach syscall) */
3114 		mach_syscall_trace_exit(error, (-1) * get_saved_state_svc_number(state));
3115 	}
3116 
3117 	thread_exception_return();
3118 }
3119 
3120 void
syscall_trace(struct arm_saved_state * regs __unused)3121 syscall_trace(
3122 	struct arm_saved_state * regs __unused)
3123 {
3124 	/* kprintf("syscall: %d\n", saved_state64(regs)->x[16]);  */
3125 }
3126 
3127 static void
sleh_interrupt_handler_prologue(arm_saved_state_t * state,unsigned int type)3128 sleh_interrupt_handler_prologue(arm_saved_state_t *state, unsigned int type)
3129 {
3130 	const bool is_user = PSR64_IS_USER(get_saved_state_cpsr(state));
3131 
3132 	if (is_user == true) {
3133 		/* Sanitize FAR (only if the interrupt occurred while the CPU was in usermode) */
3134 		saved_state64(state)->far = 0;
3135 	}
3136 
3137 	recount_enter_interrupt();
3138 
3139 	task_vtimer_check(current_thread());
3140 
3141 	uint64_t pc = is_user ? get_saved_state_pc(state) :
3142 	    VM_KERNEL_UNSLIDE(get_saved_state_pc(state));
3143 
3144 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START,
3145 	    0, pc, is_user, type);
3146 }
3147 
3148 static void
sleh_interrupt_handler_epilogue(void)3149 sleh_interrupt_handler_epilogue(void)
3150 {
3151 #if KPERF
3152 	kperf_interrupt();
3153 #endif /* KPERF */
3154 	KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END);
3155 	recount_leave_interrupt();
3156 }
3157 
3158 void
sleh_invalid_stack(arm_context_t * context,uint64_t esr __unused,vm_offset_t far __unused)3159 sleh_invalid_stack(arm_context_t *context, uint64_t esr __unused, vm_offset_t far __unused)
3160 {
3161 	thread_t thread = current_thread();
3162 	vm_offset_t kernel_stack_bottom, sp;
3163 
3164 	sp = get_saved_state_sp(&context->ss);
3165 	vm_offset_t kstackptr = (vm_offset_t)thread->machine.kstackptr;
3166 	kernel_stack_bottom = round_page(kstackptr) - KERNEL_STACK_SIZE;
3167 
3168 	if ((sp < kernel_stack_bottom) && (sp >= (kernel_stack_bottom - PAGE_SIZE))) {
3169 		panic_with_thread_kernel_state("Invalid kernel stack pointer (probable overflow).", &context->ss);
3170 	}
3171 
3172 	panic_with_thread_kernel_state("Invalid kernel stack pointer (probable corruption or early boot).", &context->ss);
3173 }
3174 
3175 
3176 #if MACH_ASSERT
3177 static int trap_handled;
3178 static const char *
handle_recoverable_kernel_trap(__unused void * tstate,uint16_t comment)3179 handle_recoverable_kernel_trap(
3180 	__unused void     *tstate,
3181 	uint16_t          comment)
3182 {
3183 	assert(comment == TEST_RECOVERABLE_SOFT_TRAP);
3184 
3185 	printf("Recoverable trap handled.\n");
3186 	trap_handled = 1;
3187 
3188 	return NULL;
3189 }
3190 
3191 KERNEL_BRK_DESCRIPTOR_DEFINE(test_desc,
3192     .type                = TRAP_TELEMETRY_TYPE_KERNEL_BRK_TEST,
3193     .base                = TEST_RECOVERABLE_SOFT_TRAP,
3194     .max                 = TEST_RECOVERABLE_SOFT_TRAP,
3195     .options             = BRK_TELEMETRY_OPTIONS_RECOVERABLE_DEFAULT(
3196 	    /* enable_telemetry */ false),
3197     .handle_breakpoint   = handle_recoverable_kernel_trap);
3198 
3199 static int
recoverable_kernel_trap_test(__unused int64_t in,int64_t * out)3200 recoverable_kernel_trap_test(__unused int64_t in, int64_t *out)
3201 {
3202 	ml_recoverable_trap(TEST_RECOVERABLE_SOFT_TRAP);
3203 
3204 	*out = trap_handled;
3205 	return 0;
3206 }
3207 
3208 SYSCTL_TEST_REGISTER(recoverable_kernel_trap, recoverable_kernel_trap_test);
3209 
3210 #endif
3211 
3212 #if CONFIG_SPTM
3213 /**
3214  * Evaluate the panic lockdown policy for a synchronous EL1 SP0 exception
3215  *
3216  * Returns true if panic lockdown should be initiated (but does not itself do
3217  * so)
3218  */
3219 __SECURITY_STACK_DISALLOWED_PUSH
3220 bool
sleh_panic_lockdown_should_initiate_el1_sp0_sync(uint64_t esr,uint64_t elr,uint64_t far,uint64_t spsr)3221 sleh_panic_lockdown_should_initiate_el1_sp0_sync(uint64_t esr, uint64_t elr,
3222     uint64_t far, uint64_t spsr)
3223 {
3224 	const esr_exception_class_t class = ESR_EC(esr);
3225 	const bool any_exceptions_masked = spsr & DAIF_STANDARD_DISABLE;
3226 
3227 	switch (class) {
3228 	case ESR_EC_PC_ALIGN:   /* PC misaligned (should never happen) */
3229 	case ESR_EC_IABORT_EL1: /* Potential iPAC failure (poisoned PC) */
3230 	case ESR_EC_PAC_FAIL: { /* FPAC fail */
3231 		return true;
3232 	}
3233 
3234 	case ESR_EC_BRK_AARCH64: {
3235 		/*
3236 		 * Breakpoints are used on non-FPAC systems to signal some PAC failures
3237 		 */
3238 #if HAS_TELEMETRY_KERNEL_BRK
3239 		const struct kernel_brk_descriptor *desc;
3240 		desc = find_kernel_brk_descriptor_by_comment(ISS_BRK_COMMENT(esr));
3241 		if (desc && desc->options.recoverable) {
3242 			/*
3243 			 * We matched a breakpoint and it's recoverable, skip lockdown.
3244 			 */
3245 			return false;
3246 		}
3247 #endif /* HAS_TELEMETRY_KERNEL_BRK */
3248 
3249 		/*
3250 		 * If we don't support telemetry breakpoints and/or didn't match a
3251 		 * recoverable breakpoint, the exception is fatal.
3252 		 */
3253 		return true;
3254 	}
3255 
3256 	case ESR_EC_DABORT_EL1: {
3257 		const struct copyio_recovery_entry *cre =
3258 		    find_copyio_recovery_entry(elr);
3259 		if (cre) {
3260 #if HAS_MTE
3261 			/*
3262 			 * Tag check faults are fatal in copyio when they impact a kernel
3263 			 * address and the copyio function is not permitted to recover from
3264 			 * a tag check fault on a kernel address.
3265 			 *
3266 			 * We can determine if we faulted on a kernel address by checking
3267 			 * any of the cannonical address bits. This works since
3268 			 * copy_validate will reject user addresses with any of these
3269 			 * cannonical bits set before reaching the underlying copyio
3270 			 * functions, and so bits set here means this is actually a kernel
3271 			 * address.
3272 			 */
3273 			const bool is_kernel_far = far & TTBR_SELECTOR;
3274 			if (is_kernel_far &&
3275 			    is_tag_check_fault(ISS_DA_FSC(ESR_ISS(esr))) &&
3276 			    !cre->recover_from_kernel_read_tag_check_fault &&
3277 			    !cre->recover_from_kernel_write_tag_check_fault) {
3278 				return true;
3279 			}
3280 #endif /* HAS_MTE */
3281 
3282 			/*
3283 			 * copyio faults are recoverable regardless of whether or not
3284 			 * exceptions are masked.
3285 			 */
3286 			return false;
3287 		}
3288 
3289 #if HAS_MTE
3290 		/*
3291 		 * Kernel tag check faults are always fatal outside of copyio.
3292 		 */
3293 		if (is_tag_check_fault(ISS_DA_FSC(ESR_ISS(esr)))) {
3294 			return true;
3295 		}
3296 #endif /* HAS_MTE */
3297 
3298 
3299 		/*
3300 		 * Heuristic: if FAR != XPAC(FAR), the pointer was likely corrupted
3301 		 * due to PAC.
3302 		 */
3303 #if HAS_MTE
3304 		/*
3305 		 * This heuristic can misfire for TBCF/CPA2 poisoning, but
3306 		 * triggering a lockdown for these failures in the kernel is fine
3307 		 * since they are not recoverable.
3308 		 */
3309 #endif /* HAS_MTE */
3310 		const uint64_t far_stripped =
3311 		    (uint64_t)ptrauth_strip((void *)far, ptrauth_key_asda);
3312 
3313 		if (far != far_stripped) {
3314 			/* potential dPAC failure (poisoined address) */
3315 			return true;
3316 		}
3317 
3318 		if (any_exceptions_masked && startup_phase >= STARTUP_SUB_LOCKDOWN) {
3319 			/*
3320 			 * Any data abort taken with exceptions masked is fatal if we're
3321 			 * past early boot.
3322 			 */
3323 			return true;
3324 		}
3325 
3326 		return false;
3327 	}
3328 
3329 	case ESR_EC_UNCATEGORIZED: {
3330 		/* Undefined instruction (GDBTRAP for stackshots, etc.) */
3331 		return false;
3332 	}
3333 
3334 	case ESR_EC_BTI_FAIL: {
3335 		/* Kernel BTI exceptions are always fatal */
3336 		return true;
3337 	}
3338 
3339 	default: {
3340 		if (!any_exceptions_masked) {
3341 			/*
3342 			 * When exceptions are not masked, we default-allow exceptions.
3343 			 */
3344 			return false;
3345 		}
3346 
3347 		if (startup_phase < STARTUP_SUB_LOCKDOWN) {
3348 			/*
3349 			 * Ignore early boot exceptions even if exceptions are masked.
3350 			 */
3351 			return false;
3352 		}
3353 
3354 		/* Default-deny all others when exceptions are masked */
3355 		return true;
3356 	}
3357 	}
3358 }
3359 __SECURITY_STACK_DISALLOWED_POP
3360 #endif /* CONFIG_SPTM */
3361