1 /*
2 * Copyright (c) 2012-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <arm/caches_internal.h>
30 #include <arm/cpu_data.h>
31 #include <arm/cpu_data_internal.h>
32 #include <arm/misc_protos.h>
33 #include <arm/thread.h>
34 #include <arm/rtclock.h>
35 #include <arm/trap_internal.h> /* for IS_ARM_GDB_TRAP() et al */
36 #include <arm64/proc_reg.h>
37 #include <arm64/machine_machdep.h>
38 #include <arm64/monotonic.h>
39 #include <arm64/instructions.h>
40
41 #include <kern/debug.h>
42 #include <kern/exc_guard.h>
43 #include <kern/restartable.h>
44 #include <kern/socd_client.h>
45 #include <kern/task.h>
46 #include <kern/thread.h>
47 #include <kern/zalloc_internal.h>
48 #include <mach/exception.h>
49 #include <mach/arm/traps.h>
50 #include <mach/vm_types.h>
51 #include <mach/machine/thread_status.h>
52
53 #include <machine/atomic.h>
54 #include <machine/limits.h>
55
56 #include <pexpert/arm/protos.h>
57 #include <pexpert/arm64/apple_arm64_cpu.h>
58 #include <pexpert/arm64/apple_arm64_regs.h>
59 #include <pexpert/arm64/board_config.h>
60
61 #include <vm/vm_page.h>
62 #include <vm/pmap.h>
63 #include <vm/vm_fault.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_map_xnu.h>
66
67 #include <sys/errno.h>
68 #include <sys/kdebug.h>
69 #include <sys/code_signing.h>
70 #include <sys/reason.h>
71 #include <kperf/kperf.h>
72
73 #include <kern/policy_internal.h>
74 #if CONFIG_TELEMETRY
75 #include <kern/telemetry.h>
76 #include <kern/trap_telemetry.h>
77 #endif
78
79 #include <prng/entropy.h>
80
81
82
83
84 #include <arm64/platform_error_handler.h>
85
86 #if KASAN_TBI
87 #include <san/kasan.h>
88 #endif /* KASAN_TBI */
89
90 #if CONFIG_UBSAN_MINIMAL
91 #include <san/ubsan_minimal.h>
92 #endif
93
94 #if HAS_MTE
95 #include <arm64/mte_xnu.h>
96 #endif /* HAS_MTE */
97
98
99 #ifndef __arm64__
100 #error Should only be compiling for arm64.
101 #endif
102
103 #if DEBUG || DEVELOPMENT
104 #define HAS_TELEMETRY_KERNEL_BRK 1
105 #endif
106
107
108 #define TEST_CONTEXT32_SANITY(context) \
109 (context->ss.ash.flavor == ARM_SAVED_STATE32 && context->ss.ash.count == ARM_SAVED_STATE32_COUNT && \
110 context->ns.nsh.flavor == ARM_NEON_SAVED_STATE32 && context->ns.nsh.count == ARM_NEON_SAVED_STATE32_COUNT)
111
112 #define TEST_CONTEXT64_SANITY(context) \
113 (context->ss.ash.flavor == ARM_SAVED_STATE64 && context->ss.ash.count == ARM_SAVED_STATE64_COUNT && \
114 context->ns.nsh.flavor == ARM_NEON_SAVED_STATE64 && context->ns.nsh.count == ARM_NEON_SAVED_STATE64_COUNT)
115
116 #define ASSERT_CONTEXT_SANITY(context) \
117 assert(TEST_CONTEXT32_SANITY(context) || TEST_CONTEXT64_SANITY(context))
118
119
120 #define COPYIN(src, dst, size) \
121 (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) ? \
122 copyin_kern(src, dst, size) : \
123 copyin(src, dst, size)
124
125 #define COPYOUT(src, dst, size) \
126 (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) ? \
127 copyout_kern(src, dst, size) : \
128 copyout(src, dst, size)
129
130 // Below is for concatenating a string param to a string literal
131 #define STR1(x) #x
132 #define STR(x) STR1(x)
133
134 #define ARM64_KDBG_CODE_KERNEL (0 << 8)
135 #define ARM64_KDBG_CODE_USER (1 << 8)
136 #define ARM64_KDBG_CODE_GUEST (2 << 8)
137
138 _Static_assert(ARM64_KDBG_CODE_GUEST <= KDBG_CODE_MAX, "arm64 KDBG trace codes out of range");
139 _Static_assert(ARM64_KDBG_CODE_GUEST <= UINT16_MAX, "arm64 KDBG trace codes out of range");
140
141 void panic_with_thread_kernel_state(const char *msg, arm_saved_state_t *ss) __abortlike;
142
143 void sleh_synchronous_sp1(arm_context_t *, uint64_t, vm_offset_t) __abortlike;
144 void sleh_synchronous(arm_context_t *, uint64_t, vm_offset_t, bool);
145
146
147
148 void sleh_irq(arm_saved_state_t *);
149 void sleh_fiq(arm_saved_state_t *);
150 void sleh_serror(arm_context_t *context, uint64_t esr, vm_offset_t far);
151 void sleh_invalid_stack(arm_context_t *context, uint64_t esr, vm_offset_t far) __dead2;
152
153 static void sleh_interrupt_handler_prologue(arm_saved_state_t *, unsigned int type);
154 static void sleh_interrupt_handler_epilogue(void);
155
156 static void handle_svc(arm_saved_state_t *);
157 static void handle_mach_absolute_time_trap(arm_saved_state_t *);
158 static void handle_mach_continuous_time_trap(arm_saved_state_t *);
159
160 static void handle_msr_trap(arm_saved_state_t *state, uint64_t esr);
161 #if __has_feature(ptrauth_calls)
162 static void handle_pac_fail(arm_saved_state_t *state, uint64_t esr) __dead2;
163 static inline uint64_t fault_addr_bitmask(unsigned int bit_from, unsigned int bit_to);
164 #endif
165 static void handle_bti_fail(arm_saved_state_t *state, uint64_t esr);
166 extern kern_return_t arm_fast_fault(pmap_t, vm_map_address_t, vm_prot_t, bool, bool);
167
168 static void handle_uncategorized(arm_saved_state_t *);
169
170 static void handle_kernel_breakpoint(arm_saved_state_t *, uint64_t);
171
172 static void handle_user_breakpoint(arm_saved_state_t *, uint64_t) __dead2;
173
174 typedef void (*abort_inspector_t)(uint32_t, fault_status_t *, vm_prot_t *);
175 static void inspect_instruction_abort(uint32_t, fault_status_t *, vm_prot_t *);
176 static void inspect_data_abort(uint32_t, fault_status_t *, vm_prot_t *);
177
178 static int is_vm_fault(fault_status_t);
179 static int is_translation_fault(fault_status_t);
180 static int is_alignment_fault(fault_status_t);
181
182 typedef void (*abort_handler_t)(arm_saved_state_t *, uint64_t, vm_offset_t, fault_status_t, vm_prot_t, expected_fault_handler_t);
183 static void handle_user_abort(arm_saved_state_t *, uint64_t, vm_offset_t, fault_status_t, vm_prot_t, expected_fault_handler_t);
184 static void handle_kernel_abort(arm_saved_state_t *, uint64_t, vm_offset_t, fault_status_t, vm_prot_t, expected_fault_handler_t);
185
186 static void handle_pc_align(arm_saved_state_t *ss) __dead2;
187 static void handle_sp_align(arm_saved_state_t *ss) __dead2;
188 static void handle_sw_step_debug(arm_saved_state_t *ss) __dead2;
189 static void handle_wf_trap(arm_saved_state_t *ss) __dead2;
190 static void handle_fp_trap(arm_saved_state_t *ss, uint64_t esr) __dead2;
191 #if HAS_ARM_FEAT_SME
192 static void handle_sme_trap(arm_saved_state_t *state, uint64_t esr);
193 #endif /* HAS_ARM_FEAT_SME */
194
195 static void handle_watchpoint(vm_offset_t fault_addr) __dead2;
196
197 static void handle_abort(arm_saved_state_t *, uint64_t, vm_offset_t, abort_inspector_t, abort_handler_t, expected_fault_handler_t);
198
199 static void handle_user_trapped_instruction32(arm_saved_state_t *, uint64_t esr) __dead2;
200
201 static void handle_simd_trap(arm_saved_state_t *, uint64_t esr) __dead2;
202
203 extern void current_cached_proc_cred_update(void);
204 void mach_syscall_trace_exit(unsigned int retval, unsigned int call_number);
205
206 struct proc;
207
208 typedef uint32_t arm64_instr_t;
209
210 extern void
211 unix_syscall(struct arm_saved_state * regs, thread_t thread_act, struct proc * proc);
212
213 extern void
214 mach_syscall(struct arm_saved_state*);
215
216 #if CONFIG_SPTM
217 bool sleh_panic_lockdown_should_initiate_el1_sp0_sync(uint64_t esr, uint64_t elr, uint64_t far, uint64_t spsr);
218 #endif /* CONFIG_SPTM */
219
220 #if CONFIG_DTRACE
221 extern kern_return_t dtrace_user_probe(arm_saved_state_t* regs);
222 extern boolean_t dtrace_tally_fault(user_addr_t);
223
224 /*
225 * Traps for userland processing. Can't include bsd/sys/fasttrap_isa.h, so copy
226 * and paste the trap instructions
227 * over from that file. Need to keep these in sync!
228 */
229 #define FASTTRAP_ARM32_INSTR 0xe7ffdefc
230 #define FASTTRAP_THUMB32_INSTR 0xdefc
231 #define FASTTRAP_ARM64_INSTR 0xe7eeee7e
232
233 #define FASTTRAP_ARM32_RET_INSTR 0xe7ffdefb
234 #define FASTTRAP_THUMB32_RET_INSTR 0xdefb
235 #define FASTTRAP_ARM64_RET_INSTR 0xe7eeee7d
236
237 /* See <rdar://problem/4613924> */
238 perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routine */
239 #endif
240
241
242
243 extern void arm64_thread_exception_return(void) __dead2;
244
245 #if defined(APPLETYPHOON)
246 #define CPU_NAME "Typhoon"
247 #elif defined(APPLETWISTER)
248 #define CPU_NAME "Twister"
249 #elif defined(APPLEHURRICANE)
250 #define CPU_NAME "Hurricane"
251 #elif defined(APPLELIGHTNING)
252 #define CPU_NAME "Lightning"
253 #elif defined(APPLEEVEREST)
254 #define CPU_NAME "Everest"
255 #elif defined(APPLEH16)
256 #define CPU_NAME "AppleH16"
257 #elif defined(APPLEACC8)
258 #define CPU_NAME "AppleACC8"
259 #else
260 #define CPU_NAME "Unknown"
261 #endif
262
263 #if (CONFIG_KERNEL_INTEGRITY && defined(KERNEL_INTEGRITY_WT))
264 #define ESR_WT_SERROR(esr) (((esr) & 0xffffff00) == 0xbf575400)
265 #define ESR_WT_REASON(esr) ((esr) & 0xff)
266
267 #define WT_REASON_NONE 0
268 #define WT_REASON_INTEGRITY_FAIL 1
269 #define WT_REASON_BAD_SYSCALL 2
270 #define WT_REASON_NOT_LOCKED 3
271 #define WT_REASON_ALREADY_LOCKED 4
272 #define WT_REASON_SW_REQ 5
273 #define WT_REASON_PT_INVALID 6
274 #define WT_REASON_PT_VIOLATION 7
275 #define WT_REASON_REG_VIOLATION 8
276 #endif
277
278 #if defined(HAS_IPI)
279 void cpu_signal_handler(void);
280 extern unsigned int gFastIPI;
281 #endif /* defined(HAS_IPI) */
282
283 static arm_saved_state64_t *original_faulting_state = NULL;
284
285 /*
286 * A self-restrict mode describes which (if any, or several) special permissive
287 * modes are active at the time of a fault. This, in part, determines how the
288 * fault will be handled.
289 */
290 __options_closed_decl(self_restrict_mode_t, unsigned int, {
291 /* None of the special modes are active. */
292 SELF_RESTRICT_NONE = 0U,
293
294 /*
295 * Any of the other more specific modes, this should be active if any other
296 * mode is active.
297 */
298 SELF_RESTRICT_ANY = (1U << 0),
299
300 /* Reserved */
301
302 /* Reserved */
303 });
304
305
306 TUNABLE(bool, fp_exceptions_enabled, "-fp_exceptions", false);
307
308 extern const vm_map_address_t physmap_base;
309 extern const vm_map_address_t physmap_end;
310 extern vm_offset_t static_memory_end;
311
312 /*
313 * Fault copyio_recovery_entry in copyin/copyout routines.
314 *
315 * Offets are expressed in bytes from ©_recovery_table
316 */
317 struct copyio_recovery_entry {
318 ptrdiff_t cre_start;
319 ptrdiff_t cre_end;
320 ptrdiff_t cre_recovery;
321 #if HAS_MTE
322 uint8_t recover_from_kernel_read_tag_check_fault;
323 uint8_t recover_from_kernel_write_tag_check_fault;
324 uint8_t padding[6];
325 #endif
326 };
327
328 extern struct copyio_recovery_entry copyio_recover_table[];
329 extern struct copyio_recovery_entry copyio_recover_table_end[];
330
331 static inline ptrdiff_t
copyio_recovery_offset(uintptr_t addr)332 copyio_recovery_offset(uintptr_t addr)
333 {
334 return (ptrdiff_t)(addr - (uintptr_t)copyio_recover_table);
335 }
336
337 #if !HAS_APPLE_PAC
338 static inline uintptr_t
copyio_recovery_addr(ptrdiff_t offset)339 copyio_recovery_addr(ptrdiff_t offset)
340 {
341 return (uintptr_t)copyio_recover_table + (uintptr_t)offset;
342 }
343 #endif
344
345 static inline struct copyio_recovery_entry *
find_copyio_recovery_entry(uint64_t pc)346 find_copyio_recovery_entry(uint64_t pc)
347 {
348 ptrdiff_t offset = copyio_recovery_offset(pc);
349 struct copyio_recovery_entry *e;
350
351 for (e = copyio_recover_table; e < copyio_recover_table_end; e++) {
352 if (offset >= e->cre_start && offset < e->cre_end) {
353 return e;
354 }
355 }
356
357 return NULL;
358 }
359
360 static inline int
is_vm_fault(fault_status_t status)361 is_vm_fault(fault_status_t status)
362 {
363 switch (status) {
364 case FSC_TRANSLATION_FAULT_L0:
365 case FSC_TRANSLATION_FAULT_L1:
366 case FSC_TRANSLATION_FAULT_L2:
367 case FSC_TRANSLATION_FAULT_L3:
368 case FSC_ACCESS_FLAG_FAULT_L1:
369 case FSC_ACCESS_FLAG_FAULT_L2:
370 case FSC_ACCESS_FLAG_FAULT_L3:
371 case FSC_PERMISSION_FAULT_L1:
372 case FSC_PERMISSION_FAULT_L2:
373 case FSC_PERMISSION_FAULT_L3:
374 return TRUE;
375 default:
376 return FALSE;
377 }
378 }
379
380 static inline int
is_translation_fault(fault_status_t status)381 is_translation_fault(fault_status_t status)
382 {
383 switch (status) {
384 case FSC_TRANSLATION_FAULT_L0:
385 case FSC_TRANSLATION_FAULT_L1:
386 case FSC_TRANSLATION_FAULT_L2:
387 case FSC_TRANSLATION_FAULT_L3:
388 return TRUE;
389 default:
390 return FALSE;
391 }
392 }
393
394 static inline int
is_permission_fault(fault_status_t status)395 is_permission_fault(fault_status_t status)
396 {
397 switch (status) {
398 case FSC_PERMISSION_FAULT_L1:
399 case FSC_PERMISSION_FAULT_L2:
400 case FSC_PERMISSION_FAULT_L3:
401 return TRUE;
402 default:
403 return FALSE;
404 }
405 }
406
407 static inline int
is_alignment_fault(fault_status_t status)408 is_alignment_fault(fault_status_t status)
409 {
410 return status == FSC_ALIGNMENT_FAULT;
411 }
412
413 static inline int
is_parity_error(fault_status_t status)414 is_parity_error(fault_status_t status)
415 {
416 switch (status) {
417 #if defined(ARM64_BOARD_CONFIG_T6020)
418 /*
419 * H14 Erratum (rdar://61553243): Despite having FEAT_RAS implemented,
420 * FSC_SYNC_PARITY_X can be reported for data and instruction aborts
421 * and should be interpreted as FSC_SYNC_EXT_ABORT_x
422 */
423 #else
424 /*
425 * TODO: According to ARM ARM, Async Parity (0b011001) is a DFSC that is
426 * only applicable to AArch32 HSR register. Can this be removed?
427 */
428 case FSC_ASYNC_PARITY:
429 case FSC_SYNC_PARITY:
430 case FSC_SYNC_PARITY_TT_L1:
431 case FSC_SYNC_PARITY_TT_L2:
432 case FSC_SYNC_PARITY_TT_L3:
433 return TRUE;
434 #endif
435 default:
436 return FALSE;
437 }
438 }
439
440 static inline int
is_sync_external_abort(fault_status_t status)441 is_sync_external_abort(fault_status_t status)
442 {
443 switch (status) {
444 #if defined(ARM64_BOARD_CONFIG_T6020)
445 /*
446 * H14 Erratum (rdar://61553243): Despite having FEAT_RAS implemented,
447 * FSC_SYNC_PARITY_x can be reported for data and instruction aborts
448 * and should be interpreted as FSC_SYNC_EXT_ABORT_x
449 */
450 case FSC_SYNC_PARITY:
451 #endif /* defined(ARM64_BOARD_CONFIG_T6020) */
452 case FSC_SYNC_EXT_ABORT:
453 return TRUE;
454 default:
455 return FALSE;
456 }
457 }
458
459 static inline int
is_table_walk_error(fault_status_t status)460 is_table_walk_error(fault_status_t status)
461 {
462 switch (status) {
463 case FSC_SYNC_EXT_ABORT_TT_L1:
464 case FSC_SYNC_EXT_ABORT_TT_L2:
465 case FSC_SYNC_EXT_ABORT_TT_L3:
466 #if defined(ARM64_BOARD_CONFIG_T6020)
467 /*
468 * H14 Erratum(rdar://61553243): Despite having FEAT_RAS implemented,
469 * FSC_SYNC_PARITY_x can be reported for data and instruction aborts
470 * and should be interpreted as FSC_SYNC_EXT_ABORT_x
471 */
472 case FSC_SYNC_PARITY_TT_L1:
473 case FSC_SYNC_PARITY_TT_L2:
474 case FSC_SYNC_PARITY_TT_L3:
475 #endif /* defined(ARM64_BOARD_CONFIG_T6020) */
476 return TRUE;
477 default:
478 return FALSE;
479 }
480 }
481
482
483 #if HAS_MTE
484 static void
485 mte_send_sync_soft_mode_exception(thread_t thread, vm_map_address_t address, mach_exception_data_type_t mx_code);
486
487 static inline int
is_tag_check_fault(fault_status_t status)488 is_tag_check_fault(fault_status_t status)
489 {
490 return status == FSC_SYNC_TAG_CHECK_FAULT;
491 }
492
493 static inline bool
is_canonical_memory_permission_fault(uint64_t esr)494 is_canonical_memory_permission_fault(uint64_t esr)
495 {
496 return ESR_ISS2(esr) & ISS2_DA_TND;
497 }
498
499 static inline uint16_t
tag_check_fault_type(pmap_t pmap,vm_map_address_t fault_address)500 tag_check_fault_type(pmap_t pmap, vm_map_address_t fault_address)
501 {
502 if (pmap_is_tagged_mapping(pmap, pmap_strip_addr(pmap, fault_address))) {
503 return EXC_ARM_MTE_TAGCHECK_FAIL;
504 } else {
505 return EXC_ARM_MTE_CANONICAL_FAIL;
506 }
507 }
508 #endif /* HAS_MTE */
509
510
511 static inline int
is_servicible_fault(fault_status_t status,uint64_t esr)512 is_servicible_fault(fault_status_t status, uint64_t esr)
513 {
514 #if HAS_MTE
515 if (is_tag_check_fault(status)) {
516 /*
517 * Never called from the context of a kernel thread with its map switched
518 * to a user map, so current_task() is always the task responsible for
519 * the fault.
520 */
521 task_t current = current_task_early();
522 /*
523 * If the task is running in soft mode, we can "service" the fault by
524 * clearing TCF0 and letting the thread try again.
525 */
526 if (current && task_has_sec_soft_mode(current)) {
527 return TRUE;
528 }
529 }
530 if (is_canonical_memory_permission_fault(esr)) {
531 /*
532 * This fault was caused by a tag write to canonically tagged
533 * memory. Trying to fault in the data page won't do any good.
534 */
535 return FALSE;
536 }
537 #else
538 #pragma unused(esr)
539 #endif
540 return is_vm_fault(status);
541 }
542
543 __dead2 __unused
544 static void
arm64_implementation_specific_error(arm_saved_state_t * state,uint64_t esr,vm_offset_t far)545 arm64_implementation_specific_error(arm_saved_state_t *state, uint64_t esr, vm_offset_t far)
546 {
547 #pragma unused (state, esr, far)
548 panic_plain("Unhandled implementation specific error\n");
549 }
550
551 #if CONFIG_KERNEL_INTEGRITY
552 #pragma clang diagnostic push
553 #pragma clang diagnostic ignored "-Wunused-parameter"
554 static void
kernel_integrity_error_handler(uint64_t esr,vm_offset_t far)555 kernel_integrity_error_handler(uint64_t esr, vm_offset_t far)
556 {
557 #if defined(KERNEL_INTEGRITY_WT)
558 #if (DEVELOPMENT || DEBUG)
559 if (ESR_WT_SERROR(esr)) {
560 switch (ESR_WT_REASON(esr)) {
561 case WT_REASON_INTEGRITY_FAIL:
562 panic_plain("Kernel integrity, violation in frame 0x%016lx.", far);
563 case WT_REASON_BAD_SYSCALL:
564 panic_plain("Kernel integrity, bad syscall.");
565 case WT_REASON_NOT_LOCKED:
566 panic_plain("Kernel integrity, not locked.");
567 case WT_REASON_ALREADY_LOCKED:
568 panic_plain("Kernel integrity, already locked.");
569 case WT_REASON_SW_REQ:
570 panic_plain("Kernel integrity, software request.");
571 case WT_REASON_PT_INVALID:
572 panic_plain("Kernel integrity, encountered invalid TTE/PTE while "
573 "walking 0x%016lx.", far);
574 case WT_REASON_PT_VIOLATION:
575 panic_plain("Kernel integrity, violation in mapping 0x%016lx.",
576 far);
577 case WT_REASON_REG_VIOLATION:
578 panic_plain("Kernel integrity, violation in system register %d.",
579 (unsigned) far);
580 default:
581 panic_plain("Kernel integrity, unknown (esr=0x%08llx).", esr);
582 }
583 }
584 #else
585 if (ESR_WT_SERROR(esr)) {
586 panic_plain("SError esr: 0x%08llx far: 0x%016lx.", esr, far);
587 }
588 #endif
589 #endif
590 }
591 #pragma clang diagnostic pop
592 #endif
593
594 static void
arm64_platform_error(arm_saved_state_t * state,uint64_t esr,vm_offset_t far,platform_error_source_t source)595 arm64_platform_error(arm_saved_state_t *state, uint64_t esr, vm_offset_t far, platform_error_source_t source)
596 {
597 #if CONFIG_KERNEL_INTEGRITY
598 kernel_integrity_error_handler(esr, far);
599 #endif
600
601 (void)source;
602 cpu_data_t *cdp = getCpuDatap();
603
604 if (PE_handle_platform_error(far)) {
605 return;
606 } else if (cdp->platform_error_handler != NULL) {
607 cdp->platform_error_handler(cdp->cpu_id, far);
608 } else {
609 arm64_implementation_specific_error(state, esr, far);
610 }
611 }
612
613
614 void
panic_with_thread_kernel_state(const char * msg,arm_saved_state_t * ss)615 panic_with_thread_kernel_state(const char *msg, arm_saved_state_t *ss)
616 {
617 boolean_t ss_valid;
618
619 ss_valid = is_saved_state64(ss);
620 arm_saved_state64_t *state = saved_state64(ss);
621
622 os_atomic_cmpxchg(&original_faulting_state, NULL, state, seq_cst);
623
624 // rdar://80659177
625 // Read SoCD tracepoints up to twice — once the first time we call panic and
626 // another time if we encounter a nested panic after that.
627 static int twice = 2;
628 if (twice > 0) {
629 twice--;
630 SOCD_TRACE_XNU(KERNEL_STATE_PANIC,
631 SOCD_TRACE_MODE_STICKY_TRACEPOINT,
632 ADDR(state->pc),
633 PACK_LSB(VALUE(state->lr), VALUE(ss_valid)),
634 PACK_2X32(VALUE(state->esr), VALUE(state->cpsr)),
635 VALUE(state->far));
636 }
637
638
639
640 panic_plain("%s at pc 0x%016llx, lr 0x%016llx (saved state: %p%s)\n"
641 "\t x0: 0x%016llx x1: 0x%016llx x2: 0x%016llx x3: 0x%016llx\n"
642 "\t x4: 0x%016llx x5: 0x%016llx x6: 0x%016llx x7: 0x%016llx\n"
643 "\t x8: 0x%016llx x9: 0x%016llx x10: 0x%016llx x11: 0x%016llx\n"
644 "\t x12: 0x%016llx x13: 0x%016llx x14: 0x%016llx x15: 0x%016llx\n"
645 "\t x16: 0x%016llx x17: 0x%016llx x18: 0x%016llx x19: 0x%016llx\n"
646 "\t x20: 0x%016llx x21: 0x%016llx x22: 0x%016llx x23: 0x%016llx\n"
647 "\t x24: 0x%016llx x25: 0x%016llx x26: 0x%016llx x27: 0x%016llx\n"
648 "\t x28: 0x%016llx fp: 0x%016llx lr: 0x%016llx sp: 0x%016llx\n"
649 "\t pc: 0x%016llx cpsr: 0x%08x esr: 0x%016llx far: 0x%016llx\n",
650 msg, state->pc, state->lr, ss, (ss_valid ? "" : " INVALID"),
651 state->x[0], state->x[1], state->x[2], state->x[3],
652 state->x[4], state->x[5], state->x[6], state->x[7],
653 state->x[8], state->x[9], state->x[10], state->x[11],
654 state->x[12], state->x[13], state->x[14], state->x[15],
655 state->x[16], state->x[17], state->x[18], state->x[19],
656 state->x[20], state->x[21], state->x[22], state->x[23],
657 state->x[24], state->x[25], state->x[26], state->x[27],
658 state->x[28], state->fp, state->lr, state->sp,
659 state->pc, state->cpsr, state->esr, state->far);
660 }
661
662 void
sleh_synchronous_sp1(arm_context_t * context,uint64_t esr,vm_offset_t far __unused)663 sleh_synchronous_sp1(arm_context_t *context, uint64_t esr, vm_offset_t far __unused)
664 {
665 esr_exception_class_t class = ESR_EC(esr);
666 arm_saved_state_t * state = &context->ss;
667
668 switch (class) {
669 case ESR_EC_UNCATEGORIZED:
670 {
671 #if (DEVELOPMENT || DEBUG)
672 uint32_t instr = *((uint32_t*)get_saved_state_pc(state));
673 if (IS_ARM_GDB_TRAP(instr)) {
674 DebuggerCall(EXC_BREAKPOINT, state);
675 }
676 OS_FALLTHROUGH; // panic if we return from the debugger
677 #else
678 panic_with_thread_kernel_state("Unexpected debugger trap while SP1 selected", state);
679 #endif /* (DEVELOPMENT || DEBUG) */
680 }
681 default:
682 panic_with_thread_kernel_state("Synchronous exception taken while SP1 selected", state);
683 }
684 }
685
686
687 __attribute__((noreturn))
688 void
thread_exception_return()689 thread_exception_return()
690 {
691 thread_t thread = current_thread();
692 if (thread->machine.exception_trace_code != 0) {
693 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
694 MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, thread->machine.exception_trace_code) | DBG_FUNC_END, 0, 0, 0, 0, 0);
695 thread->machine.exception_trace_code = 0;
696 }
697
698 #if HAS_MTE
699 thread->machine.el0_synchronous_trap = false;
700 #endif /* HAS_MTE */
701
702 #if KASAN_TBI
703 kasan_unpoison_curstack(true);
704 #endif /* KASAN_TBI */
705 arm64_thread_exception_return();
706 __builtin_unreachable();
707 }
708
709 /*
710 * check whether task vtimers are running and set thread and CPU BSD AST
711 *
712 * must be called with interrupts masked so updates of fields are atomic
713 * must be emitted inline to avoid generating an FBT probe on the exception path
714 *
715 */
716 __attribute__((__always_inline__))
717 static inline void
task_vtimer_check(thread_t thread)718 task_vtimer_check(thread_t thread)
719 {
720 task_t task = get_threadtask_early(thread);
721
722 if (__improbable(task != NULL && task->vtimers)) {
723 thread_ast_set(thread, AST_BSD);
724 thread->machine.CpuDatap->cpu_pending_ast |= AST_BSD;
725 }
726 }
727
728 #if MACH_ASSERT
729 /**
730 * A version of get_preemption_level() that works in early boot.
731 *
732 * If an exception is raised in early boot before the initial thread has been
733 * set up, then calling get_preemption_level() in the SLEH will trigger an
734 * infinitely-recursing exception. This function handles this edge case.
735 */
736 static inline int
sleh_get_preemption_level(void)737 sleh_get_preemption_level(void)
738 {
739 if (__improbable(current_thread() == NULL)) {
740 return 0;
741 }
742 return get_preemption_level();
743 }
744 #endif // MACH_ASSERT
745
746 static inline bool
is_platform_error(uint64_t esr)747 is_platform_error(uint64_t esr)
748 {
749 esr_exception_class_t class = ESR_EC(esr);
750 uint32_t iss = ESR_ISS(esr);
751 fault_status_t fault_code;
752
753 if (class == ESR_EC_DABORT_EL0 || class == ESR_EC_DABORT_EL1) {
754 fault_code = ISS_DA_FSC(iss);
755 } else if (class == ESR_EC_IABORT_EL0 || class == ESR_EC_IABORT_EL1) {
756 fault_code = ISS_IA_FSC(iss);
757 } else {
758 return false;
759 }
760
761 return is_parity_error(fault_code) || is_sync_external_abort(fault_code) ||
762 is_table_walk_error(fault_code);
763 }
764
765 void
sleh_synchronous(arm_context_t * context,uint64_t esr,vm_offset_t far,__unused bool did_initiate_panic_lockdown)766 sleh_synchronous(arm_context_t *context, uint64_t esr, vm_offset_t far, __unused bool did_initiate_panic_lockdown)
767 {
768 esr_exception_class_t class = ESR_EC(esr);
769 arm_saved_state_t * state = &context->ss;
770 thread_t thread = current_thread();
771 #if MACH_ASSERT
772 int preemption_level = sleh_get_preemption_level();
773 #endif
774 expected_fault_handler_t expected_fault_handler = NULL;
775 #ifdef CONFIG_XNUPOST
776 expected_fault_handler_t saved_expected_fault_handler = NULL;
777 uintptr_t saved_expected_fault_addr = 0;
778 uintptr_t saved_expected_fault_pc = 0;
779 #endif /* CONFIG_XNUPOST */
780
781 ASSERT_CONTEXT_SANITY(context);
782
783 task_vtimer_check(thread);
784
785 #if CONFIG_DTRACE
786 /*
787 * Handle kernel DTrace probes as early as possible to minimize the likelihood
788 * that this path will itself trigger a DTrace probe, which would lead to infinite
789 * probe recursion.
790 */
791 if (__improbable((class == ESR_EC_UNCATEGORIZED) && tempDTraceTrapHook &&
792 (tempDTraceTrapHook(EXC_BAD_INSTRUCTION, state, 0, 0) == KERN_SUCCESS))) {
793 #if CONFIG_SPTM
794 if (__improbable(did_initiate_panic_lockdown)) {
795 panic("Unexpectedly initiated lockdown for DTrace probe?");
796 }
797 #endif
798 return;
799 }
800 #endif
801 bool is_user = PSR64_IS_USER(get_saved_state_cpsr(state));
802
803 #if CONFIG_SPTM
804 // Lockdown should only be initiated for kernel exceptions
805 assert(!(is_user && did_initiate_panic_lockdown));
806 #endif /* CONFIG_SPTM */
807
808 /*
809 * Use KERNEL_DEBUG_CONSTANT_IST here to avoid producing tracepoints
810 * that would disclose the behavior of PT_DENY_ATTACH processes.
811 */
812 if (is_user) {
813 /* Sanitize FAR (but only if the exception was taken from userspace) */
814 switch (class) {
815 case ESR_EC_IABORT_EL1:
816 case ESR_EC_IABORT_EL0:
817 /* If this is a SEA, since we can't trust FnV, just clear FAR from the save area. */
818 if (ISS_IA_FSC(ESR_ISS(esr)) == FSC_SYNC_EXT_ABORT) {
819 saved_state64(state)->far = 0;
820 }
821 break;
822 case ESR_EC_DABORT_EL1:
823 case ESR_EC_DABORT_EL0:
824 /* If this is a SEA, since we can't trust FnV, just clear FAR from the save area. */
825 if (ISS_DA_FSC(ESR_ISS(esr)) == FSC_SYNC_EXT_ABORT) {
826 saved_state64(state)->far = 0;
827 }
828 break;
829 case ESR_EC_WATCHPT_MATCH_EL1:
830 case ESR_EC_WATCHPT_MATCH_EL0:
831 case ESR_EC_PC_ALIGN:
832 break; /* FAR_ELx is valid */
833 default:
834 saved_state64(state)->far = 0;
835 break;
836 }
837
838 thread->machine.exception_trace_code = (uint16_t)(ARM64_KDBG_CODE_USER | class);
839 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
840 MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, thread->machine.exception_trace_code) | DBG_FUNC_START,
841 esr, far, get_saved_state_pc(state), 0, 0);
842 } else {
843 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
844 MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, ARM64_KDBG_CODE_KERNEL | class) | DBG_FUNC_START,
845 esr, VM_KERNEL_ADDRHIDE(far), VM_KERNEL_UNSLIDE(get_saved_state_pc(state)), 0, 0);
846 }
847
848 if (__improbable(ESR_INSTR_IS_2BYTES(esr))) {
849 /*
850 * We no longer support 32-bit, which means no 2-byte
851 * instructions.
852 */
853 if (is_user) {
854 panic("Exception on 2-byte instruction, "
855 "context=%p, esr=%#llx, far=%p",
856 context, esr, (void *)far);
857 } else {
858 panic_with_thread_kernel_state("Exception on 2-byte instruction", state);
859 }
860 }
861
862 #ifdef CONFIG_XNUPOST
863 if (thread->machine.expected_fault_handler != NULL) {
864 bool matching_fault_pc = false;
865 saved_expected_fault_handler = thread->machine.expected_fault_handler;
866 saved_expected_fault_addr = thread->machine.expected_fault_addr;
867 saved_expected_fault_pc = thread->machine.expected_fault_pc;
868
869 thread->machine.expected_fault_handler = NULL;
870 thread->machine.expected_fault_addr = 0;
871 thread->machine.expected_fault_pc = 0;
872
873 #if __has_feature(ptrauth_calls)
874 /*
875 * Compare only the bits of PC which make up the virtual address.
876 * This ignores the upper bits, which may have been corrupted by HW in
877 * platform dependent ways to signal pointer authentication fault.
878 */
879 uint64_t fault_addr_mask = fault_addr_bitmask(0, 64 - T1SZ_BOOT - 1);
880 uint64_t masked_expected_pc = saved_expected_fault_pc & fault_addr_mask;
881 uint64_t masked_saved_pc = get_saved_state_pc(state) & fault_addr_mask;
882 matching_fault_pc = masked_expected_pc == masked_saved_pc;
883 #else
884 matching_fault_pc =
885 (saved_expected_fault_pc == get_saved_state_pc(state));
886 #endif /* ptrauth_call */
887 if (saved_expected_fault_addr == far ||
888 matching_fault_pc) {
889 expected_fault_handler = saved_expected_fault_handler;
890 }
891 }
892 #endif /* CONFIG_XNUPOST */
893
894 if (__improbable(is_platform_error(esr))) {
895 /*
896 * Must gather error info in platform error handler before
897 * thread is preempted to another core/cluster to guarantee
898 * accurate error details
899 */
900
901 arm64_platform_error(state, esr, far, PLAT_ERR_SRC_SYNC);
902 #if CONFIG_SPTM
903 if (__improbable(did_initiate_panic_lockdown)) {
904 panic("Panic lockdown initiated for platform error");
905 }
906 #endif
907 return;
908 }
909
910 if (is_user && class == ESR_EC_DABORT_EL0) {
911 thread_reset_pcs_will_fault(thread);
912 }
913
914 #if CONFIG_SPTM
915 if (__improbable(did_initiate_panic_lockdown && current_thread() != NULL)) {
916 /*
917 * If we initiated panic lockdown, we must disable preemption before
918 * enabling interrupts. While unlikely, preempting the panicked thread
919 * after lockdown has occurred may hang the system if all cores end up
920 * blocked while attempting to return to user space.
921 */
922 disable_preemption();
923 }
924 #endif /* CONFIG_SPTM */
925
926 /* Inherit the interrupt masks from previous context */
927 if (SPSR_INTERRUPTS_ENABLED(get_saved_state_cpsr(state))) {
928 ml_set_interrupts_enabled(TRUE);
929 }
930
931 #if HAS_MTE
932 if (is_user) {
933 thread->machine.el0_synchronous_trap = true;
934 }
935 #endif
936
937 switch (class) {
938 case ESR_EC_SVC_64:
939 if (!is_saved_state64(state) || !is_user) {
940 panic("Invalid SVC_64 context");
941 }
942
943 handle_svc(state);
944 break;
945
946 case ESR_EC_DABORT_EL0:
947 handle_abort(state, esr, far, inspect_data_abort, handle_user_abort, expected_fault_handler);
948 break;
949
950 case ESR_EC_MSR_TRAP:
951 handle_msr_trap(state, esr);
952 break;
953 /**
954 * Some APPLEVIRTUALPLATFORM targets do not specify armv8.6, but it's still possible for
955 * them to be hosted by a host that implements ARM_FPAC. There's no way for such a host
956 * to disable it or trap it without substantial performance penalty. Therefore, the FPAC
957 * handler here needs to be built into the guest kernels to prevent the exception to fall
958 * through.
959 */
960 #if __has_feature(ptrauth_calls)
961 case ESR_EC_PAC_FAIL:
962 #ifdef CONFIG_XNUPOST
963 if (expected_fault_handler != NULL && expected_fault_handler(state)) {
964 break;
965 }
966 #endif /* CONFIG_XNUPOST */
967 handle_pac_fail(state, esr);
968 __builtin_unreachable();
969
970 #endif /* __has_feature(ptrauth_calls) */
971
972 #if HAS_ARM_FEAT_SME
973 case ESR_EC_SME:
974 handle_sme_trap(state, esr);
975 break;
976 #endif /* HAS_ARM_FEAT_SME */
977
978 case ESR_EC_IABORT_EL0:
979 handle_abort(state, esr, far, inspect_instruction_abort, handle_user_abort, expected_fault_handler);
980 break;
981
982 case ESR_EC_IABORT_EL1:
983 #ifdef CONFIG_XNUPOST
984 if ((expected_fault_handler != NULL) && expected_fault_handler(state)) {
985 break;
986 }
987 #endif /* CONFIG_XNUPOST */
988
989 panic_with_thread_kernel_state("Kernel instruction fetch abort", state);
990
991 case ESR_EC_PC_ALIGN:
992 handle_pc_align(state);
993 __builtin_unreachable();
994
995 case ESR_EC_DABORT_EL1:
996 handle_abort(state, esr, far, inspect_data_abort, handle_kernel_abort, expected_fault_handler);
997 break;
998
999 case ESR_EC_UNCATEGORIZED:
1000 assert(!ESR_ISS(esr));
1001
1002 #if CONFIG_XNUPOST
1003 if (!is_user && (expected_fault_handler != NULL) && expected_fault_handler(state)) {
1004 /*
1005 * The fault handler accepted the exception and handled it on its
1006 * own. Don't trap to the debugger/panic.
1007 */
1008 break;
1009 }
1010 #endif /* CONFIG_XNUPOST */
1011 handle_uncategorized(&context->ss);
1012 break;
1013
1014 case ESR_EC_SP_ALIGN:
1015 handle_sp_align(state);
1016 __builtin_unreachable();
1017
1018 case ESR_EC_BKPT_AARCH32:
1019 handle_user_breakpoint(state, esr);
1020 __builtin_unreachable();
1021
1022 case ESR_EC_BRK_AARCH64:
1023 #ifdef CONFIG_XNUPOST
1024 if ((expected_fault_handler != NULL) && expected_fault_handler(state)) {
1025 break;
1026 }
1027 #endif /* CONFIG_XNUPOST */
1028 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
1029 handle_kernel_breakpoint(state, esr);
1030 break;
1031 } else {
1032 handle_user_breakpoint(state, esr);
1033 __builtin_unreachable();
1034 }
1035
1036 case ESR_EC_BKPT_REG_MATCH_EL0:
1037 if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1038 handle_user_breakpoint(state, esr);
1039 }
1040 panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1041 class, state, class, esr, (void *)far);
1042 __builtin_unreachable();
1043
1044 case ESR_EC_BKPT_REG_MATCH_EL1:
1045 panic_with_thread_kernel_state("Hardware Breakpoint Debug exception from kernel. Panic (by design)", state);
1046 __builtin_unreachable();
1047
1048 case ESR_EC_SW_STEP_DEBUG_EL0:
1049 if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1050 handle_sw_step_debug(state);
1051 }
1052 panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1053 class, state, class, esr, (void *)far);
1054 __builtin_unreachable();
1055
1056 case ESR_EC_SW_STEP_DEBUG_EL1:
1057 panic_with_thread_kernel_state("Software Step Debug exception from kernel. Panic (by design)", state);
1058 __builtin_unreachable();
1059
1060 case ESR_EC_WATCHPT_MATCH_EL0:
1061 if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1062 handle_watchpoint(far);
1063 }
1064 panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1065 class, state, class, esr, (void *)far);
1066 __builtin_unreachable();
1067
1068 case ESR_EC_WATCHPT_MATCH_EL1:
1069 /*
1070 * If we hit a watchpoint in kernel mode, probably in a copyin/copyout which we don't want to
1071 * abort. Turn off watchpoints and keep going; we'll turn them back on in return_from_exception..
1072 */
1073 if (FSC_DEBUG_FAULT == ISS_SSDE_FSC(esr)) {
1074 arm_debug_set(NULL);
1075 break; /* return to first level handler */
1076 }
1077 panic("Unsupported Class %u event code. state=%p class=%u esr=%llu far=%p",
1078 class, state, class, esr, (void *)far);
1079 __builtin_unreachable();
1080
1081 case ESR_EC_TRAP_SIMD_FP:
1082 handle_simd_trap(state, esr);
1083 __builtin_unreachable();
1084
1085 case ESR_EC_ILLEGAL_INSTR_SET:
1086 panic("Illegal instruction set exception. state=%p class=%u esr=%llu far=%p spsr=0x%x",
1087 state, class, esr, (void *)far, get_saved_state_cpsr(state));
1088 __builtin_unreachable();
1089
1090 case ESR_EC_MCR_MRC_CP15_TRAP:
1091 case ESR_EC_MCRR_MRRC_CP15_TRAP:
1092 case ESR_EC_MCR_MRC_CP14_TRAP:
1093 case ESR_EC_LDC_STC_CP14_TRAP:
1094 case ESR_EC_MCRR_MRRC_CP14_TRAP:
1095 handle_user_trapped_instruction32(state, esr);
1096 __builtin_unreachable();
1097
1098 case ESR_EC_WFI_WFE:
1099 // Use of WFI or WFE instruction when they have been disabled for EL0
1100 handle_wf_trap(state);
1101 __builtin_unreachable();
1102
1103 case ESR_EC_FLOATING_POINT_64:
1104 handle_fp_trap(state, esr);
1105 __builtin_unreachable();
1106 case ESR_EC_BTI_FAIL:
1107 #ifdef CONFIG_XNUPOST
1108 if ((expected_fault_handler != NULL) && expected_fault_handler(state)) {
1109 break;
1110 }
1111 #endif /* CONFIG_XNUPOST */
1112 handle_bti_fail(state, esr);
1113 __builtin_unreachable();
1114
1115 default:
1116 handle_uncategorized(state);
1117 }
1118
1119 #ifdef CONFIG_XNUPOST
1120 if (saved_expected_fault_handler != NULL) {
1121 thread->machine.expected_fault_handler = saved_expected_fault_handler;
1122 thread->machine.expected_fault_addr = saved_expected_fault_addr;
1123 thread->machine.expected_fault_pc = saved_expected_fault_pc;
1124 }
1125 #endif /* CONFIG_XNUPOST */
1126
1127 if (is_user) {
1128 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1129 MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, thread->machine.exception_trace_code) | DBG_FUNC_END,
1130 esr, far, get_saved_state_pc(state), 0, 0);
1131 thread->machine.exception_trace_code = 0;
1132 } else {
1133 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1134 MACHDBG_CODE(DBG_MACH_EXCP_SYNC_ARM, ARM64_KDBG_CODE_KERNEL | class) | DBG_FUNC_END,
1135 esr, VM_KERNEL_ADDRHIDE(far), VM_KERNEL_UNSLIDE(get_saved_state_pc(state)), 0, 0);
1136 }
1137
1138 #if CONFIG_SPTM
1139 if (__improbable(did_initiate_panic_lockdown)) {
1140 #if CONFIG_XNUPOST
1141 bool can_recover = !!(expected_fault_handler);
1142 #else
1143 bool can_recover = false;
1144 #endif /* CONFIG_XNU_POST */
1145
1146 if (can_recover) {
1147 /*
1148 * If we matched an exception handler, this was a simulated lockdown
1149 * and so we can recover. Re-enable preemption if we disabled it.
1150 */
1151 if (current_thread() != NULL) {
1152 enable_preemption();
1153 }
1154 } else {
1155 /*
1156 * fleh already triggered a lockdown but we, for whatever reason,
1157 * didn't end up finding a reason to panic. Catch all panic in this
1158 * case.
1159 * Note that the panic here has no security benefit as the system is
1160 * already hosed, this is merely for telemetry.
1161 */
1162 panic_with_thread_kernel_state("Panic lockdown initiated", state);
1163 }
1164 }
1165 #endif /* CONFIG_SPTM */
1166
1167 #if MACH_ASSERT
1168 if (preemption_level != sleh_get_preemption_level()) {
1169 panic("synchronous exception changed preemption level from %d to %d", preemption_level, sleh_get_preemption_level());
1170 }
1171 #endif
1172
1173 #if HAS_MTE
1174 if (is_user) {
1175 thread->machine.el0_synchronous_trap = false;
1176 }
1177 #endif
1178 }
1179
1180 /*
1181 * Uncategorized exceptions are a catch-all for general execution errors.
1182 * ARM64_TODO: For now, we assume this is for undefined instruction exceptions.
1183 */
1184 static void
handle_uncategorized(arm_saved_state_t * state)1185 handle_uncategorized(arm_saved_state_t *state)
1186 {
1187 exception_type_t exception = EXC_BAD_INSTRUCTION;
1188 mach_exception_data_type_t codes[2] = {EXC_ARM_UNDEFINED};
1189 mach_msg_type_number_t numcodes = 2;
1190 uint32_t instr = 0;
1191
1192 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1193
1194 #if CONFIG_DTRACE
1195
1196 if (PSR64_IS_USER64(get_saved_state_cpsr(state))) {
1197 /*
1198 * For a 64bit user process, we care about all 4 bytes of the
1199 * instr.
1200 */
1201 if (instr == FASTTRAP_ARM64_INSTR || instr == FASTTRAP_ARM64_RET_INSTR) {
1202 if (dtrace_user_probe(state) == KERN_SUCCESS) {
1203 return;
1204 }
1205 }
1206 } else if (PSR64_IS_USER32(get_saved_state_cpsr(state))) {
1207 /*
1208 * For a 32bit user process, we check for thumb mode, in
1209 * which case we only care about a 2 byte instruction length.
1210 * For non-thumb mode, we care about all 4 bytes of the instructin.
1211 */
1212 if (get_saved_state_cpsr(state) & PSR64_MODE_USER32_THUMB) {
1213 if (((uint16_t)instr == FASTTRAP_THUMB32_INSTR) ||
1214 ((uint16_t)instr == FASTTRAP_THUMB32_RET_INSTR)) {
1215 if (dtrace_user_probe(state) == KERN_SUCCESS) {
1216 return;
1217 }
1218 }
1219 } else {
1220 if ((instr == FASTTRAP_ARM32_INSTR) ||
1221 (instr == FASTTRAP_ARM32_RET_INSTR)) {
1222 if (dtrace_user_probe(state) == KERN_SUCCESS) {
1223 return;
1224 }
1225 }
1226 }
1227 }
1228
1229 #endif /* CONFIG_DTRACE */
1230
1231 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
1232 if (IS_ARM_GDB_TRAP(instr)) {
1233 boolean_t interrupt_state;
1234 exception = EXC_BREAKPOINT;
1235
1236 interrupt_state = ml_set_interrupts_enabled(FALSE);
1237
1238 /* Save off the context here (so that the debug logic
1239 * can see the original state of this thread).
1240 */
1241 current_thread()->machine.kpcb = state;
1242
1243 /* Hop into the debugger (typically either due to a
1244 * fatal exception, an explicit panic, or a stackshot
1245 * request.
1246 */
1247 DebuggerCall(exception, state);
1248
1249 current_thread()->machine.kpcb = NULL;
1250 (void) ml_set_interrupts_enabled(interrupt_state);
1251 return;
1252 } else {
1253 panic("Undefined kernel instruction: pc=%p instr=%x", (void*)get_saved_state_pc(state), instr);
1254 }
1255 }
1256
1257 /*
1258 * Check for GDB breakpoint via illegal opcode.
1259 */
1260 if (IS_ARM_GDB_TRAP(instr)) {
1261 exception = EXC_BREAKPOINT;
1262 codes[0] = EXC_ARM_BREAKPOINT;
1263 codes[1] = instr;
1264 } else {
1265 codes[1] = instr;
1266 }
1267
1268 exception_triage(exception, codes, numcodes);
1269 __builtin_unreachable();
1270 }
1271
1272 #if __has_feature(ptrauth_calls)
1273 static inline const char *
ptrauth_key_to_string(ptrauth_key key)1274 ptrauth_key_to_string(ptrauth_key key)
1275 {
1276 switch (key) {
1277 case ptrauth_key_asia:
1278 return "IA";
1279 case ptrauth_key_asib:
1280 return "IB";
1281 case ptrauth_key_asda:
1282 return "DA";
1283 case ptrauth_key_asdb:
1284 return "DB";
1285 default:
1286 __builtin_unreachable();
1287 }
1288 }
1289
1290 static const char *
ptrauth_handle_brk_trap(void * tstate,uint16_t comment)1291 ptrauth_handle_brk_trap(void *tstate, uint16_t comment)
1292 {
1293 kernel_panic_reason_t pr = PERCPU_GET(panic_reason);
1294 arm_saved_state_t *state = (arm_saved_state_t *)tstate;
1295
1296 ptrauth_key key = (ptrauth_key)(comment - PTRAUTH_TRAP_START);
1297 const char *key_str = ptrauth_key_to_string(key);
1298
1299 snprintf(pr->buf, sizeof(pr->buf),
1300 "Break 0x%04X instruction exception from kernel. "
1301 "Ptrauth failure with %s key resulted in 0x%016llx",
1302 comment, key_str, saved_state64(state)->x[16]);
1303
1304 return pr->buf;
1305 }
1306 #endif /* __has_feature(ptrauth_calls) */
1307
1308 #if HAS_TELEMETRY_KERNEL_BRK
1309 static uint32_t bound_chk_violations_event;
1310
1311 static const char *
xnu_soft_trap_handle_breakpoint(void * tstate,uint16_t comment)1312 xnu_soft_trap_handle_breakpoint(
1313 void *tstate,
1314 uint16_t comment)
1315 {
1316 #if CONFIG_UBSAN_MINIMAL
1317 if (comment == UBSAN_SOFT_TRAP_SIGNED_OF) {
1318 ubsan_handle_brk_trap(tstate, comment);
1319 }
1320 #else
1321 (void)tstate;
1322 #endif
1323
1324 if (comment == CLANG_SOFT_TRAP_BOUND_CHK) {
1325 os_atomic_inc(&bound_chk_violations_event, relaxed);
1326 }
1327 return NULL;
1328 }
1329 #endif /* HAS_TELEMETRY_KERNEL_BRK */
1330
1331 static const char *
xnu_hard_trap_handle_breakpoint(void * tstate,uint16_t comment)1332 xnu_hard_trap_handle_breakpoint(void *tstate, uint16_t comment)
1333 {
1334 kernel_panic_reason_t pr = PERCPU_GET(panic_reason);
1335 arm_saved_state64_t *state = saved_state64(tstate);
1336
1337 switch (comment) {
1338 case XNU_HARD_TRAP_SAFE_UNLINK:
1339 snprintf(pr->buf, sizeof(pr->buf),
1340 "panic: corrupt list around element %p",
1341 (void *)state->x[8]);
1342 return pr->buf;
1343
1344 case XNU_HARD_TRAP_STRING_CHK:
1345 return "panic: string operation caused an overflow";
1346
1347 case XNU_HARD_TRAP_ASSERT_FAILURE:
1348 /*
1349 * Read the implicit assert arguments, see:
1350 * ML_TRAP_REGISTER_1: x8
1351 * ML_TRAP_REGISTER_2: x16
1352 * ML_TRAP_REGISTER_3: x17
1353 */
1354 panic_assert_format(pr->buf, sizeof(pr->buf),
1355 (struct mach_assert_hdr *)state->x[8],
1356 state->x[16], state->x[17]);
1357 return pr->buf;
1358
1359 default:
1360 return NULL;
1361 }
1362 }
1363
1364 #if __has_feature(ptrauth_calls)
1365 KERNEL_BRK_DESCRIPTOR_DEFINE(ptrauth_desc,
1366 .type = TRAP_TELEMETRY_TYPE_KERNEL_BRK_PTRAUTH,
1367 .base = PTRAUTH_TRAP_START,
1368 .max = PTRAUTH_TRAP_END,
1369 .options = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1370 .handle_breakpoint = ptrauth_handle_brk_trap);
1371 #endif
1372
1373 KERNEL_BRK_DESCRIPTOR_DEFINE(clang_desc,
1374 .type = TRAP_TELEMETRY_TYPE_KERNEL_BRK_CLANG,
1375 .base = CLANG_ARM_TRAP_START,
1376 .max = CLANG_ARM_TRAP_END,
1377 .options = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1378 .handle_breakpoint = NULL);
1379
1380 KERNEL_BRK_DESCRIPTOR_DEFINE(libcxx_desc,
1381 .type = TRAP_TELEMETRY_TYPE_KERNEL_BRK_LIBCXX,
1382 .base = LIBCXX_TRAP_START,
1383 .max = LIBCXX_TRAP_END,
1384 .options = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1385 .handle_breakpoint = NULL);
1386
1387 #if HAS_TELEMETRY_KERNEL_BRK
1388 KERNEL_BRK_DESCRIPTOR_DEFINE(xnu_soft_traps_desc,
1389 .type = TRAP_TELEMETRY_TYPE_KERNEL_BRK_TELEMETRY,
1390 .base = XNU_SOFT_TRAP_START,
1391 .max = XNU_SOFT_TRAP_END,
1392 .options = BRK_TELEMETRY_OPTIONS_RECOVERABLE_DEFAULT(
1393 /* enable_telemetry */ true),
1394 .handle_breakpoint = xnu_soft_trap_handle_breakpoint);
1395 #endif /* HAS_TELEMETRY_KERNEL_BRK */
1396
1397 KERNEL_BRK_DESCRIPTOR_DEFINE(xnu_hard_traps_desc,
1398 .type = TRAP_TELEMETRY_TYPE_KERNEL_BRK_XNU,
1399 .base = XNU_HARD_TRAP_START,
1400 .max = XNU_HARD_TRAP_END,
1401 .options = BRK_TELEMETRY_OPTIONS_FATAL_DEFAULT,
1402 .handle_breakpoint = xnu_hard_trap_handle_breakpoint);
1403
1404 static void
1405 #if !HAS_TELEMETRY_KERNEL_BRK
1406 __attribute__((noreturn))
1407 #endif
handle_kernel_breakpoint(arm_saved_state_t * state,uint64_t esr)1408 handle_kernel_breakpoint(arm_saved_state_t *state, uint64_t esr)
1409 {
1410 uint16_t comment = ISS_BRK_COMMENT(esr);
1411 const struct kernel_brk_descriptor *desc;
1412 const char *msg = NULL;
1413
1414 desc = find_kernel_brk_descriptor_by_comment(comment);
1415
1416 if (!desc) {
1417 goto brk_out;
1418 }
1419
1420 #if HAS_TELEMETRY_KERNEL_BRK
1421 if (desc->options.enable_trap_telemetry) {
1422 trap_telemetry_report_exception(
1423 /* trap_type */ desc->type,
1424 /* trap_code */ comment,
1425 /* options */ desc->options.telemetry_options,
1426 /* saved_state */ (void *)state);
1427 }
1428 #endif
1429
1430 if (desc->handle_breakpoint) {
1431 msg = desc->handle_breakpoint(state, comment);
1432 }
1433
1434 #if HAS_TELEMETRY_KERNEL_BRK
1435 /* Still alive? Check if we should recover. */
1436 if (desc->options.recoverable) {
1437 add_saved_state_pc(state, 4);
1438 return;
1439 }
1440 #endif
1441
1442 brk_out:
1443 if (msg == NULL) {
1444 kernel_panic_reason_t pr = PERCPU_GET(panic_reason);
1445
1446 if (comment == CLANG_ARM_TRAP_BOUND_CHK) {
1447 msg = tsnprintf(pr->buf, sizeof(pr->buf),
1448 "Bounds safety trap");
1449 } else {
1450 msg = tsnprintf(pr->buf, sizeof(pr->buf),
1451 "Break 0x%04X instruction exception from kernel. "
1452 "Panic (by design)",
1453 comment);
1454 }
1455 }
1456
1457 panic_with_thread_kernel_state(msg, state);
1458 __builtin_unreachable();
1459 #undef MSG_FMT
1460 }
1461
1462 /*
1463 * Similar in spirit to kernel_brk_descriptor, but with less flexible semantics:
1464 * each descriptor defines a `brk` label range for use from userspace.
1465 * When used, system policy may decide to kill the calling process without giving them opportunity to
1466 * catch the exception or continue execution from a signal handler.
1467 * This is used to enforce security boundaries: userspace code may use this mechanism
1468 * to reliably terminate when internal inconsistencies are detected.
1469 * Note that we don't invariably terminate without giving the process a say: we might only enforce
1470 * such a policy if a security feature is enabled, for example.
1471 */
1472 typedef struct user_brk_label_range_descriptor {
1473 uint16_t base;
1474 uint16_t max;
1475 } user_brk_label_range_descriptor_t;
1476
1477 const user_brk_label_range_descriptor_t user_brk_descriptors[] = {
1478 #if __has_feature(ptrauth_calls)
1479 /* PAC failures detected in data by userspace */
1480 {
1481 /* Use the exact same label range as kernel PAC */
1482 .base = PTRAUTH_TRAP_START,
1483 .max = PTRAUTH_TRAP_END,
1484 },
1485 #endif /* __has_feature(ptrauth_calls) */
1486 /* Available for use by system libraries when detecting disallowed conditions */
1487 {
1488 /* Note this uses the same range as the kernel-specific XNU_HARD_TRAP range */
1489 .base = 0xB000,
1490 .max = 0xBFFF,
1491 }
1492 };
1493 const int user_brk_descriptor_count = sizeof(user_brk_descriptors) / sizeof(user_brk_descriptors[0]);
1494
1495 const static inline user_brk_label_range_descriptor_t *
find_user_brk_descriptor_by_comment(uint16_t comment)1496 find_user_brk_descriptor_by_comment(uint16_t comment)
1497 {
1498 for (int desc_idx = 0; desc_idx < user_brk_descriptor_count; desc_idx++) {
1499 const user_brk_label_range_descriptor_t* des = &user_brk_descriptors[desc_idx];
1500 if (comment >= des->base && comment <= des->max) {
1501 return des;
1502 }
1503 }
1504
1505 return NULL;
1506 }
1507
1508 static void
handle_user_breakpoint(arm_saved_state_t * state,uint64_t esr __unused)1509 handle_user_breakpoint(arm_saved_state_t *state, uint64_t esr __unused)
1510 {
1511 exception_type_t exception = EXC_BREAKPOINT;
1512 mach_exception_data_type_t codes[2] = {EXC_ARM_BREAKPOINT};
1513 mach_msg_type_number_t numcodes = 2;
1514
1515 if (ESR_EC(esr) == ESR_EC_BRK_AARCH64) {
1516 /*
1517 * Consult the trap labels we know about to decide whether userspace
1518 * should be given the opportunity to handle the exception.
1519 */
1520 uint16_t brk_label = ISS_BRK_COMMENT(esr);
1521 const struct user_brk_label_range_descriptor* descriptor = find_user_brk_descriptor_by_comment(brk_label);
1522 /*
1523 * Note it's no problem if we don't recognize the label.
1524 * In this case we'll just go through normal exception delivery.
1525 */
1526 if (descriptor != NULL) {
1527 exception |= EXC_MAY_BE_UNRECOVERABLE_BIT;
1528
1529 #if __has_feature(ptrauth_calls)
1530 /*
1531 * We have additional policy specifically for PAC violations.
1532 * To make the rest of the code easier to follow, don't set
1533 * EXC_MAY_BE_UNRECOVERABLE_BIT here and just set EXC_PTRAUTH_BIT instead.
1534 * Conceptually a PAC failure is absolutely 'maybe unrecoverable', but it's
1535 * not really worth excising the discrepency from the plumbing.
1536 */
1537 if (descriptor->base == PTRAUTH_TRAP_START) {
1538 exception &= ~(EXC_MAY_BE_UNRECOVERABLE_BIT);
1539 exception |= EXC_PTRAUTH_BIT;
1540 }
1541 #endif /* __has_feature(ptrauth_calls) */
1542 }
1543 }
1544
1545 codes[1] = get_saved_state_pc(state);
1546 exception_triage(exception, codes, numcodes);
1547 __builtin_unreachable();
1548 }
1549
1550 static void
handle_watchpoint(vm_offset_t fault_addr)1551 handle_watchpoint(vm_offset_t fault_addr)
1552 {
1553 exception_type_t exception = EXC_BREAKPOINT;
1554 mach_exception_data_type_t codes[2] = {EXC_ARM_DA_DEBUG};
1555 mach_msg_type_number_t numcodes = 2;
1556
1557 codes[1] = fault_addr;
1558 exception_triage(exception, codes, numcodes);
1559 __builtin_unreachable();
1560 }
1561
1562 static void
handle_abort(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,abort_inspector_t inspect_abort,abort_handler_t handler,expected_fault_handler_t expected_fault_handler)1563 handle_abort(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
1564 abort_inspector_t inspect_abort, abort_handler_t handler, expected_fault_handler_t expected_fault_handler)
1565 {
1566 fault_status_t fault_code;
1567 vm_prot_t fault_type;
1568
1569 inspect_abort(ESR_ISS(esr), &fault_code, &fault_type);
1570 handler(state, esr, fault_addr, fault_code, fault_type, expected_fault_handler);
1571 }
1572
1573 static void
inspect_instruction_abort(uint32_t iss,fault_status_t * fault_code,vm_prot_t * fault_type)1574 inspect_instruction_abort(uint32_t iss, fault_status_t *fault_code, vm_prot_t *fault_type)
1575 {
1576 getCpuDatap()->cpu_stat.instr_ex_cnt++;
1577 *fault_code = ISS_IA_FSC(iss);
1578 *fault_type = (VM_PROT_READ | VM_PROT_EXECUTE);
1579 }
1580
1581 static void
inspect_data_abort(uint32_t iss,fault_status_t * fault_code,vm_prot_t * fault_type)1582 inspect_data_abort(uint32_t iss, fault_status_t *fault_code, vm_prot_t *fault_type)
1583 {
1584 getCpuDatap()->cpu_stat.data_ex_cnt++;
1585 *fault_code = ISS_DA_FSC(iss);
1586
1587 /*
1588 * Cache maintenance operations always report faults as write access.
1589 * Change these to read access, unless they report a permission fault.
1590 * Only certain cache maintenance operations (e.g. 'dc ivac') require write
1591 * access to the mapping, but if a cache maintenance operation that only requires
1592 * read access generates a permission fault, then we will not be able to handle
1593 * the fault regardless of whether we treat it as a read or write fault.
1594 */
1595 if ((iss & ISS_DA_WNR) && (!(iss & ISS_DA_CM) || is_permission_fault(*fault_code))) {
1596 *fault_type = (VM_PROT_READ | VM_PROT_WRITE);
1597 } else {
1598 *fault_type = (VM_PROT_READ);
1599 }
1600 }
1601
1602 #if __has_feature(ptrauth_calls)
1603 static inline uint64_t
fault_addr_bitmask(unsigned int bit_from,unsigned int bit_to)1604 fault_addr_bitmask(unsigned int bit_from, unsigned int bit_to)
1605 {
1606 return ((1ULL << (bit_to - bit_from + 1)) - 1) << bit_from;
1607 }
1608
1609 static inline bool
fault_addr_bit(vm_offset_t fault_addr,unsigned int bit)1610 fault_addr_bit(vm_offset_t fault_addr, unsigned int bit)
1611 {
1612 return (bool)((fault_addr >> bit) & 1);
1613 }
1614
1615 extern int gARM_FEAT_FPAC;
1616 extern int gARM_FEAT_FPACCOMBINE;
1617 extern int gARM_FEAT_PAuth2;
1618
1619 /**
1620 * Determines whether a fault address taken at EL0 contains a PAC error code
1621 * corresponding to the specified kind of ptrauth key.
1622 */
1623 static bool
user_fault_matches_pac_error_code(vm_offset_t fault_addr,uint64_t pc,bool data_key)1624 user_fault_matches_pac_error_code(vm_offset_t fault_addr, uint64_t pc, bool data_key)
1625 {
1626 if (gARM_FEAT_FPACCOMBINE) {
1627 /*
1628 * CPUs with FPACCOMBINE always raise PAC Fail exceptions during
1629 * PAC failure. If the CPU took any other kind of exception, we
1630 * can rule out PAC as the root cause.
1631 */
1632 return false;
1633 }
1634
1635 if (data_key && gARM_FEAT_FPAC) {
1636 uint32_t instr;
1637 int err = copyin(pc, (char *)&instr, sizeof(instr));
1638 if (!err && !ARM64_INSTR_IS_LDRAx(instr)) {
1639 /*
1640 * On FPAC-enabled devices, PAC failure can only cause
1641 * data aborts during "combined" LDRAx instructions. If
1642 * PAC fails during a discrete AUTxx + LDR/STR
1643 * instruction sequence, then the AUTxx instruction
1644 * raises a PAC Fail exception rather than poisoning its
1645 * output address.
1646 *
1647 * In principle the same logic applies to instruction
1648 * aborts. But we have no way to identify the exact
1649 * instruction that caused the abort, so we can't tell
1650 * if it was a combined branch + auth instruction.
1651 */
1652 return false;
1653 }
1654 }
1655
1656 bool instruction_tbi = !(get_tcr() & TCR_TBID0_TBI_DATA_ONLY);
1657 bool tbi = data_key || __improbable(instruction_tbi);
1658
1659 if (gARM_FEAT_PAuth2) {
1660 /*
1661 * EnhancedPAC2 CPUs don't encode error codes at fixed positions, so
1662 * treat all non-canonical address bits like potential poison bits.
1663 */
1664 uint64_t mask = fault_addr_bitmask(64 - T0SZ_BOOT, 54);
1665 if (!tbi) {
1666 mask |= fault_addr_bitmask(56, 63);
1667 }
1668 return (fault_addr & mask) != 0;
1669 } else {
1670 unsigned int poison_shift;
1671 if (tbi) {
1672 poison_shift = 53;
1673 } else {
1674 poison_shift = 61;
1675 }
1676
1677 /* PAC error codes are always in the form key_number:NOT(key_number) */
1678 bool poison_bit_1 = fault_addr_bit(fault_addr, poison_shift);
1679 bool poison_bit_2 = fault_addr_bit(fault_addr, poison_shift + 1);
1680 return poison_bit_1 != poison_bit_2;
1681 }
1682 }
1683 #endif /* __has_feature(ptrauth_calls) */
1684
1685 /**
1686 * Determines whether the userland thread has a JIT region in RW mode, TPRO
1687 * in RW mode, or JCTL_EL0 in pointer signing mode. A fault in any of these trusted
1688 * code paths may indicate an attack on WebKit. Rather than letting a
1689 * potentially-compromised process try to handle the exception, it will be killed
1690 * by the kernel and a crash report will be generated.
1691 */
1692 static self_restrict_mode_t
user_fault_in_self_restrict_mode(thread_t thread __unused)1693 user_fault_in_self_restrict_mode(thread_t thread __unused)
1694 {
1695 self_restrict_mode_t out = SELF_RESTRICT_NONE;
1696
1697 return out;
1698 }
1699
1700 static void
handle_pc_align(arm_saved_state_t * ss)1701 handle_pc_align(arm_saved_state_t *ss)
1702 {
1703 exception_type_t exc;
1704 mach_exception_data_type_t codes[2];
1705 mach_msg_type_number_t numcodes = 2;
1706
1707 if (!PSR64_IS_USER(get_saved_state_cpsr(ss))) {
1708 panic_with_thread_kernel_state("PC alignment exception from kernel.", ss);
1709 }
1710
1711 exc = EXC_BAD_ACCESS;
1712 #if __has_feature(ptrauth_calls)
1713 uint64_t pc = get_saved_state_pc(ss);
1714 if (user_fault_matches_pac_error_code(pc, pc, false)) {
1715 exc |= EXC_PTRAUTH_BIT;
1716 }
1717 #endif /* __has_feature(ptrauth_calls) */
1718
1719 codes[0] = EXC_ARM_DA_ALIGN;
1720 codes[1] = get_saved_state_pc(ss);
1721
1722 exception_triage(exc, codes, numcodes);
1723 __builtin_unreachable();
1724 }
1725
1726 static void
handle_sp_align(arm_saved_state_t * ss)1727 handle_sp_align(arm_saved_state_t *ss)
1728 {
1729 exception_type_t exc;
1730 mach_exception_data_type_t codes[2];
1731 mach_msg_type_number_t numcodes = 2;
1732
1733 if (!PSR64_IS_USER(get_saved_state_cpsr(ss))) {
1734 panic_with_thread_kernel_state("SP alignment exception from kernel.", ss);
1735 }
1736
1737 exc = EXC_BAD_ACCESS;
1738 #if __has_feature(ptrauth_calls)
1739 if (user_fault_matches_pac_error_code(get_saved_state_sp(ss), get_saved_state_pc(ss), true)) {
1740 exc |= EXC_PTRAUTH_BIT;
1741 }
1742 #endif /* __has_feature(ptrauth_calls) */
1743
1744 codes[0] = EXC_ARM_SP_ALIGN;
1745 codes[1] = get_saved_state_sp(ss);
1746
1747 exception_triage(exc, codes, numcodes);
1748 __builtin_unreachable();
1749 }
1750
1751 static void
handle_wf_trap(arm_saved_state_t * state)1752 handle_wf_trap(arm_saved_state_t *state)
1753 {
1754 exception_type_t exc;
1755 mach_exception_data_type_t codes[2];
1756 mach_msg_type_number_t numcodes = 2;
1757 uint32_t instr = 0;
1758
1759 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1760
1761 exc = EXC_BAD_INSTRUCTION;
1762 codes[0] = EXC_ARM_UNDEFINED;
1763 codes[1] = instr;
1764
1765 exception_triage(exc, codes, numcodes);
1766 __builtin_unreachable();
1767 }
1768
1769 static void
handle_fp_trap(arm_saved_state_t * state,uint64_t esr)1770 handle_fp_trap(arm_saved_state_t *state, uint64_t esr)
1771 {
1772 exception_type_t exc = EXC_ARITHMETIC;
1773 mach_exception_data_type_t codes[2];
1774 mach_msg_type_number_t numcodes = 2;
1775 uint32_t instr = 0;
1776
1777 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
1778 panic_with_thread_kernel_state("Floating point exception from kernel", state);
1779 }
1780
1781 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1782 codes[1] = instr;
1783
1784 /* The floating point trap flags are only valid if TFV is set. */
1785 if (!fp_exceptions_enabled) {
1786 exc = EXC_BAD_INSTRUCTION;
1787 codes[0] = EXC_ARM_UNDEFINED;
1788 } else if (!(esr & ISS_FP_TFV)) {
1789 codes[0] = EXC_ARM_FP_UNDEFINED;
1790 } else if (esr & ISS_FP_UFF) {
1791 codes[0] = EXC_ARM_FP_UF;
1792 } else if (esr & ISS_FP_OFF) {
1793 codes[0] = EXC_ARM_FP_OF;
1794 } else if (esr & ISS_FP_IOF) {
1795 codes[0] = EXC_ARM_FP_IO;
1796 } else if (esr & ISS_FP_DZF) {
1797 codes[0] = EXC_ARM_FP_DZ;
1798 } else if (esr & ISS_FP_IDF) {
1799 codes[0] = EXC_ARM_FP_ID;
1800 } else if (esr & ISS_FP_IXF) {
1801 codes[0] = EXC_ARM_FP_IX;
1802 } else {
1803 panic("Unrecognized floating point exception, state=%p, esr=%#llx", state, esr);
1804 }
1805
1806 exception_triage(exc, codes, numcodes);
1807 __builtin_unreachable();
1808 }
1809
1810
1811
1812 /*
1813 * handle_alignment_fault_from_user:
1814 * state: Saved state
1815 *
1816 * Attempts to deal with an alignment fault from userspace (possibly by
1817 * emulating the faulting instruction). If emulation failed due to an
1818 * unservicable fault, the ESR for that fault will be stored in the
1819 * recovery_esr field of the thread by the exception code.
1820 *
1821 * Returns:
1822 * -1: Emulation failed (emulation of state/instr not supported)
1823 * 0: Successfully emulated the instruction
1824 * EFAULT: Emulation failed (probably due to permissions)
1825 * EINVAL: Emulation failed (probably due to a bad address)
1826 */
1827
1828
1829 static int
handle_alignment_fault_from_user(arm_saved_state_t * state,kern_return_t * vmfr)1830 handle_alignment_fault_from_user(arm_saved_state_t *state, kern_return_t *vmfr)
1831 {
1832 int ret = -1;
1833
1834 #pragma unused (state)
1835 #pragma unused (vmfr)
1836
1837 return ret;
1838 }
1839
1840
1841
1842 #if HAS_ARM_FEAT_SME
1843 static void
handle_sme_trap(arm_saved_state_t * state,uint64_t esr)1844 handle_sme_trap(arm_saved_state_t *state, uint64_t esr)
1845 {
1846 exception_type_t exc = EXC_BAD_INSTRUCTION;
1847 mach_exception_data_type_t codes[2] = {EXC_ARM_UNDEFINED};
1848 mach_msg_type_number_t numcodes = 2;
1849
1850 if (!PSR64_IS_USER(get_saved_state_cpsr(state))) {
1851 panic("SME exception from kernel, state=%p, esr=%#llx", state, esr);
1852 }
1853 if (!arm_sme_version()) {
1854 /*
1855 * If SME is disabled in software but userspace executes an SME
1856 * instruction anyway, then the CPU will still raise an
1857 * SME-specific trap. Triage it as if the CPU raised an
1858 * undefined-instruction trap.
1859 */
1860 exception_triage(exc, codes, numcodes);
1861 __builtin_unreachable();
1862 }
1863
1864 if (ISS_SME_SMTC(ESR_ISS(esr)) == ISS_SME_SMTC_CAPCR) {
1865 thread_t thread = current_thread();
1866 switch (machine_thread_sme_state_alloc(thread)) {
1867 case KERN_SUCCESS:
1868 return;
1869
1870
1871 default:
1872 panic("Failed to allocate SME state for thread %p", thread);
1873 }
1874 }
1875
1876 uint32_t instr;
1877 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
1878 codes[1] = instr;
1879
1880 exception_triage(exc, codes, numcodes);
1881 __builtin_unreachable();
1882 }
1883 #endif /* HAS_ARM_FEAT_SME */
1884
1885 static void
handle_sw_step_debug(arm_saved_state_t * state)1886 handle_sw_step_debug(arm_saved_state_t *state)
1887 {
1888 thread_t thread = current_thread();
1889 exception_type_t exc;
1890 mach_exception_data_type_t codes[2];
1891 mach_msg_type_number_t numcodes = 2;
1892
1893 if (!PSR64_IS_USER(get_saved_state_cpsr(state))) {
1894 panic_with_thread_kernel_state("SW_STEP_DEBUG exception from kernel.", state);
1895 }
1896
1897 // Disable single step and unmask interrupts (in the saved state, anticipating next exception return)
1898 if (thread->machine.DebugData != NULL) {
1899 thread->machine.DebugData->uds.ds64.mdscr_el1 &= ~0x1;
1900 } else {
1901 panic_with_thread_kernel_state("SW_STEP_DEBUG exception thread DebugData is NULL.", state);
1902 }
1903
1904 mask_user_saved_state_cpsr(thread->machine.upcb, 0, PSR64_SS | DAIF_ALL);
1905
1906 // Special encoding for gdb single step event on ARM
1907 exc = EXC_BREAKPOINT;
1908 codes[0] = 1;
1909 codes[1] = 0;
1910
1911 exception_triage(exc, codes, numcodes);
1912 __builtin_unreachable();
1913 }
1914
1915 #if MACH_ASSERT
1916 TUNABLE_WRITEABLE(self_restrict_mode_t, panic_on_jit_guard, "panic_on_jit_guard", SELF_RESTRICT_NONE);
1917 #endif /* MACH_ASSERT */
1918
1919 static void
handle_user_abort(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,fault_status_t fault_code,vm_prot_t fault_type,expected_fault_handler_t expected_fault_handler)1920 handle_user_abort(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
1921 fault_status_t fault_code, vm_prot_t fault_type, expected_fault_handler_t expected_fault_handler)
1922 {
1923 exception_type_t exc = EXC_BAD_ACCESS;
1924 mach_exception_data_type_t codes[2];
1925 mach_msg_type_number_t numcodes = 2;
1926 thread_t thread = current_thread();
1927
1928 (void)expected_fault_handler;
1929
1930 if (__improbable(!SPSR_INTERRUPTS_ENABLED(get_saved_state_cpsr(state)))) {
1931 panic_with_thread_kernel_state("User abort from non-interruptible context", state);
1932 }
1933
1934 thread->iotier_override = THROTTLE_LEVEL_NONE; /* Reset IO tier override before handling abort from userspace */
1935
1936 if (!is_servicible_fault(fault_code, esr) &&
1937 thread->t_rr_state.trr_fault_state != TRR_FAULT_NONE) {
1938 thread_reset_pcs_done_faulting(thread);
1939 }
1940
1941 #if HAS_MTE
1942 if (is_tag_check_fault(fault_code)) {
1943 pmap_t current_pmap = current_map()->pmap;
1944 codes[0] = tag_check_fault_type(current_pmap, fault_addr);
1945 } else if (is_canonical_memory_permission_fault(esr)) {
1946 codes[0] = KERN_PROTECTION_FAILURE;
1947 } else
1948 #endif
1949 if (is_vm_fault(fault_code)) {
1950 vm_map_t map = thread->map;
1951 vm_offset_t vm_fault_addr = fault_addr;
1952 kern_return_t result = KERN_FAILURE;
1953
1954 assert(map != kernel_map);
1955
1956 if (!(fault_type & VM_PROT_EXECUTE)) {
1957 vm_fault_addr = VM_USER_STRIP_TBI(fault_addr);
1958 }
1959
1960 /* check to see if it is just a pmap ref/modify fault */
1961 if (!is_translation_fault(fault_code)) {
1962 result = arm_fast_fault(map->pmap,
1963 vm_fault_addr,
1964 fault_type, (fault_code == FSC_ACCESS_FLAG_FAULT_L3), TRUE);
1965 }
1966 if (result != KERN_SUCCESS) {
1967
1968 {
1969 /* We have to fault the page in */
1970 result = vm_fault(map, vm_fault_addr, fault_type,
1971 /* change_wiring */ FALSE, VM_KERN_MEMORY_NONE, THREAD_ABORTSAFE,
1972 /* caller_pmap */ NULL, /* caller_pmap_addr */ 0);
1973 }
1974 }
1975 if (thread->t_rr_state.trr_fault_state != TRR_FAULT_NONE) {
1976 thread_reset_pcs_done_faulting(thread);
1977 }
1978 if (result == KERN_SUCCESS || result == KERN_ABORTED) {
1979 return;
1980 }
1981
1982 /*
1983 * vm_fault() should never return KERN_FAILURE for page faults from user space.
1984 * If it does, we're leaking preemption disables somewhere in the kernel.
1985 */
1986 if (__improbable(result == KERN_FAILURE)) {
1987 panic("vm_fault() KERN_FAILURE from user fault on thread %p", thread);
1988 }
1989
1990 codes[0] = result;
1991 } else if (is_alignment_fault(fault_code)) {
1992 kern_return_t vmfkr = KERN_SUCCESS;
1993 thread->machine.recover_esr = 0;
1994 thread->machine.recover_far = 0;
1995 int result = handle_alignment_fault_from_user(state, &vmfkr);
1996 if (result == 0) {
1997 /* Successfully emulated, or instruction
1998 * copyin() for decode/emulation failed.
1999 * Continue, or redrive instruction.
2000 */
2001 thread_exception_return();
2002 } else if (((result == EFAULT) || (result == EINVAL)) &&
2003 (thread->machine.recover_esr == 0)) {
2004 /*
2005 * If we didn't actually take a fault, but got one of
2006 * these errors, then we failed basic sanity checks of
2007 * the fault address. Treat this as an invalid
2008 * address.
2009 */
2010 codes[0] = KERN_INVALID_ADDRESS;
2011 } else if ((result == EFAULT) &&
2012 (thread->machine.recover_esr)) {
2013 /*
2014 * Since alignment aborts are prioritized
2015 * ahead of translation aborts, the misaligned
2016 * atomic emulation flow may have triggered a
2017 * VM pagefault, which the VM could not resolve.
2018 * Report the VM fault error in codes[]
2019 */
2020
2021 codes[0] = vmfkr;
2022 assertf(vmfkr != KERN_SUCCESS, "Unexpected vmfkr 0x%x", vmfkr);
2023 /* Cause ESR_EC to reflect an EL0 abort */
2024 thread->machine.recover_esr &= ~ESR_EC_MASK;
2025 thread->machine.recover_esr |= (ESR_EC_DABORT_EL0 << ESR_EC_SHIFT);
2026 set_saved_state_esr(thread->machine.upcb, thread->machine.recover_esr);
2027 set_saved_state_far(thread->machine.upcb, thread->machine.recover_far);
2028 fault_addr = thread->machine.recover_far;
2029 } else {
2030 /* This was just an unsupported alignment
2031 * exception. Misaligned atomic emulation
2032 * timeouts fall in this category.
2033 */
2034 codes[0] = EXC_ARM_DA_ALIGN;
2035 }
2036 } else if (is_parity_error(fault_code)) {
2037 #if defined(APPLE_ARM64_ARCH_FAMILY)
2038 /*
2039 * Platform errors are handled in sleh_sync before interrupts are enabled.
2040 */
2041 #else
2042 panic("User parity error.");
2043 #endif
2044 } else {
2045 codes[0] = KERN_FAILURE;
2046 }
2047
2048 #if CODE_SIGNING_MONITOR
2049 /*
2050 * If the code reaches here, it means we weren't able to resolve the fault and we're
2051 * going to be sending the task an exception. On systems which have the code signing
2052 * monitor enabled, an execute fault which cannot be handled must result in sending
2053 * a SIGKILL to the task.
2054 */
2055 if (is_vm_fault(fault_code) && (fault_type & VM_PROT_EXECUTE)) {
2056 csm_code_signing_violation(current_proc(), fault_addr);
2057 }
2058 #endif
2059
2060 codes[1] = fault_addr;
2061 #if __has_feature(ptrauth_calls)
2062 bool is_data_abort = (ESR_EC(esr) == ESR_EC_DABORT_EL0);
2063 if (user_fault_matches_pac_error_code(fault_addr, get_saved_state_pc(state), is_data_abort)) {
2064 exc |= EXC_PTRAUTH_BIT;
2065 }
2066 #endif /* __has_feature(ptrauth_calls) */
2067
2068 const self_restrict_mode_t self_restrict_mode = user_fault_in_self_restrict_mode(thread);
2069 if ((self_restrict_mode != SELF_RESTRICT_NONE) &&
2070 task_is_jit_exception_fatal(get_threadtask(thread))) {
2071 int flags = PX_KTRIAGE;
2072 exception_info_t info = {
2073 .os_reason = OS_REASON_SELF_RESTRICT,
2074 .exception_type = exc,
2075 .mx_code = codes[0],
2076 .mx_subcode = codes[1]
2077 };
2078
2079 #if MACH_ASSERT
2080 /*
2081 * Case: panic_on_jit_guard=1. Catch an early process creation TPRO issue causing
2082 * rdar://129742083. Only panic during early process creation (1 thread, few syscalls
2083 * issued) to avoid spurious panics.
2084 */
2085 const self_restrict_mode_t self_restrict_panic_mask = panic_on_jit_guard & self_restrict_mode;
2086 bool should_panic = ((self_restrict_panic_mask == SELF_RESTRICT_ANY) &&
2087 (current_task()->thread_count == 1) &&
2088 (thread->syscalls_unix < 24));
2089
2090 /*
2091 * Modes other than ANY will force panic, skipping checks that were done in the ANY case,
2092 * but allowing us to filter on a more specific scenario (e.g. TPRO, JIT, etc). This is
2093 * meant to catch a TPRO issue causing rdar://145703251. Restrict to KERN_PROTECTION_FAILURE
2094 * only to avoid failures from the more frequent case of KERN_INVALID_ADDRESS that aren't
2095 * of interest for that radar.
2096 */
2097 should_panic |= (codes[0] == KERN_PROTECTION_FAILURE)
2098 && ((self_restrict_panic_mask & ~SELF_RESTRICT_ANY) != 0);
2099
2100 printf("\nGUARD_REASON_JIT exc %d codes=<0x%llx,0x%llx> syscalls %d task %p thread %p va 0x%lx code 0x%x type 0x%x esr 0x%llx\n",
2101 exc, codes[0], codes[1], thread->syscalls_unix, current_task(), thread, fault_addr, fault_code, fault_type, esr);
2102 if (should_panic) {
2103 panic("GUARD_REASON_JIT exc %d codes=<0x%llx,0x%llx> syscalls %d task %p thread %p va 0x%lx code 0x%x type 0x%x esr 0x%llx state %p j %d t %d s user 0x%llx (0x%llx) jb 0x%llx (0x%llx)",
2104 exc, codes[0], codes[1], thread->syscalls_unix, current_task(), thread, fault_addr, fault_code, fault_type, esr, state,
2105 0, 0, 0ull, 0ull,
2106 0ull, 0ull
2107 );
2108 }
2109 #endif /* MACH_ASSERT */
2110
2111 exit_with_mach_exception(current_proc(), info, flags);
2112 }
2113
2114 #if HAS_MTE
2115 if (codes[0] == EXC_ARM_MTE_TAGCHECK_FAIL || codes[0] == EXC_ARM_MTE_CANONICAL_FAIL) {
2116 /*
2117 * If soft-mode is enabled, we trigger a simulated crash, then we'll clear TCF0
2118 * and let the thread try again.
2119 */
2120 if (task_has_sec_soft_mode(current_task())) {
2121 mte_send_sync_soft_mode_exception(thread, /* fault_addr */ codes[1], /* mx_code */ codes[0]);
2122 /* Disable tag checking for userspace addresses. This will be our first and last tag check fault. */
2123 mte_disable_user_checking(current_task());
2124
2125 if (thread->t_rr_state.trr_fault_state != TRR_FAULT_NONE) {
2126 thread_reset_pcs_done_faulting(thread);
2127 }
2128 /* Retry with tag checking disabled. */
2129 thread_exception_return();
2130 }
2131
2132 /* Hard-mode: */
2133 int flags = PX_KTRIAGE;
2134 exception_info_t info = {
2135 .os_reason = OS_REASON_MTE_FAIL,
2136 .exception_type = exc,
2137 .mx_code = codes[0],
2138 .mx_subcode = codes[1]
2139 };
2140 exit_with_mach_exception(current_proc(), info, flags);
2141 }
2142 #endif /* HAS_MTE */
2143
2144 exception_triage(exc, codes, numcodes);
2145 __builtin_unreachable();
2146 }
2147
2148 /**
2149 * Panic because the kernel abort handler tried to apply a recovery handler that
2150 * isn't inside copyio_recover_table[].
2151 *
2152 * @param state original saved-state
2153 * @param recover invalid recovery handler
2154 */
2155 __attribute__((noreturn, used))
2156 static void
panic_on_invalid_recovery_handler(arm_saved_state_t * state,struct copyio_recovery_entry * recover)2157 panic_on_invalid_recovery_handler(arm_saved_state_t *state, struct copyio_recovery_entry *recover)
2158 {
2159 panic("attempt to set invalid recovery handler %p on kernel saved-state %p", recover, state);
2160 }
2161
2162 /**
2163 * Update a thread saved-state to store an error code in x0 and branch to a
2164 * copyio recovery handler.
2165 *
2166 * @param state original saved-state
2167 * @param esr ESR_ELx value for the fault taken
2168 * @param fault_addr FAR_ELx value for the fault taken
2169 * @param thread target thread
2170 * @param recover destination copyio recovery handler
2171 * @param x0 error code to populate into x0
2172 */
2173 static void
handle_kernel_abort_recover_with_error_code(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,thread_t thread,struct copyio_recovery_entry * _Nonnull recover,uint64_t x0)2174 handle_kernel_abort_recover_with_error_code(
2175 arm_saved_state_t *state,
2176 uint64_t esr,
2177 vm_offset_t fault_addr,
2178 thread_t thread,
2179 struct copyio_recovery_entry *_Nonnull recover,
2180 uint64_t x0)
2181 {
2182 thread->machine.recover_esr = esr;
2183 thread->machine.recover_far = fault_addr;
2184 saved_state64(state)->x[0] = x0;
2185 #if defined(HAS_APPLE_PAC)
2186 MANIPULATE_SIGNED_THREAD_STATE(state,
2187 "adrp x6, _copyio_recover_table_end@page \n"
2188 "add x6, x6, _copyio_recover_table_end@pageoff \n"
2189 "cmp %[recover], x6 \n"
2190 "b.lt 1f \n"
2191 "bl _panic_on_invalid_recovery_handler \n"
2192 "brk #0 \n"
2193 "1: \n"
2194 "adrp x6, _copyio_recover_table@page \n"
2195 "add x6, x6, _copyio_recover_table@pageoff \n"
2196 "subs x7, %[recover], x6 \n"
2197 "b.pl 1f \n"
2198 "bl _panic_on_invalid_recovery_handler \n"
2199 "brk #0 \n"
2200 "1: \n"
2201 "udiv x8, x7, %[SIZEOF_RECOVER] \n"
2202 "mul x8, x8, %[SIZEOF_RECOVER] \n"
2203 "cmp x7, x8 \n"
2204 "b.eq 1f \n"
2205 "bl _panic_on_invalid_recovery_handler \n"
2206 "brk #0 \n"
2207 "1: \n"
2208 "ldr x1, [%[recover], %[CRE_RECOVERY]] \n"
2209 "add x1, x1, x6 \n"
2210 "str x1, [x0, %[SS64_PC]] \n",
2211 [recover] "r"(recover),
2212 [SIZEOF_RECOVER] "r"((sizeof(*recover))),
2213 [CRE_RECOVERY] "i"(offsetof(struct copyio_recovery_entry, cre_recovery))
2214 );
2215 #else
2216 ptrdiff_t recover_offset = (uintptr_t)recover - (uintptr_t)copyio_recover_table;
2217 if ((uintptr_t)recover < (uintptr_t)copyio_recover_table ||
2218 (uintptr_t)recover >= (uintptr_t)copyio_recover_table_end ||
2219 (recover_offset % sizeof(*recover)) != 0) {
2220 panic_on_invalid_recovery_handler(state, recover);
2221 }
2222 saved_state64(state)->pc = copyio_recovery_addr(recover->cre_recovery);
2223 #endif
2224 }
2225
2226 static inline void
handle_kernel_abort_recover(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,thread_t thread,struct copyio_recovery_entry * _Nonnull recover)2227 handle_kernel_abort_recover(
2228 arm_saved_state_t *state,
2229 uint64_t esr,
2230 vm_offset_t fault_addr,
2231 thread_t thread,
2232 struct copyio_recovery_entry *_Nonnull recover)
2233 {
2234 handle_kernel_abort_recover_with_error_code(state, esr, fault_addr, thread, recover, EFAULT);
2235 }
2236
2237 #if HAS_MTE
2238 static void
mte_send_sync_soft_mode_exception(thread_t thread,vm_map_address_t address,mach_exception_data_type_t mx_code)2239 mte_send_sync_soft_mode_exception(thread_t thread, vm_map_address_t address, mach_exception_data_type_t mx_code)
2240 {
2241 uint64_t code = mx_code | kGUARD_EXC_MTE_SOFT_MODE;
2242 EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_VIRT_MEMORY);
2243 EXC_GUARD_ENCODE_FLAVOR(code, kGUARD_EXC_MTE_SYNC_FAULT);
2244 thread_guard_violation(thread, code, address, /* fatal */ false);
2245 }
2246
2247 /*
2248 * We took a fault during a copyio routine, over a user address, in the context
2249 * of a user thread that "synchronously" asked the kernel to access a pointer.
2250 * The intention is to kill the user thread (EXC_BAD_ACCESS), but since we might
2251 * be within an interrupt context, delay sending the exception to the guard AST.
2252 */
2253 static void
mte_send_sync_kernel_on_user_fault(thread_t thread,vm_map_address_t fault_addr,mach_exception_data_type_t mx_code)2254 mte_send_sync_kernel_on_user_fault(thread_t thread, vm_map_address_t fault_addr, mach_exception_data_type_t mx_code)
2255 {
2256 /* Soft-mode: send a sync fault simulated exception. */
2257 if (task_has_sec_soft_mode(get_threadtask(thread))) {
2258 mte_send_sync_soft_mode_exception(thread, fault_addr, mx_code);
2259 return;
2260 }
2261
2262 /* Hard-mode: send an old fashioned Tag Check Fault. */
2263 set_saved_state_far(thread->machine.upcb, fault_addr);
2264 int flags = PX_KTRIAGE;
2265 exception_info_t info = {
2266 .os_reason = OS_REASON_MTE_FAIL,
2267 .exception_type = EXC_BAD_ACCESS,
2268 .mx_code = mx_code,
2269 .mx_subcode = fault_addr
2270 };
2271 exit_with_mach_exception_using_ast(info, flags, /* fatal */ true);
2272 }
2273
2274 /*
2275 * We took a fault while servicing an AST, over a user address, on behalf of
2276 * a user process. The intention is to kill the user thread. The notification
2277 * is sent to the current thread, so we can use
2278 * thread_guard_violation()->thread_ast_mach_exception().
2279 *
2280 * @param thread current thread.
2281 * @param mx_code must be either a TAG CHECK FAIL or a CANONICAL (TAG CHECK) FAIL.
2282 * @param fault_addr the address that the fault was taken on.
2283 */
2284 static void
mte_send_async_ast_fault(thread_t thread,mach_exception_data_type_t mx_code,vm_map_address_t fault_addr)2285 mte_send_async_ast_fault(thread_t thread, mach_exception_data_type_t mx_code, vm_map_address_t fault_addr)
2286 {
2287 assert(mx_code == EXC_ARM_MTE_TAGCHECK_FAIL || mx_code == EXC_ARM_MTE_CANONICAL_FAIL);
2288 uint64_t code = mx_code;
2289 bool soft_mode = task_has_sec_soft_mode(get_threadtask(thread));
2290
2291 if (soft_mode) {
2292 code |= kGUARD_EXC_MTE_SOFT_MODE;
2293 }
2294
2295 EXC_GUARD_ENCODE_TYPE(code, GUARD_TYPE_VIRT_MEMORY);
2296 EXC_GUARD_ENCODE_FLAVOR(code, kGUARD_EXC_MTE_ASYNC_USER_FAULT);
2297
2298 thread_guard_violation(thread, code, fault_addr, /* not fatal in soft_mode */ !soft_mode);
2299 }
2300
2301 static void
mte_record_async_tag_check_fault_address(vm_map_t map,vm_map_address_t fault_address)2302 mte_record_async_tag_check_fault_address(vm_map_t map, vm_map_address_t fault_address)
2303 {
2304 /*
2305 * Verify the address being reported (and the min address of the map) don't
2306 * conflict with any of the magic values used by this mechanism. These
2307 * asserts should not fire currently as the first page of VA is not mappable
2308 * in user maps today.
2309 */
2310 assert(fault_address >= VM_ASYNC_TAG_FAULT_MIN_VALID_ADDR);
2311 assert(vm_map_min(map) >= VM_ASYNC_TAG_FAULT_MIN_VALID_ADDR);
2312
2313 /*
2314 * Attempt to report the faulting address. If this fails, we know that a
2315 * faulting address has already been reported. Accordingly, we can just
2316 * ignore the failure and continue on since we never send more than one MTE
2317 * guard exception per task anyway.
2318 */
2319 (void)os_atomic_cmpxchg(&map->async_tag_fault_address, 0, fault_address, relaxed);
2320
2321 /*
2322 * We cannot set the AST here, as we'd need to take a task lock and we may
2323 * deadlock. On exit from the switched map operation or on return from
2324 * the IOMD read/writeBytes path, the caller will check whether an exception
2325 * happened by inspecting `async_tag_fault_address` and act accordingly.
2326 */
2327 }
2328
2329 /*
2330 * We took a fault accessing a userspace address, while in a kernel thread that
2331 * temporarily switched to the user map in order to do work on behalf of the target process.
2332 * Record onto the map the faulting address.
2333 *
2334 * This is essentially a thin layer over mte_record_async_tag_check_fault(), just adding
2335 * a bunch of sanity checks that we don't start hitting unexpected faults.
2336 */
2337 static void
mte_record_async_kernel_interposed_map_fault_address(vm_map_address_t fault_addr)2338 mte_record_async_kernel_interposed_map_fault_address(vm_map_address_t fault_addr)
2339 {
2340 vm_map_t map = current_map();
2341
2342 assert(vm_kernel_map_is_kernel(current_task()->map));
2343 assert(!vm_kernel_map_is_kernel(map));
2344
2345 if (!map->owning_task && !map->terminated) {
2346 panic("Kernel tag-check fault on %p @ %#llx prior to vm_map_setup",
2347 map, map->async_tag_fault_address);
2348 }
2349
2350 mte_record_async_tag_check_fault_address(map, fault_addr);
2351 }
2352
2353 static void
handle_kernel_tag_check_fault(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,thread_t thread,struct copyio_recovery_entry * recover)2354 handle_kernel_tag_check_fault(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
2355 thread_t thread, struct copyio_recovery_entry *recover)
2356 {
2357 /*
2358 * MTE tag check faults are treated as non-recoverable security
2359 * violations, even when they're raised inside a copyio routine.
2360 *
2361 * If the fault happened while accessing a user address inside a copyio
2362 * routine in the context of a userspace process, assume the current
2363 * process supplied that address and deliver a Mach exception. This may
2364 * manifest either as `EXC_BAD_ACCESS` or a Mach guard exception.
2365 * The former is used when the fault was raised while servicing a
2366 * synchronous EL0 exception, so the crashing EL0 thread state will
2367 * likely have something to do with the root cause.
2368 *
2369 * If such a fault happened in the context of a kernel thread, assume
2370 * that the kernel is doing work on behalf of the process that owns the
2371 * current map and set a guard exception on the map to asynchronously
2372 * kill it.
2373 *
2374 * If the fault happened while accessing a kernel address, and the
2375 * copyio handler didn't explicitly say that it needs to tolerate kernel
2376 * tag check faults, then panic. (We'll assume the kernel is always
2377 * responsible for mistagged kernel addresses: copy_validate() should
2378 * keep misbehaving userspace processes from passing those in.)
2379 */
2380
2381 /* Use the TTBR selector to determine whether it's a user or kernel address. */
2382 bool is_user_addr = (fault_addr & TTBR_SELECTOR) == 0;
2383 /* Are we running as a kernel thread. */
2384 bool is_kernel_thread = vm_kernel_map_is_kernel(current_task()->map);
2385
2386 /*
2387 * Only attempt recovery if we have a recovery handler associated.
2388 * Recovery step will differ depending on whether we faulted on a user or kernel address.
2389 */
2390 if (recover) {
2391 if (is_user_addr) {
2392 uint64_t error_code = EFAULT;
2393 task_t owning_task = current_task();
2394 bool in_el0_sync_trap = thread->machine.el0_synchronous_trap &&
2395 current_cpu_datap()->cpu_int_state == NULL;
2396
2397 if (in_el0_sync_trap) {
2398 /* "Synchronous" software exception. */
2399 mach_exception_data_type_t code = tag_check_fault_type(current_map()->pmap, fault_addr);
2400 mte_send_sync_kernel_on_user_fault(thread, fault_addr, code);
2401 } else {
2402 /* "Asynchrnous" software exception */
2403 #if DEVELOPMENT || DEBUG
2404 if (mte_panic_on_async_fault()) {
2405 panic_with_thread_kernel_state("Kernel AST tag check fault accessing user space", state);
2406 }
2407 #endif /* DEVELOPMENT || DEBUG */
2408
2409 if (is_kernel_thread) {
2410 /* kernel thread executes with switched map. */
2411 mte_record_async_kernel_interposed_map_fault_address(fault_addr);
2412 owning_task = current_map()->owning_task;
2413 } else {
2414 /* Asynchronous but within current_thread() */
2415 mach_exception_data_type_t code = tag_check_fault_type(current_map()->pmap, fault_addr);
2416 mte_send_async_ast_fault(thread, code, fault_addr);
2417 }
2418 }
2419 /* If in soft-mode, retry with tag checking disabled. */
2420 if (task_has_sec_soft_mode(owning_task)) {
2421 mte_disable_user_checking(owning_task);
2422 error_code = EAGAIN;
2423 }
2424 /*
2425 * We took this exception from inside kernel copyio code. Even
2426 * if we're not going to retry it, the kernel thread needs to
2427 * clean things up by branching to the copyio recovery handler.
2428 */
2429 handle_kernel_abort_recover_with_error_code(state, esr, fault_addr, thread, recover, error_code);
2430 return;
2431 } else {
2432 /* We further filter the side of the access that is actually allowed to fault. */
2433 bool is_write_access = (ESR_ISS(esr) & ISS_DA_WNR);
2434
2435 if ((recover->recover_from_kernel_read_tag_check_fault && !is_write_access) ||
2436 (recover->recover_from_kernel_write_tag_check_fault && is_write_access)) {
2437 /* Are we within an IOMD critical region - that will give us a task to blame. */
2438 task_t task_providing_faultable_buffer = current_thread_get_iomd_faultable_access_buffer_provider();
2439 if (task_providing_faultable_buffer != NULL) {
2440 /* Same drill as the kernel thread case above: record here the required information. */
2441 #if DEVELOPMENT || DEBUG
2442 mte_record_async_tag_check_fault_address(task_providing_faultable_buffer->map, fault_addr);
2443 if (mte_panic_on_async_fault()) {
2444 panic_with_thread_kernel_state("Kernel AST tag check fault accessing physmap", state);
2445 }
2446 #else /* DEVELOPMENT || DEBUG */
2447 mte_record_async_tag_check_fault_address(task_providing_faultable_buffer->map, 0xdeadbeef);
2448 #endif /* DEVELOPMENT || DEBUG */
2449 }
2450
2451 /* No fault, just clean recovery returning EFAULT. */
2452 handle_kernel_abort_recover(state, esr, fault_addr, thread, recover);
2453 return;
2454 }
2455 /* Fallthrough to regular panic scenario. */
2456 }
2457 }
2458
2459 /*
2460 * Everything past this point doesn't have a recovery handler and is a fatal violation.
2461 * Package up and report as much useful information as possible.
2462 */
2463 #define MSG_FMT "Kernel tag check fault (expected tagged address: 0x%016llx)"
2464 char msg[strlen(MSG_FMT)
2465 - strlen("0x%016llx") + strlen("0xFFFFFFFFFFFFFFFF")
2466 + 1];
2467
2468 vm_address_t expected_tagged_address = vm_memtag_load_tag(fault_addr);
2469 snprintf(msg, sizeof(msg), MSG_FMT, (uint64_t)expected_tagged_address);
2470 panic_with_thread_kernel_state(msg, state);
2471 #undef MSG_FMT
2472 }
2473 #endif /* HAS_MTE */
2474
2475 static void
handle_kernel_abort(arm_saved_state_t * state,uint64_t esr,vm_offset_t fault_addr,fault_status_t fault_code,vm_prot_t fault_type,expected_fault_handler_t expected_fault_handler)2476 handle_kernel_abort(arm_saved_state_t *state, uint64_t esr, vm_offset_t fault_addr,
2477 fault_status_t fault_code, vm_prot_t fault_type, expected_fault_handler_t expected_fault_handler)
2478 {
2479 thread_t thread = current_thread();
2480 struct copyio_recovery_entry *recover = find_copyio_recovery_entry(
2481 get_saved_state_pc(state));
2482
2483 #ifndef CONFIG_XNUPOST
2484 (void)expected_fault_handler;
2485 #endif /* CONFIG_XNUPOST */
2486
2487
2488 #if CONFIG_DTRACE
2489 if (is_vm_fault(fault_code) && thread->t_dtrace_inprobe) { /* Executing under dtrace_probe? */
2490 if (dtrace_tally_fault(fault_addr)) { /* Should a fault under dtrace be ignored? */
2491 /*
2492 * Point to next instruction, or recovery handler if set.
2493 */
2494 if (recover) {
2495 handle_kernel_abort_recover(state, esr, VM_USER_STRIP_PTR(fault_addr), thread, recover);
2496 } else {
2497 add_saved_state_pc(state, 4);
2498 }
2499 return;
2500 } else {
2501 panic_with_thread_kernel_state("Unexpected page fault under dtrace_probe", state);
2502 }
2503 }
2504 #endif
2505
2506 #if HAS_MTE
2507 if (is_tag_check_fault(fault_code)) {
2508 #ifdef CONFIG_XNUPOST
2509 if (expected_fault_handler && expected_fault_handler(state)) {
2510 return;
2511 }
2512 #endif /* CONFIG_XNUPOST */
2513
2514 handle_kernel_tag_check_fault(state, esr, fault_addr, thread, recover);
2515 return;
2516 } else
2517 #endif /* HAS_MTE */
2518 if (is_vm_fault(fault_code)) {
2519 kern_return_t result = KERN_FAILURE;
2520 vm_map_t map;
2521 int interruptible;
2522
2523 #ifdef CONFIG_XNUPOST
2524 if (expected_fault_handler && expected_fault_handler(state)) {
2525 return;
2526 }
2527 #endif /* CONFIG_XNUPOST */
2528
2529 if (VM_KERNEL_ADDRESS(fault_addr) || thread == THREAD_NULL || recover == 0) {
2530 /*
2531 * If no recovery handler is supplied, always drive the fault against
2532 * the kernel map. If the fault was taken against a userspace VA, indicating
2533 * an unprotected access to user address space, vm_fault() should fail and
2534 * ultimately lead to a panic here.
2535 */
2536 map = kernel_map;
2537 interruptible = THREAD_UNINT;
2538
2539 #if CONFIG_KERNEL_TAGGING
2540 /*
2541 * If kernel tagging is enabled, canonicalize the address here, so that we have a
2542 * chance to find it in the VM ranges. Do not mess with exec fault cases.
2543 */
2544 if (!((fault_type) & VM_PROT_EXECUTE)) {
2545 fault_addr = vm_memtag_canonicalize(map, fault_addr);
2546 }
2547 #endif /* CONFIG_KERNEL_TAGGING */
2548 } else {
2549 map = thread->map;
2550
2551 /**
2552 * In the case that the recovery handler is set (e.g., during copyio
2553 * and dtrace probes), we don't want the vm_fault() operation to be
2554 * aborted early. Those code paths can't handle restarting the
2555 * vm_fault() operation so don't allow it to return early without
2556 * creating the wanted mapping.
2557 */
2558 interruptible = (recover) ? THREAD_UNINT : THREAD_ABORTSAFE;
2559
2560 #if HAS_MTE || HAS_MTE_EMULATION_SHIMS
2561 /*
2562 * If we have MTE enabled on the process, allow recovery of tagged
2563 * addresses.
2564 */
2565 if (current_task_has_sec_enabled() && recover) {
2566 if (!((fault_type) & VM_PROT_EXECUTE)) {
2567 fault_addr = vm_memtag_canonicalize(map, fault_addr);
2568 }
2569 }
2570 #endif /* HAS_MTE || HAS_MTE_EMULATION_SHIMS */
2571 }
2572
2573 /*
2574 * Ensure no faults in the physical aperture. This could happen if
2575 * a page table is incorrectly allocated from the read only region
2576 * when running with KTRR.
2577 */
2578 if (__improbable(fault_addr >= physmap_base) && (fault_addr < physmap_end)) {
2579 panic_with_thread_kernel_state("Unexpected fault in kernel physical aperture", state);
2580 }
2581 if (__improbable(fault_addr >= gVirtBase && fault_addr < static_memory_end)) {
2582 panic_with_thread_kernel_state("Unexpected fault in kernel static region", state);
2583 }
2584
2585 /* check to see if it is just a pmap ref/modify fault */
2586 if (!is_translation_fault(fault_code)) {
2587 result = arm_fast_fault(map->pmap,
2588 fault_addr,
2589 fault_type, (fault_code == FSC_ACCESS_FLAG_FAULT_L3), FALSE);
2590 if (result == KERN_SUCCESS) {
2591 return;
2592 }
2593 }
2594
2595 /**
2596 * vm_fault() can be called with preemption disabled (and indeed this is expected for
2597 * certain copyio() scenarios), but can't safely be called with interrupts disabled once
2598 * the system has gone multi-threaded. Other than some early-boot situations such as
2599 * startup kext loading, kernel paging operations should never be triggered by
2600 * non-interruptible code in the first place, so a fault from such a context will
2601 * ultimately produce a kernel data abort panic anyway. In these cases, skip calling
2602 * vm_fault() to avoid masking the real kernel panic with a failed VM locking assertion.
2603 */
2604 if (__probable(SPSR_INTERRUPTS_ENABLED(get_saved_state_cpsr(state)) ||
2605 startup_phase < STARTUP_SUB_EARLY_BOOT ||
2606 current_cpu_datap()->cpu_hibernate)) {
2607 if (result != KERN_PROTECTION_FAILURE) {
2608 // VM will query this property when deciding to throttle this fault, we don't want to
2609 // throttle kernel faults for copyio faults. The presence of a recovery entry is used as a
2610 // proxy for being in copyio code.
2611 bool const was_recover = thread->recover;
2612 thread->recover = was_recover || recover;
2613
2614 /*
2615 * We have to "fault" the page in.
2616 */
2617 result = vm_fault(map, fault_addr, fault_type,
2618 /* change_wiring */ FALSE, VM_KERN_MEMORY_NONE, interruptible,
2619 /* caller_pmap */ NULL, /* caller_pmap_addr */ 0);
2620
2621 thread->recover = was_recover;
2622 }
2623
2624 if (result == KERN_SUCCESS) {
2625 return;
2626 }
2627 }
2628
2629 /*
2630 * If we have a recover handler, invoke it now.
2631 */
2632 if (recover) {
2633 handle_kernel_abort_recover(state, esr, fault_addr, thread, recover);
2634 return;
2635 }
2636
2637 panic_fault_address = fault_addr;
2638 } else if (is_alignment_fault(fault_code)) {
2639 if (recover) {
2640 handle_kernel_abort_recover(state, esr, fault_addr, thread, recover);
2641 return;
2642 }
2643 panic_with_thread_kernel_state("Unaligned kernel data abort.", state);
2644 } else if (is_parity_error(fault_code)) {
2645 #if defined(APPLE_ARM64_ARCH_FAMILY)
2646 /*
2647 * Platform errors are handled in sleh_sync before interrupts are enabled.
2648 */
2649 #else
2650 panic_with_thread_kernel_state("Kernel parity error.", state);
2651 #endif
2652 } else {
2653 kprintf("Unclassified kernel abort (fault_code=0x%x)\n", fault_code);
2654 }
2655
2656 panic_with_thread_kernel_state("Kernel data abort.", state);
2657 }
2658
2659 extern void syscall_trace(struct arm_saved_state * regs);
2660
2661 static void
handle_svc(arm_saved_state_t * state)2662 handle_svc(arm_saved_state_t *state)
2663 {
2664 int trap_no = get_saved_state_svc_number(state);
2665 thread_t thread = current_thread();
2666 struct proc *p;
2667
2668 #define handle_svc_kprintf(x...) /* kprintf("handle_svc: " x) */
2669
2670 #define TRACE_SYSCALL 1
2671 #if TRACE_SYSCALL
2672 syscall_trace(state);
2673 #endif
2674
2675 thread->iotier_override = THROTTLE_LEVEL_NONE; /* Reset IO tier override before handling SVC from userspace */
2676
2677 if (trap_no == (int)PLATFORM_SYSCALL_TRAP_NO) {
2678 platform_syscall(state);
2679 panic("Returned from platform_syscall()?");
2680 }
2681
2682 current_cached_proc_cred_update();
2683
2684 if (trap_no < 0) {
2685 switch (trap_no) {
2686 case MACH_ARM_TRAP_ABSTIME:
2687 handle_mach_absolute_time_trap(state);
2688 return;
2689 case MACH_ARM_TRAP_CONTTIME:
2690 handle_mach_continuous_time_trap(state);
2691 return;
2692 }
2693
2694 /* Counting perhaps better in the handler, but this is how it's been done */
2695 thread->syscalls_mach++;
2696 mach_syscall(state);
2697 } else {
2698 /* Counting perhaps better in the handler, but this is how it's been done */
2699 thread->syscalls_unix++;
2700 p = get_bsdthreadtask_info(thread);
2701
2702 assert(p);
2703
2704 unix_syscall(state, thread, p);
2705 }
2706 }
2707
2708 static void
handle_mach_absolute_time_trap(arm_saved_state_t * state)2709 handle_mach_absolute_time_trap(arm_saved_state_t *state)
2710 {
2711 uint64_t now = mach_absolute_time();
2712 saved_state64(state)->x[0] = now;
2713 }
2714
2715 static void
handle_mach_continuous_time_trap(arm_saved_state_t * state)2716 handle_mach_continuous_time_trap(arm_saved_state_t *state)
2717 {
2718 uint64_t now = mach_continuous_time();
2719 saved_state64(state)->x[0] = now;
2720 }
2721
2722
2723 __attribute__((noreturn))
2724 static void
handle_msr_trap(arm_saved_state_t * state,uint64_t esr)2725 handle_msr_trap(arm_saved_state_t *state, uint64_t esr)
2726 {
2727 exception_type_t exception = EXC_BAD_INSTRUCTION;
2728 mach_exception_data_type_t codes[2] = {EXC_ARM_UNDEFINED};
2729 mach_msg_type_number_t numcodes = 2;
2730 uint32_t instr = 0;
2731
2732 if (!is_saved_state64(state)) {
2733 panic("MSR/MRS trap (ESR 0x%llx) from 32-bit state", esr);
2734 }
2735
2736 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2737 panic("MSR/MRS trap (ESR 0x%llx) from kernel", esr);
2738 }
2739
2740 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2741 codes[1] = instr;
2742
2743 exception_triage(exception, codes, numcodes);
2744 __builtin_unreachable();
2745 }
2746
2747 #if __has_feature(ptrauth_calls)
2748 static void
stringify_gpr(unsigned int r,char reg[4])2749 stringify_gpr(unsigned int r, char reg[4])
2750 {
2751 switch (r) {
2752 case 29:
2753 strncpy(reg, "fp", 4);
2754 return;
2755
2756 case 30:
2757 strncpy(reg, "lr", 4);
2758 return;
2759
2760 case 31:
2761 strncpy(reg, "xzr", 4);
2762 return;
2763
2764 default:
2765 snprintf(reg, 4, "x%u", r);
2766 return;
2767 }
2768 }
2769
2770 static void
autxx_instruction_extract_reg(uint32_t instr,char reg[4])2771 autxx_instruction_extract_reg(uint32_t instr, char reg[4])
2772 {
2773 unsigned int rd = ARM64_INSTR_AUTxx_RD_GET(instr);
2774 stringify_gpr(rd, reg);
2775 }
2776
2777 static const char *
autix_system_instruction_extract_reg(uint32_t instr)2778 autix_system_instruction_extract_reg(uint32_t instr)
2779 {
2780 unsigned int crm_op2 = ARM64_INSTR_AUTIx_SYSTEM_CRM_OP2_GET(instr);
2781 if (crm_op2 == ARM64_INSTR_AUTIx_SYSTEM_CRM_OP2_AUTIA1716 ||
2782 crm_op2 == ARM64_INSTR_AUTIx_SYSTEM_CRM_OP2_AUTIB1716) {
2783 return "x17";
2784 } else {
2785 return "lr";
2786 }
2787 }
2788
2789 static void
bxrax_instruction_extract_reg(uint32_t instr,char reg[4])2790 bxrax_instruction_extract_reg(uint32_t instr, char reg[4])
2791 {
2792 unsigned int rn = ARM64_INSTR_BxRAx_RN_GET(instr);
2793 stringify_gpr(rn, reg);
2794 }
2795
2796 static void
handle_pac_fail(arm_saved_state_t * state,uint64_t esr)2797 handle_pac_fail(arm_saved_state_t *state, uint64_t esr)
2798 {
2799 exception_type_t exception = EXC_BAD_ACCESS | EXC_PTRAUTH_BIT;
2800 mach_exception_data_type_t codes[2] = {EXC_ARM_PAC_FAIL};
2801 mach_msg_type_number_t numcodes = 2;
2802 uint32_t instr = 0;
2803
2804 if (!is_saved_state64(state)) {
2805 panic("PAC failure (ESR 0x%llx) from 32-bit state", esr);
2806 }
2807
2808 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2809
2810 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2811 #define GENERIC_PAC_FAILURE_MSG_FMT "PAC failure from kernel with %s key"
2812 #define AUTXX_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT " while authing %s"
2813 #define BXRAX_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT " while branching to %s"
2814 #define RETAX_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT " while returning"
2815 #define GENERIC_MSG_FMT GENERIC_PAC_FAILURE_MSG_FMT
2816 #define MAX_PAC_MSG_FMT BXRAX_MSG_FMT
2817
2818 char msg[strlen(MAX_PAC_MSG_FMT)
2819 - strlen("%s") + strlen("IA")
2820 - strlen("%s") + strlen("xzr")
2821 + 1];
2822 ptrauth_key key = (ptrauth_key)(esr & 0x3);
2823 const char *key_str = ptrauth_key_to_string(key);
2824
2825 if (ARM64_INSTR_IS_AUTxx(instr)) {
2826 char reg[4];
2827 autxx_instruction_extract_reg(instr, reg);
2828 snprintf(msg, sizeof(msg), AUTXX_MSG_FMT, key_str, reg);
2829 } else if (ARM64_INSTR_IS_AUTIx_SYSTEM(instr)) {
2830 const char *reg = autix_system_instruction_extract_reg(instr);
2831 snprintf(msg, sizeof(msg), AUTXX_MSG_FMT, key_str, reg);
2832 } else if (ARM64_INSTR_IS_BxRAx(instr)) {
2833 char reg[4];
2834 bxrax_instruction_extract_reg(instr, reg);
2835 snprintf(msg, sizeof(msg), BXRAX_MSG_FMT, key_str, reg);
2836 } else if (ARM64_INSTR_IS_RETAx(instr)) {
2837 snprintf(msg, sizeof(msg), RETAX_MSG_FMT, key_str);
2838 } else {
2839 snprintf(msg, sizeof(msg), GENERIC_MSG_FMT, key_str);
2840 }
2841 panic_with_thread_kernel_state(msg, state);
2842 }
2843
2844 codes[1] = instr;
2845
2846 exception_triage(exception, codes, numcodes);
2847 __builtin_unreachable();
2848 }
2849 #endif /* __has_feature(ptrauth_calls) */
2850
2851 __attribute__((noreturn))
2852 static void
handle_bti_fail(arm_saved_state_t * state,uint64_t esr)2853 handle_bti_fail(arm_saved_state_t *state, uint64_t esr)
2854 {
2855 uint32_t btype = (uint32_t) esr & ISS_BTI_BTYPE_MASK;
2856
2857 if (!is_saved_state64(state)) {
2858 /* BTI is an ARMv8 feature, this should not be possible */
2859 panic("BTI failure for 32-bit state? (ESR=0x%llx)", esr);
2860 }
2861
2862 /*
2863 * We currently only expect BTI to be enabled for kernel pages, so panic if
2864 * we detect otherwise.
2865 */
2866 if (!PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2867 panic("Unexpected non-kernel BTI failure? (ESR=0x%llx)", esr);
2868 }
2869
2870 #define BTI_FAIL_PTR_FMT "%04x"
2871 #define BTI_FAIL_MSG_FMT "Kernel BTI failure (BTYPE=0x" BTI_FAIL_PTR_FMT ")"
2872 /* Replace the pointer format with the length of the pointer message+NULL */
2873 char msg[strlen(BTI_FAIL_MSG_FMT) - strlen(BTI_FAIL_PTR_FMT) + 8 + 1];
2874 snprintf(msg, sizeof(msg), BTI_FAIL_MSG_FMT, btype);
2875 panic_with_thread_kernel_state(msg, state);
2876 __builtin_unreachable();
2877 }
2878
2879 static void
handle_user_trapped_instruction32(arm_saved_state_t * state,uint64_t esr)2880 handle_user_trapped_instruction32(arm_saved_state_t *state, uint64_t esr)
2881 {
2882 exception_type_t exception = EXC_BAD_INSTRUCTION;
2883 mach_exception_data_type_t codes[2] = {EXC_ARM_UNDEFINED};
2884 mach_msg_type_number_t numcodes = 2;
2885 uint32_t instr;
2886
2887 if (is_saved_state64(state)) {
2888 panic("ESR (0x%llx) for instruction trapped from U32, but saved state is 64-bit.", esr);
2889 }
2890
2891 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2892 panic("ESR (0x%llx) for instruction trapped from U32, actually came from kernel?", esr);
2893 }
2894
2895 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2896 codes[1] = instr;
2897
2898 exception_triage(exception, codes, numcodes);
2899 __builtin_unreachable();
2900 }
2901
2902 static void
handle_simd_trap(arm_saved_state_t * state,uint64_t esr)2903 handle_simd_trap(arm_saved_state_t *state, uint64_t esr)
2904 {
2905 exception_type_t exception = EXC_BAD_INSTRUCTION;
2906 mach_exception_data_type_t codes[2] = {EXC_ARM_UNDEFINED};
2907 mach_msg_type_number_t numcodes = 2;
2908 uint32_t instr = 0;
2909
2910 if (PSR64_IS_KERNEL(get_saved_state_cpsr(state))) {
2911 panic("ESR (0x%llx) for SIMD trap from userland, actually came from kernel?", esr);
2912 }
2913
2914 COPYIN(get_saved_state_pc(state), (char *)&instr, sizeof(instr));
2915 codes[1] = instr;
2916
2917 exception_triage(exception, codes, numcodes);
2918 __builtin_unreachable();
2919 }
2920
2921 void
sleh_irq(arm_saved_state_t * state)2922 sleh_irq(arm_saved_state_t *state)
2923 {
2924 cpu_data_t * cdp __unused = getCpuDatap();
2925 #if MACH_ASSERT
2926 int preemption_level = sleh_get_preemption_level();
2927 #endif
2928
2929
2930 sleh_interrupt_handler_prologue(state, DBG_INTR_TYPE_OTHER);
2931
2932 #if USE_APPLEARMSMP
2933 PE_handle_ext_interrupt();
2934 #else
2935 /* Run the registered interrupt handler. */
2936 cdp->interrupt_handler(cdp->interrupt_target,
2937 cdp->interrupt_refCon,
2938 cdp->interrupt_nub,
2939 cdp->interrupt_source);
2940 #endif
2941
2942 entropy_collect();
2943
2944
2945 sleh_interrupt_handler_epilogue();
2946 #if MACH_ASSERT
2947 if (preemption_level != sleh_get_preemption_level()) {
2948 panic("irq handler %p changed preemption level from %d to %d", cdp->interrupt_handler, preemption_level, sleh_get_preemption_level());
2949 }
2950 #endif
2951 }
2952
2953 void
sleh_fiq(arm_saved_state_t * state)2954 sleh_fiq(arm_saved_state_t *state)
2955 {
2956 unsigned int type = DBG_INTR_TYPE_UNKNOWN;
2957 #if MACH_ASSERT
2958 int preemption_level = sleh_get_preemption_level();
2959 #endif
2960
2961 #if MONOTONIC_FIQ
2962 uint64_t pmcr0 = 0, upmsr = 0;
2963 #endif /* MONOTONIC_FIQ */
2964
2965 #if defined(HAS_IPI)
2966 boolean_t is_ipi = FALSE;
2967 uint64_t ipi_sr = 0;
2968
2969 if (gFastIPI) {
2970 MRS(ipi_sr, "S3_5_C15_C1_1");
2971
2972 if (ipi_sr & ARM64_IPISR_IPI_PENDING) {
2973 is_ipi = TRUE;
2974 }
2975 }
2976
2977 if (is_ipi) {
2978 type = DBG_INTR_TYPE_IPI;
2979 } else
2980 #endif /* defined(HAS_IPI) */
2981 if (ml_get_timer_pending()) {
2982 type = DBG_INTR_TYPE_TIMER;
2983 }
2984 #if MONOTONIC_FIQ
2985 /* Consult the PMI sysregs last, after IPI/timer
2986 * classification.
2987 */
2988 else if (mt_pmi_pending(&pmcr0, &upmsr)) {
2989 type = DBG_INTR_TYPE_PMI;
2990 }
2991 #endif /* MONOTONIC_FIQ */
2992
2993 sleh_interrupt_handler_prologue(state, type);
2994
2995 #if APPLEVIRTUALPLATFORM
2996 uint64_t iar = __builtin_arm_rsr64("ICC_IAR0_EL1");
2997 #endif
2998
2999 #if defined(HAS_IPI)
3000 if (type == DBG_INTR_TYPE_IPI) {
3001 /*
3002 * Order is important here: we must ack the IPI by writing IPI_SR
3003 * before we call cpu_signal_handler(). Otherwise, there will be
3004 * a window between the completion of pending-signal processing in
3005 * cpu_signal_handler() and the ack during which a newly-issued
3006 * IPI to this CPU may be lost. ISB is required to ensure the msr
3007 * is retired before execution of cpu_signal_handler().
3008 */
3009 MSR("S3_5_C15_C1_1", ARM64_IPISR_IPI_PENDING);
3010 __builtin_arm_isb(ISB_SY);
3011 cpu_signal_handler();
3012 } else
3013 #endif /* defined(HAS_IPI) */
3014 #if MONOTONIC_FIQ
3015 if (type == DBG_INTR_TYPE_PMI) {
3016 ml_interrupt_masked_debug_start(mt_fiq, DBG_INTR_TYPE_PMI);
3017 mt_fiq(getCpuDatap(), pmcr0, upmsr);
3018 ml_interrupt_masked_debug_end();
3019 } else
3020 #endif /* MONOTONIC_FIQ */
3021 {
3022 /*
3023 * We don't know that this is a timer, but we don't have insight into
3024 * the other interrupts that go down this path.
3025 */
3026
3027 cpu_data_t *cdp = getCpuDatap();
3028
3029 cdp->cpu_decrementer = -1; /* Large */
3030
3031 /*
3032 * ARM64_TODO: whether we're coming from userland is ignored right now.
3033 * We can easily thread it through, but not bothering for the
3034 * moment (AArch32 doesn't either).
3035 */
3036 ml_interrupt_masked_debug_start(rtclock_intr, DBG_INTR_TYPE_TIMER);
3037 rtclock_intr(TRUE);
3038 ml_interrupt_masked_debug_end();
3039 }
3040
3041 #if APPLEVIRTUALPLATFORM
3042 if (iar != GIC_SPURIOUS_IRQ) {
3043 __builtin_arm_wsr64("ICC_EOIR0_EL1", iar);
3044 __builtin_arm_isb(ISB_SY);
3045 }
3046 #endif
3047
3048 sleh_interrupt_handler_epilogue();
3049 #if MACH_ASSERT
3050 if (preemption_level != sleh_get_preemption_level()) {
3051 panic("fiq type %u changed preemption level from %d to %d", type, preemption_level, sleh_get_preemption_level());
3052 }
3053 #endif
3054 }
3055
3056 void
sleh_serror(arm_context_t * context,uint64_t esr,vm_offset_t far)3057 sleh_serror(arm_context_t *context, uint64_t esr, vm_offset_t far)
3058 {
3059 task_vtimer_check(current_thread());
3060
3061 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_SERR_ARM, 0) | DBG_FUNC_START,
3062 esr, VM_KERNEL_ADDRHIDE(far));
3063 arm_saved_state_t *state = &context->ss;
3064 #if MACH_ASSERT
3065 int preemption_level = sleh_get_preemption_level();
3066 #endif
3067
3068 if (PSR64_IS_USER(get_saved_state_cpsr(state))) {
3069 /* Sanitize FAR (only if we came from userspace) */
3070 saved_state64(state)->far = 0;
3071 }
3072
3073 ASSERT_CONTEXT_SANITY(context);
3074 arm64_platform_error(state, esr, far, PLAT_ERR_SRC_ASYNC);
3075 #if MACH_ASSERT
3076 if (preemption_level != sleh_get_preemption_level()) {
3077 panic("serror changed preemption level from %d to %d", preemption_level, sleh_get_preemption_level());
3078 }
3079 #endif
3080 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_SERR_ARM, 0) | DBG_FUNC_END,
3081 esr, VM_KERNEL_ADDRHIDE(far));
3082 }
3083
3084 void
mach_syscall_trace_exit(unsigned int retval,unsigned int call_number)3085 mach_syscall_trace_exit(unsigned int retval,
3086 unsigned int call_number)
3087 {
3088 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
3089 MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) |
3090 DBG_FUNC_END, retval, 0, 0, 0, 0);
3091 }
3092
3093 __attribute__((noreturn))
3094 void
thread_syscall_return(kern_return_t error)3095 thread_syscall_return(kern_return_t error)
3096 {
3097 thread_t thread;
3098 struct arm_saved_state *state;
3099
3100 thread = current_thread();
3101 state = get_user_regs(thread);
3102
3103 assert(is_saved_state64(state));
3104 saved_state64(state)->x[0] = error;
3105
3106 #if MACH_ASSERT
3107 kern_allocation_name_t
3108 prior __assert_only = thread_get_kernel_state(thread)->allocation_name;
3109 assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior));
3110 #endif /* MACH_ASSERT */
3111
3112 if (kdebug_enable) {
3113 /* Invert syscall number (negative for a mach syscall) */
3114 mach_syscall_trace_exit(error, (-1) * get_saved_state_svc_number(state));
3115 }
3116
3117 thread_exception_return();
3118 }
3119
3120 void
syscall_trace(struct arm_saved_state * regs __unused)3121 syscall_trace(
3122 struct arm_saved_state * regs __unused)
3123 {
3124 /* kprintf("syscall: %d\n", saved_state64(regs)->x[16]); */
3125 }
3126
3127 static void
sleh_interrupt_handler_prologue(arm_saved_state_t * state,unsigned int type)3128 sleh_interrupt_handler_prologue(arm_saved_state_t *state, unsigned int type)
3129 {
3130 const bool is_user = PSR64_IS_USER(get_saved_state_cpsr(state));
3131
3132 if (is_user == true) {
3133 /* Sanitize FAR (only if the interrupt occurred while the CPU was in usermode) */
3134 saved_state64(state)->far = 0;
3135 }
3136
3137 recount_enter_interrupt();
3138
3139 task_vtimer_check(current_thread());
3140
3141 uint64_t pc = is_user ? get_saved_state_pc(state) :
3142 VM_KERNEL_UNSLIDE(get_saved_state_pc(state));
3143
3144 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_START,
3145 0, pc, is_user, type);
3146 }
3147
3148 static void
sleh_interrupt_handler_epilogue(void)3149 sleh_interrupt_handler_epilogue(void)
3150 {
3151 #if KPERF
3152 kperf_interrupt();
3153 #endif /* KPERF */
3154 KDBG_RELEASE(MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END);
3155 recount_leave_interrupt();
3156 }
3157
3158 void
sleh_invalid_stack(arm_context_t * context,uint64_t esr __unused,vm_offset_t far __unused)3159 sleh_invalid_stack(arm_context_t *context, uint64_t esr __unused, vm_offset_t far __unused)
3160 {
3161 thread_t thread = current_thread();
3162 vm_offset_t kernel_stack_bottom, sp;
3163
3164 sp = get_saved_state_sp(&context->ss);
3165 vm_offset_t kstackptr = (vm_offset_t)thread->machine.kstackptr;
3166 kernel_stack_bottom = round_page(kstackptr) - KERNEL_STACK_SIZE;
3167
3168 if ((sp < kernel_stack_bottom) && (sp >= (kernel_stack_bottom - PAGE_SIZE))) {
3169 panic_with_thread_kernel_state("Invalid kernel stack pointer (probable overflow).", &context->ss);
3170 }
3171
3172 panic_with_thread_kernel_state("Invalid kernel stack pointer (probable corruption or early boot).", &context->ss);
3173 }
3174
3175
3176 #if MACH_ASSERT
3177 static int trap_handled;
3178 static const char *
handle_recoverable_kernel_trap(__unused void * tstate,uint16_t comment)3179 handle_recoverable_kernel_trap(
3180 __unused void *tstate,
3181 uint16_t comment)
3182 {
3183 assert(comment == TEST_RECOVERABLE_SOFT_TRAP);
3184
3185 printf("Recoverable trap handled.\n");
3186 trap_handled = 1;
3187
3188 return NULL;
3189 }
3190
3191 KERNEL_BRK_DESCRIPTOR_DEFINE(test_desc,
3192 .type = TRAP_TELEMETRY_TYPE_KERNEL_BRK_TEST,
3193 .base = TEST_RECOVERABLE_SOFT_TRAP,
3194 .max = TEST_RECOVERABLE_SOFT_TRAP,
3195 .options = BRK_TELEMETRY_OPTIONS_RECOVERABLE_DEFAULT(
3196 /* enable_telemetry */ false),
3197 .handle_breakpoint = handle_recoverable_kernel_trap);
3198
3199 static int
recoverable_kernel_trap_test(__unused int64_t in,int64_t * out)3200 recoverable_kernel_trap_test(__unused int64_t in, int64_t *out)
3201 {
3202 ml_recoverable_trap(TEST_RECOVERABLE_SOFT_TRAP);
3203
3204 *out = trap_handled;
3205 return 0;
3206 }
3207
3208 SYSCTL_TEST_REGISTER(recoverable_kernel_trap, recoverable_kernel_trap_test);
3209
3210 #endif
3211
3212 #if CONFIG_SPTM
3213 /**
3214 * Evaluate the panic lockdown policy for a synchronous EL1 SP0 exception
3215 *
3216 * Returns true if panic lockdown should be initiated (but does not itself do
3217 * so)
3218 */
3219 __SECURITY_STACK_DISALLOWED_PUSH
3220 bool
sleh_panic_lockdown_should_initiate_el1_sp0_sync(uint64_t esr,uint64_t elr,uint64_t far,uint64_t spsr)3221 sleh_panic_lockdown_should_initiate_el1_sp0_sync(uint64_t esr, uint64_t elr,
3222 uint64_t far, uint64_t spsr)
3223 {
3224 const esr_exception_class_t class = ESR_EC(esr);
3225 const bool any_exceptions_masked = spsr & DAIF_STANDARD_DISABLE;
3226
3227 switch (class) {
3228 case ESR_EC_PC_ALIGN: /* PC misaligned (should never happen) */
3229 case ESR_EC_IABORT_EL1: /* Potential iPAC failure (poisoned PC) */
3230 case ESR_EC_PAC_FAIL: { /* FPAC fail */
3231 return true;
3232 }
3233
3234 case ESR_EC_BRK_AARCH64: {
3235 /*
3236 * Breakpoints are used on non-FPAC systems to signal some PAC failures
3237 */
3238 #if HAS_TELEMETRY_KERNEL_BRK
3239 const struct kernel_brk_descriptor *desc;
3240 desc = find_kernel_brk_descriptor_by_comment(ISS_BRK_COMMENT(esr));
3241 if (desc && desc->options.recoverable) {
3242 /*
3243 * We matched a breakpoint and it's recoverable, skip lockdown.
3244 */
3245 return false;
3246 }
3247 #endif /* HAS_TELEMETRY_KERNEL_BRK */
3248
3249 /*
3250 * If we don't support telemetry breakpoints and/or didn't match a
3251 * recoverable breakpoint, the exception is fatal.
3252 */
3253 return true;
3254 }
3255
3256 case ESR_EC_DABORT_EL1: {
3257 const struct copyio_recovery_entry *cre =
3258 find_copyio_recovery_entry(elr);
3259 if (cre) {
3260 #if HAS_MTE
3261 /*
3262 * Tag check faults are fatal in copyio when they impact a kernel
3263 * address and the copyio function is not permitted to recover from
3264 * a tag check fault on a kernel address.
3265 *
3266 * We can determine if we faulted on a kernel address by checking
3267 * any of the cannonical address bits. This works since
3268 * copy_validate will reject user addresses with any of these
3269 * cannonical bits set before reaching the underlying copyio
3270 * functions, and so bits set here means this is actually a kernel
3271 * address.
3272 */
3273 const bool is_kernel_far = far & TTBR_SELECTOR;
3274 if (is_kernel_far &&
3275 is_tag_check_fault(ISS_DA_FSC(ESR_ISS(esr))) &&
3276 !cre->recover_from_kernel_read_tag_check_fault &&
3277 !cre->recover_from_kernel_write_tag_check_fault) {
3278 return true;
3279 }
3280 #endif /* HAS_MTE */
3281
3282 /*
3283 * copyio faults are recoverable regardless of whether or not
3284 * exceptions are masked.
3285 */
3286 return false;
3287 }
3288
3289 #if HAS_MTE
3290 /*
3291 * Kernel tag check faults are always fatal outside of copyio.
3292 */
3293 if (is_tag_check_fault(ISS_DA_FSC(ESR_ISS(esr)))) {
3294 return true;
3295 }
3296 #endif /* HAS_MTE */
3297
3298
3299 /*
3300 * Heuristic: if FAR != XPAC(FAR), the pointer was likely corrupted
3301 * due to PAC.
3302 */
3303 #if HAS_MTE
3304 /*
3305 * This heuristic can misfire for TBCF/CPA2 poisoning, but
3306 * triggering a lockdown for these failures in the kernel is fine
3307 * since they are not recoverable.
3308 */
3309 #endif /* HAS_MTE */
3310 const uint64_t far_stripped =
3311 (uint64_t)ptrauth_strip((void *)far, ptrauth_key_asda);
3312
3313 if (far != far_stripped) {
3314 /* potential dPAC failure (poisoined address) */
3315 return true;
3316 }
3317
3318 if (any_exceptions_masked && startup_phase >= STARTUP_SUB_LOCKDOWN) {
3319 /*
3320 * Any data abort taken with exceptions masked is fatal if we're
3321 * past early boot.
3322 */
3323 return true;
3324 }
3325
3326 return false;
3327 }
3328
3329 case ESR_EC_UNCATEGORIZED: {
3330 /* Undefined instruction (GDBTRAP for stackshots, etc.) */
3331 return false;
3332 }
3333
3334 case ESR_EC_BTI_FAIL: {
3335 /* Kernel BTI exceptions are always fatal */
3336 return true;
3337 }
3338
3339 default: {
3340 if (!any_exceptions_masked) {
3341 /*
3342 * When exceptions are not masked, we default-allow exceptions.
3343 */
3344 return false;
3345 }
3346
3347 if (startup_phase < STARTUP_SUB_LOCKDOWN) {
3348 /*
3349 * Ignore early boot exceptions even if exceptions are masked.
3350 */
3351 return false;
3352 }
3353
3354 /* Default-deny all others when exceptions are masked */
3355 return true;
3356 }
3357 }
3358 }
3359 __SECURITY_STACK_DISALLOWED_POP
3360 #endif /* CONFIG_SPTM */
3361