xref: /xnu-11215.41.3/bsd/dev/arm64/fasttrap_isa.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2007-2022 Apple Inc. All rights reserved.
3  */
4 /*
5  * CDDL HEADER START
6  *
7  * The contents of this file are subject to the terms of the
8  * Common Development and Distribution License, Version 1.0 only
9  * (the "License").  You may not use this file except in compliance
10  * with the License.
11  *
12  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
13  * or http://www.opensolaris.org/os/licensing.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  *
17  * When distributing Covered Code, include this CDDL HEADER in each
18  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
19  * If applicable, add the following below this CDDL HEADER, with the
20  * fields enclosed by brackets "[]" replaced with your own identifying
21  * information: Portions Copyright [yyyy] [name of copyright owner]
22  *
23  * CDDL HEADER END
24  */
25 /*
26  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/fasttrap_isa.h>
31 #include <sys/fasttrap_impl.h>
32 #include <sys/dtrace.h>
33 #include <sys/dtrace_impl.h>
34 #include <kern/task.h>
35 #include <arm/thread.h>
36 
37 #include <sys/dtrace_ptss.h>
38 
39 #if __has_include(<ptrauth.h>)
40 #include <ptrauth.h>
41 #endif
42 
43 extern dtrace_id_t dtrace_probeid_error;
44 
45 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
46 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
47 
48 extern uint8_t dtrace_decode_arm64(uint32_t instr);
49 
50 #define IS_ARM64_NOP(x) ((x) == 0xD503201F)
51 /* Marker for is-enabled probes */
52 #define IS_ARM64_IS_ENABLED(x) ((x) == 0xD2800000)
53 
54 int
fasttrap_tracepoint_init(proc_t * p,fasttrap_tracepoint_t * tp,user_addr_t pc,fasttrap_probe_type_t type)55 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp,
56     user_addr_t pc, fasttrap_probe_type_t type)
57 {
58 #pragma unused(type)
59 	uint32_t instr = 0;
60 
61 	/*
62 	 * Read the instruction at the given address out of the process's
63 	 * address space. We don't have to worry about a debugger
64 	 * changing this instruction before we overwrite it with our trap
65 	 * instruction since P_PR_LOCK is set. Since instructions can span
66 	 * pages, we potentially read the instruction in two parts. If the
67 	 * second part fails, we just zero out that part of the instruction.
68 	 */
69 	/*
70 	 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey...
71 	 */
72 
73 	if (uread(p, &instr, 4, pc) != 0) {
74 		return -1;
75 	}
76 
77 	tp->ftt_instr = instr;
78 
79 	if (tp->ftt_fntype != FASTTRAP_FN_DONE_INIT) {
80 		switch (tp->ftt_fntype) {
81 		case FASTTRAP_FN_UNKNOWN:
82 		case FASTTRAP_FN_ARM64:
83 		case FASTTRAP_FN_ARM64_32:
84 			/*
85 			 * On arm64 there is no distinction between
86 			 * arm vs. thumb mode instruction types.
87 			 */
88 			tp->ftt_fntype = FASTTRAP_FN_DONE_INIT;
89 			break;
90 
91 		case FASTTRAP_FN_USDT:
92 			if (IS_ARM64_NOP(instr) || IS_ARM64_IS_ENABLED(instr)) {
93 				tp->ftt_fntype = FASTTRAP_FN_DONE_INIT;
94 			} else {
95 				/*
96 				 * Shouldn't reach here - this means we don't
97 				 * recognize the instruction at one of the
98 				 * USDT probe locations
99 				 */
100 				return -1;
101 			}
102 
103 			break;
104 
105 		case FASTTRAP_FN_ARM:
106 		case FASTTRAP_FN_THUMB:
107 		default:
108 			/*
109 			 * If we get an arm or thumb mode type
110 			 * then we are clearly in the wrong path.
111 			 */
112 			return -1;
113 		}
114 	}
115 
116 	tp->ftt_type = dtrace_decode_arm64(instr);
117 
118 	if (tp->ftt_type == FASTTRAP_T_ARM64_EXCLUSIVE_MEM) {
119 		kprintf("Detected attempt to place DTrace probe on exclusive memory instruction (pc = 0x%llx); refusing to trace (or exclusive operation could never succeed).\n", pc);
120 		tp->ftt_type = FASTTRAP_T_INV;
121 		return -1;
122 	}
123 
124 	if (tp->ftt_type == FASTTRAP_T_INV) {
125 		/* This is an instruction we either don't recognize or can't instrument */
126 		printf("dtrace: fasttrap init64: Unrecognized instruction: %08x at %08llx\n", instr, pc);
127 		return -1;
128 	}
129 
130 	return 0;
131 }
132 
133 int
fasttrap_tracepoint_install(proc_t * p,fasttrap_tracepoint_t * tp)134 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
135 {
136 	uint32_t instr;
137 	int size;
138 
139 	if (proc_is64bit_data(p)) {
140 		size = 4;
141 		instr = FASTTRAP_ARM64_INSTR;
142 	} else {
143 		return -1;
144 	}
145 
146 	if (uwrite(p, &instr, size, tp->ftt_pc) != 0) {
147 		return -1;
148 	}
149 
150 	tp->ftt_installed = 1;
151 
152 	return 0;
153 }
154 
155 int
fasttrap_tracepoint_remove(proc_t * p,fasttrap_tracepoint_t * tp)156 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
157 {
158 	uint32_t instr;
159 	int size = 4;
160 
161 	if (proc_is64bit_data(p)) {
162 		/*
163 		 * Distinguish between read or write failures and a changed
164 		 * instruction.
165 		 */
166 		if (uread(p, &instr, size, tp->ftt_pc) != 0) {
167 			goto end;
168 		}
169 
170 		if (instr != FASTTRAP_ARM64_INSTR) {
171 			goto end;
172 		}
173 	} else {
174 		return -1;
175 	}
176 
177 	if (uwrite(p, &tp->ftt_instr, size, tp->ftt_pc) != 0) {
178 		return -1;
179 	}
180 
181 end:
182 	tp->ftt_installed = 0;
183 
184 	return 0;
185 }
186 
187 static void
fasttrap_return_common(proc_t * p,arm_saved_state_t * regs,user_addr_t pc,user_addr_t new_pc)188 fasttrap_return_common(proc_t *p, arm_saved_state_t *regs, user_addr_t pc, user_addr_t new_pc)
189 {
190 	pid_t pid = proc_getpid(p);
191 	fasttrap_tracepoint_t *tp;
192 	fasttrap_bucket_t *bucket;
193 	fasttrap_id_t *id;
194 	lck_mtx_t *pid_mtx;
195 	int retire_tp = 1;
196 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
197 	lck_mtx_lock(pid_mtx);
198 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
199 
200 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
201 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
202 		    tp->ftt_proc->ftpc_acount != 0) {
203 			break;
204 		}
205 	}
206 
207 	/*
208 	 * Don't sweat it if we can't find the tracepoint again; unlike
209 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
210 	 * is not essential to the correct execution of the process.
211 	 */
212 	if (tp == NULL) {
213 		lck_mtx_unlock(pid_mtx);
214 		return;
215 	}
216 
217 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
218 		fasttrap_probe_t *probe = id->fti_probe;
219 		/* ARM64_TODO  - check for FASTTRAP_T_RET */
220 		if ((tp->ftt_type != FASTTRAP_T_ARM64_RET || tp->ftt_type != FASTTRAP_T_ARM64_RETAB) &&
221 		    new_pc - probe->ftp_faddr < probe->ftp_fsize) {
222 			continue;
223 		}
224 		if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
225 			if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
226 				/* already triggered */
227 				continue;
228 			}
229 		}
230 		/*
231 		 * If we have at least one probe associated that
232 		 * is not a oneshot probe, don't remove the
233 		 * tracepoint
234 		 */
235 		else {
236 			retire_tp = 0;
237 		}
238 
239 #if defined(XNU_TARGET_OS_OSX)
240 		if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
241 			dtrace_probe(dtrace_probeid_error, 0 /* state */, id->fti_probe->ftp_id,
242 			    1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
243 #else
244 		if (FALSE) {
245 #endif /* defined(XNU_TARGET_OS_OSX) */
246 		} else {
247 			dtrace_probe(probe->ftp_id,
248 			    pc - id->fti_probe->ftp_faddr,
249 			    saved_state64(regs)->x[0], 0, 0, 0);
250 		}
251 	}
252 	if (retire_tp) {
253 		fasttrap_tracepoint_retire(p, tp);
254 	}
255 
256 	lck_mtx_unlock(pid_mtx);
257 }
258 
259 #if DEBUG
260 __dead2
261 #endif
262 static void
263 fasttrap_sigsegv(proc_t *p, uthread_t t, user_addr_t addr, arm_saved_state_t *regs)
264 {
265 	/* TODO: This function isn't implemented yet. In debug mode, panic the system to
266 	 * find out why we're hitting this point. In other modes, kill the process.
267 	 */
268 #if DEBUG
269 #pragma unused(p,t,addr,arm_saved_state)
270 	panic("fasttrap: sigsegv not yet implemented");
271 #else
272 #pragma unused(p,t,addr)
273 	/* Kill the process */
274 	set_saved_state_pc(regs, 0);
275 #endif
276 
277 #if 0
278 	proc_lock(p);
279 
280 	/* Set fault address and mark signal */
281 	t->uu_code = addr;
282 	t->uu_siglist |= sigmask(SIGSEGV);
283 
284 	/*
285 	 * XXX These two line may be redundant; if not, then we need
286 	 * XXX to potentially set the data address in the machine
287 	 * XXX specific thread state structure to indicate the address.
288 	 */
289 	t->uu_exception = KERN_INVALID_ADDRESS;         /* SIGSEGV */
290 	t->uu_subcode = 0;      /* XXX pad */
291 
292 	proc_unlock(p);
293 
294 	/* raise signal */
295 	signal_setast(get_machthread(t));
296 #endif
297 }
298 
299 static void
300 fasttrap_usdt_args64(fasttrap_probe_t *probe, arm_saved_state64_t *regs64, int argc,
301     uint64_t *argv)
302 {
303 	int i, x, cap = MIN(argc, probe->ftp_nargs);
304 
305 	for (i = 0; i < cap; i++) {
306 		x = probe->ftp_argmap[i];
307 
308 		/* Up to 8 args are passed in registers on arm64 */
309 		if (x < 8) {
310 			argv[i] = regs64->x[x];
311 		} else {
312 			fasttrap_fuword64_noerr(regs64->sp + (x - 8) * sizeof(uint64_t), &argv[i]);
313 		}
314 	}
315 
316 	for (; i < argc; i++) {
317 		argv[i] = 0;
318 	}
319 }
320 
321 static int
322 condition_true(int cond, int cpsr)
323 {
324 	int taken = 0;
325 	int zf = (cpsr & PSR64_Z) ? 1 : 0,
326 	    nf = (cpsr & PSR64_N) ? 1 : 0,
327 	    cf = (cpsr & PSR64_C) ? 1 : 0,
328 	    vf = (cpsr & PSR64_V) ? 1 : 0;
329 
330 	switch (cond) {
331 	case 0: taken = zf; break;
332 	case 1: taken = !zf; break;
333 	case 2: taken = cf; break;
334 	case 3: taken = !cf; break;
335 	case 4: taken = nf; break;
336 	case 5: taken = !nf; break;
337 	case 6: taken = vf; break;
338 	case 7: taken = !vf; break;
339 	case 8: taken = (cf && !zf); break;
340 	case 9: taken = (!cf || zf); break;
341 	case 10: taken = (nf == vf); break;
342 	case 11: taken = (nf != vf); break;
343 	case 12: taken = (!zf && (nf == vf)); break;
344 	case 13: taken = (zf || (nf != vf)); break;
345 	case 14: taken = 1; break;
346 	case 15: taken = 1; break;         /* always "true" for ARM, unpredictable for THUMB. */
347 	}
348 
349 	return taken;
350 }
351 
352 /*
353  * Copy out an instruction for execution in userland.
354  * Trap back to kernel to handle return to original flow of execution, because
355  * direct branches don't have sufficient range (+/- 128MB) and we
356  * cannot clobber a GPR.  Note that we have to specially handle PC-rel loads/stores
357  * as well, which have range +/- 1MB (convert to an indirect load).  Instruction buffer
358  * layout:
359  *
360  *    [ Thunked instruction sequence ]
361  *    [ Trap for return to original code and return probe handling ]
362  *
363  * This *does* make it impossible for an ldxr/stxr pair to succeed if we trace on or between
364  * them... may need to get fancy at some point.
365  */
366 static void
367 fasttrap_pid_probe_thunk_instr64(arm_saved_state_t *state, fasttrap_tracepoint_t *tp, proc_t *p, uthread_t uthread,
368     const uint32_t *instructions, uint32_t num_instrs, user_addr_t *pc_out)
369 {
370 	uint32_t local_scratch[8];
371 	user_addr_t pc = get_saved_state_pc(state);
372 	user_addr_t user_scratch_area;
373 
374 	assert(num_instrs < 8);
375 
376 	bcopy(instructions, local_scratch, num_instrs * sizeof(uint32_t));
377 	local_scratch[num_instrs] = FASTTRAP_ARM64_RET_INSTR;
378 
379 	uthread->t_dtrace_astpc = uthread->t_dtrace_scrpc = uthread->t_dtrace_scratch->addr;
380 	user_scratch_area = uthread->t_dtrace_scratch->write_addr;
381 
382 	if (user_scratch_area == (user_addr_t)0) {
383 		fasttrap_sigtrap(p, uthread, pc); // Should be killing target proc
384 		*pc_out = pc;
385 		return;
386 	}
387 
388 	if (uwrite(p, local_scratch, (num_instrs + 1) * sizeof(uint32_t), user_scratch_area) != KERN_SUCCESS) {
389 		fasttrap_sigtrap(p, uthread, pc);
390 		*pc_out = pc;
391 		return;
392 	}
393 
394 	/* We're stepping (come back to kernel to adjust PC for return to regular code). */
395 	uthread->t_dtrace_step = 1;
396 
397 	/* We may or may not be about to run a return probe (but we wouldn't thunk ret lr)*/
398 	uthread->t_dtrace_ret = (tp->ftt_retids != NULL);
399 	assert(tp->ftt_type != FASTTRAP_T_ARM64_RET);
400 	assert(tp->ftt_type != FASTTRAP_T_ARM64_RETAB);
401 
402 	/* Set address of instruction we've patched */
403 	uthread->t_dtrace_pc = pc;
404 
405 	/* Any branch would be emulated, next instruction should be one ahead */
406 	uthread->t_dtrace_npc = pc + 4;
407 
408 	/* We are certainly handling a probe */
409 	uthread->t_dtrace_on = 1;
410 
411 	/* Let's jump to the scratch area */
412 	*pc_out = uthread->t_dtrace_scratch->addr;
413 }
414 
415 /*
416  * Sign-extend bit "sign_bit_index" out to bit 64.
417  */
418 static int64_t
419 sign_extend(int64_t input, uint32_t sign_bit_index)
420 {
421 	assert(sign_bit_index < 63);
422 	if (input & (1ULL << sign_bit_index)) {
423 		/* All 1's & ~[1's from 0 to sign bit] */
424 		input |= ((~0ULL) & ~((1ULL << (sign_bit_index + 1)) - 1ULL));
425 	}
426 
427 	return input;
428 }
429 
430 /*
431  * Handle xzr vs. sp, fp, lr, etc.  Will *not* read the SP.
432  */
433 static uint64_t
434 get_saved_state64_regno(arm_saved_state64_t *regs64, uint32_t regno, int use_xzr)
435 {
436 	/* Set PC to register value */
437 	switch (regno) {
438 	case 29:
439 		return regs64->fp;
440 	case 30:
441 		return regs64->lr;
442 	case 31:
443 		/* xzr */
444 		if (use_xzr) {
445 			return 0;
446 		} else {
447 			return regs64->sp;
448 		}
449 	default:
450 		return regs64->x[regno];
451 	}
452 }
453 
454 static void
455 set_saved_state_regno(arm_saved_state_t *state, uint32_t regno, int use_xzr, register_t value)
456 {
457 	/* Set PC to register value */
458 	switch (regno) {
459 	case 29:
460 		set_saved_state_fp(state, value);
461 		break;
462 	case 30:
463 		set_saved_state_lr(state, value);
464 		break;
465 	case 31:
466 		if (!use_xzr) {
467 			set_saved_state_sp(state, value);
468 		}
469 		break;
470 	default:
471 		set_saved_state_reg(state, regno, value);
472 		break;
473 	}
474 }
475 
476 /*
477  * Common operation: extract sign-extended PC offset from instruction
478  * Left-shifts result by two bits.
479  */
480 static uint64_t
481 extract_address_literal_sign_extended(uint32_t instr, uint32_t base, uint32_t numbits)
482 {
483 	uint64_t offset;
484 
485 	offset = (instr >> base) & ((1 << numbits) - 1);
486 	offset = sign_extend(offset, numbits - 1);
487 	offset = offset << 2;
488 
489 	return offset;
490 }
491 
492 static void
493 do_cbz_cnbz(arm_saved_state64_t *regs64, uint32_t regwidth, uint32_t instr, int is_cbz, user_addr_t *pc_out)
494 {
495 	uint32_t regno;
496 	uint64_t regval;
497 	uint64_t offset;
498 
499 	/* Extract register */
500 	regno = (instr & 0x1f);
501 	assert(regno <= 31);
502 	regval = get_saved_state64_regno(regs64, regno, 1);
503 
504 	/* Control for size */
505 	if (regwidth == 32) {
506 		regval &= 0xFFFFFFFFULL;
507 	}
508 
509 	/* Extract offset */
510 	offset = extract_address_literal_sign_extended(instr, 5, 19);
511 
512 	/* Do test */
513 	if ((is_cbz && regval == 0) || ((!is_cbz) && regval != 0)) {
514 		/* Set PC from label */
515 		*pc_out = regs64->pc + offset;
516 	} else {
517 		/* Advance PC */
518 		*pc_out = regs64->pc + 4;
519 	}
520 }
521 
522 static void
523 do_tbz_tbnz(arm_saved_state64_t *regs64, uint32_t instr, int is_tbz, user_addr_t *pc_out)
524 {
525 	uint64_t offset, regval;
526 	uint32_t bit_index, b5, b40, regno, bit_set;
527 
528 	/* Compute offset */
529 	offset = extract_address_literal_sign_extended(instr, 5, 14);
530 
531 	/* Extract bit index */
532 	b5 = (instr >> 31);
533 	b40 = ((instr >> 19) & 0x1f);
534 	bit_index = (b5 << 5) | b40;
535 	assert(bit_index <= 63);
536 
537 	/* Extract register */
538 	regno = (instr & 0x1f);
539 	assert(regno <= 31);
540 	regval = get_saved_state64_regno(regs64, regno, 1);
541 
542 	/* Test bit */
543 	bit_set = ((regval & (1 << bit_index)) != 0);
544 
545 	if ((is_tbz && (!bit_set)) || ((!is_tbz) && bit_set)) {
546 		/* Branch: unsigned addition so overflow defined */
547 		*pc_out = regs64->pc + offset;
548 	} else {
549 		/* Advance PC */
550 		*pc_out = regs64->pc + 4;
551 	}
552 }
553 
554 
555 static void
556 fasttrap_pid_probe_handle_patched_instr64(arm_saved_state_t *state, fasttrap_tracepoint_t *tp __unused, uthread_t uthread,
557     proc_t *p, uint_t is_enabled, int *was_simulated)
558 {
559 	thread_t th = get_machthread(uthread);
560 	int res1, res2;
561 	arm_saved_state64_t *regs64 = saved_state64(state);
562 	uint32_t instr = tp->ftt_instr;
563 	user_addr_t new_pc = 0;
564 
565 	/* Neon state should be threaded throw, but hack it until we have better arm/arm64 integration */
566 	arm_neon_saved_state64_t *ns64 = &(get_user_neon_regs(th)->ns_64);
567 
568 	/* is-enabled probe: set x0 to 1 and step forwards */
569 	if (is_enabled) {
570 		regs64->x[0] = 1;
571 		add_saved_state_pc(state, 4);
572 		return;
573 	}
574 
575 	/* For USDT probes, bypass all the emulation logic for the nop instruction */
576 	if (IS_ARM64_NOP(tp->ftt_instr)) {
577 		add_saved_state_pc(state, 4);
578 		return;
579 	}
580 
581 
582 	/* Only one of many cases in the switch doesn't simulate */
583 	switch (tp->ftt_type) {
584 	/*
585 	 * Function entry: emulate for speed.
586 	 * stp fp, lr, [sp, #-16]!
587 	 */
588 	case FASTTRAP_T_ARM64_STANDARD_FUNCTION_ENTRY:
589 	{
590 		/* Store values to stack */
591 		res1 = fasttrap_suword64(regs64->sp - 16, regs64->fp);
592 		res2 = fasttrap_suword64(regs64->sp - 8, regs64->lr);
593 		if (res1 != 0 || res2 != 0) {
594 			fasttrap_sigsegv(p, uthread, regs64->sp - (res1 ? 16 : 8), state);
595 #ifndef DEBUG
596 			new_pc = regs64->pc;         /* Bit of a hack */
597 			break;
598 #endif
599 		}
600 
601 		/* Move stack pointer */
602 		regs64->sp -= 16;
603 
604 		/* Move PC forward */
605 		new_pc = regs64->pc + 4;
606 		*was_simulated = 1;
607 		break;
608 	}
609 
610 	/*
611 	 * PC-relative loads/stores: emulate for correctness.
612 	 * All loads are 32bits or greater (no need to handle byte or halfword accesses).
613 	 *	LDR Wt, addr
614 	 *	LDR Xt, addr
615 	 *	LDRSW Xt, addr
616 	 *
617 	 *      LDR St, addr
618 	 *      LDR Dt, addr
619 	 *      LDR Qt, addr
620 	 *      PRFM label -> becomes a NOP
621 	 */
622 	case FASTTRAP_T_ARM64_LDR_S_PC_REL:
623 	case FASTTRAP_T_ARM64_LDR_W_PC_REL:
624 	case FASTTRAP_T_ARM64_LDR_D_PC_REL:
625 	case FASTTRAP_T_ARM64_LDR_X_PC_REL:
626 	case FASTTRAP_T_ARM64_LDR_Q_PC_REL:
627 	case FASTTRAP_T_ARM64_LDRSW_PC_REL:
628 	{
629 		uint64_t offset;
630 		uint32_t valsize, regno;
631 		user_addr_t address;
632 		union {
633 			uint32_t val32;
634 			uint64_t val64;
635 			uint128_t val128;
636 		} value;
637 
638 		/* Extract 19-bit offset, add to pc */
639 		offset = extract_address_literal_sign_extended(instr, 5, 19);
640 		address = regs64->pc + offset;
641 
642 		/* Extract destination register */
643 		regno = (instr & 0x1f);
644 		assert(regno <= 31);
645 
646 		/* Read value of desired size from memory */
647 		switch (tp->ftt_type) {
648 		case FASTTRAP_T_ARM64_LDR_S_PC_REL:
649 		case FASTTRAP_T_ARM64_LDR_W_PC_REL:
650 		case FASTTRAP_T_ARM64_LDRSW_PC_REL:
651 			valsize = 4;
652 			break;
653 		case FASTTRAP_T_ARM64_LDR_D_PC_REL:
654 		case FASTTRAP_T_ARM64_LDR_X_PC_REL:
655 			valsize = 8;
656 			break;
657 		case FASTTRAP_T_ARM64_LDR_Q_PC_REL:
658 			valsize = 16;
659 			break;
660 		default:
661 			panic("Should never get here!");
662 			valsize = -1;
663 			break;
664 		}
665 
666 		if (copyin(address, &value, valsize) != 0) {
667 			fasttrap_sigsegv(p, uthread, address, state);
668 #ifndef DEBUG
669 			new_pc = regs64->pc;         /* Bit of a hack, we know about update in fasttrap_sigsegv() */
670 			break;
671 #endif
672 		}
673 
674 		/* Stash in correct register slot */
675 		switch (tp->ftt_type) {
676 		case FASTTRAP_T_ARM64_LDR_W_PC_REL:
677 			set_saved_state_regno(state, regno, 1, value.val32);
678 			break;
679 		case FASTTRAP_T_ARM64_LDRSW_PC_REL:
680 			set_saved_state_regno(state, regno, 1, sign_extend(value.val32, 31));
681 			break;
682 		case FASTTRAP_T_ARM64_LDR_X_PC_REL:
683 			set_saved_state_regno(state, regno, 1, value.val64);
684 			break;
685 		case FASTTRAP_T_ARM64_LDR_S_PC_REL:
686 			ns64->v.s[regno][0] = value.val32;
687 			break;
688 		case FASTTRAP_T_ARM64_LDR_D_PC_REL:
689 			ns64->v.d[regno][0] = value.val64;
690 			break;
691 		case FASTTRAP_T_ARM64_LDR_Q_PC_REL:
692 			ns64->v.q[regno] = value.val128;
693 			break;
694 		default:
695 			panic("Should never get here!");
696 		}
697 
698 
699 		/* Move PC forward */
700 		new_pc = regs64->pc + 4;
701 		*was_simulated = 1;
702 		break;
703 	}
704 
705 	case FASTTRAP_T_ARM64_PRFM:
706 	{
707 		/* Becomes a NOP (architecturally permitted).  Just move PC forward */
708 		new_pc = regs64->pc + 4;
709 		*was_simulated = 1;
710 		break;
711 	}
712 
713 	/*
714 	 * End explicit memory accesses.
715 	 */
716 
717 	/*
718 	 * Branches: parse condition codes if needed, emulate for correctness and
719 	 * in the case of the indirect branches, convenience
720 	 *      B.cond
721 	 *      CBNZ Wn, label
722 	 *      CBNZ Xn, label
723 	 *      CBZ Wn, label
724 	 *      CBZ Xn, label
725 	 *      TBNZ, Xn|Wn, #uimm16, label
726 	 *      TBZ, Xn|Wn, #uimm16, label
727 	 *
728 	 *      B label
729 	 *      BL label
730 	 *
731 	 *	BLR Xm
732 	 *	BR Xm
733 	 *	RET Xm
734 	 */
735 	case FASTTRAP_T_ARM64_B_COND:
736 	{
737 		int cond;
738 
739 		/* Extract condition code */
740 		cond = (instr & 0xf);
741 
742 		/* Determine if it passes */
743 		if (condition_true(cond, regs64->cpsr)) {
744 			uint64_t offset;
745 
746 			/* Extract 19-bit target offset, add to PC */
747 			offset = extract_address_literal_sign_extended(instr, 5, 19);
748 			new_pc = regs64->pc + offset;
749 		} else {
750 			/* Move forwards */
751 			new_pc = regs64->pc + 4;
752 		}
753 
754 		*was_simulated = 1;
755 		break;
756 	}
757 
758 	case FASTTRAP_T_ARM64_CBNZ_W:
759 	{
760 		do_cbz_cnbz(regs64, 32, instr, 0, &new_pc);
761 		*was_simulated = 1;
762 		break;
763 	}
764 	case FASTTRAP_T_ARM64_CBNZ_X:
765 	{
766 		do_cbz_cnbz(regs64, 64, instr, 0, &new_pc);
767 		*was_simulated = 1;
768 		break;
769 	}
770 	case FASTTRAP_T_ARM64_CBZ_W:
771 	{
772 		do_cbz_cnbz(regs64, 32, instr, 1, &new_pc);
773 		*was_simulated = 1;
774 		break;
775 	}
776 	case FASTTRAP_T_ARM64_CBZ_X:
777 	{
778 		do_cbz_cnbz(regs64, 64, instr, 1, &new_pc);
779 		*was_simulated = 1;
780 		break;
781 	}
782 
783 	case FASTTRAP_T_ARM64_TBNZ:
784 	{
785 		do_tbz_tbnz(regs64, instr, 0, &new_pc);
786 		*was_simulated = 1;
787 		break;
788 	}
789 	case FASTTRAP_T_ARM64_TBZ:
790 	{
791 		do_tbz_tbnz(regs64, instr, 1, &new_pc);
792 		*was_simulated = 1;
793 		break;
794 	}
795 	case FASTTRAP_T_ARM64_B:
796 	case FASTTRAP_T_ARM64_BL:
797 	{
798 		uint64_t offset;
799 
800 		/* Extract offset from instruction */
801 		offset = extract_address_literal_sign_extended(instr, 0, 26);
802 
803 		/* Update LR if appropriate */
804 		if (tp->ftt_type == FASTTRAP_T_ARM64_BL) {
805 			set_saved_state_lr(state, regs64->pc + 4);
806 		}
807 
808 		/* Compute PC (unsigned addition for defined overflow) */
809 		new_pc = regs64->pc + offset;
810 		*was_simulated = 1;
811 		break;
812 	}
813 
814 	case FASTTRAP_T_ARM64_BLR:
815 	case FASTTRAP_T_ARM64_BR:
816 	{
817 		uint32_t regno;
818 
819 		/* Extract register from instruction */
820 		regno = ((instr >> 5) & 0x1f);
821 		assert(regno <= 31);
822 
823 		/* Update LR if appropriate */
824 		if (tp->ftt_type == FASTTRAP_T_ARM64_BLR) {
825 			set_saved_state_lr(state, regs64->pc + 4);
826 		}
827 
828 		/* Update PC in saved state */
829 		new_pc = get_saved_state64_regno(regs64, regno, 1);
830 		*was_simulated = 1;
831 		break;
832 	}
833 
834 	case FASTTRAP_T_ARM64_RET:
835 	{
836 		/* Extract register */
837 		unsigned regno = ((instr >> 5) & 0x1f);
838 		assert(regno <= 31);
839 
840 		/* Set PC to register value (xzr, not sp) */
841 		new_pc = get_saved_state64_regno(regs64, regno, 1);
842 
843 		*was_simulated = 1;
844 		break;
845 	}
846 	case FASTTRAP_T_ARM64_RETAB:
847 	{
848 		/* Set PC to register value (xzr, not sp) */
849 		new_pc = get_saved_state64_regno(regs64, 30, 1);
850 #if __has_feature(ptrauth_calls)
851 		new_pc = (user_addr_t) ptrauth_strip((void *)new_pc, ptrauth_key_return_address);
852 #endif
853 
854 		*was_simulated = 1;
855 		break;
856 	}
857 	/*
858 	 * End branches.
859 	 */
860 
861 	/*
862 	 * Address calculations: emulate for correctness.
863 	 *
864 	 *      ADRP Xd, label
865 	 *      ADR Xd, label
866 	 */
867 	case FASTTRAP_T_ARM64_ADRP:
868 	case FASTTRAP_T_ARM64_ADR:
869 	{
870 		uint64_t immhi, immlo, offset, result;
871 		uint32_t regno;
872 
873 		/* Extract destination register */
874 		regno = (instr & 0x1f);
875 		assert(regno <= 31);
876 
877 		/* Extract offset */
878 		immhi = ((instr & 0x00ffffe0) >> 5);                    /* bits [23,5]: 19 bits */
879 		immlo = ((instr & 0x60000000) >> 29);                   /* bits [30,29]: 2 bits */
880 
881 		/* Add to PC.  Use unsigned addition so that overflow wraps (rather than being undefined). */
882 		if (tp->ftt_type == FASTTRAP_T_ARM64_ADRP) {
883 			offset =  (immhi << 14) | (immlo << 12);                /* Concatenate bits into [32,12]*/
884 			offset = sign_extend(offset, 32);                       /* Sign extend from bit 32 */
885 			result = (regs64->pc & ~0xfffULL) + offset;             /* And add to page of current pc */
886 		} else {
887 			assert(tp->ftt_type == FASTTRAP_T_ARM64_ADR);
888 			offset =  (immhi << 2) | immlo;                         /* Concatenate bits into [20,0] */
889 			offset = sign_extend(offset, 20);                       /* Sign-extend */
890 			result = regs64->pc + offset;                           /* And add to page of current pc */
891 		}
892 
893 		/* xzr, not sp */
894 		set_saved_state_regno(state, regno, 1, result);
895 
896 		/* Move PC forward */
897 		new_pc = regs64->pc + 4;
898 		*was_simulated = 1;
899 		break;
900 	}
901 
902 	/*
903 	 *  End address calculations.
904 	 */
905 
906 	/*
907 	 * Everything else: thunk to userland
908 	 */
909 	case FASTTRAP_T_COMMON:
910 	{
911 		fasttrap_pid_probe_thunk_instr64(state, tp, p, uthread, &tp->ftt_instr, 1, &new_pc);
912 		*was_simulated = 0;
913 		break;
914 	}
915 	default:
916 	{
917 		panic("An instruction DTrace doesn't expect: %d", tp->ftt_type);
918 		break;
919 	}
920 	}
921 
922 	set_saved_state_pc(state, new_pc);
923 	return;
924 }
925 
926 int
927 fasttrap_pid_probe(arm_saved_state_t *state)
928 {
929 	proc_t *p = current_proc();
930 	fasttrap_bucket_t *bucket;
931 	lck_mtx_t *pid_mtx;
932 	fasttrap_tracepoint_t *tp, tp_local;
933 	pid_t pid;
934 	dtrace_icookie_t cookie;
935 	uint_t is_enabled = 0;
936 	int was_simulated, retire_tp = 1;
937 
938 	uint64_t pc = get_saved_state_pc(state);
939 
940 	assert(is_saved_state64(state));
941 
942 	uthread_t uthread = current_uthread();
943 
944 	/*
945 	 * It's possible that a user (in a veritable orgy of bad planning)
946 	 * could redirect this thread's flow of control before it reached the
947 	 * return probe fasttrap. In this case we need to kill the process
948 	 * since it's in a unrecoverable state.
949 	 */
950 	if (uthread->t_dtrace_step) {
951 		ASSERT(uthread->t_dtrace_on);
952 		fasttrap_sigtrap(p, uthread, (user_addr_t)pc);
953 		return 0;
954 	}
955 
956 	/*
957 	 * Clear all user tracing flags.
958 	 */
959 	uthread->t_dtrace_ft = 0;
960 	uthread->t_dtrace_pc = 0;
961 	uthread->t_dtrace_npc = 0;
962 	uthread->t_dtrace_scrpc = 0;
963 	uthread->t_dtrace_astpc = 0;
964 	uthread->t_dtrace_reg = 0;
965 
966 
967 	pid = proc_getpid(p);
968 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
969 	lck_mtx_lock(pid_mtx);
970 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
971 
972 	/*
973 	 * Lookup the tracepoint that the process just hit.
974 	 */
975 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
976 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
977 		    tp->ftt_proc->ftpc_acount != 0) {
978 			break;
979 		}
980 	}
981 
982 	/*
983 	 * If we couldn't find a matching tracepoint, either a tracepoint has
984 	 * been inserted without using the pid<pid> ioctl interface (see
985 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
986 	 */
987 	if (tp == NULL) {
988 		lck_mtx_unlock(pid_mtx);
989 		return -1;
990 	}
991 
992 	/* Execute the actual probe */
993 	if (tp->ftt_ids != NULL) {
994 		fasttrap_id_t *id;
995 		uint64_t arg4;
996 
997 		if (is_saved_state64(state)) {
998 			arg4 = get_saved_state_reg(state, 4);
999 		} else {
1000 			return -1;
1001 		}
1002 
1003 
1004 		/* First four parameters are passed in registers */
1005 
1006 		for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1007 			fasttrap_probe_t *probe = id->fti_probe;
1008 
1009 #if defined(XNU_TARGET_OS_OSX)
1010 			if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
1011 				dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id,
1012 				    1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
1013 #else
1014 			if (FALSE) {
1015 #endif /* defined(XNU_TARGET_OS_OSX) */
1016 			} else {
1017 				if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
1018 					if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
1019 						/* already triggered */
1020 						continue;
1021 					}
1022 				}
1023 				/*
1024 				 * If we have at least one probe associated that
1025 				 * is not a oneshot probe, don't remove the
1026 				 * tracepoint
1027 				 */
1028 				else {
1029 					retire_tp = 0;
1030 				}
1031 				if (id->fti_ptype == DTFTP_ENTRY) {
1032 					/*
1033 					 * We note that this was an entry
1034 					 * probe to help ustack() find the
1035 					 * first caller.
1036 					 */
1037 					cookie = dtrace_interrupt_disable();
1038 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1039 					dtrace_probe(probe->ftp_id,
1040 					    get_saved_state_reg(state, 0),
1041 					    get_saved_state_reg(state, 1),
1042 					    get_saved_state_reg(state, 2),
1043 					    get_saved_state_reg(state, 3),
1044 					    arg4);
1045 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1046 					dtrace_interrupt_enable(cookie);
1047 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1048 					/*
1049 					 * Note that in this case, we don't
1050 					 * call dtrace_probe() since it's only
1051 					 * an artificial probe meant to change
1052 					 * the flow of control so that it
1053 					 * encounters the true probe.
1054 					 */
1055 					is_enabled = 1;
1056 				} else if (probe->ftp_argmap == NULL) {
1057 					dtrace_probe(probe->ftp_id,
1058 					    get_saved_state_reg(state, 0),
1059 					    get_saved_state_reg(state, 1),
1060 					    get_saved_state_reg(state, 2),
1061 					    get_saved_state_reg(state, 3),
1062 					    arg4);
1063 				} else {
1064 					uint64_t t[5];
1065 
1066 					fasttrap_usdt_args64(probe, saved_state64(state), 5, t);
1067 					dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]);
1068 				}
1069 			}
1070 		}
1071 		if (retire_tp) {
1072 			fasttrap_tracepoint_retire(p, tp);
1073 		}
1074 	}
1075 	/*
1076 	 * We're about to do a bunch of work so we cache a local copy of
1077 	 * the tracepoint to emulate the instruction, and then find the
1078 	 * tracepoint again later if we need to light up any return probes.
1079 	 */
1080 	tp_local = *tp;
1081 	lck_mtx_unlock(pid_mtx);
1082 	tp = &tp_local;
1083 
1084 	/*
1085 	 * APPLE NOTE:
1086 	 *
1087 	 * Subroutines should update PC.
1088 	 * We're setting this earlier than Solaris does, to get a "correct"
1089 	 * ustack() output. In the Sun code,  a() -> b() -> c() -> d() is
1090 	 * reported at: d, b, a. The new way gives c, b, a, which is closer
1091 	 * to correct, as the return instruction has already exectued.
1092 	 */
1093 	fasttrap_pid_probe_handle_patched_instr64(state, tp, uthread, p, is_enabled, &was_simulated);
1094 
1095 	/*
1096 	 * If there were no return probes when we first found the tracepoint,
1097 	 * we should feel no obligation to honor any return probes that were
1098 	 * subsequently enabled -- they'll just have to wait until the next
1099 	 * time around.
1100 	 */
1101 	if (tp->ftt_retids != NULL) {
1102 		/*
1103 		 * We need to wait until the results of the instruction are
1104 		 * apparent before invoking any return probes. If this
1105 		 * instruction was emulated we can just call
1106 		 * fasttrap_return_common(); if it needs to be executed, we
1107 		 * need to wait until the user thread returns to the kernel.
1108 		 */
1109 		/*
1110 		 * It used to be that only common instructions were simulated.
1111 		 * For performance reasons, we now simulate some instructions
1112 		 * when safe and go back to userland otherwise. The was_simulated
1113 		 * flag means we don't need to go back to userland.
1114 		 */
1115 		if (was_simulated) {
1116 			fasttrap_return_common(p, state, (user_addr_t)pc, (user_addr_t)get_saved_state_pc(state));
1117 		} else {
1118 			ASSERT(uthread->t_dtrace_ret != 0);
1119 			ASSERT(uthread->t_dtrace_pc == pc);
1120 			ASSERT(uthread->t_dtrace_scrpc != 0);
1121 			ASSERT(((user_addr_t)get_saved_state_pc(state)) == uthread->t_dtrace_astpc);
1122 		}
1123 	}
1124 
1125 	return 0;
1126 }
1127 
1128 int
1129 fasttrap_return_probe(arm_saved_state_t *regs)
1130 {
1131 	proc_t *p = current_proc();
1132 	uthread_t uthread = current_uthread();
1133 	user_addr_t pc = uthread->t_dtrace_pc;
1134 	user_addr_t npc = uthread->t_dtrace_npc;
1135 
1136 	uthread->t_dtrace_pc = 0;
1137 	uthread->t_dtrace_npc = 0;
1138 	uthread->t_dtrace_scrpc = 0;
1139 	uthread->t_dtrace_astpc = 0;
1140 
1141 
1142 	/*
1143 	 * We set rp->r_pc to the address of the traced instruction so
1144 	 * that it appears to dtrace_probe() that we're on the original
1145 	 * instruction, and so that the user can't easily detect our
1146 	 * complex web of lies. dtrace_return_probe() (our caller)
1147 	 * will correctly set %pc after we return.
1148 	 */
1149 	set_saved_state_pc(regs, pc);
1150 
1151 	fasttrap_return_common(p, regs, pc, npc);
1152 
1153 	return 0;
1154 }
1155 
1156 uint64_t
1157 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1158     int aframes)
1159 {
1160 #pragma unused(arg, id, parg, aframes)
1161 	arm_saved_state_t* regs = find_user_regs(current_thread());
1162 
1163 	/* First eight arguments are in registers */
1164 	if (argno < 8) {
1165 		return saved_state64(regs)->x[argno];
1166 	}
1167 
1168 	/* Look on the stack for the rest */
1169 	uint64_t value;
1170 	uint64_t* sp = (uint64_t*) saved_state64(regs)->sp;
1171 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1172 	value = dtrace_fuword64((user_addr_t) (sp + argno - 8));
1173 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
1174 
1175 	return value;
1176 }
1177 
1178 uint64_t
1179 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1180 {
1181 #pragma unused(arg, id, parg, argno, aframes)
1182 	return 0;
1183 }
1184