xref: /xnu-8020.140.41/bsd/dev/arm/fasttrap_isa.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2007 Apple Inc. All rights reserved.
3  */
4 /*
5  * CDDL HEADER START
6  *
7  * The contents of this file are subject to the terms of the
8  * Common Development and Distribution License, Version 1.0 only
9  * (the "License").  You may not use this file except in compliance
10  * with the License.
11  *
12  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
13  * or http://www.opensolaris.org/os/licensing.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  *
17  * When distributing Covered Code, include this CDDL HEADER in each
18  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
19  * If applicable, add the following below this CDDL HEADER, with the
20  * fields enclosed by brackets "[]" replaced with your own identifying
21  * information: Portions Copyright [yyyy] [name of copyright owner]
22  *
23  * CDDL HEADER END
24  */
25 /*
26  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #include <sys/fasttrap_isa.h>
31 #include <sys/fasttrap_impl.h>
32 #include <sys/dtrace.h>
33 #include <sys/dtrace_impl.h>
34 #include <kern/task.h>
35 #include <vm/pmap.h>
36 #include <vm/vm_map.h>
37 #include <mach/mach_vm.h>
38 #include <arm/proc_reg.h>
39 #include <arm/caches_internal.h>
40 
41 #include <sys/dtrace_ptss.h>
42 #include <kern/debug.h>
43 
44 #include <pexpert/pexpert.h>
45 
46 extern dtrace_id_t dtrace_probeid_error;
47 
48 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
49 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
50 
51 extern int dtrace_decode_arm(uint32_t instr);
52 extern int dtrace_decode_thumb(uint32_t instr);
53 
54 /*
55  * Lossless User-Land Tracing on ARM
56  * ---------------------------------
57  *
58  * The details here will be fleshed out as more of this is implemented. The
59  * basic design will be the same way as tracing works in x86.
60  *
61  * Some ARM specific issues:
62  *
63  * We need to patch differently for ARM instructions and Thumb instructions.
64  * When we hit a probe, we check to see if the mode we're currently in is the
65  * same as the mode we're patching for. If not, we remove the tracepoint and
66  * abort. This ARM/Thumb information is pulled in from the arch specific
67  * information in the fasttrap probe.
68  *
69  * On ARM, any instruction that uses registers can also use the pc as a
70  * register. This presents problems during emulation because we have copied
71  * the instruction and thus the pc can be different. Currently we've emulated
72  * any instructions that use the pc if they can be used in a return probe.
73  * Eventually we will want to support all instructions that use the pc, but
74  * to do so requires disassembling the instruction and reconstituting it by
75  * substituting a different register.
76  *
77  */
78 
79 #define THUMB_INSTR(x) (*(uint16_t*) &(x))
80 
81 #define SIGNEXTEND(x, v) ((((int) (x)) << (32-(v))) >> (32-(v)))
82 #define ALIGNADDR(x, v) (((x) >> (v)) << (v))
83 #define GETITSTATE(x) ((((x) >> 8) & 0xFC) | (((x) >> 25) & 0x3))
84 #define ISLASTINIT(x) (((x) & 0xF) == 8)
85 
86 #define SET16(x, w) *((uint16_t*) (x)) = (w)
87 #define SET32(x, w) *((uint32_t*) (x)) = (w)
88 
89 #define IS_ARM_NOP(x) ((x) == 0xE1A00000)
90 /* Marker for is-enabled probes */
91 #define IS_ARM_IS_ENABLED(x) ((x) == 0xE0200000)
92 
93 #define IS_THUMB_NOP(x) ((x) == 0x46C0)
94 /* Marker for is-enabled probes */
95 #define IS_THUMB_IS_ENABLED(x) ((x) == 0x4040)
96 
97 #define ARM_LDM_UF (1 << 23)
98 #define ARM_LDM_PF (1 << 24)
99 #define ARM_LDM_WF (1 << 21)
100 
101 #define ARM_LDR_UF (1 << 23)
102 #define ARM_LDR_BF (1 << 22)
103 
104 extern int dtrace_arm_condition_true(int cond, int cpsr);
105 
106 int
fasttrap_tracepoint_init(proc_t * p,fasttrap_tracepoint_t * tp,user_addr_t pc,fasttrap_probe_type_t type)107 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp,
108     user_addr_t pc, fasttrap_probe_type_t type)
109 {
110 #pragma unused(type)
111 	uint32_t instr;
112 
113 	/*
114 	 * Read the instruction at the given address out of the process's
115 	 * address space. We don't have to worry about a debugger
116 	 * changing this instruction before we overwrite it with our trap
117 	 * instruction since P_PR_LOCK is set. Since instructions can span
118 	 * pages, we potentially read the instruction in two parts. If the
119 	 * second part fails, we just zero out that part of the instruction.
120 	 */
121 	/*
122 	 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey...
123 	 */
124 
125 	if (uread(p, &instr, 4, pc) != 0) {
126 		return -1;
127 	}
128 
129 	/* We want &instr to always point to the saved instruction, so just copy the
130 	 * whole thing When cast to a pointer to a uint16_t, that will give us a
131 	 * pointer to the first two bytes, which is the thumb instruction.
132 	 */
133 	tp->ftt_instr = instr;
134 
135 	if (tp->ftt_fntype != FASTTRAP_FN_DONE_INIT) {
136 		switch (tp->ftt_fntype) {
137 		case FASTTRAP_FN_UNKNOWN:
138 			/* Can't instrument without any information. We can add some heuristics later if necessary. */
139 			return -1;
140 
141 		case FASTTRAP_FN_USDT:
142 			if (IS_ARM_NOP(instr) || IS_ARM_IS_ENABLED(instr)) {
143 				tp->ftt_thumb = 0;
144 			} else if (IS_THUMB_NOP(THUMB_INSTR(instr)) || IS_THUMB_IS_ENABLED(THUMB_INSTR(instr))) {
145 				tp->ftt_thumb = 1;
146 			} else {
147 				/* Shouldn't reach here - this means we don't recognize
148 				 * the instruction at one of the USDT probe locations
149 				 */
150 				return -1;
151 			}
152 			tp->ftt_fntype = FASTTRAP_FN_DONE_INIT;
153 			break;
154 
155 		case FASTTRAP_FN_ARM:
156 			tp->ftt_thumb = 0;
157 			tp->ftt_fntype = FASTTRAP_FN_DONE_INIT;
158 			break;
159 
160 		case FASTTRAP_FN_THUMB:
161 			tp->ftt_thumb = 1;
162 			tp->ftt_fntype = FASTTRAP_FN_DONE_INIT;
163 			break;
164 
165 		default:
166 			return -1;
167 		}
168 	}
169 
170 	if (tp->ftt_thumb) {
171 		tp->ftt_type = dtrace_decode_thumb(instr);
172 	} else {
173 		tp->ftt_type = dtrace_decode_arm(instr);
174 	}
175 
176 	if (tp->ftt_type == FASTTRAP_T_INV) {
177 		/* This is an instruction we either don't recognize or can't instrument */
178 		printf("dtrace: fasttrap: Unrecognized instruction: %08x at %08x\n",
179 		    (tp->ftt_thumb && dtrace_instr_size(tp->ftt_instr, tp->ftt_thumb) == 2) ? tp->ftt_instr1 : instr, pc);
180 		return -1;
181 	}
182 
183 	return 0;
184 }
185 
186 int
fasttrap_tracepoint_install(proc_t * p,fasttrap_tracepoint_t * tp)187 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
188 {
189 	/* The thumb patch is a 2 byte instruction regardless of the size of the original instruction */
190 	uint32_t instr;
191 	int size = tp->ftt_thumb ? 2 : 4;
192 
193 	if (tp->ftt_thumb) {
194 		*((uint16_t*) &instr) = FASTTRAP_THUMB_INSTR;
195 	} else {
196 		instr = FASTTRAP_ARM_INSTR;
197 	}
198 
199 	if (uwrite(p, &instr, size, tp->ftt_pc) != 0) {
200 		return -1;
201 	}
202 
203 	tp->ftt_installed = 1;
204 
205 	return 0;
206 }
207 
208 int
fasttrap_tracepoint_remove(proc_t * p,fasttrap_tracepoint_t * tp)209 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
210 {
211 	/* The thumb patch is a 2 byte instruction regardless of the size of the original instruction */
212 	uint32_t instr;
213 	int size = tp->ftt_thumb ? 2 : 4;
214 
215 	/*
216 	 * Distinguish between read or write failures and a changed
217 	 * instruction.
218 	 */
219 	if (uread(p, &instr, size, tp->ftt_pc) != 0) {
220 		goto end;
221 	}
222 	if (tp->ftt_thumb) {
223 		if (*((uint16_t*) &instr) != FASTTRAP_THUMB_INSTR) {
224 			goto end;
225 		}
226 	} else {
227 		if (instr != FASTTRAP_ARM_INSTR) {
228 			goto end;
229 		}
230 	}
231 	if (uwrite(p, &tp->ftt_instr, size, tp->ftt_pc) != 0) {
232 		return -1;
233 	}
234 
235 end:
236 	tp->ftt_installed = 0;
237 
238 	return 0;
239 }
240 
241 static void
fasttrap_return_common(proc_t * p,arm_saved_state_t * regs,user_addr_t pc,user_addr_t new_pc)242 fasttrap_return_common(proc_t *p, arm_saved_state_t *regs, user_addr_t pc, user_addr_t new_pc)
243 {
244 	pid_t pid = proc_getpid(p);
245 	fasttrap_tracepoint_t *tp;
246 	fasttrap_bucket_t *bucket;
247 	fasttrap_id_t *id;
248 	lck_mtx_t *pid_mtx;
249 	int retire_tp = 1;
250 
251 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
252 	lck_mtx_lock(pid_mtx);
253 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
254 
255 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
256 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
257 		    tp->ftt_proc->ftpc_acount != 0) {
258 			break;
259 		}
260 	}
261 
262 	/*
263 	 * Don't sweat it if we can't find the tracepoint again; unlike
264 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
265 	 * is not essential to the correct execution of the process.
266 	 */
267 	if (tp == NULL) {
268 		lck_mtx_unlock(pid_mtx);
269 		return;
270 	}
271 
272 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
273 		fasttrap_probe_t *probe = id->fti_probe;
274 		/*
275 		 * If there's a branch that could act as a return site, we
276 		 * need to trace it, and check here if the program counter is
277 		 * external to the function.
278 		 */
279 		if (tp->ftt_type != FASTTRAP_T_LDM_PC &&
280 		    tp->ftt_type != FASTTRAP_T_POP_PC &&
281 		    new_pc - probe->ftp_faddr < probe->ftp_fsize) {
282 			continue;
283 		}
284 
285 		if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
286 			if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
287 				/* already triggered */
288 				continue;
289 			}
290 		}
291 		/*
292 		 * If we have at least one probe associated that
293 		 * is not a oneshot probe, don't remove the
294 		 * tracepoint
295 		 */
296 		else {
297 			retire_tp = 0;
298 		}
299 #if defined(XNU_TARGET_OS_OSX)
300 		if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
301 			dtrace_probe(dtrace_probeid_error, 0 /* state */, id->fti_probe->ftp_id,
302 			    1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
303 #else
304 		if (FALSE) {
305 #endif /* defined(XNU_TARGET_OS_OSX) */
306 		} else {
307 			dtrace_probe(id->fti_probe->ftp_id,
308 			    pc - id->fti_probe->ftp_faddr,
309 			    regs->r[0], 0, 0, 0);
310 		}
311 	}
312 	if (retire_tp) {
313 		fasttrap_tracepoint_retire(p, tp);
314 	}
315 
316 	lck_mtx_unlock(pid_mtx);
317 }
318 
319 #if DEBUG
320 __dead2
321 #endif
322 static void
323 fasttrap_sigsegv(proc_t *p, uthread_t t, user_addr_t addr, arm_saved_state_t *regs)
324 {
325 	/* TODO: This function isn't implemented yet. In debug mode, panic the system to
326 	 * find out why we're hitting this point. In other modes, kill the process.
327 	 */
328 #if DEBUG
329 #pragma unused(p,t,addr,arm_saved_state)
330 	panic("fasttrap: sigsegv not yet implemented");
331 #else
332 #pragma unused(p,t,addr)
333 	/* Kill the process */
334 	regs->pc = 0;
335 #endif
336 
337 #if 0
338 	proc_lock(p);
339 
340 	/* Set fault address and mark signal */
341 	t->uu_code = addr;
342 	t->uu_siglist |= sigmask(SIGSEGV);
343 
344 	/*
345 	 * XXX These two line may be redundant; if not, then we need
346 	 * XXX to potentially set the data address in the machine
347 	 * XXX specific thread state structure to indicate the address.
348 	 */
349 	t->uu_exception = KERN_INVALID_ADDRESS;         /* SIGSEGV */
350 	t->uu_subcode = 0;      /* XXX pad */
351 
352 	proc_unlock(p);
353 
354 	/* raise signal */
355 	signal_setast(get_machthread(t));
356 #endif
357 }
358 
359 static void
360 fasttrap_usdt_args(fasttrap_probe_t *probe, arm_saved_state_t *regs, int argc,
361     uint32_t *argv)
362 {
363 	int i, x, cap = MIN(argc, probe->ftp_nargs);
364 
365 	for (i = 0; i < cap; i++) {
366 		x = probe->ftp_argmap[i];
367 
368 		if (x < 4) {
369 			argv[i] = regs->r[x];
370 		} else {
371 			fasttrap_fuword32_noerr(regs->sp + (x - 4) * sizeof(uint32_t), &argv[i]);
372 		}
373 	}
374 
375 	for (; i < argc; i++) {
376 		argv[i] = 0;
377 	}
378 }
379 
380 static void
381 set_thumb_flag(arm_saved_state_t *regs, user_addr_t pc)
382 {
383 	if (pc & 1) {
384 		regs->cpsr |= PSR_TF;
385 	} else {
386 		regs->cpsr &= ~PSR_TF;
387 	}
388 }
389 
390 int
391 fasttrap_pid_probe(arm_saved_state_t *regs)
392 {
393 	proc_t *p = current_proc();
394 	user_addr_t new_pc = 0;
395 	fasttrap_bucket_t *bucket;
396 	lck_mtx_t *pid_mtx;
397 	fasttrap_tracepoint_t *tp, tp_local;
398 	pid_t pid;
399 	dtrace_icookie_t cookie;
400 	uint_t is_enabled = 0;
401 	int instr_size;
402 	int was_simulated = 1, retire_tp = 1;
403 
404 	user_addr_t pc = regs->pc;
405 
406 	uthread_t uthread = current_uthread();
407 
408 	/*
409 	 * It's possible that a user (in a veritable orgy of bad planning)
410 	 * could redirect this thread's flow of control before it reached the
411 	 * return probe fasttrap. In this case we need to kill the process
412 	 * since it's in a unrecoverable state.
413 	 */
414 	if (uthread->t_dtrace_step) {
415 		ASSERT(uthread->t_dtrace_on);
416 		fasttrap_sigtrap(p, uthread, pc);
417 		return 0;
418 	}
419 
420 	/*
421 	 * Clear all user tracing flags.
422 	 */
423 	uthread->t_dtrace_ft = 0;
424 	uthread->t_dtrace_pc = 0;
425 	uthread->t_dtrace_npc = 0;
426 	uthread->t_dtrace_scrpc = 0;
427 	uthread->t_dtrace_astpc = 0;
428 
429 
430 	pid = proc_getpid(p);
431 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
432 	lck_mtx_lock(pid_mtx);
433 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
434 
435 	/*
436 	 * Lookup the tracepoint that the process just hit.
437 	 */
438 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
439 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
440 		    tp->ftt_proc->ftpc_acount != 0) {
441 			break;
442 		}
443 	}
444 
445 	/*
446 	 * If we couldn't find a matching tracepoint, either a tracepoint has
447 	 * been inserted without using the pid<pid> ioctl interface (see
448 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
449 	 */
450 	if (tp == NULL) {
451 		lck_mtx_unlock(pid_mtx);
452 		return -1;
453 	}
454 
455 	/* Default to always execute */
456 	int condition_code = 0xE;
457 	if (tp->ftt_thumb) {
458 		uint32_t itstate = GETITSTATE(regs->cpsr);
459 		if (itstate != 0) {
460 			/* In IT block, make sure it's the last statement in the block */
461 			if (ISLASTINIT(itstate)) {
462 				condition_code = itstate >> 4;
463 			} else {
464 				printf("dtrace: fasttrap: Tried to trace instruction %08x at %08x but not at end of IT block\n",
465 				    (tp->ftt_thumb && dtrace_instr_size(tp->ftt_instr, tp->ftt_thumb) == 2) ? tp->ftt_instr1 : tp->ftt_instr, pc);
466 
467 				fasttrap_tracepoint_remove(p, tp);
468 				lck_mtx_unlock(pid_mtx);
469 				return -1;
470 			}
471 		}
472 	} else {
473 		condition_code = ARM_CONDCODE(tp->ftt_instr);
474 	}
475 
476 	if (!tp->ftt_thumb != !(regs->cpsr & PSR_TF)) {
477 		/* The ARM/Thumb mode does not match what we expected for this probe.
478 		 * Remove this probe and bail.
479 		 */
480 		fasttrap_tracepoint_remove(p, tp);
481 		lck_mtx_unlock(pid_mtx);
482 		return -1;
483 	}
484 
485 	if (tp->ftt_ids != NULL) {
486 		fasttrap_id_t *id;
487 
488 		uint32_t s4;
489 		uint32_t *stack = (uint32_t *)regs->sp;
490 
491 		/* First four parameters are passed in registers */
492 		fasttrap_fuword32_noerr((user_addr_t)(uint32_t)stack, &s4);
493 
494 		for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
495 			fasttrap_probe_t *probe = id->fti_probe;
496 
497 #if defined(XNU_TARGET_OS_OSX)
498 			if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
499 				dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id,
500 				    1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
501 #else
502 			if (FALSE) {
503 #endif /* defined(XNU_TARGET_OS_OSX) */
504 			} else {
505 				if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
506 					if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
507 						/* already triggered */
508 						continue;
509 					}
510 				}
511 				/*
512 				 * If we have at least probe associated that
513 				 * is not a oneshot probe, don't remove the
514 				 * tracepoint
515 				 */
516 				else {
517 					retire_tp = 0;
518 				}
519 				if (id->fti_ptype == DTFTP_ENTRY) {
520 					/*
521 					 * We note that this was an entry
522 					 * probe to help ustack() find the
523 					 * first caller.
524 					 */
525 					cookie = dtrace_interrupt_disable();
526 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
527 					dtrace_probe(probe->ftp_id, regs->r[0], regs->r[1], regs->r[2], regs->r[3], s4);
528 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
529 					dtrace_interrupt_enable(cookie);
530 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
531 					/*
532 					 * Note that in this case, we don't
533 					 * call dtrace_probe() since it's only
534 					 * an artificial probe meant to change
535 					 * the flow of control so that it
536 					 * encounters the true probe.
537 					 */
538 					is_enabled = 1;
539 				} else if (probe->ftp_argmap == NULL) {
540 					dtrace_probe(probe->ftp_id, regs->r[0], regs->r[1], regs->r[2], regs->r[3], s4);
541 				} else {
542 					uint32_t t[5];
543 
544 					fasttrap_usdt_args(probe, regs, 5, t);
545 					dtrace_probe(probe->ftp_id, t[0], t[1], t[2], t[3], t[4]);
546 				}
547 			}
548 		}
549 		if (retire_tp) {
550 			fasttrap_tracepoint_retire(p, tp);
551 		}
552 	}
553 	/*
554 	 * We're about to do a bunch of work so we cache a local copy of
555 	 * the tracepoint to emulate the instruction, and then find the
556 	 * tracepoint again later if we need to light up any return probes.
557 	 */
558 	tp_local = *tp;
559 	lck_mtx_unlock(pid_mtx);
560 	tp = &tp_local;
561 
562 	/*
563 	 * If there's an is-enabled probe connected to this tracepoint it
564 	 * means that there was a 'eor r0,r0,r0'
565 	 * instruction that was placed there by DTrace when the binary was
566 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
567 	 * into R0. Accordingly, we can bypass all the instruction
568 	 * emulation logic since we know the inevitable result. It's possible
569 	 * that a user could construct a scenario where the 'is-enabled'
570 	 * probe was on some other instruction, but that would be a rather
571 	 * exotic way to shoot oneself in the foot.
572 	 */
573 
574 	if (is_enabled) {
575 		regs->r[0] = 1;
576 		new_pc = regs->pc + (tp->ftt_thumb ? 2 : 4);
577 		goto done;
578 	}
579 
580 	/* For USDT probes, bypass all the emulation logic for the nop instruction */
581 	if ((tp->ftt_thumb && IS_THUMB_NOP(THUMB_INSTR(tp->ftt_instr))) ||
582 	    (!tp->ftt_thumb && IS_ARM_NOP(tp->ftt_instr))) {
583 		new_pc = regs->pc + (tp->ftt_thumb ? 2 : 4);
584 		goto done;
585 	}
586 
587 	instr_size = dtrace_instr_size(tp->ftt_instr, tp->ftt_thumb);
588 
589 	switch (tp->ftt_type) {
590 	case FASTTRAP_T_MOV_PC_REG:
591 	case FASTTRAP_T_CPY_PC:
592 	{
593 		if (!dtrace_arm_condition_true(condition_code, regs->cpsr)) {
594 			new_pc = pc + instr_size;
595 			break;
596 		}
597 
598 		int rm;
599 		if (tp->ftt_thumb) {
600 			rm = THUMB16_HRM(tp->ftt_instr1);
601 		} else {
602 			rm = tp->ftt_instr & 0xF;
603 		}
604 		new_pc = regs->r[rm];
605 
606 		/* This instruction does not change the Thumb state */
607 
608 		break;
609 	}
610 
611 	case FASTTRAP_T_STM_LR:
612 	case FASTTRAP_T_PUSH_LR:
613 	{
614 		/*
615 		 * This is a very common case, so we want to emulate this instruction if
616 		 * possible. However, on a push, it is possible that we might reach the end
617 		 * of a page and have to allocate a new page. Most of the time this will not
618 		 * happen, and we know that the push instruction can store at most 16 words,
619 		 * so check to see if we are far from the boundary, and if so, emulate. This
620 		 * can be made more aggressive by checking the actual number of words being
621 		 * pushed, but we won't do that for now.
622 		 *
623 		 * Some of the same issues that apply to POP_PC probably apply here also.
624 		 */
625 
626 		int reglist;
627 		int ret;
628 		uintptr_t* base;
629 
630 		if (!dtrace_arm_condition_true(condition_code, regs->cpsr)) {
631 			new_pc = pc + instr_size;
632 			break;
633 		}
634 
635 		base = (uintptr_t*) regs->sp;
636 		if (((((uintptr_t) base) - 16 * 4) >> PAGE_SHIFT) != (((uintptr_t) base) >> PAGE_SHIFT)) {
637 			/* Crosses the page boundary, go to emulation */
638 			goto instr_emulate;
639 		}
640 
641 		if (tp->ftt_thumb) {
642 			if (instr_size == 4) {
643 				/* We know we have to push lr, never push sp or pc */
644 				reglist = tp->ftt_instr2 & 0x1FFF;
645 			} else {
646 				reglist = tp->ftt_instr1 & 0xFF;
647 			}
648 		} else {
649 			/* We know we have to push lr, never push sp or pc */
650 			reglist = tp->ftt_instr & 0x1FFF;
651 		}
652 
653 		/* Push the link register */
654 		base--;
655 		ret = fasttrap_suword32((uint32_t) base, regs->lr);
656 		if (ret == -1) {
657 			fasttrap_sigsegv(p, uthread, (user_addr_t) base, regs);
658 			new_pc = regs->pc;
659 			break;
660 		}
661 
662 		/* Start pushing from $r12 */
663 		int regmask = 1 << 12;
664 		int regnum = 12;
665 
666 		while (regmask) {
667 			if (reglist & regmask) {
668 				base--;
669 				ret = fasttrap_suword32((uint32_t) base, regs->r[regnum]);
670 				if (ret == -1) {
671 					fasttrap_sigsegv(p, uthread, (user_addr_t) base, regs);
672 					new_pc = regs->pc;
673 					break;
674 				}
675 			}
676 			regmask >>= 1;
677 			regnum--;
678 		}
679 
680 		regs->sp = (uintptr_t) base;
681 
682 		new_pc = pc + instr_size;
683 
684 		break;
685 	}
686 
687 
688 	case FASTTRAP_T_LDM_PC:
689 	case FASTTRAP_T_POP_PC:
690 	{
691 		/* TODO Two issues that will eventually need to be resolved:
692 		 *
693 		 * 1. Understand what the hardware does if we have to segfault (data abort) in
694 		 * the middle of a load multiple. We currently don't have a working segfault
695 		 * handler anyway, and with no swapfile we should never segfault on this load.
696 		 * If we do, we'll just kill the process by setting the pc to 0.
697 		 *
698 		 * 2. The emulation is no longer atomic. We currently only emulate pop for
699 		 * function epilogues, and so we should never have a race here because one
700 		 * thread should never be trying to manipulate another thread's stack frames.
701 		 * That is almost certainly a bug in the program.
702 		 *
703 		 * This will need to be fixed if we ever:
704 		 *   a. Ship dtrace externally, as this could be a potential attack vector
705 		 *   b. Support instruction level tracing, as we might then pop/ldm non epilogues.
706 		 *
707 		 */
708 
709 		/* Assume ldmia! sp/pop ... pc */
710 
711 		int regnum = 0, reglist;
712 		int ret;
713 		uintptr_t* base;
714 
715 		if (!dtrace_arm_condition_true(condition_code, regs->cpsr)) {
716 			new_pc = pc + instr_size;
717 			break;
718 		}
719 
720 		if (tp->ftt_thumb) {
721 			if (instr_size == 4) {
722 				/* We know we have to load the pc, don't do it twice */
723 				reglist = tp->ftt_instr2 & 0x7FFF;
724 			} else {
725 				reglist = tp->ftt_instr1 & 0xFF;
726 			}
727 		} else {
728 			/* We know we have to load the pc, don't do it twice */
729 			reglist = tp->ftt_instr & 0x7FFF;
730 		}
731 
732 		base = (uintptr_t*) regs->sp;
733 		while (reglist) {
734 			if (reglist & 1) {
735 				ret = fasttrap_fuword32((uint32_t) base, &regs->r[regnum]);
736 				if (ret == -1) {
737 					fasttrap_sigsegv(p, uthread, (user_addr_t) base, regs);
738 					new_pc = regs->pc;
739 					break;
740 				}
741 				base++;
742 			}
743 			reglist >>= 1;
744 			regnum++;
745 		}
746 
747 		ret = fasttrap_fuword32((uint32_t) base, &new_pc);
748 		if (ret == -1) {
749 			fasttrap_sigsegv(p, uthread, (user_addr_t) base, regs);
750 			new_pc = regs->pc;
751 			break;
752 		}
753 		base++;
754 
755 		regs->sp = (uintptr_t) base;
756 
757 		set_thumb_flag(regs, new_pc);
758 
759 		break;
760 	}
761 
762 	case FASTTRAP_T_CB_N_Z:
763 	{
764 		/* Thumb mode instruction, and not permitted in IT block, so skip the condition code check */
765 		int rn = tp->ftt_instr1 & 0x7;
766 		int offset = (((tp->ftt_instr1 & 0x00F8) >> 2) | ((tp->ftt_instr1 & 0x0200) >> 3)) + 4;
767 		int nonzero = tp->ftt_instr1 & 0x0800;
768 		if (!nonzero != !(regs->r[rn] == 0)) {
769 			new_pc = pc + offset;
770 		} else {
771 			new_pc = pc + instr_size;
772 		}
773 		break;
774 	}
775 
776 	case FASTTRAP_T_B_COND:
777 	{
778 		/* Use the condition code in the instruction and ignore the ITSTATE */
779 
780 		int code, offset;
781 		if (tp->ftt_thumb) {
782 			if (instr_size == 4) {
783 				code = (tp->ftt_instr1 >> 6) & 0xF;
784 				if (code == 14 || code == 15) {
785 					panic("fasttrap: Emulation of invalid branch");
786 				}
787 				int S = (tp->ftt_instr1 >> 10) & 1,
788 				    J1 = (tp->ftt_instr2 >> 13) & 1,
789 				    J2 = (tp->ftt_instr2 >> 11) & 1;
790 				offset = 4 + SIGNEXTEND(
791 					(S << 20) | (J2 << 19) | (J1 << 18) |
792 					((tp->ftt_instr1 & 0x003F) << 12) |
793 					((tp->ftt_instr2 & 0x07FF) << 1),
794 					21);
795 			} else {
796 				code = (tp->ftt_instr1 >> 8) & 0xF;
797 				if (code == 14 || code == 15) {
798 					panic("fasttrap: Emulation of invalid branch");
799 				}
800 				offset = 4 + (SIGNEXTEND(tp->ftt_instr1 & 0xFF, 8) << 1);
801 			}
802 		} else {
803 			code = ARM_CONDCODE(tp->ftt_instr);
804 			if (code == 15) {
805 				panic("fasttrap: Emulation of invalid branch");
806 			}
807 			offset = 8 + (SIGNEXTEND(tp->ftt_instr & 0x00FFFFFF, 24) << 2);
808 		}
809 
810 		if (dtrace_arm_condition_true(code, regs->cpsr)) {
811 			new_pc = pc + offset;
812 		} else {
813 			new_pc = pc + instr_size;
814 		}
815 
816 		break;
817 	}
818 
819 	case FASTTRAP_T_B_UNCOND:
820 	{
821 		int offset;
822 
823 		/* Unconditional branches can only be taken from Thumb mode */
824 		/* (This is different from an ARM branch with condition code "always") */
825 		ASSERT(tp->ftt_thumb == 1);
826 
827 		if (!dtrace_arm_condition_true(condition_code, regs->cpsr)) {
828 			new_pc = pc + instr_size;
829 			break;
830 		}
831 
832 		if (instr_size == 4) {
833 			int S = (tp->ftt_instr1 >> 10) & 1,
834 			    J1 = (tp->ftt_instr2 >> 13) & 1,
835 			    J2 = (tp->ftt_instr2 >> 11) & 1;
836 			int I1 = (J1 != S) ? 0 : 1, I2 = (J2 != S) ? 0 : 1;
837 			offset = 4 + SIGNEXTEND(
838 				(S << 24) | (I1 << 23) | (I2 << 22) |
839 				((tp->ftt_instr1 & 0x03FF) << 12) |
840 				((tp->ftt_instr2 & 0x07FF) << 1),
841 				25);
842 		} else {
843 			uint32_t instr1 = tp->ftt_instr1;
844 			offset = 4 + (SIGNEXTEND(instr1 & 0x7FF, 11) << 1);
845 		}
846 
847 		new_pc = pc + offset;
848 
849 		break;
850 	}
851 
852 	case FASTTRAP_T_BX_REG:
853 	{
854 		int reg;
855 
856 		if (!dtrace_arm_condition_true(condition_code, regs->cpsr)) {
857 			new_pc = pc + instr_size;
858 			break;
859 		}
860 
861 		if (tp->ftt_thumb) {
862 			reg = THUMB16_HRM(tp->ftt_instr1);
863 		} else {
864 			reg = ARM_RM(tp->ftt_instr);
865 		}
866 		new_pc = regs->r[reg];
867 		set_thumb_flag(regs, new_pc);
868 
869 		break;
870 	}
871 
872 	case FASTTRAP_T_LDR_PC_IMMED:
873 	case FASTTRAP_T_VLDR_PC_IMMED:
874 		/* Handle these instructions by replacing the PC in the instruction with another
875 		 * register. They are common, so we'd like to support them, and this way we do so
876 		 * without any risk of having to simulate a segfault.
877 		 */
878 
879 		/* Fall through */
880 
881 instr_emulate:
882 	case FASTTRAP_T_COMMON:
883 	{
884 		user_addr_t addr;
885 		uint8_t scratch[32];
886 		uint_t i = 0;
887 		fasttrap_instr_t emul_instr;
888 		emul_instr.instr32 = tp->ftt_instr;
889 		int emul_instr_size;
890 
891 		/*
892 		 * Unfortunately sometimes when we emulate the instruction and have to replace the
893 		 * PC, there is no longer a thumb mode equivalent. We end up having to run the
894 		 * modified instruction in ARM mode. We use this variable to keep track of which
895 		 * mode we should emulate in. We still use the original variable to determine
896 		 * what mode to return to.
897 		 */
898 		uint8_t emul_thumb = tp->ftt_thumb;
899 		int save_reg = -1;
900 		uint32_t save_val = 0;
901 
902 		/*
903 		 * Dealing with condition codes and emulation:
904 		 * We can't just uniformly do a condition code check here because not all instructions
905 		 * have condition codes. We currently do not support an instruction by instruction trace,
906 		 * so we can assume that either: 1. We are executing a Thumb instruction, in which case
907 		 * we either are not in an IT block and should execute always, or we are last in an IT
908 		 * block. Either way, the traced instruction will run correctly, and we won't have any
909 		 * problems when we return to the original code, because we will no longer be in the IT
910 		 * block. 2. We are executing an ARM instruction, in which case we are ok as long as
911 		 * we don't attempt to change the condition code.
912 		 */
913 		if (tp->ftt_type == FASTTRAP_T_LDR_PC_IMMED) {
914 			/* We know we always have a free register (the one we plan to write the
915 			 * result value to!). So we'll replace the pc with that one.
916 			 */
917 			int new_reg;
918 			if (tp->ftt_thumb) {
919 				/* Check to see if thumb or thumb2 */
920 				if (instr_size == 2) {
921 					/*
922 					 * Sadness. We need to emulate this instruction in ARM mode
923 					 * because it has an 8 bit immediate offset. Instead of having
924 					 * to deal with condition codes in the ARM instruction, we'll
925 					 * just check the condition and abort if the condition is false.
926 					 */
927 					if (!dtrace_arm_condition_true(condition_code, regs->cpsr)) {
928 						new_pc = pc + instr_size;
929 						break;
930 					}
931 
932 					new_reg = (tp->ftt_instr1 >> 8) & 0x7;
933 					regs->r[new_reg] = ALIGNADDR(regs->pc + 4, 2);
934 					emul_thumb = 0;
935 					emul_instr.instr32 = 0xE5900000 | (new_reg << 16) | (new_reg << 12) | ((tp->ftt_instr1 & 0xFF) << 2);
936 				} else {
937 					/* Thumb2. Just replace the register. */
938 					new_reg = (tp->ftt_instr2 >> 12) & 0xF;
939 					regs->r[new_reg] = ALIGNADDR(regs->pc + 4, 2);
940 					emul_instr.instr16.instr1 &= ~0x000F;
941 					emul_instr.instr16.instr1 |= new_reg;
942 				}
943 			} else {
944 				/* ARM. Just replace the register. */
945 				new_reg = (tp->ftt_instr >> 12) & 0xF;
946 				regs->r[new_reg] = ALIGNADDR(regs->pc + 8, 2);
947 				emul_instr.instr32 &= ~0x000F0000;
948 				emul_instr.instr32 |= new_reg << 16;
949 			}
950 		} else if (tp->ftt_type == FASTTRAP_T_VLDR_PC_IMMED) {
951 			/* This instruction only uses one register, and if we're here, we know
952 			 * it must be the pc. So we'll just replace it with R0.
953 			 */
954 			save_reg = 0;
955 			save_val = regs->r[0];
956 			regs->r[save_reg] = ALIGNADDR(regs->pc + (tp->ftt_thumb ? 4 : 8), 2);
957 			if (tp->ftt_thumb) {
958 				emul_instr.instr16.instr1 &= ~0x000F;
959 			} else {
960 				emul_instr.instr32 &= ~0x000F0000;
961 			}
962 		}
963 
964 		emul_instr_size = dtrace_instr_size(emul_instr.instr32, emul_thumb);
965 
966 		/*
967 		 * At this point:
968 		 *   tp->ftt_thumb = thumb mode of original instruction
969 		 *   emul_thumb = thumb mode for emulation
970 		 *   emul_instr = instruction we are using to emulate original instruction
971 		 *   emul_instr_size = size of emulating instruction
972 		 */
973 
974 		addr = uthread->t_dtrace_scratch->addr;
975 
976 		if (addr == 0LL) {
977 			fasttrap_sigtrap(p, uthread, pc);         // Should be killing target proc
978 			new_pc = pc;
979 			break;
980 		}
981 
982 		uthread->t_dtrace_scrpc = addr;
983 		if (emul_thumb) {
984 			/*
985 			 * No way to do an unconditional branch in Thumb mode, shove the address
986 			 * onto the user stack and go to the next location with a pop. This can
987 			 * segfault if this push happens to cross a stack page, but that's ok, since
988 			 * we are running in userland, and the kernel knows how to handle userland
989 			 * stack expansions correctly.
990 			 *
991 			 * Layout of scratch space for Thumb mode:
992 			 *   Emulated instruction
993 			 *   ldr save_reg, [pc, #16] (if necessary, restore any register we clobbered)
994 			 *   push { r0, r1 }
995 			 *   ldr r0, [pc, #4]
996 			 *   str r0, [sp, #4]
997 			 *   pop { r0, pc }
998 			 *   Location we should return to in original program
999 			 *   Saved value of clobbered register (if necessary)
1000 			 */
1001 
1002 			bcopy(&emul_instr, &scratch[i], emul_instr_size); i += emul_instr_size;
1003 
1004 			if (save_reg != -1) {
1005 				uint16_t restore_inst = 0x4803;
1006 				restore_inst |= (save_reg & 0x7) << 8;
1007 				SET16(scratch + i, restore_inst); i += 2;               // ldr reg, [pc , #16]
1008 			}
1009 
1010 			SET16(scratch + i, 0xB403); i += 2;                             // push { r0, r1 }
1011 			SET16(scratch + i, 0x4801); i += 2;                             // ldr r0, [pc, #4]
1012 			SET16(scratch + i, 0x9001); i += 2;                             // str r0, [sp, #4]
1013 			SET16(scratch + i, 0xBD01); i += 2;                             // pop { r0, pc }
1014 
1015 			if (i % 4) {
1016 				SET16(scratch + i, 0); i += 2;                          // padding - saved 32 bit words must be aligned
1017 			}
1018 			SET32(scratch + i, pc + instr_size + (tp->ftt_thumb ? 1 : 0)); i += 4;          // Return address
1019 			if (save_reg != -1) {
1020 				SET32(scratch + i, save_val); i += 4;                   // saved value of clobbered register
1021 			}
1022 
1023 			uthread->t_dtrace_astpc = addr + i;
1024 			bcopy(&emul_instr, &scratch[i], emul_instr_size); i += emul_instr_size;
1025 			SET16(scratch + i, FASTTRAP_THUMB_RET_INSTR); i += 2;
1026 		} else {
1027 			/*
1028 			 * Layout of scratch space for ARM mode:
1029 			 *   Emulated instruction
1030 			 *   ldr save_reg, [pc, #12] (if necessary, restore any register we clobbered)
1031 			 *   ldr pc, [pc, #4]
1032 			 *   Location we should return to in original program
1033 			 *   Saved value of clobbered register (if necessary)
1034 			 */
1035 
1036 			bcopy(&emul_instr, &scratch[i], emul_instr_size); i += emul_instr_size;
1037 
1038 			if (save_reg != -1) {
1039 				uint32_t restore_inst = 0xE59F0004;
1040 				restore_inst |= save_reg << 12;
1041 				SET32(scratch + i, restore_inst); i += 4;               // ldr reg, [pc, #12]
1042 			}
1043 			SET32(scratch + i, 0xE51FF004); i += 4;                         // ldr pc, [pc, #4]
1044 
1045 			SET32(scratch + i, pc + instr_size + (tp->ftt_thumb ? 1 : 0)); i += 4;          // Return address
1046 			if (save_reg != -1) {
1047 				SET32(scratch + i, save_val); i += 4;                   // Saved value of clobbered register
1048 			}
1049 
1050 			uthread->t_dtrace_astpc = addr + i;
1051 			bcopy(&emul_instr, &scratch[i], emul_instr_size); i += emul_instr_size;
1052 			SET32(scratch + i, FASTTRAP_ARM_RET_INSTR); i += 4;
1053 		}
1054 
1055 		if (uwrite(p, scratch, i, uthread->t_dtrace_scratch->write_addr) != KERN_SUCCESS) {
1056 			fasttrap_sigtrap(p, uthread, pc);
1057 			new_pc = pc;
1058 			break;
1059 		}
1060 
1061 		if (tp->ftt_retids != NULL) {
1062 			uthread->t_dtrace_step = 1;
1063 			uthread->t_dtrace_ret = 1;
1064 			new_pc = uthread->t_dtrace_astpc + (emul_thumb ? 1 : 0);
1065 		} else {
1066 			new_pc = uthread->t_dtrace_scrpc + (emul_thumb ? 1 : 0);
1067 		}
1068 
1069 		uthread->t_dtrace_pc = pc;
1070 		uthread->t_dtrace_npc = pc + instr_size;
1071 		uthread->t_dtrace_on = 1;
1072 		was_simulated = 0;
1073 		set_thumb_flag(regs, new_pc);
1074 		break;
1075 	}
1076 
1077 	default:
1078 		panic("fasttrap: mishandled an instruction");
1079 	}
1080 
1081 done:
1082 	/*
1083 	 * APPLE NOTE:
1084 	 *
1085 	 * We're setting this earlier than Solaris does, to get a "correct"
1086 	 * ustack() output. In the Sun code,  a() -> b() -> c() -> d() is
1087 	 * reported at: d, b, a. The new way gives c, b, a, which is closer
1088 	 * to correct, as the return instruction has already exectued.
1089 	 */
1090 	regs->pc = new_pc;
1091 
1092 	/*
1093 	 * If there were no return probes when we first found the tracepoint,
1094 	 * we should feel no obligation to honor any return probes that were
1095 	 * subsequently enabled -- they'll just have to wait until the next
1096 	 * time around.
1097 	 */
1098 	if (tp->ftt_retids != NULL) {
1099 		/*
1100 		 * We need to wait until the results of the instruction are
1101 		 * apparent before invoking any return probes. If this
1102 		 * instruction was emulated we can just call
1103 		 * fasttrap_return_common(); if it needs to be executed, we
1104 		 * need to wait until the user thread returns to the kernel.
1105 		 */
1106 		/*
1107 		 * It used to be that only common instructions were simulated.
1108 		 * For performance reasons, we now simulate some instructions
1109 		 * when safe and go back to userland otherwise. The was_simulated
1110 		 * flag means we don't need to go back to userland.
1111 		 */
1112 		if (was_simulated) {
1113 			fasttrap_return_common(p, regs, pc, new_pc);
1114 		} else {
1115 			ASSERT(uthread->t_dtrace_ret != 0);
1116 			ASSERT(uthread->t_dtrace_pc == pc);
1117 			ASSERT(uthread->t_dtrace_scrpc != 0);
1118 			ASSERT(new_pc == uthread->t_dtrace_astpc);
1119 		}
1120 	}
1121 
1122 	return 0;
1123 }
1124 
1125 int
1126 fasttrap_return_probe(arm_saved_state_t *regs)
1127 {
1128 	proc_t *p = current_proc();
1129 	uthread_t uthread = current_uthread();
1130 	user_addr_t pc = uthread->t_dtrace_pc;
1131 	user_addr_t npc = uthread->t_dtrace_npc;
1132 
1133 	uthread->t_dtrace_pc = 0;
1134 	uthread->t_dtrace_npc = 0;
1135 	uthread->t_dtrace_scrpc = 0;
1136 	uthread->t_dtrace_astpc = 0;
1137 
1138 
1139 	/*
1140 	 * We set rp->r_pc to the address of the traced instruction so
1141 	 * that it appears to dtrace_probe() that we're on the original
1142 	 * instruction, and so that the user can't easily detect our
1143 	 * complex web of lies. dtrace_return_probe() (our caller)
1144 	 * will correctly set %pc after we return.
1145 	 */
1146 	regs->pc = pc;
1147 
1148 	fasttrap_return_common(p, regs, pc, npc);
1149 
1150 	return 0;
1151 }
1152 
1153 uint64_t
1154 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1155     int aframes)
1156 {
1157 #pragma unused(arg, id, parg, aframes)
1158 	arm_saved_state_t* regs = find_user_regs(current_thread());
1159 
1160 	/* First four arguments are in registers */
1161 	if (argno < 4) {
1162 		return regs->r[argno];
1163 	}
1164 
1165 	/* Look on the stack for the rest */
1166 	uint32_t value;
1167 	uint32_t* sp = (uint32_t*) regs->sp;
1168 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1169 	value = dtrace_fuword32((user_addr_t) (sp + argno - 4));
1170 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
1171 
1172 	return value;
1173 }
1174 
1175 uint64_t
1176 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1177 {
1178 #pragma unused(arg, id, parg, argno, aframes)
1179 #if 0
1180 	return fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno);
1181 #endif
1182 
1183 	return 0;
1184 }
1185