xref: /xnu-8796.101.5/bsd/dev/i386/fasttrap_isa.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/fasttrap_isa.h>
28 #include <sys/fasttrap_impl.h>
29 #include <sys/dtrace.h>
30 #include <sys/dtrace_impl.h>
31 extern dtrace_id_t dtrace_probeid_error;
32 
33 #include "fasttrap_regset.h"
34 
35 #include <sys/dtrace_ptss.h>
36 #include <kern/debug.h>
37 
38 #include <machine/pal_routines.h>
39 
40 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
41 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
42 
43 /*
44  * Lossless User-Land Tracing on x86
45  * ---------------------------------
46  *
47  * The execution of most instructions is not dependent on the address; for
48  * these instructions it is sufficient to copy them into the user process's
49  * address space and execute them. To effectively single-step an instruction
50  * in user-land, we copy out the following sequence of instructions to scratch
51  * space in the user thread's ulwp_t structure.
52  *
53  * We then set the program counter (%eip or %rip) to point to this scratch
54  * space. Once execution resumes, the original instruction is executed and
55  * then control flow is redirected to what was originally the subsequent
56  * instruction. If the kernel attemps to deliver a signal while single-
57  * stepping, the signal is deferred and the program counter is moved into the
58  * second sequence of instructions. The second sequence ends in a trap into
59  * the kernel where the deferred signal is then properly handled and delivered.
60  *
61  * For instructions whose execute is position dependent, we perform simple
62  * emulation. These instructions are limited to control transfer
63  * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
64  * of %rip-relative addressing that means that almost any instruction can be
65  * position dependent. For all the details on how we emulate generic
66  * instructions included %rip-relative instructions, see the code in
67  * fasttrap_pid_probe() below where we handle instructions of type
68  * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
69  */
70 
71 #define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3)
72 #define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
73 #define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7)
74 #define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm))
75 
76 #define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3)
77 #define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7)
78 #define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7)
79 
80 #define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1)
81 #define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1)
82 #define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1)
83 #define	FASTTRAP_REX_B(rex)		((rex) & 1)
84 #define	FASTTRAP_REX(w, r, x, b)	\
85 	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
86 
87 /*
88  * Single-byte op-codes.
89  */
90 #define	FASTTRAP_PUSHL_EBP	0x55
91 
92 #define	FASTTRAP_JO		0x70
93 #define	FASTTRAP_JNO		0x71
94 #define	FASTTRAP_JB		0x72
95 #define	FASTTRAP_JAE		0x73
96 #define	FASTTRAP_JE		0x74
97 #define	FASTTRAP_JNE		0x75
98 #define	FASTTRAP_JBE		0x76
99 #define	FASTTRAP_JA		0x77
100 #define	FASTTRAP_JS		0x78
101 #define	FASTTRAP_JNS		0x79
102 #define	FASTTRAP_JP		0x7a
103 #define	FASTTRAP_JNP		0x7b
104 #define	FASTTRAP_JL		0x7c
105 #define	FASTTRAP_JGE		0x7d
106 #define	FASTTRAP_JLE		0x7e
107 #define	FASTTRAP_JG		0x7f
108 
109 #define	FASTTRAP_NOP		0x90
110 
111 #define	FASTTRAP_MOV_EAX	0xb8
112 #define	FASTTRAP_MOV_ECX	0xb9
113 
114 #define	FASTTRAP_RET16		0xc2
115 #define	FASTTRAP_RET		0xc3
116 
117 #define	FASTTRAP_LOOPNZ		0xe0
118 #define	FASTTRAP_LOOPZ		0xe1
119 #define	FASTTRAP_LOOP		0xe2
120 #define	FASTTRAP_JCXZ		0xe3
121 
122 #define	FASTTRAP_CALL		0xe8
123 #define	FASTTRAP_JMP32		0xe9
124 #define	FASTTRAP_JMP8		0xeb
125 
126 #define	FASTTRAP_INT3		0xcc
127 #define	FASTTRAP_INT		0xcd
128 #define	T_DTRACE_RET		0x7f
129 
130 #define	FASTTRAP_2_BYTE_OP	0x0f
131 #define	FASTTRAP_GROUP5_OP	0xff
132 
133 /*
134  * Two-byte op-codes (second byte only).
135  */
136 #define	FASTTRAP_0F_JO		0x80
137 #define	FASTTRAP_0F_JNO		0x81
138 #define	FASTTRAP_0F_JB		0x82
139 #define	FASTTRAP_0F_JAE		0x83
140 #define	FASTTRAP_0F_JE		0x84
141 #define	FASTTRAP_0F_JNE		0x85
142 #define	FASTTRAP_0F_JBE		0x86
143 #define	FASTTRAP_0F_JA		0x87
144 #define	FASTTRAP_0F_JS		0x88
145 #define	FASTTRAP_0F_JNS		0x89
146 #define	FASTTRAP_0F_JP		0x8a
147 #define	FASTTRAP_0F_JNP		0x8b
148 #define	FASTTRAP_0F_JL		0x8c
149 #define	FASTTRAP_0F_JGE		0x8d
150 #define	FASTTRAP_0F_JLE		0x8e
151 #define	FASTTRAP_0F_JG		0x8f
152 
153 #define	FASTTRAP_EFLAGS_OF	0x800
154 #define	FASTTRAP_EFLAGS_DF	0x400
155 #define	FASTTRAP_EFLAGS_SF	0x080
156 #define	FASTTRAP_EFLAGS_ZF	0x040
157 #define	FASTTRAP_EFLAGS_AF	0x010
158 #define	FASTTRAP_EFLAGS_PF	0x004
159 #define	FASTTRAP_EFLAGS_CF	0x001
160 
161 /*
162  * Instruction prefixes.
163  */
164 #define	FASTTRAP_PREFIX_OPERAND	0x66
165 #define	FASTTRAP_PREFIX_ADDRESS	0x67
166 #define	FASTTRAP_PREFIX_CS	0x2E
167 #define	FASTTRAP_PREFIX_DS	0x3E
168 #define	FASTTRAP_PREFIX_ES	0x26
169 #define	FASTTRAP_PREFIX_FS	0x64
170 #define	FASTTRAP_PREFIX_GS	0x65
171 #define	FASTTRAP_PREFIX_SS	0x36
172 #define	FASTTRAP_PREFIX_LOCK	0xF0
173 #define	FASTTRAP_PREFIX_REP	0xF3
174 #define	FASTTRAP_PREFIX_REPNE	0xF2
175 
176 #define	FASTTRAP_NOREG	0xff
177 
178 /*
179  * Map between instruction register encodings and the kernel constants which
180  * correspond to indicies into struct regs.
181  */
182 
183 /*
184  * APPLE NOTE: We are cheating here. The regmap is used to decode which register
185  * a given instruction is trying to reference. OS X does not have extended registers
186  * for 32 bit apps, but the *order* is the same. So for 32 bit state, we will return:
187  *
188  * REG_RAX -> EAX
189  * REG_RCX -> ECX
190  * REG_RDX -> EDX
191  * REG_RBX -> EBX
192  * REG_RSP -> UESP
193  * REG_RBP -> EBP
194  * REG_RSI -> ESI
195  * REG_RDI -> EDI
196  *
197  * The fasttrap_getreg function knows how to make the correct transformation.
198  */
199 static const uint8_t regmap[16] = {
200 	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
201 	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
202 };
203 
204 static user_addr_t fasttrap_getreg(x86_saved_state_t *, uint_t);
205 
206 static uint64_t
fasttrap_anarg(x86_saved_state_t * regs,int function_entry,int argno)207 fasttrap_anarg(x86_saved_state_t *regs, int function_entry, int argno)
208 {
209 	uint64_t value;
210 	int shift = function_entry ? 1 : 0;
211 
212 	x86_saved_state64_t *regs64;
213 	x86_saved_state32_t *regs32;
214 	unsigned int p_model;
215 
216         if (is_saved_state64(regs)) {
217                 regs64 = saved_state64(regs);
218 		regs32 = NULL;
219 		p_model = DATAMODEL_LP64;
220         } else {
221 		regs64 = NULL;
222                 regs32 = saved_state32(regs);
223 		p_model = DATAMODEL_ILP32;
224         }
225 
226 	if (p_model == DATAMODEL_LP64) {
227 		user_addr_t stack;
228 
229 		/*
230 		 * In 64-bit mode, the first six arguments are stored in
231 		 * registers.
232 		 */
233 		if (argno < 6)
234 			return ((&regs64->rdi)[argno]);
235 
236 		stack = regs64->isf.rsp + sizeof(uint64_t) * (argno - 6 + shift);
237 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
238 		value = dtrace_fuword64(stack);
239 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
240 	} else {
241 		uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp);
242 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
243 		value = dtrace_fuword32((user_addr_t)(unsigned long)&stack[argno + shift]);
244 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
245 	}
246 
247 	return (value);
248 }
249 
250 /*ARGSUSED*/
251 int
fasttrap_tracepoint_init(proc_t * p,fasttrap_tracepoint_t * tp,user_addr_t pc,fasttrap_probe_type_t type)252 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, user_addr_t pc,
253     fasttrap_probe_type_t type)
254 {
255 #pragma unused(type)
256 	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
257 	size_t len = FASTTRAP_MAX_INSTR_SIZE;
258 	size_t first = MIN(len, PAGE_SIZE - (pc & PAGE_MASK));
259 	uint_t start = 0;
260 	size_t size;
261 	int rmindex;
262 	uint8_t seg, rex = 0;
263 	unsigned int p_model = (p->p_flag & P_LP64) ? DATAMODEL_LP64 : DATAMODEL_ILP32;
264 
265 	/*
266 	 * Read the instruction at the given address out of the process's
267 	 * address space. We don't have to worry about a debugger
268 	 * changing this instruction before we overwrite it with our trap
269 	 * instruction since P_PR_LOCK is set. Since instructions can span
270 	 * pages, we potentially read the instruction in two parts. If the
271 	 * second part fails, we just zero out that part of the instruction.
272 	 */
273 	/*
274 	 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey...
275 	 */
276 	if (uread(p, &instr[0], first, pc) != 0)
277 		return (-1);
278 	if (len > first &&
279 	    uread(p, &instr[first], len - first, pc + first) != 0) {
280 		bzero(&instr[first], len - first);
281 		len = first;
282 	}
283 
284 	/*
285 	 * If the disassembly fails, then we have a malformed instruction.
286 	 */
287 	if ((size = dtrace_instr_size_isa(instr, p_model, &rmindex)) <= 0)
288 		return (-1);
289 
290 	/*
291 	 * Make sure the disassembler isn't completely broken.
292 	 */
293 	ASSERT(-1 <= rmindex && rmindex < (int)size);
294 
295 	/*
296 	 * If the computed size is greater than the number of bytes read,
297 	 * then it was a malformed instruction possibly because it fell on a
298 	 * page boundary and the subsequent page was missing or because of
299 	 * some malicious user.
300 	 */
301 	if (size > len)
302 		return (-1);
303 
304 	tp->ftt_size = (uint8_t)size;
305 	tp->ftt_segment = FASTTRAP_SEG_NONE;
306 
307 	/*
308 	 * Find the start of the instruction's opcode by processing any
309 	 * legacy prefixes.
310 	 */
311 	for (;;) {
312 		seg = 0;
313 		switch (instr[start]) {
314 		case FASTTRAP_PREFIX_SS:
315 			seg++;
316 			OS_FALLTHROUGH;
317 		case FASTTRAP_PREFIX_GS:
318 			seg++;
319 			OS_FALLTHROUGH;
320 		case FASTTRAP_PREFIX_FS:
321 			seg++;
322 			OS_FALLTHROUGH;
323 		case FASTTRAP_PREFIX_ES:
324 			seg++;
325 			OS_FALLTHROUGH;
326 		case FASTTRAP_PREFIX_DS:
327 			seg++;
328 			OS_FALLTHROUGH;
329 		case FASTTRAP_PREFIX_CS:
330 			seg++;
331 			OS_FALLTHROUGH;
332 		case FASTTRAP_PREFIX_OPERAND:
333 		case FASTTRAP_PREFIX_ADDRESS:
334 		case FASTTRAP_PREFIX_LOCK:
335 		case FASTTRAP_PREFIX_REP:
336 		case FASTTRAP_PREFIX_REPNE:
337 			if (seg != 0) {
338 				/*
339 				 * It's illegal for an instruction to specify
340 				 * two segment prefixes -- give up on this
341 				 * illegal instruction.
342 				 */
343 				if (tp->ftt_segment != FASTTRAP_SEG_NONE)
344 					return (-1);
345 
346 				tp->ftt_segment = seg;
347 			}
348 			start++;
349 			continue;
350 		}
351 		break;
352 	}
353 
354 	/*
355 	 * Identify the REX prefix on 64-bit processes.
356 	 */
357 	if (p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
358 		rex = instr[start++];
359 
360 	/*
361 	 * Now that we're pretty sure that the instruction is okay, copy the
362 	 * valid part to the tracepoint.
363 	 */
364 	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
365 
366 	tp->ftt_type = FASTTRAP_T_COMMON;
367 	if (instr[start] == FASTTRAP_2_BYTE_OP) {
368 		switch (instr[start + 1]) {
369 		case FASTTRAP_0F_JO:
370 		case FASTTRAP_0F_JNO:
371 		case FASTTRAP_0F_JB:
372 		case FASTTRAP_0F_JAE:
373 		case FASTTRAP_0F_JE:
374 		case FASTTRAP_0F_JNE:
375 		case FASTTRAP_0F_JBE:
376 		case FASTTRAP_0F_JA:
377 		case FASTTRAP_0F_JS:
378 		case FASTTRAP_0F_JNS:
379 		case FASTTRAP_0F_JP:
380 		case FASTTRAP_0F_JNP:
381 		case FASTTRAP_0F_JL:
382 		case FASTTRAP_0F_JGE:
383 		case FASTTRAP_0F_JLE:
384 		case FASTTRAP_0F_JG:
385 			tp->ftt_type = FASTTRAP_T_JCC;
386 			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
387 			tp->ftt_dest = pc + tp->ftt_size +
388 			    /* LINTED - alignment */
389 			    *(int32_t *)&instr[start + 2];
390 			break;
391 		}
392 	} else if (instr[start] == FASTTRAP_GROUP5_OP) {
393 		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
394 		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
395 		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
396 
397 		if (reg == 2 || reg == 4) {
398 			uint_t i, sz;
399 
400 			if (reg == 2)
401 				tp->ftt_type = FASTTRAP_T_CALL;
402 			else
403 				tp->ftt_type = FASTTRAP_T_JMP;
404 
405 			if (mod == 3)
406 				tp->ftt_code = 2;
407 			else
408 				tp->ftt_code = 1;
409 
410 			ASSERT(p_model == DATAMODEL_LP64 || rex == 0);
411 
412 			/*
413 			 * See AMD x86-64 Architecture Programmer's Manual
414 			 * Volume 3, Section 1.2.7, Table 1-12, and
415 			 * Appendix A.3.1, Table A-15.
416 			 */
417 			if (mod != 3 && rm == 4) {
418 				uint8_t sib = instr[start + 2];
419 				uint_t index = FASTTRAP_SIB_INDEX(sib);
420 				uint_t base = FASTTRAP_SIB_BASE(sib);
421 
422 				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
423 
424 				tp->ftt_index = (index == 4) ?
425 				    FASTTRAP_NOREG :
426 				    regmap[index | (FASTTRAP_REX_X(rex) << 3)];
427 				tp->ftt_base = (mod == 0 && base == 5) ?
428 				    FASTTRAP_NOREG :
429 				    regmap[base | (FASTTRAP_REX_B(rex) << 3)];
430 
431 				i = 3;
432 				sz = mod == 1 ? 1 : 4;
433 			} else {
434 				/*
435 				 * In 64-bit mode, mod == 0 and r/m == 5
436 				 * denotes %rip-relative addressing; in 32-bit
437 				 * mode, the base register isn't used. In both
438 				 * modes, there is a 32-bit operand.
439 				 */
440 				if (mod == 0 && rm == 5) {
441 					if (p_model == DATAMODEL_LP64)
442 						tp->ftt_base = REG_RIP;
443 					else
444 						tp->ftt_base = FASTTRAP_NOREG;
445 					sz = 4;
446 				} else  {
447 					uint8_t base = rm |
448 					    (FASTTRAP_REX_B(rex) << 3);
449 
450 					tp->ftt_base = regmap[base];
451 					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
452 				}
453 				tp->ftt_index = FASTTRAP_NOREG;
454 				i = 2;
455 			}
456 
457 			if (sz == 1) {
458 				tp->ftt_dest = *(int8_t *)&instr[start + i];
459 			} else if (sz == 4) {
460 				/* LINTED - alignment */
461 				tp->ftt_dest = *(int32_t *)&instr[start + i];
462 			} else {
463 				tp->ftt_dest = 0;
464 			}
465 		}
466 	} else {
467 		switch (instr[start]) {
468 		case FASTTRAP_RET:
469 			tp->ftt_type = FASTTRAP_T_RET;
470 			break;
471 
472 		case FASTTRAP_RET16:
473 			tp->ftt_type = FASTTRAP_T_RET16;
474 			/* LINTED - alignment */
475 			tp->ftt_dest = *(uint16_t *)&instr[start + 1];
476 			break;
477 
478 		case FASTTRAP_JO:
479 		case FASTTRAP_JNO:
480 		case FASTTRAP_JB:
481 		case FASTTRAP_JAE:
482 		case FASTTRAP_JE:
483 		case FASTTRAP_JNE:
484 		case FASTTRAP_JBE:
485 		case FASTTRAP_JA:
486 		case FASTTRAP_JS:
487 		case FASTTRAP_JNS:
488 		case FASTTRAP_JP:
489 		case FASTTRAP_JNP:
490 		case FASTTRAP_JL:
491 		case FASTTRAP_JGE:
492 		case FASTTRAP_JLE:
493 		case FASTTRAP_JG:
494 			tp->ftt_type = FASTTRAP_T_JCC;
495 			tp->ftt_code = instr[start];
496 			tp->ftt_dest = pc + tp->ftt_size +
497 			    (int8_t)instr[start + 1];
498 			break;
499 
500 		case FASTTRAP_LOOPNZ:
501 		case FASTTRAP_LOOPZ:
502 		case FASTTRAP_LOOP:
503 			tp->ftt_type = FASTTRAP_T_LOOP;
504 			tp->ftt_code = instr[start];
505 			tp->ftt_dest = pc + tp->ftt_size +
506 			    (int8_t)instr[start + 1];
507 			break;
508 
509 		case FASTTRAP_JCXZ:
510 			tp->ftt_type = FASTTRAP_T_JCXZ;
511 			tp->ftt_dest = pc + tp->ftt_size +
512 			    (int8_t)instr[start + 1];
513 			break;
514 
515 		case FASTTRAP_CALL:
516 			tp->ftt_type = FASTTRAP_T_CALL;
517 			tp->ftt_dest = pc + tp->ftt_size +
518 			    /* LINTED - alignment */
519 			    *(int32_t *)&instr[start + 1];
520 			tp->ftt_code = 0;
521 			break;
522 
523 		case FASTTRAP_JMP32:
524 			tp->ftt_type = FASTTRAP_T_JMP;
525 			tp->ftt_dest = pc + tp->ftt_size +
526 				/* LINTED - alignment */
527 			    *(int32_t *)&instr[start + 1];
528 			break;
529 		case FASTTRAP_JMP8:
530 			tp->ftt_type = FASTTRAP_T_JMP;
531 			tp->ftt_dest = pc + tp->ftt_size +
532 			    (int8_t)instr[start + 1];
533 			break;
534 
535 		case FASTTRAP_PUSHL_EBP:
536 			if (start == 0)
537 				tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
538 			break;
539 
540 		case FASTTRAP_NOP:
541 			ASSERT(p_model == DATAMODEL_LP64 || rex == 0);
542 
543 			/*
544 			 * On sol64 we have to be careful not to confuse a nop
545 			 * (actually xchgl %eax, %eax) with an instruction using
546 			 * the same opcode, but that does something different
547 			 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
548 			 */
549 			if (FASTTRAP_REX_B(rex) == 0)
550 				tp->ftt_type = FASTTRAP_T_NOP;
551 			break;
552 
553 		case FASTTRAP_INT3:
554 			/*
555 			 * The pid provider shares the int3 trap with debugger
556 			 * breakpoints so we can't instrument them.
557 			 */
558 			ASSERT(instr[start] == FASTTRAP_INSTR);
559 			return (-1);
560 
561 		case FASTTRAP_INT:
562 			/*
563 			 * Interrupts seem like they could be traced with
564 			 * no negative implications, but it's possible that
565 			 * a thread could be redirected by the trap handling
566 			 * code which would eventually return to the
567 			 * instruction after the interrupt. If the interrupt
568 			 * were in our scratch space, the subsequent
569 			 * instruction might be overwritten before we return.
570 			 * Accordingly we refuse to instrument any interrupt.
571 			 */
572 			return (-1);
573 		}
574 	}
575 
576 	if (p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
577 		/*
578 		 * If the process is 64-bit and the instruction type is still
579 		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
580 		 * execute it -- we need to watch for %rip-relative
581 		 * addressing mode. See the portion of fasttrap_pid_probe()
582 		 * below where we handle tracepoints with type
583 		 * FASTTRAP_T_COMMON for how we emulate instructions that
584 		 * employ %rip-relative addressing.
585 		 */
586 		if (rmindex != -1) {
587 			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
588 			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
589 			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
590 
591 			ASSERT(rmindex > (int)start);
592 
593 			if (mod == 0 && rm == 5) {
594 				/*
595 				 * We need to be sure to avoid other
596 				 * registers used by this instruction. While
597 				 * the reg field may determine the op code
598 				 * rather than denoting a register, assuming
599 				 * that it denotes a register is always safe.
600 				 * We leave the REX field intact and use
601 				 * whatever value's there for simplicity.
602 				 */
603 				if (reg != 0) {
604 					tp->ftt_ripmode = FASTTRAP_RIP_1 |
605 					    (FASTTRAP_RIP_X *
606 					    FASTTRAP_REX_B(rex));
607 					rm = 0;
608 				} else {
609 					tp->ftt_ripmode = FASTTRAP_RIP_2 |
610 					    (FASTTRAP_RIP_X *
611 					    FASTTRAP_REX_B(rex));
612 					rm = 1;
613 				}
614 
615 				tp->ftt_modrm = tp->ftt_instr[rmindex];
616 				tp->ftt_instr[rmindex] =
617 				    FASTTRAP_MODRM(2, reg, rm);
618 			}
619 		}
620 	}
621 
622 	return (0);
623 }
624 
625 int
fasttrap_tracepoint_install(proc_t * p,fasttrap_tracepoint_t * tp)626 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
627 {
628 	fasttrap_instr_t instr = FASTTRAP_INSTR;
629 
630 	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
631 		return (-1);
632 
633 	tp->ftt_installed = 1;
634 
635 	return (0);
636 }
637 
638 int
fasttrap_tracepoint_remove(proc_t * p,fasttrap_tracepoint_t * tp)639 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
640 {
641 	uint8_t instr;
642 
643 	/*
644 	 * Distinguish between read or write failures and a changed
645 	 * instruction.
646 	 */
647 	if (uread(p, &instr, 1, tp->ftt_pc) != 0)
648 		goto end;
649 	if (instr != FASTTRAP_INSTR)
650 		goto end;
651 	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
652 		return (-1);
653 end:
654 	tp->ftt_installed = 0;
655 
656 	return (0);
657 }
658 
659 static void
fasttrap_return_common(x86_saved_state_t * regs,user_addr_t pc,pid_t pid,user_addr_t new_pc)660 fasttrap_return_common(x86_saved_state_t *regs, user_addr_t pc, pid_t pid,
661     user_addr_t new_pc)
662 {
663 	x86_saved_state64_t *regs64;
664 	x86_saved_state32_t *regs32;
665 	unsigned int p_model;
666 	int retire_tp = 1;
667 
668 	dtrace_icookie_t cookie;
669 
670         if (is_saved_state64(regs)) {
671                 regs64 = saved_state64(regs);
672 		regs32 = NULL;
673 		p_model = DATAMODEL_LP64;
674         } else {
675 		regs64 = NULL;
676                 regs32 = saved_state32(regs);
677 		p_model = DATAMODEL_ILP32;
678         }
679 
680 	fasttrap_tracepoint_t *tp;
681 	fasttrap_bucket_t *bucket;
682 	fasttrap_id_t *id;
683 	lck_mtx_t *pid_mtx;
684 
685 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
686 	lck_mtx_lock(pid_mtx);
687 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
688 
689 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
690 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
691 		    tp->ftt_proc->ftpc_acount != 0)
692 			break;
693 	}
694 
695 	/*
696 	 * Don't sweat it if we can't find the tracepoint again; unlike
697 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
698 	 * is not essential to the correct execution of the process.
699 	 */
700 	if (tp == NULL) {
701 		lck_mtx_unlock(pid_mtx);
702 		return;
703 	}
704 
705 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
706 		fasttrap_probe_t *probe = id->fti_probe;
707 		/*
708 		 * If there's a branch that could act as a return site, we
709 		 * need to trace it, and check here if the program counter is
710 		 * external to the function.
711 		 */
712 		if (tp->ftt_type != FASTTRAP_T_RET &&
713 		    tp->ftt_type != FASTTRAP_T_RET16 &&
714 		    new_pc - probe->ftp_faddr < probe->ftp_fsize)
715 			continue;
716 
717 		if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
718 			if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
719 				/* already triggered */
720 				continue;
721 			}
722 		}
723 		/*
724 		 * If we have at least one probe associated that
725 		 * is not a oneshot probe, don't remove the
726 		 * tracepoint
727 		 */
728 		else {
729 			retire_tp = 0;
730 		}
731 		/*
732 		 * Provide a hint to the stack trace functions to add the
733 		 * following pc to the top of the stack since it's missing
734 		 * on a return probe yet highly desirable for consistency.
735 		 */
736 		cookie = dtrace_interrupt_disable();
737 		cpu_core[CPU->cpu_id].cpuc_missing_tos = pc;
738 		if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
739 			dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id,
740 				     1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
741 		} else if (p_model == DATAMODEL_LP64) {
742 			dtrace_probe(probe->ftp_id,
743 				     pc - id->fti_probe->ftp_faddr,
744 				     regs64->rax, regs64->rdx, 0, 0);
745 		} else {
746 			dtrace_probe(probe->ftp_id,
747 				     pc - id->fti_probe->ftp_faddr,
748 				     regs32->eax, regs32->edx, 0, 0);
749 		}
750 		/* remove the hint */
751 		cpu_core[CPU->cpu_id].cpuc_missing_tos = 0;
752 		dtrace_interrupt_enable(cookie);
753 	}
754 
755 	lck_mtx_unlock(pid_mtx);
756 }
757 
758 static void
fasttrap_sigsegv(proc_t * p,uthread_t t,user_addr_t addr)759 fasttrap_sigsegv(proc_t *p, uthread_t t, user_addr_t addr)
760 {
761 	proc_lock(p);
762 
763 	/* Set fault address and mark signal */
764 	t->uu_code = addr;
765 	t->uu_siglist |= sigmask(SIGSEGV);
766 
767 	/*
768          * XXX These two line may be redundant; if not, then we need
769 	 * XXX to potentially set the data address in the machine
770 	 * XXX specific thread state structure to indicate the address.
771 	 */
772 	t->uu_exception = KERN_INVALID_ADDRESS;		/* SIGSEGV */
773 	t->uu_subcode = 0;	/* XXX pad */
774 
775 	proc_unlock(p);
776 
777 	/* raise signal */
778 	signal_setast(get_machthread(t));
779 }
780 
781 static void
fasttrap_usdt_args64(fasttrap_probe_t * probe,x86_saved_state64_t * regs64,int argc,uint64_t * argv)782 fasttrap_usdt_args64(fasttrap_probe_t *probe, x86_saved_state64_t *regs64, int argc,
783     uint64_t *argv)
784 {
785 	int i, x, cap = MIN(argc, probe->ftp_nargs);
786 	user_addr_t stack = (user_addr_t)regs64->isf.rsp;
787 
788 	for (i = 0; i < cap; i++) {
789 		x = probe->ftp_argmap[i];
790 
791 		if (x < 6) {
792 			/* FIXME! This may be broken, needs testing */
793 			argv[i] = (&regs64->rdi)[x];
794 		} else {
795 			fasttrap_fuword64_noerr(stack + (x * sizeof(uint64_t)), &argv[i]);
796 		}
797 	}
798 
799 	for (; i < argc; i++) {
800 		argv[i] = 0;
801 	}
802 }
803 
804 static void
fasttrap_usdt_args32(fasttrap_probe_t * probe,x86_saved_state32_t * regs32,int argc,uint32_t * argv)805 fasttrap_usdt_args32(fasttrap_probe_t *probe, x86_saved_state32_t *regs32, int argc,
806     uint32_t *argv)
807 {
808 	int i, x, cap = MIN(argc, probe->ftp_nargs);
809 	uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp);
810 
811 	for (i = 0; i < cap; i++) {
812 		x = probe->ftp_argmap[i];
813 
814 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[x], &argv[i]);
815 	}
816 
817 	for (; i < argc; i++) {
818 		argv[i] = 0;
819 	}
820 }
821 
822 /*
823  * FIXME!
824  */
825 static int
fasttrap_do_seg(fasttrap_tracepoint_t * tp,x86_saved_state_t * rp,user_addr_t * addr)826 fasttrap_do_seg(fasttrap_tracepoint_t *tp, x86_saved_state_t *rp, user_addr_t *addr) // 64 bit
827 {
828 #pragma unused(tp, rp, addr)
829 	printf("fasttrap_do_seg() called while unimplemented.\n");
830 #if 0
831 	proc_t *p = curproc;
832 	user_desc_t *desc;
833 	uint16_t sel, ndx, type;
834 	uintptr_t limit;
835 
836 	switch (tp->ftt_segment) {
837 	case FASTTRAP_SEG_CS:
838 		sel = rp->r_cs;
839 		break;
840 	case FASTTRAP_SEG_DS:
841 		sel = rp->r_ds;
842 		break;
843 	case FASTTRAP_SEG_ES:
844 		sel = rp->r_es;
845 		break;
846 	case FASTTRAP_SEG_FS:
847 		sel = rp->r_fs;
848 		break;
849 	case FASTTRAP_SEG_GS:
850 		sel = rp->r_gs;
851 		break;
852 	case FASTTRAP_SEG_SS:
853 		sel = rp->r_ss;
854 		break;
855 	}
856 
857 	/*
858 	 * Make sure the given segment register specifies a user priority
859 	 * selector rather than a kernel selector.
860 	 */
861 	if (!SELISUPL(sel))
862 		return (-1);
863 
864 	ndx = SELTOIDX(sel);
865 
866 	/*
867 	 * Check the bounds and grab the descriptor out of the specified
868 	 * descriptor table.
869 	 */
870 	if (SELISLDT(sel)) {
871 		if (ndx > p->p_ldtlimit)
872 			return (-1);
873 
874 		desc = p->p_ldt + ndx;
875 
876 	} else {
877 		if (ndx >= NGDT)
878 			return (-1);
879 
880 		desc = cpu_get_gdt() + ndx;
881 	}
882 
883 	/*
884 	 * The descriptor must have user privilege level and it must be
885 	 * present in memory.
886 	 */
887 	if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1)
888 		return (-1);
889 
890 	type = desc->usd_type;
891 
892 	/*
893 	 * If the S bit in the type field is not set, this descriptor can
894 	 * only be used in system context.
895 	 */
896 	if ((type & 0x10) != 0x10)
897 		return (-1);
898 
899 	limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1);
900 
901 	if (tp->ftt_segment == FASTTRAP_SEG_CS) {
902 		/*
903 		 * The code/data bit and readable bit must both be set.
904 		 */
905 		if ((type & 0xa) != 0xa)
906 			return (-1);
907 
908 		if (*addr > limit)
909 			return (-1);
910 	} else {
911 		/*
912 		 * The code/data bit must be clear.
913 		 */
914 		if ((type & 0x8) != 0)
915 			return (-1);
916 
917 		/*
918 		 * If the expand-down bit is clear, we just check the limit as
919 		 * it would naturally be applied. Otherwise, we need to check
920 		 * that the address is the range [limit + 1 .. 0xffff] or
921 		 * [limit + 1 ... 0xffffffff] depending on if the default
922 		 * operand size bit is set.
923 		 */
924 		if ((type & 0x4) == 0) {
925 			if (*addr > limit)
926 				return (-1);
927 		} else if (desc->usd_def32) {
928 			if (*addr < limit + 1 || 0xffff < *addr)
929 				return (-1);
930 		} else {
931 			if (*addr < limit + 1 || 0xffffffff < *addr)
932 				return (-1);
933 		}
934 	}
935 
936 	*addr += USEGD_GETBASE(desc);
937 #endif /* 0 */
938 	return (0);
939 }
940 
941 /*
942  * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit
943  * code path. It still takes an x86_saved_state_t* argument, because it must sometimes
944  * call other methods that require a x86_saved_state_t.
945  *
946  * NOTE!!!!
947  *
948  * Any changes made to this method must be echo'd in fasttrap_pid_probe64!
949  *
950  */
951 static int
fasttrap_pid_probe32(x86_saved_state_t * regs)952 fasttrap_pid_probe32(x86_saved_state_t *regs)
953 {
954 	ASSERT(is_saved_state32(regs));
955 
956 	x86_saved_state32_t *regs32  = saved_state32(regs);
957 	user_addr_t pc = regs32->eip - 1;
958 	proc_t *p = current_proc();
959 	user_addr_t new_pc = 0;
960 	fasttrap_bucket_t *bucket;
961 	lck_mtx_t *pid_mtx;
962 	fasttrap_tracepoint_t *tp, tp_local;
963 	pid_t pid;
964 	dtrace_icookie_t cookie;
965 	uint_t is_enabled = 0, retire_tp = 1;
966 
967 	uthread_t uthread = current_uthread();
968 
969 	/*
970 	 * It's possible that a user (in a veritable orgy of bad planning)
971 	 * could redirect this thread's flow of control before it reached the
972 	 * return probe fasttrap. In this case we need to kill the process
973 	 * since it's in a unrecoverable state.
974 	 */
975 	if (uthread->t_dtrace_step) {
976 		ASSERT(uthread->t_dtrace_on);
977 		fasttrap_sigtrap(p, uthread, pc);
978 		return (0);
979 	}
980 
981 	/*
982 	 * Clear all user tracing flags.
983 	 */
984 	uthread->t_dtrace_ft = 0;
985 	uthread->t_dtrace_pc = 0;
986 	uthread->t_dtrace_npc = 0;
987 	uthread->t_dtrace_scrpc = 0;
988 	uthread->t_dtrace_astpc = 0;
989 
990 
991 	pid = proc_getpid(p);
992 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
993 	lck_mtx_lock(pid_mtx);
994 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
995 
996 	/*
997 	 * Lookup the tracepoint that the process just hit.
998 	 */
999 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
1000 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
1001 		    tp->ftt_proc->ftpc_acount != 0)
1002 			break;
1003 	}
1004 
1005 	/*
1006 	 * If we couldn't find a matching tracepoint, either a tracepoint has
1007 	 * been inserted without using the pid<pid> ioctl interface (see
1008 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1009 	 */
1010 	if (tp == NULL) {
1011 		lck_mtx_unlock(pid_mtx);
1012 		return (-1);
1013 	}
1014 
1015 	/*
1016 	 * Set the program counter to the address of the traced instruction
1017 	 * so that it looks right in ustack() output.
1018 	 */
1019 	regs32->eip = pc;
1020 
1021 	if (tp->ftt_ids != NULL) {
1022 		fasttrap_id_t *id;
1023 
1024 		uint32_t s0, s1, s2, s3, s4, s5;
1025 		uint32_t *stack = (uint32_t *)(uintptr_t)(regs32->uesp);
1026 
1027 		/*
1028 		 * In 32-bit mode, all arguments are passed on the
1029 		 * stack. If this is a function entry probe, we need
1030 		 * to skip the first entry on the stack as it
1031 		 * represents the return address rather than a
1032 		 * parameter to the function.
1033 		 */
1034 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[0], &s0);
1035 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[1], &s1);
1036 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[2], &s2);
1037 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[3], &s3);
1038 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[4], &s4);
1039 		fasttrap_fuword32_noerr((user_addr_t)(unsigned long)&stack[5], &s5);
1040 
1041 		for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1042 			fasttrap_probe_t *probe = id->fti_probe;
1043 
1044 			if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
1045 				dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id,
1046 					     1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
1047 			} else {
1048 				if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
1049 					if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
1050 						/* already triggered */
1051 						continue;
1052 					}
1053 				}
1054 				/*
1055 				 * If we have at least one probe associated that
1056 				 * is not a oneshot probe, don't remove the
1057 				 * tracepoint
1058 				 */
1059 				else {
1060 					retire_tp = 0;
1061 				}
1062 				if (id->fti_ptype == DTFTP_ENTRY) {
1063 					/*
1064 					 * We note that this was an entry
1065 					 * probe to help ustack() find the
1066 					 * first caller.
1067 					 */
1068 					cookie = dtrace_interrupt_disable();
1069 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1070 					dtrace_probe(probe->ftp_id, s1, s2,
1071 						     s3, s4, s5);
1072 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1073 					dtrace_interrupt_enable(cookie);
1074 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1075 					/*
1076 					 * Note that in this case, we don't
1077 					 * call dtrace_probe() since it's only
1078 					 * an artificial probe meant to change
1079 					 * the flow of control so that it
1080 					 * encounters the true probe.
1081 					 */
1082 					is_enabled = 1;
1083 				} else if (probe->ftp_argmap == NULL) {
1084 					dtrace_probe(probe->ftp_id, s0, s1,
1085 						     s2, s3, s4);
1086 				} else {
1087 					uint32_t t[5];
1088 
1089 					fasttrap_usdt_args32(probe, regs32,
1090 							     sizeof (t) / sizeof (t[0]), t);
1091 
1092 					dtrace_probe(probe->ftp_id, t[0], t[1],
1093 						     t[2], t[3], t[4]);
1094 				}
1095 			}
1096 		}
1097 		if (retire_tp) {
1098 			fasttrap_tracepoint_retire(p, tp);
1099 		}
1100 	}
1101 
1102 	/*
1103 	 * We're about to do a bunch of work so we cache a local copy of
1104 	 * the tracepoint to emulate the instruction, and then find the
1105 	 * tracepoint again later if we need to light up any return probes.
1106 	 */
1107 	tp_local = *tp;
1108 	lck_mtx_unlock(pid_mtx);
1109 	tp = &tp_local;
1110 
1111 	/*
1112 	 * Set the program counter to appear as though the traced instruction
1113 	 * had completely executed. This ensures that fasttrap_getreg() will
1114 	 * report the expected value for REG_RIP.
1115 	 */
1116 	regs32->eip = pc + tp->ftt_size;
1117 
1118 	/*
1119 	 * If there's an is-enabled probe connected to this tracepoint it
1120 	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1121 	 * instruction that was placed there by DTrace when the binary was
1122 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
1123 	 * into %eax or %rax. Accordingly, we can bypass all the instruction
1124 	 * emulation logic since we know the inevitable result. It's possible
1125 	 * that a user could construct a scenario where the 'is-enabled'
1126 	 * probe was on some other instruction, but that would be a rather
1127 	 * exotic way to shoot oneself in the foot.
1128 	 */
1129 	if (is_enabled) {
1130 		regs32->eax = 1;
1131 		new_pc = regs32->eip;
1132 		goto done;
1133 	}
1134 
1135 	/*
1136 	 * We emulate certain types of instructions to ensure correctness
1137 	 * (in the case of position dependent instructions) or optimize
1138 	 * common cases. The rest we have the thread execute back in user-
1139 	 * land.
1140 	 */
1141 	switch (tp->ftt_type) {
1142 		case FASTTRAP_T_RET:
1143 		case FASTTRAP_T_RET16:
1144 		{
1145 			user_addr_t dst;
1146 			user_addr_t addr;
1147 			int ret;
1148 
1149 			/*
1150 			 * We have to emulate _every_ facet of the behavior of a ret
1151 			 * instruction including what happens if the load from %esp
1152 			 * fails; in that case, we send a SIGSEGV.
1153 			 */
1154 			uint32_t dst32;
1155 			ret = fasttrap_fuword32((user_addr_t)regs32->uesp, &dst32);
1156 			dst = dst32;
1157 			addr = regs32->uesp + sizeof (uint32_t);
1158 
1159 			if (ret == -1) {
1160 				fasttrap_sigsegv(p, uthread, (user_addr_t)regs32->uesp);
1161 				new_pc = pc;
1162 				break;
1163 			}
1164 
1165 			if (tp->ftt_type == FASTTRAP_T_RET16)
1166 				addr += tp->ftt_dest;
1167 
1168 			regs32->uesp = addr;
1169 			new_pc = dst;
1170 			break;
1171 		}
1172 
1173 		case FASTTRAP_T_JCC:
1174 		{
1175 			uint_t taken;
1176 
1177 			switch (tp->ftt_code) {
1178 				case FASTTRAP_JO:
1179 					taken = (regs32->efl & FASTTRAP_EFLAGS_OF) != 0;
1180 					break;
1181 				case FASTTRAP_JNO:
1182 					taken = (regs32->efl & FASTTRAP_EFLAGS_OF) == 0;
1183 					break;
1184 				case FASTTRAP_JB:
1185 					taken = (regs32->efl & FASTTRAP_EFLAGS_CF) != 0;
1186 					break;
1187 				case FASTTRAP_JAE:
1188 					taken = (regs32->efl & FASTTRAP_EFLAGS_CF) == 0;
1189 					break;
1190 				case FASTTRAP_JE:
1191 					taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0;
1192 					break;
1193 				case FASTTRAP_JNE:
1194 					taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0;
1195 					break;
1196 				case FASTTRAP_JBE:
1197 					taken = (regs32->efl & FASTTRAP_EFLAGS_CF) != 0 ||
1198 						(regs32->efl & FASTTRAP_EFLAGS_ZF) != 0;
1199 					break;
1200 				case FASTTRAP_JA:
1201 					taken = (regs32->efl & FASTTRAP_EFLAGS_CF) == 0 &&
1202 						(regs32->efl & FASTTRAP_EFLAGS_ZF) == 0;
1203 					break;
1204 				case FASTTRAP_JS:
1205 					taken = (regs32->efl & FASTTRAP_EFLAGS_SF) != 0;
1206 					break;
1207 				case FASTTRAP_JNS:
1208 					taken = (regs32->efl & FASTTRAP_EFLAGS_SF) == 0;
1209 					break;
1210 				case FASTTRAP_JP:
1211 					taken = (regs32->efl & FASTTRAP_EFLAGS_PF) != 0;
1212 					break;
1213 				case FASTTRAP_JNP:
1214 					taken = (regs32->efl & FASTTRAP_EFLAGS_PF) == 0;
1215 					break;
1216 				case FASTTRAP_JL:
1217 					taken = ((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) !=
1218 						((regs32->efl & FASTTRAP_EFLAGS_OF) == 0);
1219 					break;
1220 				case FASTTRAP_JGE:
1221 					taken = ((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) ==
1222 						((regs32->efl & FASTTRAP_EFLAGS_OF) == 0);
1223 					break;
1224 				case FASTTRAP_JLE:
1225 					taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0 ||
1226 						((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) !=
1227 						((regs32->efl & FASTTRAP_EFLAGS_OF) == 0);
1228 					break;
1229 				case FASTTRAP_JG:
1230 					taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0 &&
1231 						((regs32->efl & FASTTRAP_EFLAGS_SF) == 0) ==
1232 						((regs32->efl & FASTTRAP_EFLAGS_OF) == 0);
1233 					break;
1234 				default:
1235 					taken = FALSE;
1236 			}
1237 
1238 			if (taken)
1239 				new_pc = tp->ftt_dest;
1240 			else
1241 				new_pc = pc + tp->ftt_size;
1242 			break;
1243 		}
1244 
1245 		case FASTTRAP_T_LOOP:
1246 		{
1247 			uint_t taken;
1248 			greg_t cx = regs32->ecx--;
1249 
1250 			switch (tp->ftt_code) {
1251 				case FASTTRAP_LOOPNZ:
1252 					taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) == 0 &&
1253 						cx != 0;
1254 					break;
1255 				case FASTTRAP_LOOPZ:
1256 					taken = (regs32->efl & FASTTRAP_EFLAGS_ZF) != 0 &&
1257 						cx != 0;
1258 					break;
1259 				case FASTTRAP_LOOP:
1260 					taken = (cx != 0);
1261 					break;
1262 				default:
1263 					taken = FALSE;
1264 			}
1265 
1266 			if (taken)
1267 				new_pc = tp->ftt_dest;
1268 			else
1269 				new_pc = pc + tp->ftt_size;
1270 			break;
1271 		}
1272 
1273 		case FASTTRAP_T_JCXZ:
1274 		{
1275 			greg_t cx = regs32->ecx;
1276 
1277 			if (cx == 0)
1278 				new_pc = tp->ftt_dest;
1279 			else
1280 				new_pc = pc + tp->ftt_size;
1281 			break;
1282 		}
1283 
1284 		case FASTTRAP_T_PUSHL_EBP:
1285 		{
1286 			user_addr_t addr = regs32->uesp - sizeof (uint32_t);
1287 			int ret = fasttrap_suword32(addr, (uint32_t)regs32->ebp);
1288 
1289 			if (ret == -1) {
1290 				fasttrap_sigsegv(p, uthread, addr);
1291 				new_pc = pc;
1292 				break;
1293 			}
1294 
1295 			regs32->uesp = addr;
1296 			new_pc = pc + tp->ftt_size;
1297 			break;
1298 		}
1299 
1300 		case FASTTRAP_T_NOP:
1301 			new_pc = pc + tp->ftt_size;
1302 			break;
1303 
1304 		case FASTTRAP_T_JMP:
1305 		case FASTTRAP_T_CALL:
1306 			if (tp->ftt_code == 0) {
1307 				new_pc = tp->ftt_dest;
1308 			} else {
1309 				user_addr_t /* value ,*/ addr = tp->ftt_dest;
1310 
1311 				if (tp->ftt_base != FASTTRAP_NOREG)
1312 					addr += fasttrap_getreg(regs, tp->ftt_base);
1313 				if (tp->ftt_index != FASTTRAP_NOREG)
1314 					addr += fasttrap_getreg(regs, tp->ftt_index) <<
1315 						tp->ftt_scale;
1316 
1317 				if (tp->ftt_code == 1) {
1318 					/*
1319 					 * If there's a segment prefix for this
1320 					 * instruction, we'll need to check permissions
1321 					 * and bounds on the given selector, and adjust
1322 					 * the address accordingly.
1323 					 */
1324 					if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1325 					    fasttrap_do_seg(tp, regs, &addr) != 0) {
1326 						fasttrap_sigsegv(p, uthread, addr);
1327 						new_pc = pc;
1328 						break;
1329 					}
1330 
1331 					uint32_t value32;
1332 					addr = (user_addr_t)(uint32_t)addr;
1333 					if (fasttrap_fuword32(addr, &value32) == -1) {
1334 						fasttrap_sigsegv(p, uthread, addr);
1335 						new_pc = pc;
1336 						break;
1337 					}
1338 					new_pc = value32;
1339 				} else {
1340 					new_pc = addr;
1341 				}
1342 			}
1343 
1344 			/*
1345 			 * If this is a call instruction, we need to push the return
1346 			 * address onto the stack. If this fails, we send the process
1347 			 * a SIGSEGV and reset the pc to emulate what would happen if
1348 			 * this instruction weren't traced.
1349 			 */
1350 			if (tp->ftt_type == FASTTRAP_T_CALL) {
1351 				user_addr_t addr = regs32->uesp - sizeof (uint32_t);
1352 				int ret = fasttrap_suword32(addr, (uint32_t)(pc + tp->ftt_size));
1353 
1354 				if (ret == -1) {
1355 					fasttrap_sigsegv(p, uthread, addr);
1356 					new_pc = pc;
1357 					break;
1358 				}
1359 
1360 				regs32->uesp = addr;
1361 			}
1362 			break;
1363 
1364 		case FASTTRAP_T_COMMON:
1365 		{
1366 			user_addr_t addr, write_addr;
1367 			uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
1368 			uint_t i = 0;
1369 
1370 			/*
1371 			 * Generic Instruction Tracing
1372 			 * ---------------------------
1373 			 *
1374 			 * This is the layout of the scratch space in the user-land
1375 			 * thread structure for our generated instructions.
1376 			 *
1377 			 *	32-bit mode			bytes
1378 			 *	------------------------	-----
1379 			 * a:	<original instruction>		<= 15
1380 			 *	jmp	<pc + tp->ftt_size>	    5
1381 			 * b:	<original instrction>		<= 15
1382 			 *	int	T_DTRACE_RET		    2
1383 			 *					-----
1384 			 *					<= 37
1385 			 *
1386 			 *	64-bit mode			bytes
1387 			 *	------------------------	-----
1388 			 * a:	<original instruction>		<= 15
1389 			 *	jmp	0(%rip)			    6
1390 			 *	<pc + tp->ftt_size>		    8
1391 			 * b:	<original instruction>		<= 15
1392 			 * 	int	T_DTRACE_RET		    2
1393 			 * 					-----
1394 			 * 					<= 46
1395 			 *
1396 			 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1397 			 * to b. If we encounter a signal on the way out of the
1398 			 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1399 			 * so that we execute the original instruction and re-enter
1400 			 * the kernel rather than redirecting to the next instruction.
1401 			 *
1402 			 * If there are return probes (so we know that we're going to
1403 			 * need to reenter the kernel after executing the original
1404 			 * instruction), the scratch space will just contain the
1405 			 * original instruction followed by an interrupt -- the same
1406 			 * data as at b.
1407 			 */
1408 
1409 			addr = uthread->t_dtrace_scratch->addr;
1410 			write_addr = uthread->t_dtrace_scratch->write_addr;
1411 
1412 			if (addr == 0LL || write_addr == 0LL) {
1413 				fasttrap_sigtrap(p, uthread, pc); // Should be killing target proc
1414 				new_pc = pc;
1415 				break;
1416 			}
1417 
1418 			ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
1419 
1420 			uthread->t_dtrace_scrpc = addr;
1421 			bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1422 			i += tp->ftt_size;
1423 
1424 			/*
1425 			 * Set up the jmp to the next instruction; note that
1426 			 * the size of the traced instruction cancels out.
1427 			 */
1428 			scratch[i++] = FASTTRAP_JMP32;
1429 			/* LINTED - alignment */
1430 			*(uint32_t *)&scratch[i] = pc - addr - 5;
1431 			i += sizeof (uint32_t);
1432 
1433 			uthread->t_dtrace_astpc = addr + i;
1434 			bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
1435 			i += tp->ftt_size;
1436 			scratch[i++] = FASTTRAP_INT;
1437 			scratch[i++] = T_DTRACE_RET;
1438 
1439 			ASSERT(i <= sizeof (scratch));
1440 
1441 			if (fasttrap_copyout(scratch, write_addr, i)) {
1442 				fasttrap_sigtrap(p, uthread, pc);
1443 				new_pc = pc;
1444 				break;
1445 			}
1446 
1447 			if (tp->ftt_retids != NULL) {
1448 				uthread->t_dtrace_step = 1;
1449 				uthread->t_dtrace_ret = 1;
1450 				new_pc = uthread->t_dtrace_astpc;
1451 			} else {
1452 				new_pc = uthread->t_dtrace_scrpc;
1453 			}
1454 
1455 			uthread->t_dtrace_pc = pc;
1456 			uthread->t_dtrace_npc = pc + tp->ftt_size;
1457 			uthread->t_dtrace_on = 1;
1458 			break;
1459 		}
1460 
1461 		default:
1462 			panic("fasttrap: mishandled an instruction");
1463 	}
1464 
1465 done:
1466 	/*
1467 	 * APPLE NOTE:
1468 	 *
1469 	 * We're setting this earlier than Solaris does, to get a "correct"
1470 	 * ustack() output. In the Sun code,  a() -> b() -> c() -> d() is
1471 	 * reported at: d, b, a. The new way gives c, b, a, which is closer
1472 	 * to correct, as the return instruction has already exectued.
1473 	 */
1474 	regs32->eip = new_pc;
1475 
1476 	/*
1477 	 * If there were no return probes when we first found the tracepoint,
1478 	 * we should feel no obligation to honor any return probes that were
1479 	 * subsequently enabled -- they'll just have to wait until the next
1480 	 * time around.
1481 	 */
1482 	if (tp->ftt_retids != NULL) {
1483 		/*
1484 		 * We need to wait until the results of the instruction are
1485 		 * apparent before invoking any return probes. If this
1486 		 * instruction was emulated we can just call
1487 		 * fasttrap_return_common(); if it needs to be executed, we
1488 		 * need to wait until the user thread returns to the kernel.
1489 		 */
1490 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
1491 			fasttrap_return_common(regs, pc, pid, new_pc);
1492 		} else {
1493 			ASSERT(uthread->t_dtrace_ret != 0);
1494 			ASSERT(uthread->t_dtrace_pc == pc);
1495 			ASSERT(uthread->t_dtrace_scrpc != 0);
1496 			ASSERT(new_pc == uthread->t_dtrace_astpc);
1497 		}
1498 	}
1499 
1500 	return (0);
1501 }
1502 
1503 /*
1504  * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit
1505  * code path. It still takes an x86_saved_state_t* argument, because it must sometimes
1506  * call other methods that require a x86_saved_state_t.
1507  *
1508  * NOTE!!!!
1509  *
1510  * Any changes made to this method must be echo'd in fasttrap_pid_probe32!
1511  *
1512  */
1513 static int
fasttrap_pid_probe64(x86_saved_state_t * regs)1514 fasttrap_pid_probe64(x86_saved_state_t *regs)
1515 {
1516 	ASSERT(is_saved_state64(regs));
1517 
1518 	x86_saved_state64_t *regs64 = saved_state64(regs);
1519 	user_addr_t pc = regs64->isf.rip - 1;
1520 	proc_t *p = current_proc();
1521 	user_addr_t new_pc = 0;
1522 	fasttrap_bucket_t *bucket;
1523 	lck_mtx_t *pid_mtx;
1524 	fasttrap_tracepoint_t *tp, tp_local;
1525 	pid_t pid;
1526 	dtrace_icookie_t cookie;
1527 	uint_t is_enabled = 0;
1528 	int retire_tp = 1;
1529 
1530 	uthread_t uthread = current_uthread();
1531 
1532 	/*
1533 	 * It's possible that a user (in a veritable orgy of bad planning)
1534 	 * could redirect this thread's flow of control before it reached the
1535 	 * return probe fasttrap. In this case we need to kill the process
1536 	 * since it's in a unrecoverable state.
1537 	 */
1538 	if (uthread->t_dtrace_step) {
1539 		ASSERT(uthread->t_dtrace_on);
1540 		fasttrap_sigtrap(p, uthread, pc);
1541 		return (0);
1542 	}
1543 
1544 	/*
1545 	 * Clear all user tracing flags.
1546 	 */
1547 	uthread->t_dtrace_ft = 0;
1548 	uthread->t_dtrace_pc = 0;
1549 	uthread->t_dtrace_npc = 0;
1550 	uthread->t_dtrace_scrpc = 0;
1551 	uthread->t_dtrace_astpc = 0;
1552 	uthread->t_dtrace_regv = 0;
1553 
1554 
1555 	pid = proc_getpid(p);
1556 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
1557 	lck_mtx_lock(pid_mtx);
1558 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
1559 
1560 	/*
1561 	 * Lookup the tracepoint that the process just hit.
1562 	 */
1563 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
1564 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
1565 		    tp->ftt_proc->ftpc_acount != 0)
1566 			break;
1567 	}
1568 
1569 	/*
1570 	 * If we couldn't find a matching tracepoint, either a tracepoint has
1571 	 * been inserted without using the pid<pid> ioctl interface (see
1572 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1573 	 */
1574 	if (tp == NULL) {
1575 		lck_mtx_unlock(pid_mtx);
1576 		return (-1);
1577 	}
1578 
1579 	/*
1580 	 * Set the program counter to the address of the traced instruction
1581 	 * so that it looks right in ustack() output.
1582 	 */
1583 	regs64->isf.rip = pc;
1584 
1585 	if (tp->ftt_ids != NULL) {
1586 		fasttrap_id_t *id;
1587 
1588 		for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
1589 			fasttrap_probe_t *probe = id->fti_probe;
1590 
1591 			if (probe->ftp_prov->ftp_provider_type == DTFTP_PROVIDER_ONESHOT) {
1592 				if (os_atomic_xchg(&probe->ftp_triggered, 1, relaxed)) {
1593 					/* already triggered */
1594 					continue;
1595 				}
1596 			}
1597 			/*
1598 			 * If we have at least probe associated that
1599 			 * is not a oneshot probe, don't remove the
1600 			 * tracepoint
1601 			 */
1602 			else {
1603 				retire_tp = 0;
1604 			}
1605 			if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) {
1606 				dtrace_probe(dtrace_probeid_error, 0 /* state */, probe->ftp_id,
1607 					     1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV);
1608 			} else if (id->fti_ptype == DTFTP_ENTRY) {
1609 				/*
1610 				 * We note that this was an entry
1611 				 * probe to help ustack() find the
1612 				 * first caller.
1613 				 */
1614 				cookie = dtrace_interrupt_disable();
1615 				DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
1616 				dtrace_probe(probe->ftp_id, regs64->rdi,
1617 					     regs64->rsi, regs64->rdx, regs64->rcx,
1618 					     regs64->r8);
1619 				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
1620 				dtrace_interrupt_enable(cookie);
1621 			} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
1622 				/*
1623 				 * Note that in this case, we don't
1624 				 * call dtrace_probe() since it's only
1625 				 * an artificial probe meant to change
1626 				 * the flow of control so that it
1627 				 * encounters the true probe.
1628 				 */
1629 				is_enabled = 1;
1630 			} else if (probe->ftp_argmap == NULL) {
1631 				dtrace_probe(probe->ftp_id, regs64->rdi,
1632 					     regs64->rsi, regs64->rdx, regs64->rcx,
1633 					     regs64->r8);
1634 			} else {
1635 				uint64_t t[5];
1636 
1637 				fasttrap_usdt_args64(probe, regs64,
1638 						     sizeof (t) / sizeof (t[0]), t);
1639 
1640 				dtrace_probe(probe->ftp_id, t[0], t[1],
1641 					     t[2], t[3], t[4]);
1642 			}
1643 
1644 		}
1645 		if (retire_tp) {
1646 			fasttrap_tracepoint_retire(p, tp);
1647 		}
1648 	}
1649 
1650 	/*
1651 	 * We're about to do a bunch of work so we cache a local copy of
1652 	 * the tracepoint to emulate the instruction, and then find the
1653 	 * tracepoint again later if we need to light up any return probes.
1654 	 */
1655 	tp_local = *tp;
1656 	lck_mtx_unlock(pid_mtx);
1657 	tp = &tp_local;
1658 
1659 	/*
1660 	 * Set the program counter to appear as though the traced instruction
1661 	 * had completely executed. This ensures that fasttrap_getreg() will
1662 	 * report the expected value for REG_RIP.
1663 	 */
1664 	regs64->isf.rip = pc + tp->ftt_size;
1665 
1666 	/*
1667 	 * If there's an is-enabled probe connected to this tracepoint it
1668 	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1669 	 * instruction that was placed there by DTrace when the binary was
1670 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
1671 	 * into %eax or %rax. Accordingly, we can bypass all the instruction
1672 	 * emulation logic since we know the inevitable result. It's possible
1673 	 * that a user could construct a scenario where the 'is-enabled'
1674 	 * probe was on some other instruction, but that would be a rather
1675 	 * exotic way to shoot oneself in the foot.
1676 	 */
1677 	if (is_enabled) {
1678 		regs64->rax = 1;
1679 		new_pc = regs64->isf.rip;
1680 		goto done;
1681 	}
1682 
1683 	/*
1684 	 * We emulate certain types of instructions to ensure correctness
1685 	 * (in the case of position dependent instructions) or optimize
1686 	 * common cases. The rest we have the thread execute back in user-
1687 	 * land.
1688 	 */
1689 	switch (tp->ftt_type) {
1690 		case FASTTRAP_T_RET:
1691 		case FASTTRAP_T_RET16:
1692 		{
1693 			user_addr_t dst;
1694 			user_addr_t addr;
1695 			int ret;
1696 
1697 			/*
1698 			 * We have to emulate _every_ facet of the behavior of a ret
1699 			 * instruction including what happens if the load from %esp
1700 			 * fails; in that case, we send a SIGSEGV.
1701 			 */
1702 			ret = fasttrap_fuword64((user_addr_t)regs64->isf.rsp, &dst);
1703 			addr = regs64->isf.rsp + sizeof (uint64_t);
1704 
1705 			if (ret == -1) {
1706 				fasttrap_sigsegv(p, uthread, (user_addr_t)regs64->isf.rsp);
1707 				new_pc = pc;
1708 				break;
1709 			}
1710 
1711 			if (tp->ftt_type == FASTTRAP_T_RET16)
1712 				addr += tp->ftt_dest;
1713 
1714 			regs64->isf.rsp = addr;
1715 			new_pc = dst;
1716 			break;
1717 		}
1718 
1719 		case FASTTRAP_T_JCC:
1720 		{
1721 			uint_t taken;
1722 
1723 			switch (tp->ftt_code) {
1724 				case FASTTRAP_JO:
1725 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_OF) != 0;
1726 					break;
1727 				case FASTTRAP_JNO:
1728 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0;
1729 					break;
1730 				case FASTTRAP_JB:
1731 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) != 0;
1732 					break;
1733 				case FASTTRAP_JAE:
1734 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) == 0;
1735 					break;
1736 				case FASTTRAP_JE:
1737 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0;
1738 					break;
1739 				case FASTTRAP_JNE:
1740 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0;
1741 					break;
1742 				case FASTTRAP_JBE:
1743 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) != 0 ||
1744 						(regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0;
1745 					break;
1746 				case FASTTRAP_JA:
1747 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_CF) == 0 &&
1748 						(regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0;
1749 					break;
1750 				case FASTTRAP_JS:
1751 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_SF) != 0;
1752 					break;
1753 				case FASTTRAP_JNS:
1754 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0;
1755 					break;
1756 				case FASTTRAP_JP:
1757 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_PF) != 0;
1758 					break;
1759 				case FASTTRAP_JNP:
1760 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_PF) == 0;
1761 					break;
1762 				case FASTTRAP_JL:
1763 					taken = ((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) !=
1764 						((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0);
1765 					break;
1766 				case FASTTRAP_JGE:
1767 					taken = ((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) ==
1768 						((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0);
1769 					break;
1770 				case FASTTRAP_JLE:
1771 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
1772 						((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) !=
1773 						((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0);
1774 					break;
1775 				case FASTTRAP_JG:
1776 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
1777 						((regs64->isf.rflags & FASTTRAP_EFLAGS_SF) == 0) ==
1778 						((regs64->isf.rflags & FASTTRAP_EFLAGS_OF) == 0);
1779 					break;
1780 				default:
1781 					taken = FALSE;
1782 			}
1783 
1784 			if (taken)
1785 				new_pc = tp->ftt_dest;
1786 			else
1787 				new_pc = pc + tp->ftt_size;
1788 			break;
1789 		}
1790 
1791 		case FASTTRAP_T_LOOP:
1792 		{
1793 			uint_t taken;
1794 			uint64_t cx = regs64->rcx--;
1795 
1796 			switch (tp->ftt_code) {
1797 				case FASTTRAP_LOOPNZ:
1798 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
1799 						cx != 0;
1800 					break;
1801 				case FASTTRAP_LOOPZ:
1802 					taken = (regs64->isf.rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
1803 						cx != 0;
1804 					break;
1805 				case FASTTRAP_LOOP:
1806 					taken = (cx != 0);
1807 					break;
1808 				default:
1809 					taken = FALSE;
1810 			}
1811 
1812 			if (taken)
1813 				new_pc = tp->ftt_dest;
1814 			else
1815 				new_pc = pc + tp->ftt_size;
1816 			break;
1817 		}
1818 
1819 		case FASTTRAP_T_JCXZ:
1820 		{
1821 			uint64_t cx = regs64->rcx;
1822 
1823 			if (cx == 0)
1824 				new_pc = tp->ftt_dest;
1825 			else
1826 				new_pc = pc + tp->ftt_size;
1827 			break;
1828 		}
1829 
1830 		case FASTTRAP_T_PUSHL_EBP:
1831 		{
1832 			user_addr_t addr = regs64->isf.rsp - sizeof (uint64_t);
1833 			int ret = fasttrap_suword64(addr, (uint64_t)regs64->rbp);
1834 
1835 			if (ret == -1) {
1836 				fasttrap_sigsegv(p, uthread, addr);
1837 				new_pc = pc;
1838 				break;
1839 			}
1840 
1841 			regs64->isf.rsp = addr;
1842 			new_pc = pc + tp->ftt_size;
1843 			break;
1844 		}
1845 
1846 		case FASTTRAP_T_NOP:
1847 			new_pc = pc + tp->ftt_size;
1848 			break;
1849 
1850 		case FASTTRAP_T_JMP:
1851 		case FASTTRAP_T_CALL:
1852 			if (tp->ftt_code == 0) {
1853 				new_pc = tp->ftt_dest;
1854 			} else {
1855 				user_addr_t value, addr = tp->ftt_dest;
1856 
1857 				if (tp->ftt_base != FASTTRAP_NOREG)
1858 					addr += fasttrap_getreg(regs, tp->ftt_base);
1859 				if (tp->ftt_index != FASTTRAP_NOREG)
1860 					addr += fasttrap_getreg(regs, tp->ftt_index) <<
1861 						tp->ftt_scale;
1862 
1863 				if (tp->ftt_code == 1) {
1864 					/*
1865 					 * If there's a segment prefix for this
1866 					 * instruction, we'll need to check permissions
1867 					 * and bounds on the given selector, and adjust
1868 					 * the address accordingly.
1869 					 */
1870 					if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
1871 					    fasttrap_do_seg(tp, regs, &addr) != 0) {
1872 						fasttrap_sigsegv(p, uthread, addr);
1873 						new_pc = pc;
1874 						break;
1875 					}
1876 
1877 					if (fasttrap_fuword64(addr, &value) == -1) {
1878 						fasttrap_sigsegv(p, uthread, addr);
1879 						new_pc = pc;
1880 						break;
1881 					}
1882 					new_pc = value;
1883 				} else {
1884 					new_pc = addr;
1885 				}
1886 			}
1887 
1888 			/*
1889 			 * If this is a call instruction, we need to push the return
1890 			 * address onto the stack. If this fails, we send the process
1891 			 * a SIGSEGV and reset the pc to emulate what would happen if
1892 			 * this instruction weren't traced.
1893 			 */
1894 			if (tp->ftt_type == FASTTRAP_T_CALL) {
1895 				user_addr_t addr = regs64->isf.rsp - sizeof (uint64_t);
1896 				int ret = fasttrap_suword64(addr, pc + tp->ftt_size);
1897 
1898 				if (ret == -1) {
1899 					fasttrap_sigsegv(p, uthread, addr);
1900 					new_pc = pc;
1901 					break;
1902 				}
1903 
1904 				regs64->isf.rsp = addr;
1905 			}
1906 			break;
1907 
1908 		case FASTTRAP_T_COMMON:
1909 		{
1910 			user_addr_t addr, write_addr;
1911 			uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
1912 			uint_t i = 0;
1913 
1914 			/*
1915 			 * Generic Instruction Tracing
1916 			 * ---------------------------
1917 			 *
1918 			 * This is the layout of the scratch space in the user-land
1919 			 * thread structure for our generated instructions.
1920 			 *
1921 			 *	32-bit mode			bytes
1922 			 *	------------------------	-----
1923 			 * a:	<original instruction>		<= 15
1924 			 *	jmp	<pc + tp->ftt_size>	    5
1925 			 * b:	<original instrction>		<= 15
1926 			 *	int	T_DTRACE_RET		    2
1927 			 *					-----
1928 			 *					<= 37
1929 			 *
1930 			 *	64-bit mode			bytes
1931 			 *	------------------------	-----
1932 			 * a:	<original instruction>		<= 15
1933 			 *	jmp	0(%rip)			    6
1934 			 *	<pc + tp->ftt_size>		    8
1935 			 * b:	<original instruction>		<= 15
1936 			 * 	int	T_DTRACE_RET		    2
1937 			 * 					-----
1938 			 * 					<= 46
1939 			 *
1940 			 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1941 			 * to b. If we encounter a signal on the way out of the
1942 			 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1943 			 * so that we execute the original instruction and re-enter
1944 			 * the kernel rather than redirecting to the next instruction.
1945 			 *
1946 			 * If there are return probes (so we know that we're going to
1947 			 * need to reenter the kernel after executing the original
1948 			 * instruction), the scratch space will just contain the
1949 			 * original instruction followed by an interrupt -- the same
1950 			 * data as at b.
1951 			 *
1952 			 * %rip-relative Addressing
1953 			 * ------------------------
1954 			 *
1955 			 * There's a further complication in 64-bit mode due to %rip-
1956 			 * relative addressing. While this is clearly a beneficial
1957 			 * architectural decision for position independent code, it's
1958 			 * hard not to see it as a personal attack against the pid
1959 			 * provider since before there was a relatively small set of
1960 			 * instructions to emulate; with %rip-relative addressing,
1961 			 * almost every instruction can potentially depend on the
1962 			 * address at which it's executed. Rather than emulating
1963 			 * the broad spectrum of instructions that can now be
1964 			 * position dependent, we emulate jumps and others as in
1965 			 * 32-bit mode, and take a different tack for instructions
1966 			 * using %rip-relative addressing.
1967 			 *
1968 			 * For every instruction that uses the ModRM byte, the
1969 			 * in-kernel disassembler reports its location. We use the
1970 			 * ModRM byte to identify that an instruction uses
1971 			 * %rip-relative addressing and to see what other registers
1972 			 * the instruction uses. To emulate those instructions,
1973 			 * we modify the instruction to be %rax-relative rather than
1974 			 * %rip-relative (or %rcx-relative if the instruction uses
1975 			 * %rax; or %r8- or %r9-relative if the REX.B is present so
1976 			 * we don't have to rewrite the REX prefix). We then load
1977 			 * the value that %rip would have been into the scratch
1978 			 * register and generate an instruction to reset the scratch
1979 			 * register back to its original value. The instruction
1980 			 * sequence looks like this:
1981 			 *
1982 			 *	64-mode %rip-relative		bytes
1983 			 *	------------------------	-----
1984 			 * a:	<modified instruction>		<= 15
1985 			 *	movq	$<value>, %<scratch>	    6
1986 			 *	jmp	0(%rip)			    6
1987 			 *	<pc + tp->ftt_size>		    8
1988 			 * b:	<modified instruction>  	<= 15
1989 			 * 	int	T_DTRACE_RET		    2
1990 			 * 					-----
1991 			 *					   52
1992 			 *
1993 			 * We set curthread->t_dtrace_regv so that upon receiving
1994 			 * a signal we can reset the value of the scratch register.
1995 			 */
1996 
1997 			addr = uthread->t_dtrace_scratch->addr;
1998 			write_addr = uthread->t_dtrace_scratch->write_addr;
1999 
2000 			if (addr == 0LL || write_addr == 0LL) {
2001 				fasttrap_sigtrap(p, uthread, pc); // Should be killing target proc
2002 				new_pc = pc;
2003 				break;
2004 			}
2005 
2006 			ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
2007 
2008 			uthread->t_dtrace_scrpc = addr;
2009 			bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
2010 			i += tp->ftt_size;
2011 
2012 			if (tp->ftt_ripmode != 0) {
2013 				uint64_t* reg;
2014 
2015 				ASSERT(tp->ftt_ripmode &
2016 				       (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
2017 
2018 				/*
2019 				 * If this was a %rip-relative instruction, we change
2020 				 * it to be either a %rax- or %rcx-relative
2021 				 * instruction (depending on whether those registers
2022 				 * are used as another operand; or %r8- or %r9-
2023 				 * relative depending on the value of REX.B). We then
2024 				 * set that register and generate a movq instruction
2025 				 * to reset the value.
2026 				 */
2027 				if (tp->ftt_ripmode & FASTTRAP_RIP_X)
2028 					scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
2029 				else
2030 					scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
2031 
2032 				if (tp->ftt_ripmode & FASTTRAP_RIP_1)
2033 					scratch[i++] = FASTTRAP_MOV_EAX;
2034 				else
2035 					scratch[i++] = FASTTRAP_MOV_ECX;
2036 
2037 				switch (tp->ftt_ripmode) {
2038 					case FASTTRAP_RIP_1:
2039 						reg = &regs64->rax;
2040 						uthread->t_dtrace_reg = REG_RAX;
2041 						break;
2042 					case FASTTRAP_RIP_2:
2043 						reg = &regs64->rcx;
2044 						uthread->t_dtrace_reg = REG_RCX;
2045 						break;
2046 					case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
2047 						reg = &regs64->r8;
2048 						uthread->t_dtrace_reg = REG_R8;
2049 						break;
2050 					case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
2051 						reg = &regs64->r9;
2052 						uthread->t_dtrace_reg = REG_R9;
2053 						break;
2054 					default:
2055 						reg = NULL;
2056 						panic("unhandled ripmode in fasttrap_pid_probe64");
2057 				}
2058 
2059 				/* LINTED - alignment */
2060 				*(uint64_t *)&scratch[i] = *reg;
2061 				uthread->t_dtrace_regv = *reg;
2062 				*reg = pc + tp->ftt_size;
2063 				i += sizeof (uint64_t);
2064 			}
2065 
2066 			/*
2067 			 * Generate the branch instruction to what would have
2068 			 * normally been the subsequent instruction. In 32-bit mode,
2069 			 * this is just a relative branch; in 64-bit mode this is a
2070 			 * %rip-relative branch that loads the 64-bit pc value
2071 			 * immediately after the jmp instruction.
2072 			 */
2073 			scratch[i++] = FASTTRAP_GROUP5_OP;
2074 			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
2075 			/* LINTED - alignment */
2076 			*(uint32_t *)&scratch[i] = 0;
2077 			i += sizeof (uint32_t);
2078 			/* LINTED - alignment */
2079 			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
2080 			i += sizeof (uint64_t);
2081 
2082 			uthread->t_dtrace_astpc = addr + i;
2083 			bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
2084 			i += tp->ftt_size;
2085 			scratch[i++] = FASTTRAP_INT;
2086 			scratch[i++] = T_DTRACE_RET;
2087 
2088 			ASSERT(i <= sizeof (scratch));
2089 
2090 			if (fasttrap_copyout(scratch, write_addr, i)) {
2091 				fasttrap_sigtrap(p, uthread, pc);
2092 				new_pc = pc;
2093 				break;
2094 			}
2095 
2096 			if (tp->ftt_retids != NULL) {
2097 				uthread->t_dtrace_step = 1;
2098 				uthread->t_dtrace_ret = 1;
2099 				new_pc = uthread->t_dtrace_astpc;
2100 			} else {
2101 				new_pc = uthread->t_dtrace_scrpc;
2102 			}
2103 
2104 			uthread->t_dtrace_pc = pc;
2105 			uthread->t_dtrace_npc = pc + tp->ftt_size;
2106 			uthread->t_dtrace_on = 1;
2107 			break;
2108 		}
2109 
2110 		default:
2111 			panic("fasttrap: mishandled an instruction");
2112 	}
2113 
2114 done:
2115 	/*
2116 	 * APPLE NOTE:
2117 	 *
2118 	 * We're setting this earlier than Solaris does, to get a "correct"
2119 	 * ustack() output. In the Sun code,  a() -> b() -> c() -> d() is
2120 	 * reported at: d, b, a. The new way gives c, b, a, which is closer
2121 	 * to correct, as the return instruction has already exectued.
2122 	 */
2123 	regs64->isf.rip = new_pc;
2124 
2125 
2126 	/*
2127 	 * If there were no return probes when we first found the tracepoint,
2128 	 * we should feel no obligation to honor any return probes that were
2129 	 * subsequently enabled -- they'll just have to wait until the next
2130 	 * time around.
2131 	 */
2132 	if (tp->ftt_retids != NULL) {
2133 		/*
2134 		 * We need to wait until the results of the instruction are
2135 		 * apparent before invoking any return probes. If this
2136 		 * instruction was emulated we can just call
2137 		 * fasttrap_return_common(); if it needs to be executed, we
2138 		 * need to wait until the user thread returns to the kernel.
2139 		 */
2140 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
2141 			fasttrap_return_common(regs, pc, pid, new_pc);
2142 		} else {
2143 			ASSERT(uthread->t_dtrace_ret != 0);
2144 			ASSERT(uthread->t_dtrace_pc == pc);
2145 			ASSERT(uthread->t_dtrace_scrpc != 0);
2146 			ASSERT(new_pc == uthread->t_dtrace_astpc);
2147 		}
2148 	}
2149 
2150 	return (0);
2151 }
2152 
2153 int
fasttrap_pid_probe(x86_saved_state_t * regs)2154 fasttrap_pid_probe(x86_saved_state_t *regs)
2155 {
2156         if (is_saved_state64(regs))
2157 		return fasttrap_pid_probe64(regs);
2158 
2159 	return fasttrap_pid_probe32(regs);
2160 }
2161 
2162 int
fasttrap_return_probe(x86_saved_state_t * regs)2163 fasttrap_return_probe(x86_saved_state_t *regs)
2164 {
2165 	x86_saved_state64_t *regs64;
2166 	x86_saved_state32_t *regs32;
2167 	unsigned int p_model;
2168 
2169         if (is_saved_state64(regs)) {
2170                 regs64 = saved_state64(regs);
2171 		regs32 = NULL;
2172 		p_model = DATAMODEL_LP64;
2173         } else {
2174 		regs64 = NULL;
2175                 regs32 = saved_state32(regs);
2176 		p_model = DATAMODEL_ILP32;
2177         }
2178 
2179 	proc_t *p = current_proc();
2180 	uthread_t uthread = current_uthread();
2181 	user_addr_t pc = uthread->t_dtrace_pc;
2182 	user_addr_t npc = uthread->t_dtrace_npc;
2183 
2184 	uthread->t_dtrace_pc = 0;
2185 	uthread->t_dtrace_npc = 0;
2186 	uthread->t_dtrace_scrpc = 0;
2187 	uthread->t_dtrace_astpc = 0;
2188 
2189 
2190 	/*
2191 	 * We set rp->r_pc to the address of the traced instruction so
2192 	 * that it appears to dtrace_probe() that we're on the original
2193 	 * instruction, and so that the user can't easily detect our
2194 	 * complex web of lies. dtrace_return_probe() (our caller)
2195 	 * will correctly set %pc after we return.
2196 	 */
2197 	if (p_model == DATAMODEL_LP64)
2198 		regs64->isf.rip = pc;
2199 	else
2200 		regs32->eip = pc;
2201 
2202 	fasttrap_return_common(regs, pc, proc_getpid(p), npc);
2203 
2204 	return (0);
2205 }
2206 
2207 uint64_t
fasttrap_pid_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)2208 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
2209     int aframes)
2210 {
2211 	pal_register_cache_state(current_thread(), VALID);
2212 #pragma unused(arg, id, parg, aframes)
2213 	return (fasttrap_anarg((x86_saved_state_t *)find_user_regs(current_thread()), 1, argno));
2214 }
2215 
2216 uint64_t
fasttrap_usdt_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)2217 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
2218     int aframes)
2219 {
2220 	pal_register_cache_state(current_thread(), VALID);
2221 #pragma unused(arg, id, parg, aframes)
2222 	return (fasttrap_anarg((x86_saved_state_t *)find_user_regs(current_thread()), 0, argno));
2223 }
2224 
2225 /*
2226  * APPLE NOTE: See comments by regmap array definition. We are cheating
2227  * when returning 32 bit registers.
2228  */
2229 static user_addr_t
fasttrap_getreg(x86_saved_state_t * regs,uint_t reg)2230 fasttrap_getreg(x86_saved_state_t *regs, uint_t reg)
2231 {
2232 	if (is_saved_state64(regs)) {
2233 		x86_saved_state64_t *regs64 = saved_state64(regs);
2234 
2235 		switch (reg) {
2236 			case REG_RAX:		return regs64->rax;
2237 			case REG_RCX:		return regs64->rcx;
2238 			case REG_RDX:		return regs64->rdx;
2239 			case REG_RBX:		return regs64->rbx;
2240 			case REG_RSP:		return regs64->isf.rsp;
2241 			case REG_RBP:		return regs64->rbp;
2242 			case REG_RSI:		return regs64->rsi;
2243 			case REG_RDI:		return regs64->rdi;
2244 			case REG_R8:		return regs64->r8;
2245 			case REG_R9:		return regs64->r9;
2246 			case REG_R10:		return regs64->r10;
2247 			case REG_R11:		return regs64->r11;
2248 			case REG_R12:		return regs64->r12;
2249 			case REG_R13:		return regs64->r13;
2250 			case REG_R14:		return regs64->r14;
2251 			case REG_R15:		return regs64->r15;
2252 			case REG_TRAPNO:	return regs64->isf.trapno;
2253 			case REG_ERR:		return regs64->isf.err;
2254 			case REG_RIP:		return regs64->isf.rip;
2255 			case REG_CS:		return regs64->isf.cs;
2256 			case REG_RFL:		return regs64->isf.rflags;
2257 			case REG_SS:		return regs64->isf.ss;
2258 			case REG_FS:		return regs64->fs;
2259 			case REG_GS:		return regs64->gs;
2260 			case REG_ES:
2261 			case REG_DS:
2262 			case REG_FSBASE:
2263 			case REG_GSBASE:
2264 				// Important to distinguish these requests (which should be legal) from other values.
2265 				panic("dtrace: unimplemented x86_64 getreg()");
2266 		}
2267 
2268 		panic("dtrace: unhandled x86_64 getreg() constant");
2269 	} else {
2270 		x86_saved_state32_t *regs32 = saved_state32(regs);
2271 
2272 		switch (reg) {
2273 			case REG_RAX:		return regs32->eax;
2274 			case REG_RCX:		return regs32->ecx;
2275 			case REG_RDX:		return regs32->edx;
2276 			case REG_RBX:		return regs32->ebx;
2277 			case REG_RSP:		return regs32->uesp;
2278 			case REG_RBP:		return regs32->ebp;
2279 			case REG_RSI:		return regs32->esi;
2280 			case REG_RDI:		return regs32->edi;
2281 		}
2282 
2283 		panic("dtrace: unhandled i386 getreg() constant");
2284 	}
2285 
2286 	return 0;
2287 }
2288