xref: /xnu-11215.41.3/bsd/dev/i386/fbt_x86.c (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <kern/thread.h>
28 #include <mach/thread_status.h>
29 #include <mach/vm_param.h>
30 #include <mach-o/loader.h>
31 #include <mach-o/nlist.h>
32 #include <libkern/kernel_mach_header.h>
33 #include <libkern/OSAtomic.h>
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/conf.h>
41 #include <sys/fcntl.h>
42 #include <miscfs/devfs/devfs.h>
43 
44 #include <sys/dtrace.h>
45 #include <sys/dtrace_impl.h>
46 #include <sys/fbt.h>
47 
48 #include <sys/dtrace_glue.h>
49 
50 #include <san/kasan.h>
51 #include <machine/trap.h>
52 
53 
54 #define DTRACE_INVOP_NOP_SKIP 1
55 #define DTRACE_INVOP_MOVL_ESP_EBP 10
56 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
57 #define DTRACE_INVOP_MOV_RSP_RBP 11
58 #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
59 #define DTRACE_INVOP_POP_RBP 12
60 #define DTRACE_INVOP_POP_RBP_SKIP 1
61 #define DTRACE_INVOP_LEAVE_SKIP 1
62 
63 #define	FBT_PUSHL_EBP			0x55
64 #define	FBT_MOVL_ESP_EBP0_V0	0x8b
65 #define	FBT_MOVL_ESP_EBP1_V0	0xec
66 #define	FBT_MOVL_ESP_EBP0_V1	0x89
67 #define	FBT_MOVL_ESP_EBP1_V1	0xe5
68 
69 #define	FBT_PUSH_RBP			0x55
70 #define	FBT_REX_RSP_RBP			0x48
71 #define	FBT_MOV_RSP_RBP0		0x89
72 #define	FBT_MOV_RSP_RBP1		0xe5
73 #define	FBT_POP_RBP				0x5d
74 
75 #define	FBT_POPL_EBP			0x5d
76 #define	FBT_RET					0xc3
77 #define	FBT_RET_IMM16			0xc2
78 #define	FBT_LEAVE				0xc9
79 #define	FBT_JMP_SHORT_REL		0xeb /* Jump short, relative, displacement relative to next instr. */
80 #define	FBT_JMP_NEAR_REL		0xe9 /* Jump near, relative, displacement relative to next instr. */
81 #define	FBT_JMP_FAR_ABS			0xea /* Jump far, absolute, address given in operand */
82 #define FBT_RET_LEN				1
83 #define FBT_RET_IMM16_LEN		3
84 #define	FBT_JMP_SHORT_REL_LEN	2
85 #define	FBT_JMP_NEAR_REL_LEN	5
86 #define	FBT_JMP_FAR_ABS_LEN		5
87 
88 #define	FBT_PATCHVAL			0xf0
89 #define FBT_AFRAMES_ENTRY		7
90 #define FBT_AFRAMES_RETURN		6
91 
92 #define	FBT_ENTRY	"entry"
93 #define	FBT_RETURN	"return"
94 #define	FBT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
95 
96 extern dtrace_provider_id_t	fbt_id;
97 extern fbt_probe_t		**fbt_probetab;
98 extern int			fbt_probetab_mask;
99 
100 kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
101 
102 int
fbt_invop(uintptr_t addr,uintptr_t * state,uintptr_t rval)103 fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
104 {
105 	fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
106 
107 	for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
108 		if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
109 
110 			if (fbt->fbtp_roffset == 0) {
111 				x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
112 
113 				CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
114 				/* 64-bit ABI, arguments passed in registers. */
115 				dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
116 				CPU->cpu_dtrace_caller = 0;
117 			} else {
118 
119 				dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
120 				CPU->cpu_dtrace_caller = 0;
121 			}
122 
123 			return (fbt->fbtp_rval);
124 		}
125 	}
126 
127 	return (0);
128 }
129 
130 #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
131 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
132 
133 kern_return_t
fbt_perfCallback(int trapno,x86_saved_state_t * tagged_regs,uintptr_t * lo_spp,__unused int unused2)134 fbt_perfCallback(
135                 int         		trapno,
136                 x86_saved_state_t 	*tagged_regs,
137 		uintptr_t		*lo_spp,
138                 __unused int        unused2)
139 {
140 	kern_return_t retval = KERN_FAILURE;
141 	x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
142 
143 	if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
144 		boolean_t oldlevel;
145 		uint64_t rsp_probe, fp, delta = 0;
146 		uintptr_t old_sp;
147 		uint32_t *pDst;
148 		int emul;
149 
150 
151 		oldlevel = ml_set_interrupts_enabled(FALSE);
152 
153 		/* Calculate where the stack pointer was when the probe instruction "fired." */
154 		rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
155 
156 		__asm__ volatile(
157 			"Ldtrace_invop_callsite_pre_label:\n"
158 			".data\n"
159 			".private_extern _dtrace_invop_callsite_pre\n"
160 			"_dtrace_invop_callsite_pre:\n"
161 			"  .quad Ldtrace_invop_callsite_pre_label\n"
162 			".text\n"
163 				 );
164 
165 		emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
166 
167 		__asm__ volatile(
168 			"Ldtrace_invop_callsite_post_label:\n"
169 			".data\n"
170 			".private_extern _dtrace_invop_callsite_post\n"
171 			"_dtrace_invop_callsite_post:\n"
172 			"  .quad Ldtrace_invop_callsite_post_label\n"
173 			".text\n"
174 				 );
175 
176 		switch (emul) {
177 		case DTRACE_INVOP_NOP:
178 			saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP;	/* Skip over the patched NOP (planted by sdt). */
179 			retval = KERN_SUCCESS;
180 			break;
181 
182 		case DTRACE_INVOP_MOV_RSP_RBP:
183 			saved_state->rbp = rsp_probe;							/* Emulate patched mov %rsp,%rbp */
184 			saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP;	/* Skip over the bytes of the patched mov %rsp,%rbp */
185 			retval = KERN_SUCCESS;
186 			break;
187 
188 		case DTRACE_INVOP_POP_RBP:
189 		case DTRACE_INVOP_LEAVE:
190 /*
191  * Emulate first micro-op of patched leave: mov %rbp,%rsp
192  * fp points just below the return address slot for target's ret
193  * and at the slot holding the frame pointer saved by the target's prologue.
194  */
195 			fp = saved_state->rbp;
196 /* Emulate second micro-op of patched leave: patched pop %rbp
197  * savearea rbp is set for the frame of the caller to target
198  * The *live* %rsp will be adjusted below for pop increment(s)
199  */
200 			saved_state->rbp = *(uint64_t *)fp;
201 /* Skip over the patched leave */
202 			saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
203 /*
204  * Lift the stack to account for the emulated leave
205  * Account for words local in this frame
206  * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
207  */
208 			delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
209 /* Account for popping off the rbp (just accomplished by the emulation
210  * above...)
211  */
212 			delta += 2;
213 			saved_state->isf.rsp += (delta << 2);
214 /* Obtain the stack pointer recorded by the trampolines */
215 			old_sp = *lo_spp;
216 /* Shift contents of stack */
217 			for (pDst = (uint32_t *)fp;
218 			     pDst > (((uint32_t *)old_sp));
219 				 pDst--)
220 				*pDst = pDst[-delta];
221 
222 #if KASAN
223 			/*
224 			 * The above has moved stack objects so they are no longer in sync
225 			 * with the shadow.
226 			 */
227 			uintptr_t base = (uintptr_t)((uint32_t *)old_sp - delta);
228 			uintptr_t size = (uintptr_t)fp - base;
229 			if (base >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
230 				kasan_unpoison_stack(base, size);
231 			}
232 #endif
233 
234 /* Track the stack lift in "saved_state". */
235 			saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
236 /* Adjust the stack pointer utilized by the trampolines */
237 			*lo_spp = old_sp + (delta << 2);
238 
239 			retval = KERN_SUCCESS;
240 			break;
241 
242 		default:
243 			retval = KERN_FAILURE;
244 			break;
245 		}
246 
247 		/* Trick trap_from_kernel into not attempting to handle pending AST_URGENT */
248 		saved_state->isf.trapno = T_PREEMPT;
249 
250 		ml_set_interrupts_enabled(oldlevel);
251 	}
252 
253 	return retval;
254 }
255 
256 void
fbt_provide_probe(struct modctl * ctl,const char * modname,const char * symbolName,machine_inst_t * symbolStart,machine_inst_t * instrHigh)257 fbt_provide_probe(struct modctl *ctl, const char *modname, const char* symbolName, machine_inst_t* symbolStart, machine_inst_t* instrHigh)
258 {
259 	unsigned int			j;
260 	unsigned int			doenable = 0;
261 	dtrace_id_t			thisid;
262 
263 	fbt_probe_t *newfbt, *retfbt, *entryfbt;
264 	machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
265 	int size;
266 
267 	/*
268 	 * Guard against null symbols
269 	 */
270 	if (!symbolStart || !instrHigh || instrHigh < symbolStart) {
271 		kprintf("dtrace: %s has an invalid address\n", symbolName);
272 		return;
273 	}
274 
275 	for (j = 0, instr = symbolStart, theInstr = 0;
276 	     (j < 4) && (instrHigh > (instr + 2)); j++) {
277 		theInstr = instr[0];
278 		if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
279 			break;
280 
281 		if ((size = dtrace_instr_size(instr)) <= 0)
282 			break;
283 
284 		instr += size;
285 	}
286 
287 	if (theInstr != FBT_PUSH_RBP)
288 		return;
289 
290 	i1 = instr[1];
291 	i2 = instr[2];
292 	i3 = instr[3];
293 
294 	limit = (machine_inst_t *)instrHigh;
295 
296 	if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
297 		instr += 1; /* Advance to the mov %rsp,%rbp */
298 		theInstr = i1;
299 	} else {
300 		return;
301 	}
302 #if 0
303 	else {
304 		/*
305 		 * Sometimes, the compiler will schedule an intervening instruction
306 		 * in the function prologue. Example:
307 		 *
308 		 * _mach_vm_read:
309 		 * 000006d8        pushl   %ebp
310 		 * 000006d9        movl    $0x00000004,%edx
311 		 * 000006de        movl    %esp,%ebp
312 		 *
313 		 * Try the next instruction, to see if it is a movl %esp,%ebp
314 		 */
315 
316 		instr += 1; /* Advance past the pushl %ebp */
317 		if ((size = dtrace_instr_size(instr)) <= 0)
318 			return;
319 
320 		instr += size;
321 
322 		if ((instr + 1) >= limit)
323 			return;
324 
325 		i1 = instr[0];
326 		i2 = instr[1];
327 
328 		if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
329 		    !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
330 			return;
331 
332 		/* instr already points at the movl %esp,%ebp */
333 		theInstr = i1;
334 	}
335 #endif
336 	thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
337 	newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
338 	strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
339 
340 	if (thisid != 0) {
341 		/*
342 		 * The dtrace_probe previously existed, so we have to hook
343 		 * the newfbt entry onto the end of the existing fbt's chain.
344 		 * If we find an fbt entry that was previously patched to
345 		 * fire, (as indicated by the current patched value), then
346 		 * we want to enable this newfbt on the spot.
347 		 */
348 		entryfbt = dtrace_probe_arg (fbt_id, thisid);
349 		ASSERT (entryfbt != NULL);
350 		for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
351 			if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
352 				doenable++;
353 
354 			if (entryfbt->fbtp_next == NULL) {
355 				entryfbt->fbtp_next = newfbt;
356 				newfbt->fbtp_id = entryfbt->fbtp_id;
357 				break;
358 			}
359 		}
360 	}
361 	else {
362 		/*
363 		 * The dtrace_probe did not previously exist, so we
364 		 * create it and hook in the newfbt.  Since the probe is
365 		 * new, we obviously do not need to enable it on the spot.
366 		 */
367 		newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
368 		doenable = 0;
369 	}
370 
371 	newfbt->fbtp_patchpoint = instr;
372 	newfbt->fbtp_ctl = ctl;
373 	newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
374 	newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
375 	newfbt->fbtp_savedval = theInstr;
376 	newfbt->fbtp_patchval = FBT_PATCHVAL;
377 	newfbt->fbtp_currentval = 0;
378 	newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
379 	fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
380 
381 	if (doenable)
382 		fbt_enable(NULL, newfbt->fbtp_id, newfbt);
383 
384 	/*
385 	 * The fbt entry chain is in place, one entry point per symbol.
386 	 * The fbt return chain can have multiple return points per symbol.
387 	 * Here we find the end of the fbt return chain.
388 	 */
389 
390 	doenable=0;
391 
392 	thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
393 	if (thisid != 0) {
394 		/* The dtrace_probe previously existed, so we have to
395 		 * find the end of the existing fbt chain.  If we find
396 		 * an fbt return that was previously patched to fire,
397 		 * (as indicated by the currrent patched value), then
398 		 * we want to enable any new fbts on the spot.
399 		 */
400 		retfbt = dtrace_probe_arg (fbt_id, thisid);
401 		ASSERT(retfbt != NULL);
402 		for (;  retfbt != NULL; retfbt =  retfbt->fbtp_next) {
403 			if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
404 				doenable++;
405 			if(retfbt->fbtp_next == NULL)
406 				break;
407 		}
408 	}
409 	else {
410 		doenable = 0;
411 		retfbt = NULL;
412 	}
413 
414 again:
415 	if (instr >= limit)
416 		return;
417 
418 	/*
419 	 * If this disassembly fails, then we've likely walked off into
420 	 * a jump table or some other unsuitable area.  Bail out of the
421 	 * disassembly now.
422 	 */
423 	if ((size = dtrace_instr_size(instr)) <= 0)
424 		return;
425 
426 	/*
427 	 * We (desperately) want to avoid erroneously instrumenting a
428 	 * jump table, especially given that our markers are pretty
429 	 * short:  two bytes on x86, and just one byte on amd64.  To
430 	 * determine if we're looking at a true instruction sequence
431 	 * or an inline jump table that happens to contain the same
432 	 * byte sequences, we resort to some heuristic sleeze:  we
433 	 * treat this instruction as being contained within a pointer,
434 	 * and see if that pointer points to within the body of the
435 	 * function.  If it does, we refuse to instrument it.
436 	 */
437 	for (j = 0; j < sizeof (uintptr_t); j++) {
438 		uintptr_t check = (uintptr_t)instr - j;
439 		uint8_t *ptr;
440 
441 		if (check < (uintptr_t)symbolStart)
442 			break;
443 
444 		if (check + sizeof (uintptr_t) > (uintptr_t)limit)
445 			continue;
446 
447 		ptr = *(uint8_t **)check;
448 
449 		if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
450 			instr += size;
451 			goto again;
452 		}
453 	}
454 
455 	/*
456 	 * OK, it's an instruction.
457 	 */
458 	theInstr = instr[0];
459 
460 	/* Walked onto the start of the next routine? If so, bail out of this function. */
461 	if (theInstr == FBT_PUSH_RBP)
462 		return;
463 
464 	if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
465 		instr += size;
466 		goto again;
467 	}
468 
469 	/*
470 	 * Found the pop %rbp; or leave.
471 	 */
472 	machine_inst_t *patch_instr = instr;
473 
474 	/*
475 	 * Scan forward for a "ret", or "jmp".
476 	 */
477 	instr += size;
478 	if (instr >= limit)
479 		return;
480 
481 	size = dtrace_instr_size(instr);
482 	if (size <= 0) /* Failed instruction decode? */
483 		return;
484 
485 	theInstr = instr[0];
486 
487 	if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
488 	    !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
489 	    !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
490 	    !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
491 	    !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
492 		return;
493 
494 	/*
495 	 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
496 	 */
497 	newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
498 	strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
499 
500 	if (retfbt == NULL) {
501 		newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
502 						      symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
503 	} else {
504 		retfbt->fbtp_next = newfbt;
505 		newfbt->fbtp_id = retfbt->fbtp_id;
506 	}
507 
508 	retfbt = newfbt;
509 	newfbt->fbtp_patchpoint = patch_instr;
510 	newfbt->fbtp_ctl = ctl;
511 	newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
512 
513 	if (*patch_instr == FBT_POP_RBP) {
514 		newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
515 	} else {
516 		ASSERT(*patch_instr == FBT_LEAVE);
517 		newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
518 	}
519 	newfbt->fbtp_roffset =
520 	(uintptr_t)(patch_instr - (uint8_t *)symbolStart);
521 
522 	newfbt->fbtp_savedval = *patch_instr;
523 	newfbt->fbtp_patchval = FBT_PATCHVAL;
524 	newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
525 	fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
526 
527 	if (doenable)
528 		fbt_enable(NULL, newfbt->fbtp_id, newfbt);
529 
530 	instr += size;
531 	goto again;
532 }
533 
534