xref: /xnu-8792.61.2/bsd/dev/i386/dtrace_isa.c (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2005-2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <kern/thread.h>
30 #include <mach/thread_status.h>
31 
32 typedef x86_saved_state_t savearea_t;
33 
34 #include <stdarg.h>
35 #include <string.h>
36 #include <sys/malloc.h>
37 #include <sys/time.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/proc_internal.h>
41 #include <sys/kauth.h>
42 #include <sys/dtrace.h>
43 #include <sys/dtrace_impl.h>
44 #include <libkern/OSAtomic.h>
45 #include <i386/x86_hypercall.h>
46 #include <kern/thread_call.h>
47 #include <kern/task.h>
48 #include <kern/sched_prim.h>
49 #include <miscfs/devfs/devfs.h>
50 #include <mach/vm_param.h>
51 #include <machine/pal_routines.h>
52 #include <i386/cpuid.h>
53 #include <i386/mp.h>
54 #include <machine/trap.h>
55 
56 /*
57  * APPLE NOTE:  The regmap is used to decode which 64bit uregs[] register
58  * is being accessed when passed the 32bit uregs[] constant (based on
59  * the reg.d translator file). The dtrace_getreg() is smart enough to handle
60  * the register mappings.   The register set definitions are the same as
61  * those used by the fasttrap_getreg code.
62  */
63 #include "fasttrap_regset.h"
64 static const uint8_t regmap[19] = {
65     REG_GS,		/* GS */
66     REG_FS,		/* FS */
67     REG_ES,		/* ES */
68     REG_DS,		/* DS */
69     REG_RDI,		/* EDI */
70     REG_RSI,		/* ESI */
71     REG_RBP,		/* EBP, REG_FP  */
72     REG_RSP,		/* ESP */
73     REG_RBX,		/* EBX */
74     REG_RDX,		/* EDX, REG_R1  */
75     REG_RCX,		/* ECX */
76     REG_RAX,		/* EAX, REG_R0  */
77     REG_TRAPNO,		/* TRAPNO */
78     REG_ERR,		/* ERR */
79     REG_RIP,		/* EIP, REG_PC  */
80     REG_CS,		/* CS */
81     REG_RFL,		/* EFL, REG_PS  */
82     REG_RSP,		/* UESP, REG_SP */
83     REG_SS		/* SS */
84 };
85 
86 extern dtrace_id_t      dtrace_probeid_error;   /* special ERROR probe */
87 
88 void
dtrace_probe_error(dtrace_state_t * state,dtrace_epid_t epid,int which,int fltoffs,int fault,uint64_t illval)89 dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
90     int fltoffs, int fault, uint64_t illval)
91 {
92     /*
93      * For the case of the error probe firing lets
94      * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
95      */
96     state->dts_arg_error_illval = illval;
97     dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault );
98 }
99 
100 /*
101  * Atomicity and synchronization
102  */
103 void
dtrace_membar_producer(void)104 dtrace_membar_producer(void)
105 {
106 	__asm__ volatile("sfence");
107 }
108 
109 void
dtrace_membar_consumer(void)110 dtrace_membar_consumer(void)
111 {
112 	__asm__ volatile("lfence");
113 }
114 
115 /*
116  * Interrupt manipulation
117  * XXX dtrace_getipl() can be called from probe context.
118  */
119 int
dtrace_getipl(void)120 dtrace_getipl(void)
121 {
122 	/*
123 	 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
124 	 * in osfmk/kern/cpu_data.h
125 	 */
126 	/* return get_interrupt_level(); */
127 	return (ml_at_interrupt_context() ? 1: 0);
128 }
129 
130 /*
131  * MP coordination
132  */
133 typedef struct xcArg {
134 	processorid_t cpu;
135 	dtrace_xcall_t f;
136 	void *arg;
137 } xcArg_t;
138 
139 static void
xcRemote(void * foo)140 xcRemote( void *foo )
141 {
142 	xcArg_t *pArg = (xcArg_t *)foo;
143 
144 	if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) {
145 		(pArg->f)(pArg->arg);
146 	}
147 }
148 
149 
150 /*
151  * dtrace_xcall() is not called from probe context.
152  */
153 void
dtrace_xcall(processorid_t cpu,dtrace_xcall_t f,void * arg)154 dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
155 {
156 	xcArg_t xcArg;
157 
158 	xcArg.cpu = cpu;
159 	xcArg.f = f;
160 	xcArg.arg = arg;
161 
162 	if (cpu == DTRACE_CPUALL) {
163 		mp_cpus_call (CPUMASK_ALL, ASYNC, xcRemote, (void*)&xcArg);
164 	}
165 	else {
166 		mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), ASYNC, xcRemote, (void*)&xcArg);
167 	}
168 }
169 
170 /*
171  * Runtime and ABI
172  */
173 uint64_t
dtrace_getreg(struct regs * savearea,uint_t reg)174 dtrace_getreg(struct regs *savearea, uint_t reg)
175 {
176 	boolean_t is64Bit = proc_is64bit(current_proc());
177 	x86_saved_state_t *regs = (x86_saved_state_t *)savearea;
178 
179 	if (regs == NULL) {
180 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
181 		return (0);
182 	}
183 
184 	if (is64Bit) {
185 	    if (reg <= SS) {
186 		reg = regmap[reg];
187 	    } else {
188 		reg -= (SS + 1);
189 	    }
190 
191 	    switch (reg) {
192 	    case REG_RDI:
193 		return (uint64_t)(regs->ss_64.rdi);
194 	    case REG_RSI:
195 		return (uint64_t)(regs->ss_64.rsi);
196 	    case REG_RDX:
197 		return (uint64_t)(regs->ss_64.rdx);
198 	    case REG_RCX:
199 		return (uint64_t)(regs->ss_64.rcx);
200 	    case REG_R8:
201 		return (uint64_t)(regs->ss_64.r8);
202 	    case REG_R9:
203 		return (uint64_t)(regs->ss_64.r9);
204 	    case REG_RAX:
205 		return (uint64_t)(regs->ss_64.rax);
206 	    case REG_RBX:
207 		return (uint64_t)(regs->ss_64.rbx);
208 	    case REG_RBP:
209 		return (uint64_t)(regs->ss_64.rbp);
210 	    case REG_R10:
211 		return (uint64_t)(regs->ss_64.r10);
212 	    case REG_R11:
213 		return (uint64_t)(regs->ss_64.r11);
214 	    case REG_R12:
215 		return (uint64_t)(regs->ss_64.r12);
216 	    case REG_R13:
217 		return (uint64_t)(regs->ss_64.r13);
218 	    case REG_R14:
219 		return (uint64_t)(regs->ss_64.r14);
220 	    case REG_R15:
221 		return (uint64_t)(regs->ss_64.r15);
222 	    case REG_FS:
223 		return (uint64_t)(regs->ss_64.fs);
224 	    case REG_GS:
225 		return (uint64_t)(regs->ss_64.gs);
226 	    case REG_TRAPNO:
227 		return (uint64_t)(regs->ss_64.isf.trapno);
228 	    case REG_ERR:
229 		return (uint64_t)(regs->ss_64.isf.err);
230 	    case REG_RIP:
231 		return (uint64_t)(regs->ss_64.isf.rip);
232 	    case REG_CS:
233 		return (uint64_t)(regs->ss_64.isf.cs);
234 	    case REG_SS:
235 		return (uint64_t)(regs->ss_64.isf.ss);
236 	    case REG_RFL:
237 		return (uint64_t)(regs->ss_64.isf.rflags);
238 	    case REG_RSP:
239 		return (uint64_t)(regs->ss_64.isf.rsp);
240 	    case REG_DS:
241 	    case REG_ES:
242 	    default:
243 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
244 		return (0);
245 	    }
246 
247 	} else {   /* is 32bit user */
248 		/* beyond register SS */
249 		if (reg > x86_SAVED_STATE32_COUNT - 1) {
250 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
251 			return (0);
252 		}
253 		return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg];
254 	}
255 }
256 
257 uint64_t
dtrace_getvmreg(uint_t ndx)258 dtrace_getvmreg(uint_t ndx)
259 {
260 	uint64_t reg = 0;
261 	bool failed = false;
262 
263 	/* Any change in the vmread final opcode must be reflected in dtrace_handle_trap below. */
264 	__asm__ __volatile__(
265 		"vmread %2, %0\n"
266 		"ja 1f\n"
267 		"mov $1, %1\n"
268 		"1:\n"
269 	: "=a" (reg), "+r" (failed) : "D" ((uint64_t)ndx));
270 
271 	/*
272 	 * Check for fault in vmreg first. If DTrace has recovered the fault cause by
273 	 * vmread above then the value in failed will be unreliable.
274 	 */
275 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ILLOP)) {
276 		return 0;
277 	}
278 
279 	/* If vmread succeeded but failed because CF or ZS is 1 report fail. */
280 	if (failed) {
281 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
282 		cpu_core[CPU->cpu_id].cpuc_dtrace_illval = ndx;
283 		return 0;
284 	}
285 
286 	return reg;
287 }
288 
289 static void
dtrace_vmcall(x86_saved_state_t * regs,uint64_t * rflags)290 dtrace_vmcall(x86_saved_state_t *regs, uint64_t *rflags)
291 {
292 	uint64_t flags = 0;
293 
294 	/*
295 	 * No constraints available for r8 or r9 which means they must be
296 	 * handled explicitly.
297 	 */
298 	__asm__ volatile (
299 	     "   movq %12, %%r8  \n"
300 	     "   movq %13, %%r9  \n"
301 	     "   vmcall          \n"
302 	     "   movq %%r8, %5   \n"
303 	     "   movq %%r9, %6   \n"
304 	     "   pushfq          \n"
305 	     "   popq %7         \n"
306 
307 	    : "=a" (regs->ss_64.rax),
308 	      "=D" (regs->ss_64.rdi),
309 	      "=S" (regs->ss_64.rsi),
310 	      "=d" (regs->ss_64.rdx),
311 	      "=c" (regs->ss_64.rcx),
312 	      "=r" (regs->ss_64.r8),  /* %5 */
313 	      "=r" (regs->ss_64.r9),  /* %6 */
314 	      "=r" (flags)            /* %7 */
315 
316 	    : "a"  (regs->ss_64.rax),
317 	      "D"  (regs->ss_64.rdi),
318 	      "S"  (regs->ss_64.rsi),
319 	      "d"  (regs->ss_64.rdx),
320 	      "c"  (regs->ss_64.rcx),
321 	      "r"  (regs->ss_64.r8),  /* %12 */
322 	      "r"  (regs->ss_64.r9)   /* %13 */
323 
324 	    : "memory", "r8", "r9");
325 
326 	*rflags = flags;
327 
328 	return;
329 }
330 
331 static inline void
dtrace_cpuid(x86_saved_state_t * regs)332 dtrace_cpuid(x86_saved_state_t *regs)
333 {
334 	__asm__ volatile (
335 	     "cpuid"
336 	    : "=a" (regs->ss_64.rax),
337 	      "=b" (regs->ss_64.rbx),
338 	      "=c" (regs->ss_64.rcx),
339 	      "=d" (regs->ss_64.rdx)
340 
341 	    : "a"  (regs->ss_64.rax),
342 	      "b"  (regs->ss_64.rbx),
343 	      "c"  (regs->ss_64.rcx),
344 	      "d"  (regs->ss_64.rdx));
345 }
346 
347 static bool
dtrace_applepv_available(uint64_t flag)348 dtrace_applepv_available(uint64_t flag)
349 {
350 	static bool checked = false;
351 	static uint64_t features = 0;
352 
353 	if (checked) {
354 		return (features & flag) != 0;
355 	}
356 
357 	x86_saved_state_t regs = {0};
358 
359 	regs.ss_64.rax = 1;
360 	dtrace_cpuid(&regs);
361 
362 	/* Bit 31 - HV bit. */
363 	if ((regs.ss_64.rcx & _Bit(31)) != 0) {
364 		for (uint32_t base = 0x40000100; base < 0x40010000; base += 0x100) {
365 			regs.ss_64.rax = base;
366 			dtrace_cpuid(&regs);
367 
368 			/* "apple-pv-xnu" */
369 			if (regs.ss_64.rbx != 0x6c707061 ||
370 			    regs.ss_64.rcx != 0x76702d65 ||
371 			    regs.ss_64.rdx != 0x756e782d) {
372 				continue;
373 			}
374 
375 			uint64_t feature_leaf = regs.ss_64.rax;
376 
377 			regs.ss_64.rax = base + APPLEPV_INTERFACE_LEAF_INDEX;
378 			dtrace_cpuid(&regs);
379 
380 			/* "AH#1" */
381 			if (regs.ss_64.rax != 0x31234841) {
382 				continue;
383 			}
384 
385 			/* Find features. */
386 			regs.ss_64.rax = feature_leaf;
387 			dtrace_cpuid(&regs);
388 
389 			features = regs.ss_64.rdx;
390 			break;
391 		}
392 	}
393 
394 	checked = true;
395 	return (features & flag) != 0;
396 }
397 
398 void
dtrace_livedump(char * filename,size_t len)399 dtrace_livedump(char *filename, size_t len)
400 {
401 	x86_saved_state_t regs = {
402 	    .ss_64.rax = HVG_HCALL_CODE(HVG_HCALL_TRIGGER_DUMP),
403 	    .ss_64.rdi = HVG_HCALL_DUMP_OPTION_REGULAR,
404 	};
405 
406 	if (len > 0) {
407 		filename[0] = '\0';
408 	}
409 
410 	if (!dtrace_applepv_available(CPUID_LEAF_FEATURE_COREDUMP)) {
411 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
412 		return;
413 	}
414 
415 	uint64_t rflags = 0;
416 	dtrace_vmcall(&regs, &rflags);
417 	if ((rflags & EFL_CF) != 0) {
418 		/* An empty filename indicates failure to dump. */
419 		return;
420 	}
421 
422 	/* Extract the filename. */
423 	char str[57] = {'\0'};
424 	memcpy(&str[0],  &regs.ss_64.rax, 8);
425 	memcpy(&str[8],  &regs.ss_64.rdi, 8);
426 	memcpy(&str[16], &regs.ss_64.rsi, 8);
427 	memcpy(&str[24], &regs.ss_64.rdx, 8);
428 	memcpy(&str[32], &regs.ss_64.rcx, 8);
429 	memcpy(&str[40], &regs.ss_64.r8,  8);
430 	memcpy(&str[48], &regs.ss_64.r9,  8);
431 
432 	(void) strlcpy(filename, str, len);
433 }
434 
435 #define RETURN_OFFSET 4
436 #define RETURN_OFFSET64 8
437 
438 static int
dtrace_getustack_common(uint64_t * pcstack,int pcstack_limit,user_addr_t pc,user_addr_t sp)439 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc,
440     user_addr_t sp)
441 {
442 	volatile uint16_t *flags =
443 	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
444 
445 #if 0
446 	uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl */
447 	size_t s1, s2;
448 #endif
449 	int ret = 0;
450 	boolean_t is64Bit = proc_is64bit(current_proc());
451 
452 	ASSERT(pcstack == NULL || pcstack_limit > 0);
453 
454 #if 0 /* XXX signal stack crawl */
455 	if (p->p_model == DATAMODEL_NATIVE) {
456 		s1 = sizeof (struct frame) + 2 * sizeof (long);
457 		s2 = s1 + sizeof (siginfo_t);
458 	} else {
459 		s1 = sizeof (struct frame32) + 3 * sizeof (int);
460 		s2 = s1 + sizeof (siginfo32_t);
461 	}
462 #endif
463 
464 	while (pc != 0) {
465 		ret++;
466 		if (pcstack != NULL) {
467 			*pcstack++ = (uint64_t)pc;
468 			pcstack_limit--;
469 			if (pcstack_limit <= 0)
470 				break;
471 		}
472 
473 		if (sp == 0)
474 			break;
475 
476 #if 0 /* XXX signal stack crawl */
477 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
478 			if (p->p_model == DATAMODEL_NATIVE) {
479 				ucontext_t *ucp = (ucontext_t *)oldcontext;
480 				greg_t *gregs = ucp->uc_mcontext.gregs;
481 
482 				sp = dtrace_fulword(&gregs[REG_FP]);
483 				pc = dtrace_fulword(&gregs[REG_PC]);
484 
485 				oldcontext = dtrace_fulword(&ucp->uc_link);
486 			} else {
487 				ucontext32_t *ucp = (ucontext32_t *)oldcontext;
488 				greg32_t *gregs = ucp->uc_mcontext.gregs;
489 
490 				sp = dtrace_fuword32(&gregs[EBP]);
491 				pc = dtrace_fuword32(&gregs[EIP]);
492 
493 				oldcontext = dtrace_fuword32(&ucp->uc_link);
494 			}
495 		}
496 		else
497 #endif
498 		{
499 			if (is64Bit) {
500 				pc = dtrace_fuword64((sp + RETURN_OFFSET64));
501 				sp = dtrace_fuword64(sp);
502 			} else {
503 				pc = dtrace_fuword32((sp + RETURN_OFFSET));
504 				sp = dtrace_fuword32(sp);
505 			}
506 		}
507 
508 		/* Truncate ustack if the iterator causes fault. */
509 		if (*flags & CPU_DTRACE_FAULT) {
510 			*flags &= ~CPU_DTRACE_FAULT;
511 			break;
512 		}
513 	}
514 
515 	return (ret);
516 }
517 
518 
519 /*
520  * The return value indicates if we've modified the stack.
521  */
522 static int
dtrace_adjust_stack(uint64_t ** pcstack,int * pcstack_limit,user_addr_t * pc,user_addr_t sp)523 dtrace_adjust_stack(uint64_t **pcstack, int *pcstack_limit, user_addr_t *pc,
524                     user_addr_t sp)
525 {
526     volatile uint16_t *flags = (volatile uint16_t *) &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
527     int64_t missing_tos;
528     int rc = 0;
529     boolean_t is64Bit = proc_is64bit(current_proc());
530 
531     ASSERT(pc != NULL);
532 
533     if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
534         /*
535          * If we found ourselves in an entry probe, the frame pointer has not
536          * yet been pushed (that happens in the
537          * function prologue).  The best approach is to
538 	 * add the current pc as a missing top of stack,
539          * and back the pc up to the caller, which is stored  at the
540          * current stack pointer address since the call
541          * instruction puts it there right before
542          * the branch.
543          */
544 
545         missing_tos = *pc;
546 
547         if (is64Bit)
548             *pc = dtrace_fuword64(sp);
549         else
550             *pc = dtrace_fuword32(sp);
551 
552 	/* Truncate ustack if the iterator causes fault. */
553 	if (*flags & CPU_DTRACE_FAULT) {
554 		*flags &= ~CPU_DTRACE_FAULT;
555 	}
556     } else {
557         /*
558          * We might have a top of stack override, in which case we just
559          * add that frame without question to the top.  This
560          * happens in return probes where you have a valid
561          * frame pointer, but it's for the callers frame
562          * and you'd like to add the pc of the return site
563          * to the frame.
564          */
565         missing_tos = cpu_core[CPU->cpu_id].cpuc_missing_tos;
566     }
567 
568     if (missing_tos != 0) {
569         if (pcstack != NULL && pcstack_limit != NULL) {
570             /*
571 	     * If the missing top of stack has been filled out, then
572 	     * we add it and adjust the size.
573              */
574 	    *(*pcstack)++ = missing_tos;
575 	    (*pcstack_limit)--;
576 	}
577         /*
578 	 * return 1 because we would have changed the
579 	 * stack whether or not it was passed in.  This
580 	 * ensures the stack count is correct
581 	 */
582          rc = 1;
583     }
584     return rc;
585 }
586 
587 void
dtrace_getupcstack(uint64_t * pcstack,int pcstack_limit)588 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
589 {
590 	thread_t thread = current_thread();
591 	x86_saved_state_t *regs;
592 	user_addr_t pc, sp, fp;
593 	volatile uint16_t *flags =
594 	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
595 	int n;
596 	boolean_t is64Bit = proc_is64bit(current_proc());
597 
598 	if (*flags & CPU_DTRACE_FAULT)
599 		return;
600 
601 	if (pcstack_limit <= 0)
602 		return;
603 
604 	/*
605 	 * If there's no user context we still need to zero the stack.
606 	 */
607 	if (thread == NULL)
608 		goto zero;
609 
610 	pal_register_cache_state(thread, VALID);
611 	regs = (x86_saved_state_t *)find_user_regs(thread);
612 	if (regs == NULL)
613 		goto zero;
614 
615 	*pcstack++ = (uint64_t)dtrace_proc_selfpid();
616 	pcstack_limit--;
617 
618 	if (pcstack_limit <= 0)
619 		return;
620 
621 	if (is64Bit) {
622 		pc = regs->ss_64.isf.rip;
623 		sp = regs->ss_64.isf.rsp;
624 		fp = regs->ss_64.rbp;
625 	} else {
626 		pc = regs->ss_32.eip;
627 		sp = regs->ss_32.uesp;
628 		fp = regs->ss_32.ebp;
629 	}
630 
631         /*
632 	 * The return value indicates if we've modified the stack.
633 	 * Since there is nothing else to fix up in either case,
634 	 * we can safely ignore it here.
635 	 */
636 	(void)dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp);
637 
638 	if(pcstack_limit <= 0)
639 	    return;
640 
641 	/*
642 	 * Note that unlike ppc, the x86 code does not use
643 	 * CPU_DTRACE_USTACK_FP. This is because x86 always
644 	 * traces from the fp, even in syscall/profile/fbt
645 	 * providers.
646 	 */
647 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
648 	ASSERT(n >= 0);
649 	ASSERT(n <= pcstack_limit);
650 
651 	pcstack += n;
652 	pcstack_limit -= n;
653 
654 zero:
655 	while (pcstack_limit-- > 0)
656 		*pcstack++ = 0;
657 }
658 
659 int
dtrace_getustackdepth(void)660 dtrace_getustackdepth(void)
661 {
662 	thread_t thread = current_thread();
663 	x86_saved_state_t *regs;
664 	user_addr_t pc, sp, fp;
665 	int n = 0;
666 	boolean_t is64Bit = proc_is64bit(current_proc());
667 
668 	if (thread == NULL)
669 		return 0;
670 
671 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
672 		return (-1);
673 
674 	pal_register_cache_state(thread, VALID);
675 	regs = (x86_saved_state_t *)find_user_regs(thread);
676 	if (regs == NULL)
677 		return 0;
678 
679 	if (is64Bit) {
680 		pc = regs->ss_64.isf.rip;
681 		sp = regs->ss_64.isf.rsp;
682 		fp = regs->ss_64.rbp;
683 	} else {
684 		pc = regs->ss_32.eip;
685 		sp = regs->ss_32.uesp;
686 		fp = regs->ss_32.ebp;
687 	}
688 
689 	if (dtrace_adjust_stack(NULL, NULL, &pc, sp) == 1) {
690 	    /*
691 	     * we would have adjusted the stack if we had
692 	     * supplied one (that is what rc == 1 means).
693 	     * Also, as a side effect, the pc might have
694 	     * been fixed up, which is good for calling
695 	     * in to dtrace_getustack_common.
696 	     */
697 	    n++;
698 	}
699 
700 	/*
701 	 * Note that unlike ppc, the x86 code does not use
702 	 * CPU_DTRACE_USTACK_FP. This is because x86 always
703 	 * traces from the fp, even in syscall/profile/fbt
704 	 * providers.
705 	 */
706 
707 	n += dtrace_getustack_common(NULL, 0, pc, fp);
708 
709 	return (n);
710 }
711 
712 void
dtrace_getufpstack(uint64_t * pcstack,uint64_t * fpstack,int pcstack_limit)713 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
714 {
715 	thread_t thread = current_thread();
716 	savearea_t *regs;
717 	user_addr_t pc, sp;
718 	volatile uint16_t *flags =
719 	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
720 #if 0
721 	uintptr_t oldcontext;
722 	size_t s1, s2;
723 #endif
724 	boolean_t is64Bit = proc_is64bit(current_proc());
725 
726 	if (*flags & CPU_DTRACE_FAULT)
727 		return;
728 
729 	if (pcstack_limit <= 0)
730 		return;
731 
732 	/*
733 	 * If there's no user context we still need to zero the stack.
734 	 */
735 	if (thread == NULL)
736 		goto zero;
737 
738 	regs = (savearea_t *)find_user_regs(thread);
739 	if (regs == NULL)
740 		goto zero;
741 
742 	*pcstack++ = (uint64_t)dtrace_proc_selfpid();
743 	pcstack_limit--;
744 
745 	if (pcstack_limit <= 0)
746 		return;
747 
748 	pc = regs->ss_32.eip;
749 	sp = regs->ss_32.ebp;
750 
751 #if 0 /* XXX signal stack crawl */
752 	oldcontext = lwp->lwp_oldcontext;
753 
754 	if (p->p_model == DATAMODEL_NATIVE) {
755 		s1 = sizeof (struct frame) + 2 * sizeof (long);
756 		s2 = s1 + sizeof (siginfo_t);
757 	} else {
758 		s1 = sizeof (struct frame32) + 3 * sizeof (int);
759 		s2 = s1 + sizeof (siginfo32_t);
760 	}
761 #endif
762 
763 	if(dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp) == 1) {
764             /*
765 	     * we made a change.
766 	     */
767 	    *fpstack++ = 0;
768 	    if (pcstack_limit <= 0)
769 		return;
770 	}
771 
772 	while (pc != 0) {
773 		*pcstack++ = (uint64_t)pc;
774 		*fpstack++ = sp;
775 		pcstack_limit--;
776 		if (pcstack_limit <= 0)
777 			break;
778 
779 		if (sp == 0)
780 			break;
781 
782 #if 0 /* XXX signal stack crawl */
783 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
784 			if (p->p_model == DATAMODEL_NATIVE) {
785 				ucontext_t *ucp = (ucontext_t *)oldcontext;
786 				greg_t *gregs = ucp->uc_mcontext.gregs;
787 
788 				sp = dtrace_fulword(&gregs[REG_FP]);
789 				pc = dtrace_fulword(&gregs[REG_PC]);
790 
791 				oldcontext = dtrace_fulword(&ucp->uc_link);
792 			} else {
793 				ucontext_t *ucp = (ucontext_t *)oldcontext;
794 				greg_t *gregs = ucp->uc_mcontext.gregs;
795 
796 				sp = dtrace_fuword32(&gregs[EBP]);
797 				pc = dtrace_fuword32(&gregs[EIP]);
798 
799 				oldcontext = dtrace_fuword32(&ucp->uc_link);
800 			}
801 		}
802 		else
803 #endif
804 		{
805 			if (is64Bit) {
806 				pc = dtrace_fuword64((sp + RETURN_OFFSET64));
807 				sp = dtrace_fuword64(sp);
808 			} else {
809 				pc = dtrace_fuword32((sp + RETURN_OFFSET));
810 				sp = dtrace_fuword32(sp);
811 			}
812 		}
813 
814 		/* Truncate ustack if the iterator causes fault. */
815 		if (*flags & CPU_DTRACE_FAULT) {
816 			*flags &= ~CPU_DTRACE_FAULT;
817 			break;
818 		}
819 	}
820 
821 zero:
822 	while (pcstack_limit-- > 0)
823 		*pcstack++ = 0;
824 }
825 
826 void
dtrace_getpcstack(pc_t * pcstack,int pcstack_limit,int aframes,uint32_t * intrpc)827 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
828 		  uint32_t *intrpc)
829 {
830 	struct frame *fp = (struct frame *)__builtin_frame_address(0);
831 	struct frame *nextfp, *minfp, *stacktop;
832 	int depth = 0;
833 	int last = 0;
834 	uintptr_t pc;
835 	uintptr_t caller = CPU->cpu_dtrace_caller;
836 	int on_intr;
837 
838 	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
839 		stacktop = (struct frame *)dtrace_get_cpu_int_stack_top();
840 	else
841 		stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
842 
843 	minfp = fp;
844 
845 	aframes++;
846 
847 	if (intrpc != NULL && depth < pcstack_limit)
848 		pcstack[depth++] = (pc_t)intrpc;
849 
850 	while (depth < pcstack_limit) {
851 		nextfp = *(struct frame **)fp;
852 		pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64);
853 
854 		if (nextfp <= minfp || nextfp >= stacktop) {
855 			if (on_intr) {
856 				/*
857 				 * Hop from interrupt stack to thread stack.
858 				 */
859 				vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
860 
861 				minfp = (struct frame *)kstack_base;
862 				stacktop = (struct frame *)(kstack_base + kernel_stack_size);
863 
864 				on_intr = 0;
865 				continue;
866 			}
867 			/*
868 			 * This is the last frame we can process; indicate
869 			 * that we should return after processing this frame.
870 			 */
871 			last = 1;
872 		}
873 
874 		if (aframes > 0) {
875 			if (--aframes == 0 && caller != 0) {
876 				/*
877 				 * We've just run out of artificial frames,
878 				 * and we have a valid caller -- fill it in
879 				 * now.
880 				 */
881 				ASSERT(depth < pcstack_limit);
882 				pcstack[depth++] = (pc_t)caller;
883 				caller = 0;
884 			}
885 		} else {
886 			if (depth < pcstack_limit)
887 				pcstack[depth++] = (pc_t)pc;
888 		}
889 
890 		if (last) {
891 			while (depth < pcstack_limit)
892 				pcstack[depth++] = 0;
893 			return;
894 		}
895 
896 		fp = nextfp;
897 		minfp = fp;
898 	}
899 }
900 
901 struct frame {
902 	struct frame *backchain;
903 	uintptr_t retaddr;
904 };
905 
906 uint64_t
dtrace_getarg(int arg,int aframes,dtrace_mstate_t * mstate,dtrace_vstate_t * vstate)907 dtrace_getarg(int arg, int aframes, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
908 {
909 	uint64_t val = 0;
910 	struct frame *fp = (struct frame *)__builtin_frame_address(0);
911 	uintptr_t *stack;
912 	uintptr_t pc;
913 	int i;
914 
915 
916     /*
917      * A total of 6 arguments are passed via registers; any argument with
918      * index of 5 or lower is therefore in a register.
919      */
920     int inreg = 5;
921 
922 	for (i = 1; i <= aframes; i++) {
923 		fp = fp->backchain;
924 		pc = fp->retaddr;
925 
926 		if (dtrace_invop_callsite_pre != NULL
927 			&& pc  >  (uintptr_t)dtrace_invop_callsite_pre
928 			&& pc  <= (uintptr_t)dtrace_invop_callsite_post) {
929 			/*
930 			 * In the case of x86_64, we will use the pointer to the
931 			 * save area structure that was pushed when we took the
932 			 * trap.  To get this structure, we must increment
933 			 * beyond the frame structure. If the
934 			 * argument that we're seeking is passed on the stack,
935 			 * we'll pull the true stack pointer out of the saved
936 			 * registers and decrement our argument by the number
937 			 * of arguments passed in registers; if the argument
938 			 * we're seeking is passed in regsiters, we can just
939 			 * load it directly.
940 			 */
941 
942 			/* fp points to frame of dtrace_invop() activation. */
943 			fp = fp->backchain; /* to fbt_perfcallback() activation. */
944 			fp = fp->backchain; /* to kernel_trap() activation. */
945 			fp = fp->backchain; /* to trap_from_kernel() activation. */
946 
947 			x86_saved_state_t   *tagged_regs = (x86_saved_state_t *)&fp[1];
948 			x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
949 
950 			if (arg <= inreg) {
951 				stack = (uintptr_t *)(void*)&saved_state->rdi;
952 			} else {
953 				fp = (struct frame *)(saved_state->isf.rsp);
954 				stack = (uintptr_t *)&fp[1]; /* Find marshalled
955 								arguments */
956 				arg -= inreg + 1;
957 			}
958 			goto load;
959 		}
960 	}
961 
962 	/*
963 	 * We know that we did not come through a trap to get into
964 	 * dtrace_probe() --  We arrive here when the provider has
965 	 * called dtrace_probe() directly.
966 	 * The probe ID is the first argument to dtrace_probe().
967 	 * We must advance beyond that to get the argX.
968 	 */
969 	arg++; /* Advance past probeID */
970 
971 	if (arg <= inreg) {
972 		/*
973 		 * This shouldn't happen.  If the argument is passed in a
974 		 * register then it should have been, well, passed in a
975 		 * register...
976 		 */
977 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
978 		return (0);
979 	}
980 
981 	arg -= (inreg + 1);
982 	stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
983 
984 load:
985 	if (dtrace_canload((uint64_t)(stack + arg), sizeof(uint64_t),
986 		mstate, vstate)) {
987 		/* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
988 		val = dtrace_load64((uint64_t)(stack + arg));
989 	}
990 
991 	return (val);
992 }
993 
994 /*
995  * Load/Store Safety
996  */
997 void
dtrace_toxic_ranges(void (* func)(uintptr_t base,uintptr_t limit))998 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
999 {
1000 	/*
1001 	 * "base" is the smallest toxic address in the range, "limit" is the first
1002 	 * VALID address greater than "base".
1003 	 */
1004 	func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1005 	if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0)
1006 			func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0);
1007 }
1008 
1009 /*
1010  * Trap Safety
1011  */
1012 extern boolean_t dtrace_handle_trap(int, x86_saved_state_t *);
1013 
1014 boolean_t
dtrace_handle_trap(int trapno,x86_saved_state_t * state)1015 dtrace_handle_trap(int trapno, x86_saved_state_t *state)
1016 {
1017 	x86_saved_state64_t *saved_state = saved_state64(state);
1018 
1019 	if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) {
1020 		return FALSE;
1021 	}
1022 
1023 	/*
1024 	 * General purpose solution would require pulling in disassembler. Right now there
1025 	 * is only one specific case to be handled so it is hardcoded here.
1026 	 */
1027 	if (trapno == T_INVALID_OPCODE) {
1028 		uint8_t *inst = (uint8_t *)saved_state->isf.rip;
1029 
1030 		/* vmread %rdi, %rax */
1031 		if (inst[0] == 0x0f && inst[1] == 0x78 && inst[2] == 0xf8) {
1032 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
1033 			saved_state->isf.rip += 3;
1034 			return TRUE;
1035 		}
1036 	}
1037 
1038 	return FALSE;
1039 }
1040