1 /*
2 * Copyright (c) 2005-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/thread.h>
30 #include <mach/thread_status.h>
31
32 typedef x86_saved_state_t savearea_t;
33
34 #include <stdarg.h>
35 #include <string.h>
36 #include <sys/malloc.h>
37 #include <sys/time.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/proc_internal.h>
41 #include <sys/kauth.h>
42 #include <sys/dtrace.h>
43 #include <sys/dtrace_impl.h>
44 #include <libkern/OSAtomic.h>
45 #include <i386/x86_hypercall.h>
46 #include <kern/thread_call.h>
47 #include <kern/task.h>
48 #include <kern/sched_prim.h>
49 #include <miscfs/devfs/devfs.h>
50 #include <mach/vm_param.h>
51 #include <machine/pal_routines.h>
52 #include <i386/cpuid.h>
53 #include <i386/mp.h>
54 #include <machine/trap.h>
55
56 /*
57 * APPLE NOTE: The regmap is used to decode which 64bit uregs[] register
58 * is being accessed when passed the 32bit uregs[] constant (based on
59 * the reg.d translator file). The dtrace_getreg() is smart enough to handle
60 * the register mappings. The register set definitions are the same as
61 * those used by the fasttrap_getreg code.
62 */
63 #include "fasttrap_regset.h"
64 static const uint8_t regmap[19] = {
65 REG_GS, /* GS */
66 REG_FS, /* FS */
67 REG_ES, /* ES */
68 REG_DS, /* DS */
69 REG_RDI, /* EDI */
70 REG_RSI, /* ESI */
71 REG_RBP, /* EBP, REG_FP */
72 REG_RSP, /* ESP */
73 REG_RBX, /* EBX */
74 REG_RDX, /* EDX, REG_R1 */
75 REG_RCX, /* ECX */
76 REG_RAX, /* EAX, REG_R0 */
77 REG_TRAPNO, /* TRAPNO */
78 REG_ERR, /* ERR */
79 REG_RIP, /* EIP, REG_PC */
80 REG_CS, /* CS */
81 REG_RFL, /* EFL, REG_PS */
82 REG_RSP, /* UESP, REG_SP */
83 REG_SS /* SS */
84 };
85
86 extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
87
88 void
dtrace_probe_error(dtrace_state_t * state,dtrace_epid_t epid,int which,int fltoffs,int fault,uint64_t illval)89 dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
90 int fltoffs, int fault, uint64_t illval)
91 {
92 /*
93 * For the case of the error probe firing lets
94 * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
95 */
96 state->dts_arg_error_illval = illval;
97 dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault );
98 }
99
100 /*
101 * Atomicity and synchronization
102 */
103 void
dtrace_membar_producer(void)104 dtrace_membar_producer(void)
105 {
106 __asm__ volatile("sfence");
107 }
108
109 void
dtrace_membar_consumer(void)110 dtrace_membar_consumer(void)
111 {
112 __asm__ volatile("lfence");
113 }
114
115 /*
116 * Interrupt manipulation
117 * XXX dtrace_getipl() can be called from probe context.
118 */
119 int
dtrace_getipl(void)120 dtrace_getipl(void)
121 {
122 /*
123 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
124 * in osfmk/kern/cpu_data.h
125 */
126 /* return get_interrupt_level(); */
127 return (ml_at_interrupt_context() ? 1: 0);
128 }
129
130 /*
131 * MP coordination
132 */
133 typedef struct xcArg {
134 processorid_t cpu;
135 dtrace_xcall_t f;
136 void *arg;
137 } xcArg_t;
138
139 static void
xcRemote(void * foo)140 xcRemote( void *foo )
141 {
142 xcArg_t *pArg = (xcArg_t *)foo;
143
144 if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) {
145 (pArg->f)(pArg->arg);
146 }
147 }
148
149
150 /*
151 * dtrace_xcall() is not called from probe context.
152 */
153 void
dtrace_xcall(processorid_t cpu,dtrace_xcall_t f,void * arg)154 dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
155 {
156 xcArg_t xcArg;
157
158 xcArg.cpu = cpu;
159 xcArg.f = f;
160 xcArg.arg = arg;
161
162 if (cpu == DTRACE_CPUALL) {
163 mp_cpus_call (CPUMASK_ALL, ASYNC, xcRemote, (void*)&xcArg);
164 }
165 else {
166 mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), ASYNC, xcRemote, (void*)&xcArg);
167 }
168 }
169
170 /*
171 * Runtime and ABI
172 */
173 uint64_t
dtrace_getreg(struct regs * savearea,uint_t reg)174 dtrace_getreg(struct regs *savearea, uint_t reg)
175 {
176 boolean_t is64Bit = proc_is64bit(current_proc());
177 x86_saved_state_t *regs = (x86_saved_state_t *)savearea;
178
179 if (regs == NULL) {
180 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
181 return (0);
182 }
183
184 if (is64Bit) {
185 if (reg <= SS) {
186 reg = regmap[reg];
187 } else {
188 reg -= (SS + 1);
189 }
190
191 switch (reg) {
192 case REG_RDI:
193 return (uint64_t)(regs->ss_64.rdi);
194 case REG_RSI:
195 return (uint64_t)(regs->ss_64.rsi);
196 case REG_RDX:
197 return (uint64_t)(regs->ss_64.rdx);
198 case REG_RCX:
199 return (uint64_t)(regs->ss_64.rcx);
200 case REG_R8:
201 return (uint64_t)(regs->ss_64.r8);
202 case REG_R9:
203 return (uint64_t)(regs->ss_64.r9);
204 case REG_RAX:
205 return (uint64_t)(regs->ss_64.rax);
206 case REG_RBX:
207 return (uint64_t)(regs->ss_64.rbx);
208 case REG_RBP:
209 return (uint64_t)(regs->ss_64.rbp);
210 case REG_R10:
211 return (uint64_t)(regs->ss_64.r10);
212 case REG_R11:
213 return (uint64_t)(regs->ss_64.r11);
214 case REG_R12:
215 return (uint64_t)(regs->ss_64.r12);
216 case REG_R13:
217 return (uint64_t)(regs->ss_64.r13);
218 case REG_R14:
219 return (uint64_t)(regs->ss_64.r14);
220 case REG_R15:
221 return (uint64_t)(regs->ss_64.r15);
222 case REG_FS:
223 return (uint64_t)(regs->ss_64.fs);
224 case REG_GS:
225 return (uint64_t)(regs->ss_64.gs);
226 case REG_TRAPNO:
227 return (uint64_t)(regs->ss_64.isf.trapno);
228 case REG_ERR:
229 return (uint64_t)(regs->ss_64.isf.err);
230 case REG_RIP:
231 return (uint64_t)(regs->ss_64.isf.rip);
232 case REG_CS:
233 return (uint64_t)(regs->ss_64.isf.cs);
234 case REG_SS:
235 return (uint64_t)(regs->ss_64.isf.ss);
236 case REG_RFL:
237 return (uint64_t)(regs->ss_64.isf.rflags);
238 case REG_RSP:
239 return (uint64_t)(regs->ss_64.isf.rsp);
240 case REG_DS:
241 case REG_ES:
242 default:
243 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
244 return (0);
245 }
246
247 } else { /* is 32bit user */
248 /* beyond register SS */
249 if (reg > x86_SAVED_STATE32_COUNT - 1) {
250 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
251 return (0);
252 }
253 return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg];
254 }
255 }
256
257 uint64_t
dtrace_getvmreg(uint_t ndx)258 dtrace_getvmreg(uint_t ndx)
259 {
260 uint64_t reg = 0;
261 bool failed = false;
262
263 /* Any change in the vmread final opcode must be reflected in dtrace_handle_trap below. */
264 __asm__ __volatile__(
265 "vmread %2, %0\n"
266 "ja 1f\n"
267 "mov $1, %1\n"
268 "1:\n"
269 : "=a" (reg), "+r" (failed) : "D" ((uint64_t)ndx));
270
271 /*
272 * Check for fault in vmreg first. If DTrace has recovered the fault cause by
273 * vmread above then the value in failed will be unreliable.
274 */
275 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ILLOP)) {
276 return 0;
277 }
278
279 /* If vmread succeeded but failed because CF or ZS is 1 report fail. */
280 if (failed) {
281 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
282 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = ndx;
283 return 0;
284 }
285
286 return reg;
287 }
288
289 static void
dtrace_vmcall(x86_saved_state_t * regs,uint64_t * rflags)290 dtrace_vmcall(x86_saved_state_t *regs, uint64_t *rflags)
291 {
292 uint64_t flags = 0;
293
294 /*
295 * No constraints available for r8 or r9 which means they must be
296 * handled explicitly.
297 */
298 __asm__ volatile (
299 " movq %12, %%r8 \n"
300 " movq %13, %%r9 \n"
301 " vmcall \n"
302 " movq %%r8, %5 \n"
303 " movq %%r9, %6 \n"
304 " pushfq \n"
305 " popq %7 \n"
306
307 : "=a" (regs->ss_64.rax),
308 "=D" (regs->ss_64.rdi),
309 "=S" (regs->ss_64.rsi),
310 "=d" (regs->ss_64.rdx),
311 "=c" (regs->ss_64.rcx),
312 "=r" (regs->ss_64.r8), /* %5 */
313 "=r" (regs->ss_64.r9), /* %6 */
314 "=r" (flags) /* %7 */
315
316 : "a" (regs->ss_64.rax),
317 "D" (regs->ss_64.rdi),
318 "S" (regs->ss_64.rsi),
319 "d" (regs->ss_64.rdx),
320 "c" (regs->ss_64.rcx),
321 "r" (regs->ss_64.r8), /* %12 */
322 "r" (regs->ss_64.r9) /* %13 */
323
324 : "memory", "r8", "r9");
325
326 *rflags = flags;
327
328 return;
329 }
330
331 static inline void
dtrace_cpuid(x86_saved_state_t * regs)332 dtrace_cpuid(x86_saved_state_t *regs)
333 {
334 __asm__ volatile (
335 "cpuid"
336 : "=a" (regs->ss_64.rax),
337 "=b" (regs->ss_64.rbx),
338 "=c" (regs->ss_64.rcx),
339 "=d" (regs->ss_64.rdx)
340
341 : "a" (regs->ss_64.rax),
342 "b" (regs->ss_64.rbx),
343 "c" (regs->ss_64.rcx),
344 "d" (regs->ss_64.rdx));
345 }
346
347 static bool
dtrace_applepv_available(uint64_t flag)348 dtrace_applepv_available(uint64_t flag)
349 {
350 static bool checked = false;
351 static uint64_t features = 0;
352
353 if (checked) {
354 return (features & flag) != 0;
355 }
356
357 x86_saved_state_t regs = {0};
358
359 regs.ss_64.rax = 1;
360 dtrace_cpuid(®s);
361
362 /* Bit 31 - HV bit. */
363 if ((regs.ss_64.rcx & _Bit(31)) != 0) {
364 for (uint32_t base = 0x40000100; base < 0x40010000; base += 0x100) {
365 regs.ss_64.rax = base;
366 dtrace_cpuid(®s);
367
368 /* "apple-pv-xnu" */
369 if (regs.ss_64.rbx != 0x6c707061 ||
370 regs.ss_64.rcx != 0x76702d65 ||
371 regs.ss_64.rdx != 0x756e782d) {
372 continue;
373 }
374
375 uint64_t feature_leaf = regs.ss_64.rax;
376
377 regs.ss_64.rax = base + APPLEPV_INTERFACE_LEAF_INDEX;
378 dtrace_cpuid(®s);
379
380 /* "AH#1" */
381 if (regs.ss_64.rax != 0x31234841) {
382 continue;
383 }
384
385 /* Find features. */
386 regs.ss_64.rax = feature_leaf;
387 dtrace_cpuid(®s);
388
389 features = regs.ss_64.rdx;
390 break;
391 }
392 }
393
394 checked = true;
395 return (features & flag) != 0;
396 }
397
398 void
dtrace_livedump(char * filename,size_t len)399 dtrace_livedump(char *filename, size_t len)
400 {
401 x86_saved_state_t regs = {
402 .ss_64.rax = HVG_HCALL_CODE(HVG_HCALL_TRIGGER_DUMP),
403 .ss_64.rdi = HVG_HCALL_DUMP_OPTION_REGULAR,
404 };
405
406 if (len > 0) {
407 filename[0] = '\0';
408 }
409
410 if (!dtrace_applepv_available(CPUID_LEAF_FEATURE_COREDUMP)) {
411 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
412 return;
413 }
414
415 uint64_t rflags = 0;
416 dtrace_vmcall(®s, &rflags);
417 if ((rflags & EFL_CF) != 0) {
418 /* An empty filename indicates failure to dump. */
419 return;
420 }
421
422 /* Extract the filename. */
423 char str[57] = {'\0'};
424 memcpy(&str[0], ®s.ss_64.rax, 8);
425 memcpy(&str[8], ®s.ss_64.rdi, 8);
426 memcpy(&str[16], ®s.ss_64.rsi, 8);
427 memcpy(&str[24], ®s.ss_64.rdx, 8);
428 memcpy(&str[32], ®s.ss_64.rcx, 8);
429 memcpy(&str[40], ®s.ss_64.r8, 8);
430 memcpy(&str[48], ®s.ss_64.r9, 8);
431
432 (void) strlcpy(filename, str, len);
433 }
434
435 #define RETURN_OFFSET 4
436 #define RETURN_OFFSET64 8
437
438 static int
dtrace_getustack_common(uint64_t * pcstack,int pcstack_limit,user_addr_t pc,user_addr_t sp)439 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc,
440 user_addr_t sp)
441 {
442 volatile uint16_t *flags =
443 (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
444
445 #if 0
446 uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl */
447 size_t s1, s2;
448 #endif
449 int ret = 0;
450 boolean_t is64Bit = proc_is64bit(current_proc());
451
452 ASSERT(pcstack == NULL || pcstack_limit > 0);
453
454 #if 0 /* XXX signal stack crawl */
455 if (p->p_model == DATAMODEL_NATIVE) {
456 s1 = sizeof (struct frame) + 2 * sizeof (long);
457 s2 = s1 + sizeof (siginfo_t);
458 } else {
459 s1 = sizeof (struct frame32) + 3 * sizeof (int);
460 s2 = s1 + sizeof (siginfo32_t);
461 }
462 #endif
463
464 while (pc != 0) {
465 ret++;
466 if (pcstack != NULL) {
467 *pcstack++ = (uint64_t)pc;
468 pcstack_limit--;
469 if (pcstack_limit <= 0)
470 break;
471 }
472
473 if (sp == 0)
474 break;
475
476 #if 0 /* XXX signal stack crawl */
477 if (oldcontext == sp + s1 || oldcontext == sp + s2) {
478 if (p->p_model == DATAMODEL_NATIVE) {
479 ucontext_t *ucp = (ucontext_t *)oldcontext;
480 greg_t *gregs = ucp->uc_mcontext.gregs;
481
482 sp = dtrace_fulword(&gregs[REG_FP]);
483 pc = dtrace_fulword(&gregs[REG_PC]);
484
485 oldcontext = dtrace_fulword(&ucp->uc_link);
486 } else {
487 ucontext32_t *ucp = (ucontext32_t *)oldcontext;
488 greg32_t *gregs = ucp->uc_mcontext.gregs;
489
490 sp = dtrace_fuword32(&gregs[EBP]);
491 pc = dtrace_fuword32(&gregs[EIP]);
492
493 oldcontext = dtrace_fuword32(&ucp->uc_link);
494 }
495 }
496 else
497 #endif
498 {
499 if (is64Bit) {
500 pc = dtrace_fuword64((sp + RETURN_OFFSET64));
501 sp = dtrace_fuword64(sp);
502 } else {
503 pc = dtrace_fuword32((sp + RETURN_OFFSET));
504 sp = dtrace_fuword32(sp);
505 }
506 }
507
508 /* Truncate ustack if the iterator causes fault. */
509 if (*flags & CPU_DTRACE_FAULT) {
510 *flags &= ~CPU_DTRACE_FAULT;
511 break;
512 }
513 }
514
515 return (ret);
516 }
517
518
519 /*
520 * The return value indicates if we've modified the stack.
521 */
522 static int
dtrace_adjust_stack(uint64_t ** pcstack,int * pcstack_limit,user_addr_t * pc,user_addr_t sp)523 dtrace_adjust_stack(uint64_t **pcstack, int *pcstack_limit, user_addr_t *pc,
524 user_addr_t sp)
525 {
526 volatile uint16_t *flags = (volatile uint16_t *) &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
527 int64_t missing_tos;
528 int rc = 0;
529 boolean_t is64Bit = proc_is64bit(current_proc());
530
531 ASSERT(pc != NULL);
532
533 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
534 /*
535 * If we found ourselves in an entry probe, the frame pointer has not
536 * yet been pushed (that happens in the
537 * function prologue). The best approach is to
538 * add the current pc as a missing top of stack,
539 * and back the pc up to the caller, which is stored at the
540 * current stack pointer address since the call
541 * instruction puts it there right before
542 * the branch.
543 */
544
545 missing_tos = *pc;
546
547 if (is64Bit)
548 *pc = dtrace_fuword64(sp);
549 else
550 *pc = dtrace_fuword32(sp);
551
552 /* Truncate ustack if the iterator causes fault. */
553 if (*flags & CPU_DTRACE_FAULT) {
554 *flags &= ~CPU_DTRACE_FAULT;
555 }
556 } else {
557 /*
558 * We might have a top of stack override, in which case we just
559 * add that frame without question to the top. This
560 * happens in return probes where you have a valid
561 * frame pointer, but it's for the callers frame
562 * and you'd like to add the pc of the return site
563 * to the frame.
564 */
565 missing_tos = cpu_core[CPU->cpu_id].cpuc_missing_tos;
566 }
567
568 if (missing_tos != 0) {
569 if (pcstack != NULL && pcstack_limit != NULL) {
570 /*
571 * If the missing top of stack has been filled out, then
572 * we add it and adjust the size.
573 */
574 *(*pcstack)++ = missing_tos;
575 (*pcstack_limit)--;
576 }
577 /*
578 * return 1 because we would have changed the
579 * stack whether or not it was passed in. This
580 * ensures the stack count is correct
581 */
582 rc = 1;
583 }
584 return rc;
585 }
586
587 void
dtrace_getupcstack(uint64_t * pcstack,int pcstack_limit)588 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
589 {
590 thread_t thread = current_thread();
591 x86_saved_state_t *regs;
592 user_addr_t pc, sp, fp;
593 volatile uint16_t *flags =
594 (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
595 int n;
596 boolean_t is64Bit = proc_is64bit(current_proc());
597
598 if (*flags & CPU_DTRACE_FAULT)
599 return;
600
601 if (pcstack_limit <= 0)
602 return;
603
604 /*
605 * If there's no user context we still need to zero the stack.
606 */
607 if (thread == NULL)
608 goto zero;
609
610 pal_register_cache_state(thread, VALID);
611 regs = (x86_saved_state_t *)find_user_regs(thread);
612 if (regs == NULL)
613 goto zero;
614
615 *pcstack++ = (uint64_t)dtrace_proc_selfpid();
616 pcstack_limit--;
617
618 if (pcstack_limit <= 0)
619 return;
620
621 if (is64Bit) {
622 pc = regs->ss_64.isf.rip;
623 sp = regs->ss_64.isf.rsp;
624 fp = regs->ss_64.rbp;
625 } else {
626 pc = regs->ss_32.eip;
627 sp = regs->ss_32.uesp;
628 fp = regs->ss_32.ebp;
629 }
630
631 /*
632 * The return value indicates if we've modified the stack.
633 * Since there is nothing else to fix up in either case,
634 * we can safely ignore it here.
635 */
636 (void)dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp);
637
638 if(pcstack_limit <= 0)
639 return;
640
641 /*
642 * Note that unlike ppc, the x86 code does not use
643 * CPU_DTRACE_USTACK_FP. This is because x86 always
644 * traces from the fp, even in syscall/profile/fbt
645 * providers.
646 */
647 n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
648 ASSERT(n >= 0);
649 ASSERT(n <= pcstack_limit);
650
651 pcstack += n;
652 pcstack_limit -= n;
653
654 zero:
655 while (pcstack_limit-- > 0)
656 *pcstack++ = 0;
657 }
658
659 int
dtrace_getustackdepth(void)660 dtrace_getustackdepth(void)
661 {
662 thread_t thread = current_thread();
663 x86_saved_state_t *regs;
664 user_addr_t pc, sp, fp;
665 int n = 0;
666 boolean_t is64Bit = proc_is64bit(current_proc());
667
668 if (thread == NULL)
669 return 0;
670
671 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
672 return (-1);
673
674 pal_register_cache_state(thread, VALID);
675 regs = (x86_saved_state_t *)find_user_regs(thread);
676 if (regs == NULL)
677 return 0;
678
679 if (is64Bit) {
680 pc = regs->ss_64.isf.rip;
681 sp = regs->ss_64.isf.rsp;
682 fp = regs->ss_64.rbp;
683 } else {
684 pc = regs->ss_32.eip;
685 sp = regs->ss_32.uesp;
686 fp = regs->ss_32.ebp;
687 }
688
689 if (dtrace_adjust_stack(NULL, NULL, &pc, sp) == 1) {
690 /*
691 * we would have adjusted the stack if we had
692 * supplied one (that is what rc == 1 means).
693 * Also, as a side effect, the pc might have
694 * been fixed up, which is good for calling
695 * in to dtrace_getustack_common.
696 */
697 n++;
698 }
699
700 /*
701 * Note that unlike ppc, the x86 code does not use
702 * CPU_DTRACE_USTACK_FP. This is because x86 always
703 * traces from the fp, even in syscall/profile/fbt
704 * providers.
705 */
706
707 n += dtrace_getustack_common(NULL, 0, pc, fp);
708
709 return (n);
710 }
711
712 void
dtrace_getufpstack(uint64_t * pcstack,uint64_t * fpstack,int pcstack_limit)713 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
714 {
715 thread_t thread = current_thread();
716 savearea_t *regs;
717 user_addr_t pc, sp;
718 volatile uint16_t *flags =
719 (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
720 #if 0
721 uintptr_t oldcontext;
722 size_t s1, s2;
723 #endif
724 boolean_t is64Bit = proc_is64bit(current_proc());
725
726 if (*flags & CPU_DTRACE_FAULT)
727 return;
728
729 if (pcstack_limit <= 0)
730 return;
731
732 /*
733 * If there's no user context we still need to zero the stack.
734 */
735 if (thread == NULL)
736 goto zero;
737
738 regs = (savearea_t *)find_user_regs(thread);
739 if (regs == NULL)
740 goto zero;
741
742 *pcstack++ = (uint64_t)dtrace_proc_selfpid();
743 pcstack_limit--;
744
745 if (pcstack_limit <= 0)
746 return;
747
748 pc = regs->ss_32.eip;
749 sp = regs->ss_32.ebp;
750
751 #if 0 /* XXX signal stack crawl */
752 oldcontext = lwp->lwp_oldcontext;
753
754 if (p->p_model == DATAMODEL_NATIVE) {
755 s1 = sizeof (struct frame) + 2 * sizeof (long);
756 s2 = s1 + sizeof (siginfo_t);
757 } else {
758 s1 = sizeof (struct frame32) + 3 * sizeof (int);
759 s2 = s1 + sizeof (siginfo32_t);
760 }
761 #endif
762
763 if(dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp) == 1) {
764 /*
765 * we made a change.
766 */
767 *fpstack++ = 0;
768 if (pcstack_limit <= 0)
769 return;
770 }
771
772 while (pc != 0) {
773 *pcstack++ = (uint64_t)pc;
774 *fpstack++ = sp;
775 pcstack_limit--;
776 if (pcstack_limit <= 0)
777 break;
778
779 if (sp == 0)
780 break;
781
782 #if 0 /* XXX signal stack crawl */
783 if (oldcontext == sp + s1 || oldcontext == sp + s2) {
784 if (p->p_model == DATAMODEL_NATIVE) {
785 ucontext_t *ucp = (ucontext_t *)oldcontext;
786 greg_t *gregs = ucp->uc_mcontext.gregs;
787
788 sp = dtrace_fulword(&gregs[REG_FP]);
789 pc = dtrace_fulword(&gregs[REG_PC]);
790
791 oldcontext = dtrace_fulword(&ucp->uc_link);
792 } else {
793 ucontext_t *ucp = (ucontext_t *)oldcontext;
794 greg_t *gregs = ucp->uc_mcontext.gregs;
795
796 sp = dtrace_fuword32(&gregs[EBP]);
797 pc = dtrace_fuword32(&gregs[EIP]);
798
799 oldcontext = dtrace_fuword32(&ucp->uc_link);
800 }
801 }
802 else
803 #endif
804 {
805 if (is64Bit) {
806 pc = dtrace_fuword64((sp + RETURN_OFFSET64));
807 sp = dtrace_fuword64(sp);
808 } else {
809 pc = dtrace_fuword32((sp + RETURN_OFFSET));
810 sp = dtrace_fuword32(sp);
811 }
812 }
813
814 /* Truncate ustack if the iterator causes fault. */
815 if (*flags & CPU_DTRACE_FAULT) {
816 *flags &= ~CPU_DTRACE_FAULT;
817 break;
818 }
819 }
820
821 zero:
822 while (pcstack_limit-- > 0)
823 *pcstack++ = 0;
824 }
825
826 void
dtrace_getpcstack(pc_t * pcstack,int pcstack_limit,int aframes,uint32_t * intrpc)827 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
828 uint32_t *intrpc)
829 {
830 struct frame *fp = (struct frame *)__builtin_frame_address(0);
831 struct frame *nextfp, *minfp, *stacktop;
832 int depth = 0;
833 int last = 0;
834 uintptr_t pc;
835 uintptr_t caller = CPU->cpu_dtrace_caller;
836 int on_intr;
837
838 if ((on_intr = CPU_ON_INTR(CPU)) != 0)
839 stacktop = (struct frame *)dtrace_get_cpu_int_stack_top();
840 else
841 stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
842
843 minfp = fp;
844
845 aframes++;
846
847 if (intrpc != NULL && depth < pcstack_limit)
848 pcstack[depth++] = (pc_t)intrpc;
849
850 while (depth < pcstack_limit) {
851 nextfp = *(struct frame **)fp;
852 pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64);
853
854 if (nextfp <= minfp || nextfp >= stacktop) {
855 if (on_intr) {
856 /*
857 * Hop from interrupt stack to thread stack.
858 */
859 vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
860
861 minfp = (struct frame *)kstack_base;
862 stacktop = (struct frame *)(kstack_base + kernel_stack_size);
863
864 on_intr = 0;
865 continue;
866 }
867 /*
868 * This is the last frame we can process; indicate
869 * that we should return after processing this frame.
870 */
871 last = 1;
872 }
873
874 if (aframes > 0) {
875 if (--aframes == 0 && caller != 0) {
876 /*
877 * We've just run out of artificial frames,
878 * and we have a valid caller -- fill it in
879 * now.
880 */
881 ASSERT(depth < pcstack_limit);
882 pcstack[depth++] = (pc_t)caller;
883 caller = 0;
884 }
885 } else {
886 if (depth < pcstack_limit)
887 pcstack[depth++] = (pc_t)pc;
888 }
889
890 if (last) {
891 while (depth < pcstack_limit)
892 pcstack[depth++] = 0;
893 return;
894 }
895
896 fp = nextfp;
897 minfp = fp;
898 }
899 }
900
901 struct frame {
902 struct frame *backchain;
903 uintptr_t retaddr;
904 };
905
906 uint64_t
dtrace_getarg(int arg,int aframes,dtrace_mstate_t * mstate,dtrace_vstate_t * vstate)907 dtrace_getarg(int arg, int aframes, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
908 {
909 uint64_t val = 0;
910 struct frame *fp = (struct frame *)__builtin_frame_address(0);
911 uintptr_t *stack;
912 uintptr_t pc;
913 int i;
914
915
916 /*
917 * A total of 6 arguments are passed via registers; any argument with
918 * index of 5 or lower is therefore in a register.
919 */
920 int inreg = 5;
921
922 for (i = 1; i <= aframes; i++) {
923 fp = fp->backchain;
924 pc = fp->retaddr;
925
926 if (dtrace_invop_callsite_pre != NULL
927 && pc > (uintptr_t)dtrace_invop_callsite_pre
928 && pc <= (uintptr_t)dtrace_invop_callsite_post) {
929 /*
930 * In the case of x86_64, we will use the pointer to the
931 * save area structure that was pushed when we took the
932 * trap. To get this structure, we must increment
933 * beyond the frame structure. If the
934 * argument that we're seeking is passed on the stack,
935 * we'll pull the true stack pointer out of the saved
936 * registers and decrement our argument by the number
937 * of arguments passed in registers; if the argument
938 * we're seeking is passed in regsiters, we can just
939 * load it directly.
940 */
941
942 /* fp points to frame of dtrace_invop() activation. */
943 fp = fp->backchain; /* to fbt_perfcallback() activation. */
944 fp = fp->backchain; /* to kernel_trap() activation. */
945 fp = fp->backchain; /* to trap_from_kernel() activation. */
946
947 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)&fp[1];
948 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
949
950 if (arg <= inreg) {
951 stack = (uintptr_t *)(void*)&saved_state->rdi;
952 } else {
953 fp = (struct frame *)(saved_state->isf.rsp);
954 stack = (uintptr_t *)&fp[1]; /* Find marshalled
955 arguments */
956 arg -= inreg + 1;
957 }
958 goto load;
959 }
960 }
961
962 /*
963 * We know that we did not come through a trap to get into
964 * dtrace_probe() -- We arrive here when the provider has
965 * called dtrace_probe() directly.
966 * The probe ID is the first argument to dtrace_probe().
967 * We must advance beyond that to get the argX.
968 */
969 arg++; /* Advance past probeID */
970
971 if (arg <= inreg) {
972 /*
973 * This shouldn't happen. If the argument is passed in a
974 * register then it should have been, well, passed in a
975 * register...
976 */
977 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
978 return (0);
979 }
980
981 arg -= (inreg + 1);
982 stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
983
984 load:
985 if (dtrace_canload((uint64_t)(stack + arg), sizeof(uint64_t),
986 mstate, vstate)) {
987 /* dtrace_probe arguments arg0 ... arg4 are 64bits wide */
988 val = dtrace_load64((uint64_t)(stack + arg));
989 }
990
991 return (val);
992 }
993
994 /*
995 * Load/Store Safety
996 */
997 void
dtrace_toxic_ranges(void (* func)(uintptr_t base,uintptr_t limit))998 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
999 {
1000 /*
1001 * "base" is the smallest toxic address in the range, "limit" is the first
1002 * VALID address greater than "base".
1003 */
1004 func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS);
1005 if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0)
1006 func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0);
1007 }
1008
1009 /*
1010 * Trap Safety
1011 */
1012 extern boolean_t dtrace_handle_trap(int, x86_saved_state_t *);
1013
1014 boolean_t
dtrace_handle_trap(int trapno,x86_saved_state_t * state)1015 dtrace_handle_trap(int trapno, x86_saved_state_t *state)
1016 {
1017 x86_saved_state64_t *saved_state = saved_state64(state);
1018
1019 if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) {
1020 return FALSE;
1021 }
1022
1023 /*
1024 * General purpose solution would require pulling in disassembler. Right now there
1025 * is only one specific case to be handled so it is hardcoded here.
1026 */
1027 if (trapno == T_INVALID_OPCODE) {
1028 uint8_t *inst = (uint8_t *)saved_state->isf.rip;
1029
1030 /* vmread %rdi, %rax */
1031 if (inst[0] == 0x0f && inst[1] == 0x78 && inst[2] == 0xf8) {
1032 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
1033 saved_state->isf.rip += 3;
1034 return TRUE;
1035 }
1036 }
1037
1038 return FALSE;
1039 }
1040