xref: /xnu-8020.140.41/bsd/dev/arm/dtrace_isa.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc) !
1 /*
2  * Copyright (c) 2005-2018 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <arm/caches_internal.h>
30 #include <arm/proc_reg.h>
31 
32 #include <kern/thread.h>
33 #include <mach/thread_status.h>
34 
35 #include <stdarg.h>
36 #include <string.h>
37 #include <sys/malloc.h>
38 #include <sys/time.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/proc_internal.h>
42 #include <sys/kauth.h>
43 #include <sys/dtrace.h>
44 #include <sys/dtrace_impl.h>
45 #include <machine/atomic.h>
46 #include <kern/simple_lock.h>
47 #include <kern/sched_prim.h>            /* for thread_wakeup() */
48 #include <kern/thread_call.h>
49 #include <kern/task.h>
50 #include <miscfs/devfs/devfs.h>
51 #include <mach/vm_param.h>
52 #include <machine/atomic.h>
53 
54 extern struct arm_saved_state *find_kern_regs(thread_t);
55 
56 extern dtrace_id_t      dtrace_probeid_error;   /* special ERROR probe */
57 typedef arm_saved_state_t savearea_t;
58 
59 int dtrace_arm_condition_true(int condition, int cpsr);
60 
61 /*
62  * Atomicity and synchronization
63  */
64 inline void
dtrace_membar_producer(void)65 dtrace_membar_producer(void)
66 {
67 	__builtin_arm_dmb(DMB_ISH);
68 }
69 
70 inline void
dtrace_membar_consumer(void)71 dtrace_membar_consumer(void)
72 {
73 	__builtin_arm_dmb(DMB_ISH);
74 }
75 
76 /*
77  * Interrupt manipulation
78  * XXX dtrace_getipl() can be called from probe context.
79  */
80 int
dtrace_getipl(void)81 dtrace_getipl(void)
82 {
83 	/*
84 	 * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE
85 	 * in osfmk/kern/cpu_data.h
86 	 */
87 	/* return get_interrupt_level(); */
88 	return ml_at_interrupt_context() ? 1 : 0;
89 }
90 
91 /*
92  * MP coordination
93  */
94 
95 static LCK_MTX_DECLARE_ATTR(dt_xc_lock, &dtrace_lck_grp, &dtrace_lck_attr);
96 static uint32_t dt_xc_sync;
97 
98 typedef struct xcArg {
99 	processorid_t   cpu;
100 	dtrace_xcall_t  f;
101 	void           *arg;
102 } xcArg_t;
103 
104 static void
xcRemote(void * foo)105 xcRemote(void *foo)
106 {
107 	xcArg_t *pArg = (xcArg_t *) foo;
108 
109 	if (pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL) {
110 		(pArg->f)(pArg->arg);
111 	}
112 
113 	if (os_atomic_dec(&dt_xc_sync, relaxed) == 0) {
114 		thread_wakeup((event_t) &dt_xc_sync);
115 	}
116 }
117 
118 /*
119  * dtrace_xcall() is not called from probe context.
120  */
121 void
dtrace_xcall(processorid_t cpu,dtrace_xcall_t f,void * arg)122 dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg)
123 {
124 	/* Only one dtrace_xcall in flight allowed */
125 	lck_mtx_lock(&dt_xc_lock);
126 
127 	xcArg_t xcArg;
128 
129 	xcArg.cpu = cpu;
130 	xcArg.f = f;
131 	xcArg.arg = arg;
132 
133 	cpu_broadcast_xcall(&dt_xc_sync, TRUE, xcRemote, (void*) &xcArg);
134 
135 	lck_mtx_unlock(&dt_xc_lock);
136 	return;
137 }
138 
139 /*
140  * Runtime and ABI
141  */
142 uint64_t
dtrace_getreg(struct regs * savearea,uint_t reg)143 dtrace_getreg(struct regs * savearea, uint_t reg)
144 {
145 	struct arm_saved_state *regs = (struct arm_saved_state *) savearea;
146 	if (regs == NULL) {
147 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
148 		return 0;
149 	}
150 	/* beyond register limit? */
151 	if (reg > ARM_SAVED_STATE32_COUNT - 1) {
152 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
153 		return 0;
154 	}
155 
156 	return (uint64_t) ((unsigned int *) (&(regs->r)))[reg];
157 }
158 
159 uint64_t
dtrace_getvmreg(uint_t ndx)160 dtrace_getvmreg(uint_t ndx)
161 {
162 #pragma unused(ndx)
163 	DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
164 	return 0;
165 }
166 
167 void
dtrace_livedump(char * filename,size_t len)168 dtrace_livedump(char *filename, size_t len)
169 {
170 #pragma unused(filename)
171 #pragma unused(len)
172 	DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
173 }
174 
175 #define RETURN_OFFSET 4
176 
177 static int
dtrace_getustack_common(uint64_t * pcstack,int pcstack_limit,user_addr_t pc,user_addr_t sp)178 dtrace_getustack_common(uint64_t * pcstack, int pcstack_limit, user_addr_t pc,
179     user_addr_t sp)
180 {
181 	volatile uint16_t *flags = (volatile uint16_t *) &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
182 	int ret = 0;
183 
184 	ASSERT(pcstack == NULL || pcstack_limit > 0);
185 
186 	while (pc != 0) {
187 		ret++;
188 		if (pcstack != NULL) {
189 			*pcstack++ = (uint64_t) pc;
190 			pcstack_limit--;
191 			if (pcstack_limit <= 0) {
192 				break;
193 			}
194 		}
195 
196 		if (sp == 0) {
197 			break;
198 		}
199 
200 		pc = dtrace_fuword32((sp + RETURN_OFFSET));
201 		sp = dtrace_fuword32(sp);
202 
203 		/* Truncate ustack if the iterator causes fault. */
204 		if (*flags & CPU_DTRACE_FAULT) {
205 			*flags &= ~CPU_DTRACE_FAULT;
206 			break;
207 		}
208 	}
209 
210 	return ret;
211 }
212 
213 void
dtrace_getupcstack(uint64_t * pcstack,int pcstack_limit)214 dtrace_getupcstack(uint64_t * pcstack, int pcstack_limit)
215 {
216 	thread_t        thread = current_thread();
217 	savearea_t     *regs;
218 	user_addr_t     pc, sp;
219 	volatile uint16_t *flags = (volatile uint16_t *) &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
220 	int n;
221 
222 	if (*flags & CPU_DTRACE_FAULT) {
223 		return;
224 	}
225 
226 	if (pcstack_limit <= 0) {
227 		return;
228 	}
229 
230 	/*
231 	 * If there's no user context we still need to zero the stack.
232 	 */
233 	if (thread == NULL) {
234 		goto zero;
235 	}
236 
237 	regs = (savearea_t *) find_user_regs(thread);
238 	if (regs == NULL) {
239 		goto zero;
240 	}
241 
242 	*pcstack++ = (uint64_t)dtrace_proc_selfpid();
243 	pcstack_limit--;
244 
245 	if (pcstack_limit <= 0) {
246 		return;
247 	}
248 
249 	pc = regs->pc;
250 	sp = regs->sp;
251 
252 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
253 		*pcstack++ = (uint64_t) pc;
254 		pcstack_limit--;
255 		if (pcstack_limit <= 0) {
256 			return;
257 		}
258 
259 		pc = regs->lr;
260 	}
261 
262 	n = dtrace_getustack_common(pcstack, pcstack_limit, pc, regs->r[7]);
263 
264 	ASSERT(n >= 0);
265 	ASSERT(n <= pcstack_limit);
266 
267 	pcstack += n;
268 	pcstack_limit -= n;
269 
270 zero:
271 	while (pcstack_limit-- > 0) {
272 		*pcstack++ = 0ULL;
273 	}
274 }
275 
276 int
dtrace_getustackdepth(void)277 dtrace_getustackdepth(void)
278 {
279 	thread_t        thread = current_thread();
280 	savearea_t     *regs;
281 	user_addr_t     pc, sp;
282 	int             n = 0;
283 
284 	if (thread == NULL) {
285 		return 0;
286 	}
287 
288 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) {
289 		return -1;
290 	}
291 
292 	regs = (savearea_t *) find_user_regs(thread);
293 	if (regs == NULL) {
294 		return 0;
295 	}
296 
297 	pc = regs->pc;
298 	sp = regs->sp;
299 
300 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
301 		n++;
302 		pc = regs->lr;
303 	}
304 
305 	/*
306 	 * Note that unlike ppc, the arm code does not use
307 	 * CPU_DTRACE_USTACK_FP. This is because arm always
308 	 * traces from the sp, even in syscall/profile/fbt
309 	 * providers.
310 	 */
311 
312 	n += dtrace_getustack_common(NULL, 0, pc, regs->r[7]);
313 
314 	return n;
315 }
316 
317 void
dtrace_getufpstack(uint64_t * pcstack,uint64_t * fpstack,int pcstack_limit)318 dtrace_getufpstack(uint64_t * pcstack, uint64_t * fpstack, int pcstack_limit)
319 {
320 	/* XXX ARMTODO 64vs32 */
321 	thread_t        thread = current_thread();
322 	savearea_t      *regs;
323 	user_addr_t     pc, sp;
324 
325 	volatile        uint16_t  *flags = (volatile uint16_t *) &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
326 
327 #if 0
328 	uintptr_t oldcontext;
329 	size_t          s1, s2;
330 #endif
331 
332 	if (*flags & CPU_DTRACE_FAULT) {
333 		return;
334 	}
335 
336 	if (pcstack_limit <= 0) {
337 		return;
338 	}
339 
340 	/*
341 	 * If there's no user context we still need to zero the stack.
342 	 */
343 	if (thread == NULL) {
344 		goto zero;
345 	}
346 
347 	regs = (savearea_t *) find_user_regs(thread);
348 	if (regs == NULL) {
349 		goto zero;
350 	}
351 
352 	*pcstack++ = (uint64_t)dtrace_proc_selfpid();
353 	pcstack_limit--;
354 
355 	if (pcstack_limit <= 0) {
356 		return;
357 	}
358 
359 	pc = regs->pc;
360 	sp = regs->sp;
361 
362 #if 0                           /* XXX signal stack crawl */
363 	oldcontext = lwp->lwp_oldcontext;
364 
365 	if (p->p_model == DATAMODEL_NATIVE) {
366 		s1 = sizeof(struct frame) + 2 * sizeof(long);
367 		s2 = s1 + sizeof(siginfo_t);
368 	} else {
369 		s1 = sizeof(struct frame32) + 3 * sizeof(int);
370 		s2 = s1 + sizeof(siginfo32_t);
371 	}
372 #endif
373 
374 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
375 		*pcstack++ = (uint64_t) pc;
376 		*fpstack++ = 0;
377 		pcstack_limit--;
378 		if (pcstack_limit <= 0) {
379 			return;
380 		}
381 
382 		pc = dtrace_fuword32(sp);
383 	}
384 	while (pc != 0 && sp != 0) {
385 		*pcstack++ = (uint64_t) pc;
386 		*fpstack++ = sp;
387 		pcstack_limit--;
388 		if (pcstack_limit <= 0) {
389 			break;
390 		}
391 
392 #if 0                           /* XXX signal stack crawl */
393 		if (oldcontext == sp + s1 || oldcontext == sp + s2) {
394 			if (p->p_model == DATAMODEL_NATIVE) {
395 				ucontext_t     *ucp = (ucontext_t *) oldcontext;
396 				greg_t         *gregs = ucp->uc_mcontext.gregs;
397 
398 				sp = dtrace_fulword(&gregs[REG_FP]);
399 				pc = dtrace_fulword(&gregs[REG_PC]);
400 
401 				oldcontext = dtrace_fulword(&ucp->uc_link);
402 			} else {
403 				ucontext_t     *ucp = (ucontext_t *) oldcontext;
404 				greg_t         *gregs = ucp->uc_mcontext.gregs;
405 
406 				sp = dtrace_fuword32(&gregs[EBP]);
407 				pc = dtrace_fuword32(&gregs[EIP]);
408 
409 				oldcontext = dtrace_fuword32(&ucp->uc_link);
410 			}
411 		} else
412 #endif
413 		{
414 			pc = dtrace_fuword32((sp + RETURN_OFFSET));
415 			sp = dtrace_fuword32(sp);
416 		}
417 
418 		/* Truncate ustack if the iterator causes fault. */
419 		if (*flags & CPU_DTRACE_FAULT) {
420 			*flags &= ~CPU_DTRACE_FAULT;
421 			break;
422 		}
423 	}
424 
425 zero:
426 	while (pcstack_limit-- > 0) {
427 		*pcstack++ = 0ULL;
428 	}
429 }
430 
431 void
dtrace_getpcstack(pc_t * pcstack,int pcstack_limit,int aframes,uint32_t * intrpc)432 dtrace_getpcstack(pc_t * pcstack, int pcstack_limit, int aframes,
433     uint32_t * intrpc)
434 {
435 	struct frame   *fp = (struct frame *) __builtin_frame_address(0);
436 	struct frame   *nextfp, *minfp, *stacktop;
437 	int             depth = 0;
438 	int             on_intr;
439 	int             last = 0;
440 	uintptr_t       pc;
441 	uintptr_t       caller = CPU->cpu_dtrace_caller;
442 
443 	if ((on_intr = CPU_ON_INTR(CPU)) != 0) {
444 		stacktop = (struct frame *) dtrace_get_cpu_int_stack_top();
445 	} else {
446 		stacktop = (struct frame *) (dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
447 	}
448 
449 	minfp = fp;
450 
451 	aframes++;
452 
453 	if (intrpc != NULL && depth < pcstack_limit) {
454 		pcstack[depth++] = (pc_t) intrpc;
455 	}
456 
457 	while (depth < pcstack_limit) {
458 		nextfp = *(struct frame **) fp;
459 		pc = *(uintptr_t *) (((uint32_t) fp) + RETURN_OFFSET);
460 
461 		if (nextfp <= minfp || nextfp >= stacktop) {
462 			if (on_intr) {
463 				/*
464 				 * Hop from interrupt stack to thread stack.
465 				 */
466 				arm_saved_state_t *arm_kern_regs = (arm_saved_state_t *) find_kern_regs(current_thread());
467 				if (arm_kern_regs) {
468 					nextfp = (struct frame *)arm_kern_regs->r[7];
469 
470 					vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread());
471 
472 					minfp = (struct frame *)kstack_base;
473 					stacktop = (struct frame *)(kstack_base + kernel_stack_size);
474 
475 					on_intr = 0;
476 
477 					if (nextfp <= minfp || nextfp >= stacktop) {
478 						last = 1;
479 					}
480 				} else {
481 					/*
482 					 * If this thread was on the interrupt stack, but did not
483 					 * take an interrupt (i.e, the idle thread), there is no
484 					 * explicit saved state for us to use.
485 					 */
486 					last = 1;
487 				}
488 			} else {
489 				/*
490 				 * This is the last frame we can process; indicate
491 				 * that we should return after processing this frame.
492 				 */
493 				last = 1;
494 			}
495 		}
496 		if (aframes > 0) {
497 			if (--aframes == 0 && caller != (uintptr_t)NULL) {
498 				/*
499 				 * We've just run out of artificial frames,
500 				 * and we have a valid caller -- fill it in
501 				 * now.
502 				 */
503 				ASSERT(depth < pcstack_limit);
504 				pcstack[depth++] = (pc_t) caller;
505 				caller = (uintptr_t)NULL;
506 			}
507 		} else {
508 			if (depth < pcstack_limit) {
509 				pcstack[depth++] = (pc_t) pc;
510 			}
511 		}
512 
513 		if (last) {
514 			while (depth < pcstack_limit) {
515 				pcstack[depth++] = (pc_t) NULL;
516 			}
517 			return;
518 		}
519 		fp = nextfp;
520 		minfp = fp;
521 	}
522 }
523 
524 int
dtrace_instr_size(uint32_t instr,int thumb_mode)525 dtrace_instr_size(uint32_t instr, int thumb_mode)
526 {
527 	if (thumb_mode) {
528 		uint16_t instr16 = *(uint16_t*) &instr;
529 		if (((instr16 >> 11) & 0x1F) > 0x1C) {
530 			return 4;
531 		} else {
532 			return 2;
533 		}
534 	} else {
535 		return 4;
536 	}
537 }
538 
539 uint64_t
dtrace_getarg(int arg,int aframes,dtrace_mstate_t * mstate,dtrace_vstate_t * vstate)540 dtrace_getarg(int arg, int aframes, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
541 {
542 #pragma unused(arg, aframes, mstate, vstate)
543 #if 0
544 	/* XXX ARMTODO */
545 	uint64_t val;
546 	uintptr_t *fp = (uintptr_t *)__builtin_frame_address(0);
547 	uintptr_t *stack;
548 	uintptr_t pc;
549 	int i;
550 
551 	for (i = 1; i <= aframes; i++) {
552 		fp = fp[0];
553 		pc = fp[1];
554 
555 		if (dtrace_invop_callsite_pre != NULL
556 		    && pc > (uintptr_t)dtrace_invop_callsite_pre
557 		    && pc <= (uintptr_t)dtrace_invop_callsite_post) {
558 			/*
559 			 * If we pass through the invalid op handler, we will
560 			 * use the pointer that it passed to the stack as the
561 			 * second argument to dtrace_invop() as the pointer to
562 			 * the frame we're hunting for.
563 			 */
564 
565 			stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
566 			fp = (struct frame *)stack[1]; /* Grab *second* argument */
567 			stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
568 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
569 			val = (uint64_t)(stack[arg]);
570 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
571 			return val;
572 		}
573 	}
574 
575 	/*
576 	 * Arrive here when provider has called dtrace_probe directly.
577 	 */
578 	stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */
579 	stack++; /* Advance past probeID */
580 
581 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
582 	val = *(((uint64_t *)stack) + arg); /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
583 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
584 	return val;
585 #endif
586 	return 0xfeedfacedeafbeadLL;
587 }
588 
589 void
dtrace_probe_error(dtrace_state_t * state,dtrace_epid_t epid,int which,int fltoffs,int fault,uint64_t illval)590 dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
591     int fltoffs, int fault, uint64_t illval)
592 {
593 	/* XXX ARMTODO */
594 	/*
595 	 * For the case of the error probe firing lets
596 	 * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG.
597 	 */
598 	state->dts_arg_error_illval = illval;
599 	dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault );
600 }
601 
602 void
dtrace_toxic_ranges(void (* func)(uintptr_t base,uintptr_t limit))603 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
604 {
605 	/* XXX ARMTODO check copied from ppc/x86*/
606 	/*
607 	 * "base" is the smallest toxic address in the range, "limit" is the first
608 	 * VALID address greater than "base".
609 	 */
610 	func(0x0, VM_MIN_KERNEL_ADDRESS);
611 	if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0) {
612 		func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0);
613 	}
614 }
615 
616 int
dtrace_arm_condition_true(int cond,int cpsr)617 dtrace_arm_condition_true(int cond, int cpsr)
618 {
619 	int taken = 0;
620 	int zf = (cpsr & PSR_ZF) ? 1 : 0,
621 	    nf = (cpsr & PSR_NF) ? 1 : 0,
622 	    cf = (cpsr & PSR_CF) ? 1 : 0,
623 	    vf = (cpsr & PSR_VF) ? 1 : 0;
624 
625 	switch (cond) {
626 	case 0: taken = zf; break;
627 	case 1: taken = !zf; break;
628 	case 2: taken = cf; break;
629 	case 3: taken = !cf; break;
630 	case 4: taken = nf; break;
631 	case 5: taken = !nf; break;
632 	case 6: taken = vf; break;
633 	case 7: taken = !vf; break;
634 	case 8: taken = (cf && !zf); break;
635 	case 9: taken = (!cf || zf); break;
636 	case 10: taken = (nf == vf); break;
637 	case 11: taken = (nf != vf); break;
638 	case 12: taken = (!zf && (nf == vf)); break;
639 	case 13: taken = (zf || (nf != vf)); break;
640 	case 14: taken = 1; break;
641 	case 15: taken = 1; break;         /* always "true" for ARM, unpredictable for THUMB. */
642 	}
643 
644 	return taken;
645 }
646 
647 void
dtrace_flush_caches(void)648 dtrace_flush_caches(void)
649 {
650 	/* TODO There were some problems with flushing just the cache line that had been modified.
651 	 * For now, we'll flush the entire cache, until we figure out how to flush just the patched block.
652 	 */
653 	FlushPoU_Dcache();
654 	InvalidatePoU_Icache();
655 }
656