1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <ptrauth.h>
27
28 #include <kern/thread.h>
29 #include <mach/thread_status.h>
30
31 /* XXX All of these should really be derived from syscall_sw.h */
32 #if defined (__x86_64__)
33 #define SYSCALL_CLASS_SHIFT 24
34 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
35 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
36 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/errno.h>
43 #include <sys/ioctl.h>
44 #include <sys/conf.h>
45 #include <sys/fcntl.h>
46 #include <sys/syscall.h>
47 #include <miscfs/devfs/devfs.h>
48
49 #include <sys/dtrace.h>
50 #include <sys/dtrace_impl.h>
51 #include <sys/systrace_args.h>
52 #include "systrace.h"
53 #include <sys/stat.h>
54 #include <sys/systm.h>
55 #include <sys/conf.h>
56 #include <sys/user.h>
57
58 #include <machine/pal_routines.h>
59
60 #if defined (__x86_64__)
61 #define SYSTRACE_ARTIFICIAL_FRAMES 2
62 #define MACHTRACE_ARTIFICIAL_FRAMES 3
63 #elif defined(__arm64__)
64 #define SYSTRACE_ARTIFICIAL_FRAMES 2
65 #define MACHTRACE_ARTIFICIAL_FRAMES 3
66 #else
67 #error Unknown Architecture
68 #endif
69
70 #define SYSTRACE_NARGS (int)(sizeof(((uthread_t)NULL)->uu_arg) / sizeof(((uthread_t)NULL)->uu_arg[0]))
71
72 #include <sys/sysent.h>
73 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
74 #define NSYSCALL nsysent /* and is less than 500 or so */
75
76 extern const char *syscallnames[];
77
78 #include <sys/dtrace_glue.h>
79 #define casptr dtrace_casptr
80 #define membar_enter dtrace_membar_producer
81
82 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
83 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
84
85 static LCK_MTX_DECLARE_ATTR(dtrace_systrace_lock,
86 &dtrace_lck_grp, &dtrace_lck_attr); /* probe state lock */
87
88 systrace_sysent_t *systrace_sysent = NULL;
89 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
90
91 static uint64_t systrace_getargval(void *, dtrace_id_t, void *, int, int);
92 static void systrace_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
93
94 void
systrace_stub(dtrace_id_t id,uint64_t arg0,uint64_t arg1,uint64_t arg2,uint64_t arg3,uint64_t arg4)95 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
96 uint64_t arg2, uint64_t arg3, uint64_t arg4)
97 {
98 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
99 }
100
101 int32_t
dtrace_systrace_syscall(struct proc * pp,void * uap,int * rv)102 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
103 {
104 unsigned short code; /* The system call number */
105
106 systrace_sysent_t *sy;
107 dtrace_id_t id;
108 int32_t rval;
109 syscall_arg_t *ip = (syscall_arg_t *)uap;
110 uint64_t uargs[SYSTRACE_NARGS] = {0};
111
112 #if defined (__x86_64__)
113 {
114 pal_register_cache_state(current_thread(), VALID);
115 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
116
117 if (is_saved_state64(tagged_regs)) {
118 x86_saved_state64_t *regs = saved_state64(tagged_regs);
119 code = regs->rax & SYSCALL_NUMBER_MASK;
120 /*
121 * Check for indirect system call... system call number
122 * passed as 'arg0'
123 */
124 if (code == 0) {
125 code = regs->rdi;
126 }
127 } else {
128 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
129
130 if (code == 0) {
131 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof(int));
132 code = fuword(params);
133 }
134 }
135 }
136 #elif defined(__arm64__)
137 {
138 /*
139 * On arm64, syscall numbers depend on a flavor (indirect or not)
140 * ... and for u32 can be in either r0 or r12
141 * ... and for u64 can be in either x0 or x16
142 */
143
144 /* see bsd/dev/arm/systemcalls.c:arm_get_syscall_number */
145 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
146
147 if (is_saved_state32(arm_regs)) {
148 /* Check for indirect system call */
149 if (saved_state32(arm_regs)->r[12] != 0) {
150 code = saved_state32(arm_regs)->r[12];
151 } else {
152 code = saved_state32(arm_regs)->r[0];
153 }
154 } else {
155 /* Check for indirect system call */
156 if (saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM] != 0) {
157 code = saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
158 } else {
159 code = saved_state64(arm_regs)->x[0];
160 }
161 }
162 }
163 #else
164 #error Unknown Architecture
165 #endif
166
167 // Bounds "check" the value of code a la unix_syscall
168 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
169
170 systrace_args(code, ip, uargs);
171
172 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
173 uthread_t uthread = current_uthread();
174 if (uthread) {
175 uthread->t_dtrace_syscall_args = uargs;
176 }
177
178 static_assert(SYSTRACE_NARGS >= 5, "not enough system call arguments");
179 (*systrace_probe)(id, uargs[0], uargs[1], uargs[2], uargs[3], uargs[4]);
180
181 if (uthread) {
182 uthread->t_dtrace_syscall_args = NULL;
183 }
184 }
185
186
187
188 #if 0 /* XXX */
189 /*
190 * APPLE NOTE: Not implemented.
191 * We want to explicitly allow DTrace consumers to stop a process
192 * before it actually executes the meat of the syscall.
193 */
194 p = ttoproc(curthread);
195 mutex_enter(&p->p_lock);
196 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
197 curthread->t_dtrace_stop = 0;
198 stop(PR_REQUESTED, 0);
199 }
200 mutex_exit(&p->p_lock);
201 #endif
202
203 rval = (*sy->stsy_underlying)(pp, uap, rv);
204
205 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
206 uint64_t munged_rv0, munged_rv1;
207 uthread_t uthread = current_uthread();
208
209 if (uthread) {
210 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
211 }
212 /*
213 * "Decode" rv for use in the call to dtrace_probe()
214 */
215 if (rval == ERESTART) {
216 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
217 munged_rv1 = -1LL;
218 } else if (rval != EJUSTRETURN) {
219 if (rval) {
220 munged_rv0 = -1LL; /* Mimic what libc will do. */
221 munged_rv1 = -1LL;
222 } else {
223 switch (sy->stsy_return_type) {
224 case _SYSCALL_RET_INT_T:
225 munged_rv0 = rv[0];
226 munged_rv1 = rv[1];
227 break;
228 case _SYSCALL_RET_UINT_T:
229 munged_rv0 = ((u_int)rv[0]);
230 munged_rv1 = ((u_int)rv[1]);
231 break;
232 case _SYSCALL_RET_OFF_T:
233 case _SYSCALL_RET_UINT64_T:
234 munged_rv0 = *(u_int64_t *)rv;
235 munged_rv1 = 0LL;
236 break;
237 case _SYSCALL_RET_ADDR_T:
238 case _SYSCALL_RET_SIZE_T:
239 case _SYSCALL_RET_SSIZE_T:
240 munged_rv0 = *(user_addr_t *)rv;
241 munged_rv1 = 0LL;
242 break;
243 case _SYSCALL_RET_NONE:
244 munged_rv0 = 0LL;
245 munged_rv1 = 0LL;
246 break;
247 default:
248 munged_rv0 = 0LL;
249 munged_rv1 = 0LL;
250 break;
251 }
252 }
253 } else {
254 munged_rv0 = 0LL;
255 munged_rv1 = 0LL;
256 }
257
258 /*
259 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
260 *
261 * "This is a bit of an historical artifact. At first, the syscall provider just
262 * had its return value in arg0, and the fbt and pid providers had their return
263 * values in arg1 (so that we could use arg0 for the offset of the return site).
264 *
265 * We inevitably started writing scripts where we wanted to see the return
266 * values from probes in all three providers, and we made this script easier
267 * to write by replicating the syscall return values in arg1 to match fbt and
268 * pid. We debated briefly about removing the return value from arg0, but
269 * decided that it would be less confusing to have the same data in two places
270 * than to have some non-helpful, non-intuitive value in arg0.
271 *
272 * This change was made 4/23/2003 according to the DTrace project's putback log."
273 */
274 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
275 }
276
277 return rval;
278 }
279
280 void
dtrace_systrace_syscall_return(unsigned short code,int rval,int * rv)281 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
282 {
283 systrace_sysent_t *sy;
284 dtrace_id_t id;
285
286 // Bounds "check" the value of code a la unix_syscall_return
287 sy = (code >= nsysent) ? &systrace_sysent[SYS_invalid] : &systrace_sysent[code];
288
289 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
290 uint64_t munged_rv0, munged_rv1;
291 uthread_t uthread = current_uthread();
292
293 if (uthread) {
294 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
295 }
296 /*
297 * "Decode" rv for use in the call to dtrace_probe()
298 */
299 if (rval == ERESTART) {
300 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
301 munged_rv1 = -1LL;
302 } else if (rval != EJUSTRETURN) {
303 if (rval) {
304 munged_rv0 = -1LL; /* Mimic what libc will do. */
305 munged_rv1 = -1LL;
306 } else {
307 switch (sy->stsy_return_type) {
308 case _SYSCALL_RET_INT_T:
309 munged_rv0 = rv[0];
310 munged_rv1 = rv[1];
311 break;
312 case _SYSCALL_RET_UINT_T:
313 munged_rv0 = ((u_int)rv[0]);
314 munged_rv1 = ((u_int)rv[1]);
315 break;
316 case _SYSCALL_RET_OFF_T:
317 case _SYSCALL_RET_UINT64_T:
318 munged_rv0 = *(u_int64_t *)rv;
319 munged_rv1 = 0LL;
320 break;
321 case _SYSCALL_RET_ADDR_T:
322 case _SYSCALL_RET_SIZE_T:
323 case _SYSCALL_RET_SSIZE_T:
324 munged_rv0 = *(user_addr_t *)rv;
325 munged_rv1 = 0LL;
326 break;
327 case _SYSCALL_RET_NONE:
328 munged_rv0 = 0LL;
329 munged_rv1 = 0LL;
330 break;
331 default:
332 munged_rv0 = 0LL;
333 munged_rv1 = 0LL;
334 break;
335 }
336 }
337 } else {
338 munged_rv0 = 0LL;
339 munged_rv1 = 0LL;
340 }
341
342 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
343 }
344 }
345
346 #define SYSTRACE_SHIFT 16
347 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
348 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
349 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
350 #define SYSTRACE_RETURN(id) (id)
351
352 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
353 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
354 #endif
355
356 static dtrace_provider_id_t systrace_id;
357
358 /*
359 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
360 * See balanced undef below.
361 */
362 #define systrace_init _systrace_init
363
364 static void
systrace_init(const struct sysent * actual,systrace_sysent_t ** interposed)365 systrace_init(const struct sysent *actual, systrace_sysent_t **interposed)
366 {
367 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
368 * from bsd/sys/sysent.h */
369 unsigned int i;
370
371 if (ssysent == NULL) {
372 *interposed = ssysent = kmem_zalloc(sizeof(systrace_sysent_t) *
373 NSYSCALL, KM_SLEEP);
374 }
375
376 for (i = 0; i < NSYSCALL; i++) {
377 /* Use of volatile protects the if statement below from being optimized away */
378 const volatile struct sysent *a = &actual[i];
379 systrace_sysent_t *s = &ssysent[i];
380
381 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
382 continue;
383 }
384
385 if (a->sy_callc == dtrace_systrace_syscall) {
386 continue;
387 }
388
389 s->stsy_underlying = a->sy_callc;
390 s->stsy_return_type = a->sy_return_type;
391 }
392 }
393
394
395 /*ARGSUSED*/
396 static void
systrace_provide(void * arg,const dtrace_probedesc_t * desc)397 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
398 {
399 #pragma unused(arg) /* __APPLE__ */
400 unsigned int i;
401
402 if (desc != NULL) {
403 return;
404 }
405
406 systrace_init(sysent, &systrace_sysent);
407
408 for (i = 0; i < NSYSCALL; i++) {
409 if (systrace_sysent[i].stsy_underlying == NULL) {
410 continue;
411 }
412
413 if (dtrace_probe_lookup(systrace_id, NULL,
414 syscallnames[i], "entry") != 0) {
415 continue;
416 }
417
418 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
419 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
420 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
421 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
422 "return", SYSTRACE_ARTIFICIAL_FRAMES,
423 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
424
425 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
426 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
427 }
428 }
429 #undef systrace_init
430
431 /*ARGSUSED*/
432 static void
systrace_destroy(void * arg,dtrace_id_t id,void * parg)433 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
434 {
435 #pragma unused(arg,id) /* __APPLE__ */
436
437 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
438
439 #pragma unused(sysnum) /* __APPLE__ */
440 /*
441 * There's nothing to do here but assert that we have actually been
442 * disabled.
443 */
444 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
445 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
446 } else {
447 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
448 }
449 }
450
451 /*ARGSUSED*/
452 static int
systrace_enable(void * arg,dtrace_id_t id,void * parg)453 systrace_enable(void *arg, dtrace_id_t id, void *parg)
454 {
455 #pragma unused(arg) /* __APPLE__ */
456
457 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
458 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
459 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
460
461 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
462 systrace_sysent[sysnum].stsy_entry = id;
463 } else {
464 systrace_sysent[sysnum].stsy_return = id;
465 }
466
467 if (enabled) {
468 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
469 return 0;
470 }
471
472 lck_mtx_lock(&dtrace_systrace_lock);
473 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
474 /* It is not possible to write to sysent[] directly because it is const. */
475 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_systrace_syscall);
476 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
477 }
478 lck_mtx_unlock(&dtrace_systrace_lock);
479
480 return 0;
481 }
482
483 /*ARGSUSED*/
484 static void
systrace_disable(void * arg,dtrace_id_t id,void * parg)485 systrace_disable(void *arg, dtrace_id_t id, void *parg)
486 {
487 #pragma unused(arg,id) /* __APPLE__ */
488
489 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
490 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
491 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
492
493 if (disable) {
494 /*
495 * Usage of volatile protects the if statement below from being optimized away.
496 *
497 * Compilers are clever and know that const array values can't change in time
498 * and the if below is always false. That is because it can't see that DTrace
499 * injects dtrace_systrace_syscall dynamically and violates constness of the
500 * array.
501 */
502 volatile const struct sysent *syscallent = &sysent[sysnum];
503
504 lck_mtx_lock(&dtrace_systrace_lock);
505 if (syscallent->sy_callc == dtrace_systrace_syscall) {
506 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying,
507 (vm_offset_t)&syscallent->sy_callc, sizeof(vm_offset_t));
508 }
509 lck_mtx_unlock(&dtrace_systrace_lock);
510 }
511
512 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
513 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
514 } else {
515 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
516 }
517 }
518
519 static dtrace_pattr_t systrace_attr = {
520 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
521 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
522 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
523 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
524 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
525 };
526
527 static dtrace_pops_t systrace_pops = {
528 .dtps_provide = systrace_provide,
529 .dtps_provide_module = NULL,
530 .dtps_enable = systrace_enable,
531 .dtps_disable = systrace_disable,
532 .dtps_suspend = NULL,
533 .dtps_resume = NULL,
534 .dtps_getargdesc = systrace_getargdesc,
535 .dtps_getargval = systrace_getargval,
536 .dtps_usermode = NULL,
537 .dtps_destroy = systrace_destroy
538 };
539
540 static int
systrace_attach(dev_info_t * devi)541 systrace_attach(dev_info_t *devi)
542 {
543 systrace_probe = (void*)&dtrace_probe;
544 membar_enter();
545
546 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
547 DDI_PSEUDO, 0) == DDI_FAILURE ||
548 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
549 &systrace_pops, NULL, &systrace_id) != 0) {
550 systrace_probe = systrace_stub;
551 ddi_remove_minor_node(devi, NULL);
552 return DDI_FAILURE;
553 }
554
555 return DDI_SUCCESS;
556 }
557
558
559 /*
560 * APPLE NOTE: systrace_detach not implemented
561 */
562 #if !defined(__APPLE__)
563 static int
systrace_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)564 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
565 {
566 switch (cmd) {
567 case DDI_DETACH:
568 break;
569 case DDI_SUSPEND:
570 return DDI_SUCCESS;
571 default:
572 return DDI_FAILURE;
573 }
574
575 if (dtrace_unregister(systrace_id) != 0) {
576 return DDI_FAILURE;
577 }
578
579 ddi_remove_minor_node(devi, NULL);
580 systrace_probe = systrace_stub;
581 return DDI_SUCCESS;
582 }
583 #endif /* __APPLE__ */
584
585
586 typedef kern_return_t (*mach_call_t)(void *);
587
588 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
589 #if CONFIG_REQUIRES_U32_MUNGING
590 typedef void mach_munge_t(void *);
591 #elif __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
592 typedef int mach_munge_t(const void *, void *);
593 #endif
594
595 typedef struct {
596 unsigned char mach_trap_arg_count; /* Number of trap arguments (Arch independant) */
597 unsigned char mach_trap_u32_words; /* number of 32-bit words to copyin for U32 */
598 unsigned char mach_trap_returns_port;
599 unsigned char __mach_trap_padding;
600 kern_return_t (*mach_trap_function)(void *);
601 #if CONFIG_REQUIRES_U32_MUNGING || (__arm__ && (__BIGGEST_ALIGNMENT__ > 4))
602 mach_munge_t *mach_trap_arg_munge32; /* system call argument munger routine for 32-bit */
603 #endif
604 #if MACH_ASSERT
605 const char *mach_trap_name;
606 #endif /* MACH_ASSERT */
607 } mach_trap_t;
608
609
610 #define MACH_TRAP_TABLE_COUNT 128
611
612 extern const mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT];
613 extern const int mach_trap_count;
614 extern const char * const mach_syscall_name_table[MACH_TRAP_TABLE_COUNT];
615
616
617 /* XXX From osfmk/i386/bsd_i386.c */
618 struct mach_call_args {
619 syscall_arg_t arg1;
620 syscall_arg_t arg2;
621 syscall_arg_t arg3;
622 syscall_arg_t arg4;
623 syscall_arg_t arg5;
624 syscall_arg_t arg6;
625 syscall_arg_t arg7;
626 syscall_arg_t arg8;
627 syscall_arg_t arg9;
628 };
629
630 #undef NSYSCALL
631 #define NSYSCALL mach_trap_count
632
633 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
634 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
635 #endif
636
637 typedef struct machtrace_sysent {
638 dtrace_id_t stsy_entry;
639 dtrace_id_t stsy_return;
640 kern_return_t (*stsy_underlying)(void *);
641 int32_t stsy_return_type;
642 } machtrace_sysent_t;
643
644 static machtrace_sysent_t *machtrace_sysent = NULL;
645
646 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
647 uint64_t, uint64_t, uint64_t);
648
649 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
650
651 static dtrace_provider_id_t machtrace_id;
652
653 static kern_return_t
dtrace_machtrace_syscall(struct mach_call_args * args)654 dtrace_machtrace_syscall(struct mach_call_args *args)
655 {
656 int code; /* The mach call number */
657
658 machtrace_sysent_t *sy;
659 dtrace_id_t id;
660 kern_return_t rval;
661 #if 0 /* XXX */
662 proc_t *p;
663 #endif
664 syscall_arg_t *ip = (syscall_arg_t *)args;
665 mach_call_t mach_call;
666
667 #if defined (__x86_64__)
668 {
669 pal_register_cache_state(current_thread(), VALID);
670 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
671
672 if (is_saved_state64(tagged_regs)) {
673 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
674 } else {
675 code = -saved_state32(tagged_regs)->eax;
676 }
677 }
678 #elif defined(__arm64__)
679 {
680 /* From arm/thread_status.h:get_saved_state_svc_number */
681 arm_saved_state_t *arm_regs = (arm_saved_state_t *) find_user_regs(current_thread());
682 if (is_saved_state32(arm_regs)) {
683 code = (int)saved_state32(arm_regs)->r[12];
684 } else {
685 code = (int)saved_state64(arm_regs)->x[ARM64_SYSCALL_CODE_REG_NUM];
686 }
687
688 /* From bsd/arm64.c:mach_syscall */
689 ASSERT(code < 0); /* Otherwise it would be a Unix syscall */
690 code = -code;
691 }
692 #else
693 #error Unknown Architecture
694 #endif
695
696 sy = &machtrace_sysent[code];
697
698 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
699 uthread_t uthread = current_uthread();
700
701 if (uthread) {
702 uthread->t_dtrace_syscall_args = (void *)ip;
703 }
704
705 (*machtrace_probe)(id, *ip, *(ip + 1), *(ip + 2), *(ip + 3), *(ip + 4));
706
707 if (uthread) {
708 uthread->t_dtrace_syscall_args = (void *)0;
709 }
710 }
711
712 #if 0 /* XXX */
713 /*
714 * APPLE NOTE: Not implemented.
715 * We want to explicitly allow DTrace consumers to stop a process
716 * before it actually executes the meat of the syscall.
717 */
718 p = ttoproc(curthread);
719 mutex_enter(&p->p_lock);
720 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
721 curthread->t_dtrace_stop = 0;
722 stop(PR_REQUESTED, 0);
723 }
724 mutex_exit(&p->p_lock);
725 #endif
726
727 mach_call = (mach_call_t)(*sy->stsy_underlying);
728 rval = mach_call(args);
729
730 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
731 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
732 }
733
734 return rval;
735 }
736
737 static void
machtrace_init(const mach_trap_t * actual,machtrace_sysent_t ** interposed)738 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
739 {
740 machtrace_sysent_t *msysent = *interposed;
741 int i;
742
743 if (msysent == NULL) {
744 *interposed = msysent = kmem_zalloc(sizeof(machtrace_sysent_t) *
745 NSYSCALL, KM_SLEEP);
746 }
747
748 for (i = 0; i < NSYSCALL; i++) {
749 const volatile mach_trap_t *a = &actual[i];
750 machtrace_sysent_t *s = &msysent[i];
751
752 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) {
753 continue;
754 }
755
756 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall)) {
757 continue;
758 }
759
760 s->stsy_underlying = a->mach_trap_function;
761 }
762 }
763
764 /*ARGSUSED*/
765 static void
machtrace_provide(void * arg,const dtrace_probedesc_t * desc)766 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
767 {
768 #pragma unused(arg) /* __APPLE__ */
769
770 int i;
771
772 if (desc != NULL) {
773 return;
774 }
775
776 machtrace_init(mach_trap_table, &machtrace_sysent);
777
778 for (i = 0; i < NSYSCALL; i++) {
779 if (machtrace_sysent[i].stsy_underlying == NULL) {
780 continue;
781 }
782
783 if (dtrace_probe_lookup(machtrace_id, NULL,
784 mach_syscall_name_table[i], "entry") != 0) {
785 continue;
786 }
787
788 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
789 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
790 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
791 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
792 "return", MACHTRACE_ARTIFICIAL_FRAMES,
793 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
794
795 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
796 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
797 }
798 }
799
800 /*ARGSUSED*/
801 static void
machtrace_destroy(void * arg,dtrace_id_t id,void * parg)802 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
803 {
804 #pragma unused(arg,id) /* __APPLE__ */
805 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
806
807 #pragma unused(sysnum) /* __APPLE__ */
808
809 /*
810 * There's nothing to do here but assert that we have actually been
811 * disabled.
812 */
813 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
814 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
815 } else {
816 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
817 }
818 }
819
820 /*ARGSUSED*/
821 static int
machtrace_enable(void * arg,dtrace_id_t id,void * parg)822 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
823 {
824 #pragma unused(arg) /* __APPLE__ */
825
826 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
827 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
828 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
829
830 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
831 machtrace_sysent[sysnum].stsy_entry = id;
832 } else {
833 machtrace_sysent[sysnum].stsy_return = id;
834 }
835
836 if (enabled) {
837 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
838 return 0;
839 }
840
841 lck_mtx_lock(&dtrace_systrace_lock);
842
843 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
844 /* It is not possible to write to mach_trap_table[] directly because it is const. */
845 vm_offset_t dss = ptrauth_nop_cast(vm_offset_t, &dtrace_machtrace_syscall);
846 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
847 }
848
849 lck_mtx_unlock(&dtrace_systrace_lock);
850
851 return 0;
852 }
853
854 /*ARGSUSED*/
855 static void
machtrace_disable(void * arg,dtrace_id_t id,void * parg)856 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
857 {
858 #pragma unused(arg,id) /* __APPLE__ */
859
860 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
861 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
862 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
863
864 if (disable) {
865 /*
866 * Usage of volatile protects the if statement below from being optimized away.
867 *
868 * Compilers are clever and know that const array values can't change in time
869 * and the if below is always false. That is because it can't see that DTrace
870 * injects dtrace_machtrace_syscall dynamically and violates constness of the
871 * array.
872 */
873 volatile const mach_trap_t *machtrap = &mach_trap_table[sysnum];
874
875 lck_mtx_lock(&dtrace_systrace_lock);
876 if (machtrap->mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
877 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying,
878 (vm_offset_t)&machtrap->mach_trap_function, sizeof(vm_offset_t));
879 }
880 lck_mtx_unlock(&dtrace_systrace_lock);
881 }
882
883 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
884 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
885 } else {
886 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
887 }
888 }
889
890 static dtrace_pattr_t machtrace_attr = {
891 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
892 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
893 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
894 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
895 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
896 };
897
898 static dtrace_pops_t machtrace_pops = {
899 .dtps_provide = machtrace_provide,
900 .dtps_provide_module = NULL,
901 .dtps_enable = machtrace_enable,
902 .dtps_disable = machtrace_disable,
903 .dtps_suspend = NULL,
904 .dtps_resume = NULL,
905 .dtps_getargdesc = NULL,
906 .dtps_getargval = machtrace_getarg,
907 .dtps_usermode = NULL,
908 .dtps_destroy = machtrace_destroy
909 };
910
911 static int
machtrace_attach(dev_info_t * devi)912 machtrace_attach(dev_info_t *devi)
913 {
914 machtrace_probe = dtrace_probe;
915 membar_enter();
916
917 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
918 DDI_PSEUDO, 0) == DDI_FAILURE ||
919 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
920 &machtrace_pops, NULL, &machtrace_id) != 0) {
921 machtrace_probe = (void*)&systrace_stub;
922 ddi_remove_minor_node(devi, NULL);
923 return DDI_FAILURE;
924 }
925
926 return DDI_SUCCESS;
927 }
928
929 d_open_t _systrace_open;
930
931 int
_systrace_open(dev_t dev,int flags,int devtype,struct proc * p)932 _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
933 {
934 #pragma unused(dev,flags,devtype,p)
935 return 0;
936 }
937
938 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
939
940 static struct cdevsw systrace_cdevsw =
941 {
942 .d_open = _systrace_open,
943 .d_close = eno_opcl,
944 .d_read = eno_rdwrt,
945 .d_write = eno_rdwrt,
946 .d_ioctl = eno_ioctl,
947 .d_stop = eno_stop,
948 .d_reset = eno_reset,
949 .d_select = eno_select,
950 .d_mmap = eno_mmap,
951 .d_strategy = eno_strat,
952 .d_reserved_1 = eno_getc,
953 .d_reserved_2 = eno_putc,
954 };
955
956 void systrace_init( void );
957
958 void
systrace_init(void)959 systrace_init( void )
960 {
961 if (dtrace_sdt_probes_restricted()) {
962 return;
963 }
964
965 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
966
967 if (majdevno < 0) {
968 printf("systrace_init: failed to allocate a major number!\n");
969 return;
970 }
971
972 systrace_attach((dev_info_t*)(uintptr_t)majdevno);
973 machtrace_attach((dev_info_t*)(uintptr_t)majdevno);
974 }
975 #undef SYSTRACE_MAJOR
976
977 static uint64_t
systrace_getargval(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)978 systrace_getargval(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
979 {
980 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
981 uint64_t val = 0;
982 uint64_t *uargs = NULL;
983
984 uthread_t uthread = current_uthread();
985
986 if (uthread) {
987 uargs = uthread->t_dtrace_syscall_args;
988 }
989 if (!uargs) {
990 return 0;
991 }
992 if (argno < 0 || argno >= SYSTRACE_NARGS) {
993 return 0;
994 }
995
996 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
997 val = uargs[argno];
998 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
999 return val;
1000 }
1001
1002 static void
systrace_getargdesc(void * arg,dtrace_id_t id,void * parg,dtrace_argdesc_t * desc)1003 systrace_getargdesc(void *arg, dtrace_id_t id, void *parg,
1004 dtrace_argdesc_t *desc)
1005 {
1006 #pragma unused(arg, id)
1007 int sysnum = SYSTRACE_SYSNUM(parg);
1008 uthread_t uthread = current_uthread();
1009 uint64_t *uargs = NULL;
1010
1011 if (!uthread) {
1012 desc->dtargd_ndx = DTRACE_ARGNONE;
1013 return;
1014 }
1015
1016 uargs = uthread->t_dtrace_syscall_args;
1017
1018 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
1019 systrace_entry_setargdesc(sysnum, desc->dtargd_ndx,
1020 desc->dtargd_native, sizeof(desc->dtargd_native));
1021 } else {
1022 systrace_return_setargdesc(sysnum, desc->dtargd_ndx,
1023 desc->dtargd_native, sizeof(desc->dtargd_native));
1024 }
1025
1026 if (desc->dtargd_native[0] == '\0') {
1027 desc->dtargd_ndx = DTRACE_ARGNONE;
1028 }
1029 }
1030
1031 static uint64_t
machtrace_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)1032 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1033 {
1034 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1035 uint64_t val = 0;
1036 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1037
1038 uthread_t uthread = current_uthread();
1039
1040 if (uthread) {
1041 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1042 }
1043
1044 if (!stack) {
1045 return 0;
1046 }
1047
1048 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1049 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1050 val = (uint64_t)*(stack + argno);
1051 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1052 return val;
1053 }
1054