xref: /xnu-8792.81.2/tests/ktrace/kperf_tests.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 // Copyright (c) 2017-2021 Apple Inc.  All rights reserved.
2 
3 #include <darwintest.h>
4 #include <darwintest_utils.h>
5 #include <dispatch/dispatch.h>
6 #include <inttypes.h>
7 #include <ktrace/session.h>
8 #include <ktrace/private.h>
9 #include <sys/kdebug.h>
10 #include <sys/syscall.h>
11 #include <kperf/kpc.h>
12 #include <kperf/kperf.h>
13 #include <kperfdata/kpdecode.h>
14 #include <os/assumes.h>
15 #include <stdint.h>
16 #include <sys/sysctl.h>
17 
18 #include "kperf_helpers.h"
19 #include "ktrace_helpers.h"
20 #include "ktrace_meta.h"
21 #include "../drop_priv.h"
22 
23 #define MAX_CPUS    64
24 #define MAX_THREADS 64
25 
26 volatile static bool running_threads = true;
27 
28 static void *
spinning_thread(void * semp)29 spinning_thread(void *semp)
30 {
31 	T_QUIET;
32 	T_ASSERT_NOTNULL(semp, "semaphore passed to thread should not be NULL");
33 	dispatch_semaphore_signal(*(dispatch_semaphore_t *)semp);
34 
35 	while (running_threads) {
36 		;
37 	}
38 	return NULL;
39 }
40 
41 #define PERF_STK_KHDR   UINT32_C(0x25020014)
42 #define PERF_STK_UHDR   UINT32_C(0x25020018)
43 #define PERF_TMR_FIRE   KDBG_EVENTID(DBG_PERF, 3, 0)
44 #define PERF_TMR_HNDLR  KDBG_EVENTID(DBG_PERF, 3, 2)
45 #define PERF_TMR_PEND   KDBG_EVENTID(DBG_PERF, 3, 3)
46 #define PERF_TMR_SKIP   KDBG_EVENTID(DBG_PERF, 3, 4)
47 #define PERF_KPC_CONFIG KDBG_EVENTID(DBG_PERF, 6, 4)
48 #define PERF_KPC_REG    KDBG_EVENTID(DBG_PERF, 6, 5)
49 #define PERF_KPC_REG32  KDBG_EVENTID(DBG_PERF, 6, 7)
50 #define PERF_INSTR_DATA KDBG_EVENTID(DBG_PERF, 1, 17)
51 #define PERF_EVENT      KDBG_EVENTID(DBG_PERF, 0, 0)
52 #define PERF_DISPLABEL  KDBG_EVENTID(DBG_PERF, 1, 23)
53 #define PERF_DISPSAMPLE KDBG_EVENTID(DBG_PERF, 1, 10)
54 
55 #define SCHED_DISPATCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_DISPATCH)
56 #define SCHED_SWITCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_SCHED)
57 #define SCHED_HANDOFF KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_STACK_HANDOFF)
58 #define SCHED_IDLE KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_IDLE)
59 
60 #define MP_CPUS_CALL UINT32_C(0x1900004)
61 
62 #define DISPATCH_AFTER_EVENT UINT32_C(0xfefffffc)
63 #define TIMEOUT_SECS 10
64 
65 #define TIMER_PERIOD_NS (1 * NSEC_PER_MSEC)
66 
67 static void
start_tracing_with_timeout(ktrace_session_t s,unsigned int timeout_secs)68 start_tracing_with_timeout(ktrace_session_t s, unsigned int timeout_secs)
69 {
70 	// Only set the timeout after we've seen an event that was traced by us.
71 	// This helps set a reasonable timeout after we're guaranteed to get a
72 	// few events.
73 	dispatch_queue_t q = dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0);
74 
75 	ktrace_events_single(s, DISPATCH_AFTER_EVENT,
76 	    ^(__unused struct trace_point *tp)
77 	{
78 		T_LOG("arming timer to stop tracing after %d seconds", timeout_secs);
79 		dispatch_after(dispatch_time(DISPATCH_TIME_NOW,
80 		    timeout_secs * NSEC_PER_SEC), q, ^{
81 			T_LOG("ending tracing due to timeout");
82 			ktrace_end(s, 0);
83 		});
84 	});
85 	ktrace_set_collection_interval(s, 100);
86 
87 	T_ASSERT_POSIX_ZERO(ktrace_start(s, q), "start ktrace");
88 
89 	kdebug_trace(DISPATCH_AFTER_EVENT, 0, 0, 0, 0);
90 	T_LOG("trace point emitted");
91 }
92 
93 static void
configure_kperf_timer_samplers(uint64_t period_ns,uint32_t samplers)94 configure_kperf_timer_samplers(uint64_t period_ns, uint32_t samplers)
95 {
96 	T_SETUPBEGIN;
97 
98 	(void)kperf_action_count_set(1);
99 	T_QUIET;
100 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, samplers),
101 	    NULL);
102 	(void)kperf_timer_count_set(1);
103 	T_QUIET;
104 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
105 	    kperf_ns_to_ticks(period_ns)), NULL);
106 	T_QUIET;
107 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
108 
109 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
110 
111 	T_SETUPEND;
112 }
113 
114 static double
timestamp_secs(ktrace_session_t s,uint64_t timestamp)115 timestamp_secs(ktrace_session_t s, uint64_t timestamp)
116 {
117 	uint64_t ns = 0;
118 	T_QUIET;
119 	T_ASSERT_POSIX_ZERO(ktrace_convert_timestamp_to_nanoseconds(s, timestamp,
120 	    &ns), NULL);
121 	return (double)ns / NSEC_PER_SEC;
122 }
123 
124 #pragma mark - timers
125 
126 // Ensure that kperf is correctly sampling CPUs that are actively scheduling by
127 // bringing up threads and ensuring that threads on-core are sampled by each
128 // timer fire.
129 
130 T_DECL(kperf_sample_active_cpus,
131     "make sure that kperf samples all active CPUs")
132 {
133 	start_controlling_ktrace();
134 
135 	T_SETUPBEGIN;
136 
137 	int ncpus = dt_ncpu();
138 	T_QUIET;
139 	T_ASSERT_LT(ncpus, MAX_CPUS,
140 	    "only supports up to %d CPUs", MAX_CPUS);
141 	T_LOG("found %d CPUs", ncpus);
142 
143 	int nthreads = ncpus - 1;
144 	T_QUIET;
145 	T_ASSERT_LT(nthreads, MAX_THREADS,
146 	    "only supports up to %d threads", MAX_THREADS);
147 
148 	static pthread_t threads[MAX_THREADS];
149 
150 	ktrace_session_t s = ktrace_session_create();
151 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
152 	ktrace_set_collection_interval(s, 100);
153 
154 	__block uint64_t nfires = 0;
155 	__block uint64_t nsamples = 0;
156 	static uint64_t idle_tids[MAX_CPUS] = { 0 };
157 	__block double sum_saturation = 0;
158 	__block uint64_t last_nsamples = 0;
159 
160 	// As a test debugging aid, take an additonal argument that specifies the
161 	// number of fires to stop tracing after.  This also turns on additional
162 	// logging of scheduler trace events.
163 	int stopafter = 0;
164 	if (argc > 0) {
165 		stopafter = atoi(argv[0]);
166 		if (stopafter < 0) {
167 			T_ASSERT_FAIL("argument must be positive");
168 		}
169 	}
170 
171 	static uint64_t first_timestamp = 0;
172 	static uint64_t last_timestamp = 0;
173 	ktrace_events_any(s, ^(struct trace_point *tp) {
174 		if (first_timestamp == 0) {
175 			first_timestamp = tp->timestamp;
176 		}
177 		last_timestamp = tp->timestamp;
178 	});
179 
180 	ktrace_set_completion_handler(s, ^{
181 		T_LOG("stopping threads");
182 
183 		running_threads = false;
184 
185 		for (int i = 0; i < nthreads; i++) {
186 		        T_QUIET;
187 		        T_ASSERT_POSIX_ZERO(pthread_join(threads[i], NULL), NULL);
188 		}
189 
190 		double saturation = sum_saturation / nfires * 100;
191 
192 		T_LOG("over %.1f seconds, saw %" PRIu64 " timer fires, %" PRIu64
193 			" samples, %g samples/fire, %.2f%% saturation",
194 			timestamp_secs(s, last_timestamp - first_timestamp), nfires,
195 			nsamples, (double)nsamples / (double)nfires, saturation);
196 		T_ASSERT_GT(saturation, 95.0,
197 		    "saw reasonable percentage of full samples");
198 
199 		T_END;
200 	});
201 
202 	// Track which threads are running on each CPU.
203 	static uint64_t tids_on_cpu[MAX_CPUS] = { 0 };
204 	void (^switch_cb)(struct trace_point *, const char *name) =
205 	    ^(struct trace_point *tp, const char *name) {
206 		uint64_t new_thread = tp->arg2;
207 
208 		if (idle_tids[tp->cpuid] != new_thread) {
209 			tids_on_cpu[tp->cpuid] = new_thread;
210 		}
211 
212 		if (stopafter) {
213 			T_LOG("%.7g: %s on %d: %llx", timestamp_secs(s, tp->timestamp),
214 			    name, tp->cpuid, tp->arg2);
215 		}
216 	};
217 
218 	ktrace_events_single(s, SCHED_SWITCH, ^(struct trace_point *tp) {
219 		switch_cb(tp, "switch");
220 	});
221 	ktrace_events_single(s, SCHED_HANDOFF, ^(struct trace_point *tp) {
222 		switch_cb(tp, "hndoff");
223 	});
224 
225 	// Determine the thread IDs of the idle threads on each CPU.
226 	ktrace_events_single(s, SCHED_IDLE, ^(struct trace_point *tp) {
227 		if (tp->debugid & DBG_FUNC_END) {
228 			return;
229 		}
230 		tids_on_cpu[tp->cpuid] = 0;
231 		idle_tids[tp->cpuid] = tp->threadid;
232 		if (stopafter) {
233 			T_LOG("%.7g: idle on %d: %llx", timestamp_secs(s, tp->timestamp),
234 			    tp->cpuid, tp->threadid);
235 		}
236 	});
237 
238 	// On each timer fire, go through all the cores and mark any threads
239 	// that should be sampled.
240 
241 	__block int last_fire_cpu = -1;
242 	static bool sample_missing[MAX_CPUS] = { false };
243 	static uint64_t tids_snap[MAX_CPUS] = { 0 };
244 	__block int nexpected = 0;
245 	__block int nextra = 0;
246 	__block int nidles = 0;
247 
248 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
249 		T_QUIET; T_ASSERT_EQ((tp->debugid & DBG_FUNC_START), 0,
250 		    "no timer fire start events are allowed");
251 		int last_expected = nexpected;
252 		nfires++;
253 
254 		nexpected = 0;
255 		for (int i = 0; i < ncpus; i++) {
256 			if (sample_missing[i]) {
257 				T_LOG("missed sample on CPU %d for thread %#llx from "
258 				    "timer on CPU %d (expected %d samples)",
259 				    tp->cpuid, tids_snap[i], last_fire_cpu, last_expected);
260 				sample_missing[i] = false;
261 			}
262 
263 			if (tids_on_cpu[i] != 0) {
264 				tids_snap[i] = tids_on_cpu[i];
265 				sample_missing[i] = true;
266 				nexpected++;
267 			}
268 		}
269 		if (stopafter) {
270 			T_LOG("%.7g: FIRE on %d: %d extra, %d idles",
271 			    timestamp_secs(s, tp->timestamp), tp->cpuid, nextra, nidles);
272 		}
273 
274 		if (nfires == 1) {
275 			return;
276 		}
277 
278 		if (last_expected == 0) {
279 			sum_saturation += 1;
280 		} else {
281 			sum_saturation += (double)(nsamples - last_nsamples) /
282 			    last_expected;
283 		}
284 		last_nsamples = nsamples;
285 		nextra = 0;
286 		nidles = 0;
287 
288 		T_QUIET;
289 		T_ASSERT_LT((int)tp->cpuid, ncpus,
290 		    "timer fire should not occur on an IOP");
291 		last_fire_cpu = (int)tp->cpuid;
292 
293 		if (stopafter && (uint64_t)stopafter == nfires) {
294 			ktrace_end(s, 1);
295 		}
296 	});
297 
298 	// On the timer handler for each CPU, unset the missing sample bitmap.
299 
300 	ktrace_events_single(s, PERF_TMR_HNDLR, ^(struct trace_point *tp) {
301 		nsamples++;
302 		if ((int)tp->cpuid > ncpus) {
303 		        // Skip IOPs; they're not scheduling any relevant threads.
304 		        return;
305 		}
306 
307 		if (!sample_missing[tp->cpuid] && idle_tids[tp->cpuid] != 0) {
308 			T_LOG("sampled additional thread %llx on CPU %d", tp->threadid,
309 			    tp->cpuid);
310 			nextra++;
311 		}
312 		if (tp->threadid == idle_tids[tp->cpuid]) {
313 			T_LOG("sampled idle thread on CPU %d", tp->cpuid);
314 			nidles++;
315 		}
316 		sample_missing[tp->cpuid] = false;
317 	});
318 
319 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
320 
321 	T_SETUPEND;
322 
323 	start_tracing_with_timeout(s, TIMEOUT_SECS);
324 
325 	// Create threads to bring up all of the CPUs.
326 
327 	dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
328 
329 	for (int i = 0; i < nthreads; i++) {
330 		T_QUIET;
331 		T_ASSERT_POSIX_ZERO(
332 			pthread_create(&threads[i], NULL, &spinning_thread,
333 			&thread_spinning), NULL);
334 		dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
335 	}
336 
337 	T_LOG("spun up %d thread%s", nthreads, nthreads == 1 ? "" : "s");
338 
339 	dispatch_main();
340 }
341 
342 #define FIRES_THRESHOLD (5000)
343 
344 T_DECL(kperf_timer_fires_enough_times,
345     "ensure the correct number of timers fire in a period of time")
346 {
347 	start_controlling_ktrace();
348 
349 	dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
350 
351 	ktrace_session_t s = ktrace_session_create();
352 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
353 	ktrace_set_collection_interval(s, 100);
354 
355 	__block uint64_t nfires = 0;
356 	__block uint64_t first_fire_ns = 0;
357 	__block uint64_t last_fire_ns = 0;
358 
359 	int ncpus = dt_ncpu();
360 
361 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
362 		nfires++;
363 		if (first_fire_ns == 0) {
364 			ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
365 			    &first_fire_ns);
366 		}
367 		ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
368 		    &last_fire_ns);
369 
370 		T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
371 		    "timer fire should not occur on an IOP");
372 		if (nfires >= FIRES_THRESHOLD) {
373 			ktrace_end(s, 1);
374 		}
375 	});
376 
377 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
378 
379 	pthread_t thread;
380  	T_QUIET;
381 	T_ASSERT_POSIX_ZERO(pthread_create(&thread, NULL, &spinning_thread,
382 	    &thread_spinning), NULL);
383 	dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
384 
385 	ktrace_set_completion_handler(s, ^{
386 		running_threads = false;
387 
388 		double duration_secs = (double)(last_fire_ns - first_fire_ns) /
389 		    NSEC_PER_SEC;
390 		T_LOG("stopping thread after %.2f seconds", duration_secs);
391 
392 		T_QUIET; T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
393 
394 		T_LOG("saw %" PRIu64 " timer fires (%g fires/second)", nfires,
395 		    (double)nfires / (double)duration_secs);
396 		double expected_nfires = duration_secs * NSEC_PER_SEC / TIMER_PERIOD_NS;
397 		T_LOG("expecting %g timer fires", expected_nfires);
398 		double nfires_seen_pct = expected_nfires / nfires * 100;
399 		T_ASSERT_GT(nfires_seen_pct, 95.0,
400 		    "saw reasonable number of missed timer fires");
401 		T_ASSERT_LT(nfires_seen_pct, 105.0,
402 			"saw reasonable number of extra timer fires");
403 
404 		T_END;
405 	});
406 
407 	start_tracing_with_timeout(s, TIMEOUT_SECS);
408 
409 	dispatch_main();
410 }
411 
412 // kperf_timer_not_oversampling ensures that the profiling timer fires are
413 // spaced apart by the programmed timer period.  Otherwise, tools that rely on
414 // sample count as a proxy for CPU usage will over-estimate.
415 
416 #define FIRE_PERIOD_THRESHOLD_NS \
417 		(TIMER_PERIOD_NS - (uint64_t)(TIMER_PERIOD_NS * 0.05))
418 
419 struct cirq {
420 	unsigned int nslots;
421 	unsigned int tail_slot;
422 	unsigned int slot_size;
423 };
424 
425 #define CIRQ_INIT(TYPE, NSLOTS) \
426 	(struct cirq){ \
427 		.nslots = NSLOTS, .tail_slot = 0, .slot_size = sizeof(TYPE), \
428 	}
429 
430 static inline void *
cirq_get(struct cirq * cq,unsigned int i)431 cirq_get(struct cirq *cq, unsigned int i)
432 {
433 	return (char *)cq + sizeof(*cq) + (cq->slot_size * i);
434 }
435 
436 static void *
cirq_top(void * vcq)437 cirq_top(void *vcq)
438 {
439 	struct cirq *cq = vcq;
440 	unsigned int tail_slot = cq->tail_slot;
441 	unsigned int top_slot = (tail_slot > 0 ? tail_slot : cq->nslots) - 1;
442 	return cirq_get(cq, top_slot);
443 }
444 
445 static void *
cirq_push(void * vcq)446 cirq_push(void *vcq)
447 {
448 	struct cirq *cq = vcq;
449 	unsigned int tail_slot = cq->tail_slot;
450 	unsigned int next_slot = tail_slot == cq->nslots - 1 ? 0 : tail_slot + 1;
451 	cq->tail_slot = next_slot;
452 	return cirq_get(cq, tail_slot);
453 }
454 
455 static void
456 cirq_for(void *vcq, void (^iter)(void *elt))
457 {
458 	struct cirq *cq = vcq;
459 	for (unsigned int i = cq->tail_slot; i < cq->nslots; i++) {
460 		iter(cirq_get(cq, i));
461 	}
462 	for (unsigned int i = 0; i < cq->tail_slot; i++) {
463 		iter(cirq_get(cq, i));
464 	}
465 }
466 
467 #define HISTORY_LEN 5
468 
469 struct instval {
470 	uint64_t iv_instant_ns;
471 	uint64_t iv_val;
472 };
473 
474 struct cirq_instval {
475 	struct cirq cq;
476 	struct instval elts[HISTORY_LEN];
477 };
478 
479 struct cirq_u64 {
480 	struct cirq cq;
481 	uint64_t elts[HISTORY_LEN];
482 };
483 
484 struct cpu_oversample {
485 	struct cirq_instval timer_latencies;
486 	struct cirq_instval fire_latencies;
487 };
488 
489 static void
cpu_oversample_log(struct cpu_oversample * cpu,unsigned int cpuid)490 cpu_oversample_log(struct cpu_oversample *cpu, unsigned int cpuid)
491 {
492 	T_LOG("CPU %d timer latencies:", cpuid);
493 	__block int i = -HISTORY_LEN;
494 	cirq_for(&cpu->timer_latencies, ^(void *viv) {
495 		struct instval *iv = viv;
496 		T_LOG("\t%llu timer latency %d: %llu", iv->iv_instant_ns, i,
497 		    iv->iv_val);
498 		i++;
499 	});
500 
501 	T_LOG("CPU %d fire latencies:", cpuid);
502 	i = -HISTORY_LEN;
503 	cirq_for(&cpu->fire_latencies, ^(void *viv) {
504 		struct instval *iv = viv;
505 		T_LOG("\t%llu fire latency %d: %llu", iv->iv_instant_ns, i, iv->iv_val);
506 		i++;
507 	});
508 }
509 
510 T_DECL(kperf_timer_not_oversampling,
511     "ensure that time between fires is long enough")
512 {
513 	start_controlling_ktrace();
514 
515 	ktrace_session_t s = ktrace_session_create();
516 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
517 	// Try not to perturb the system with more work.
518 	ktrace_set_collection_interval(s, 1000);
519 	__block uint64_t nfires = 0;
520 	__block uint64_t first_fire_ns = 0;
521 	__block uint64_t last_fire_ns = 0;
522 	__block unsigned int last_fire_cpuid = 0;
523 
524 	int ncpus = dt_ncpu();
525 	T_QUIET; T_ASSERT_GT(ncpus, 0, "should see positive number of CPUs");
526 
527 	struct cpu_oversample *per_cpu = calloc((unsigned int)ncpus,
528 			sizeof(per_cpu[0]));
529 	T_QUIET; T_WITH_ERRNO;
530 	T_ASSERT_NOTNULL(per_cpu, "allocated timer latency tracking");
531 	for (int i = 0; i < ncpus; i++) {
532 		per_cpu[i].timer_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
533 		per_cpu[i].fire_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
534 	}
535 
536 	__block bool in_stackshot = false;
537 	__block uint64_t last_stackshot_ns = 0;
538 
539 	// Stackshots are the primary source of interrupt latency on the system.
540 	ktrace_events_single(s, KDBG_EVENTID(DBG_BSD, DBG_BSD_EXCP_SC,
541 			SYS_stack_snapshot_with_config), ^(struct trace_point *tp) {
542 		bool start = tp->debugid & DBG_FUNC_START;
543 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
544 		T_LOG("%llu: %s stackshot syscall from process %s",
545 				cur_ns, start ? "start" : "finish", tp->command);
546 		in_stackshot = start;
547 		if (!start) {
548 			last_stackshot_ns = cur_ns;
549 		}
550 	});
551 
552 	struct cirq_u64 *fire_times = calloc(1, sizeof(*fire_times));
553 	T_ASSERT_NOTNULL(fire_times, "allocated fire time tracking");
554 	fire_times->cq = CIRQ_INIT(uint64_t, HISTORY_LEN);
555 
556 	// Track the decrementer's latency values to find any unexpectedly long
557 	// interrupt latencies that could affect the firing cadence.
558 	ktrace_events_single(s, DECR_TRAP_LATENCY,
559 			^(struct trace_point *tp) {
560 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
561 		uint64_t latency_ns = ns_from_abs(s, 0 - tp->arg1);
562 		struct instval *latency = cirq_push(&per_cpu[tp->cpuid].timer_latencies);
563 		latency->iv_instant_ns = cur_ns;
564 		latency->iv_val = latency_ns;
565 	});
566 
567 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
568 		T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
569 				"timer fire should not occur on an IOP");
570 
571 		nfires++;
572 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
573 		uint64_t *fire_ns = cirq_push(fire_times);
574 		*fire_ns = cur_ns;
575 
576 		struct cpu_oversample *cur_cpu = &per_cpu[tp->cpuid];
577 		struct instval *last_timer_latency = cirq_top(
578 				&cur_cpu->timer_latencies);
579 		uint64_t timer_latency_ns = last_timer_latency->iv_val;
580 
581 		if (first_fire_ns == 0) {
582 			first_fire_ns = cur_ns;
583 		} else {
584 			struct cpu_oversample *last_cpu = &per_cpu[last_fire_cpuid];
585 			struct instval *last_latency = cirq_top(&last_cpu->fire_latencies);
586 			uint64_t last_fire_latency_ns = last_latency->iv_val;
587 
588 			if (timer_latency_ns > TIMER_PERIOD_NS / 4) {
589 				T_LOG("%llu: long timer latency at fire: %llu", cur_ns,
590 						timer_latency_ns);
591 			}
592 
593 			// Long interrupt latencies will cause the timer to miss its fire
594 			// time and report a fire past when it should have, making the next
595 			// period too short.  Keep track of the latency as a leeway
596 			// adjustment for the subsequent fire.
597 			uint64_t fire_period_ns = cur_ns - last_fire_ns;
598 			uint64_t fire_period_adj_ns = fire_period_ns +
599 			    last_fire_latency_ns + timer_latency_ns;
600 			// Within 5% is still a valid period -- adjust this for running in
601 			// potentially-noisy automation.
602 			uint64_t fire_period_pct_adj_ns = fire_period_adj_ns +
603 			    FIRE_PERIOD_THRESHOLD_NS / 20;
604 			if (fire_period_adj_ns < FIRE_PERIOD_THRESHOLD_NS &&
605 			    fire_period_pct_adj_ns >= FIRE_PERIOD_THRESHOLD_NS) {
606 				T_LOG("ignoring period of %llu within 5%% of expected %llu",
607 				    fire_period_adj_ns, fire_period_pct_adj_ns);
608 			}
609 			bool too_short = fire_period_pct_adj_ns < FIRE_PERIOD_THRESHOLD_NS;
610 			if (too_short) {
611 				T_LOG("%llu: period of timer fire %llu is %llu + %llu + %llu = "
612 						"%llu < %llu",
613 						cur_ns, nfires, fire_period_ns, last_fire_latency_ns,
614 						timer_latency_ns, fire_period_adj_ns,
615 						FIRE_PERIOD_THRESHOLD_NS);
616 
617 				T_LOG("short profile timer fired on CPU %d", tp->cpuid);
618 				cpu_oversample_log(cur_cpu, tp->cpuid);
619 
620 				if (cur_cpu == last_cpu) {
621 					T_LOG("fired back-to-back on CPU %d", tp->cpuid);
622 				} else {
623 					T_LOG("previous profile timer fired on CPU %d",
624 							last_fire_cpuid);
625 					cpu_oversample_log(last_cpu, last_fire_cpuid);
626 				}
627 
628 				T_LOG("profile timer fires:");
629 				cirq_for(fire_times, ^(void *vu64) {
630 					T_LOG("\tfire: %llu", *(uint64_t *)vu64);
631 				});
632 				if (nfires < (unsigned int)ncpus) {
633 					T_LOG("ignoring timer fire %llu as context may be missing",
634 							nfires);
635 				} else {
636 					if (in_stackshot) {
637 						T_LOG("skipping assertion because stackshot is "
638 								"happening");
639 					} else if (last_stackshot_ns != 0 &&
640 							cur_ns > last_stackshot_ns &&
641 							cur_ns - last_stackshot_ns < TIMER_PERIOD_NS) {
642 						T_LOG("skipping assertion because stackshot happened "
643 								"%" PRIu64 "ns ago",
644 								cur_ns - last_stackshot_ns);
645 					} else {
646 						T_ASSERT_FAIL("profiling period is shorter than "
647 								"expected with no stackshot interference");
648 					}
649 				}
650 			}
651 
652 			struct instval *latency = cirq_push(&cur_cpu->fire_latencies);
653 			latency->iv_instant_ns = cur_ns;
654 			latency->iv_val = timer_latency_ns;
655 
656 			// Snapshot this timer fire's interrupt latency, so the next one
657 			// can make an adjustment to the period.
658 			last_fire_latency_ns = timer_latency_ns;
659 		}
660 		last_fire_ns = cur_ns;
661 		last_fire_cpuid = tp->cpuid;
662 
663 		if (nfires >= FIRES_THRESHOLD) {
664 			ktrace_end(s, 1);
665 		}
666 	});
667 
668 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_TINFO);
669 
670 	ktrace_set_completion_handler(s, ^{
671 		double duration_secs = (double)(last_fire_ns - first_fire_ns) /
672 		    NSEC_PER_SEC;
673 		T_LOG("stopping trace after %.2f seconds", duration_secs);
674 
675 		T_PASS("saw %" PRIu64 " timer fires (%g fires/second) without "
676 			"oversampling", nfires, (double)nfires / (double)duration_secs);
677 
678 		T_END;
679 	});
680 
681 	start_tracing_with_timeout(s, 5);
682 
683 	// Get all CPUs out of idle.
684 	uint64_t *counts = kpc_counterbuf_alloc();
685 	(void)kpc_get_cpu_counters(true,KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
686 	free(counts);
687 
688 	dispatch_main();
689 }
690 
691 T_DECL(kperf_timer_stress, "repeatedly enable and disable timers")
692 {
693 	start_controlling_ktrace();
694 
695 	const int niters = 500;
696 	for (int i = 0; i < niters; i++) {
697 		configure_kperf_stacks_timer(-1, 1, true);
698 		T_QUIET;
699 		T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
700 		usleep(2000);
701 		kperf_reset();
702 	}
703 	T_LOG("configured kperf with a timer %d times", niters);
704 }
705 
706 #pragma mark - kdebug triggers
707 
708 #define KDEBUG_TRIGGER_TIMEOUT_NS (10 * NSEC_PER_SEC)
709 
710 #define NON_TRIGGER_CLASS    UINT32_C(0xfd)
711 #define NON_TRIGGER_SUBCLASS UINT32_C(0xff)
712 #define NON_TRIGGER_CODE     UINT32_C(0xff)
713 
714 #define NON_TRIGGER_EVENT \
715 	        (KDBG_EVENTID(NON_TRIGGER_CLASS, NON_TRIGGER_SUBCLASS, \
716 	        NON_TRIGGER_CODE))
717 
718 static void
expect_kdebug_trigger(const char * filter_desc,const uint32_t * debugids,unsigned int n_debugids)719 expect_kdebug_trigger(const char *filter_desc, const uint32_t *debugids,
720     unsigned int n_debugids)
721 {
722 	__block int missing_kernel_stacks = 0;
723 	__block int missing_user_stacks = 0;
724 	ktrace_session_t s;
725 	kperf_kdebug_filter_t filter;
726 
727 	s = ktrace_session_create();
728 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
729 	ktrace_set_collection_interval(s, 100);
730 
731 	ktrace_events_single(s, PERF_STK_KHDR, ^(struct trace_point *tp) {
732 		missing_kernel_stacks--;
733 		T_LOG("saw kernel stack with %" PRIu64 " frames, flags = %#"
734 		PRIx64, tp->arg2, tp->arg1);
735 	});
736 	ktrace_events_single(s, PERF_STK_UHDR, ^(struct trace_point *tp) {
737 		missing_user_stacks--;
738 		T_LOG("saw user stack with %" PRIu64 " frames, flags = %#"
739 		PRIx64, tp->arg2, tp->arg1);
740 	});
741 
742 	for (unsigned int i = 0; i < n_debugids; i++) {
743 		ktrace_events_single(s, debugids[i], ^(struct trace_point *tp) {
744 			missing_kernel_stacks++;
745 			missing_user_stacks++;
746 			T_LOG("saw event with debugid 0x%" PRIx32, tp->debugid);
747 		});
748 	}
749 
750 	ktrace_events_single(s, NON_TRIGGER_EVENT,
751 	    ^(__unused struct trace_point *tp)
752 	{
753 		ktrace_end(s, 0);
754 	});
755 
756 	ktrace_set_completion_handler(s, ^{
757 		T_EXPECT_LE(missing_kernel_stacks, 0, NULL);
758 		T_EXPECT_LE(missing_user_stacks, 0, NULL);
759 
760 		ktrace_session_destroy(s);
761 		T_END;
762 	});
763 
764 	kperf_reset();
765 
766 	(void)kperf_action_count_set(1);
767 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
768 	    KPERF_SAMPLER_KSTACK | KPERF_SAMPLER_USTACK), NULL);
769 
770 	filter = kperf_kdebug_filter_create();
771 	T_ASSERT_NOTNULL(filter, NULL);
772 
773 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_action_set(1), NULL);
774 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_add_desc(filter, filter_desc),
775 	    NULL);
776 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_set(filter), NULL);
777 	kperf_kdebug_filter_destroy(filter);
778 
779 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL);
780 
781 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
782 
783 	// Trace the triggering events.
784 
785 	for (unsigned int i = 0; i < n_debugids; i++) {
786 		T_ASSERT_POSIX_SUCCESS(kdebug_trace(debugids[i], 0, 0, 0, 0), NULL);
787 	}
788 
789 	T_ASSERT_POSIX_SUCCESS(kdebug_trace(NON_TRIGGER_EVENT, 0, 0, 0, 0), NULL);
790 
791 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, KDEBUG_TRIGGER_TIMEOUT_NS),
792 	    dispatch_get_main_queue(), ^(void)
793 	{
794 		ktrace_end(s, 1);
795 	});
796 }
797 
798 #define TRIGGER_CLASS     UINT32_C(0xfe)
799 #define TRIGGER_CLASS_END UINT32_C(0xfd)
800 #define TRIGGER_SUBCLASS  UINT32_C(0xff)
801 #define TRIGGER_CODE      UINT32_C(0)
802 #define TRIGGER_DEBUGID \
803 	        (KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, TRIGGER_CODE))
804 
805 T_DECL(kperf_kdebug_trigger_classes,
806     "test that kdebug trigger samples on classes")
807 {
808 	start_controlling_ktrace();
809 
810 	const uint32_t class_debugids[] = {
811 		KDBG_EVENTID(TRIGGER_CLASS, 1, 1),
812 		KDBG_EVENTID(TRIGGER_CLASS, 2, 1),
813 		KDBG_EVENTID(TRIGGER_CLASS_END, 1, 1) | DBG_FUNC_END,
814 		KDBG_EVENTID(TRIGGER_CLASS_END, 2, 1) | DBG_FUNC_END,
815 	};
816 
817 	expect_kdebug_trigger("C0xfe,C0xfdr", class_debugids,
818 	    sizeof(class_debugids) / sizeof(class_debugids[0]));
819 	dispatch_main();
820 }
821 
822 T_DECL(kperf_kdebug_trigger_subclasses,
823     "test that kdebug trigger samples on subclasses")
824 {
825 	start_controlling_ktrace();
826 
827 	const uint32_t subclass_debugids[] = {
828 		KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 0),
829 		KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 1),
830 		KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 0) | DBG_FUNC_END,
831 		KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 1) | DBG_FUNC_END
832 	};
833 
834 	expect_kdebug_trigger("S0xfeff,S0xfdffr", subclass_debugids,
835 	    sizeof(subclass_debugids) / sizeof(subclass_debugids[0]));
836 	dispatch_main();
837 }
838 
839 T_DECL(kperf_kdebug_trigger_debugids,
840     "test that kdebug trigger samples on debugids")
841 {
842 	start_controlling_ktrace();
843 
844 	const uint32_t debugids[] = {
845 		TRIGGER_DEBUGID
846 	};
847 
848 	expect_kdebug_trigger("D0xfeff0000", debugids,
849 	    sizeof(debugids) / sizeof(debugids[0]));
850 	dispatch_main();
851 }
852 
853 // TODO Set a single function specifier filter, expect not to trigger of all
854 // events from that class.
855 
856 static void
reset_kperf(void)857 reset_kperf(void)
858 {
859 	(void)kperf_reset();
860 	(void)sysctlbyname("kperf.debug_level", NULL, NULL, &(int){ 0 },
861 	    sizeof(int));
862 }
863 
864 T_DECL(kperf_kdbg_callstacks,
865     "test that the kdbg_callstacks samples on syscalls")
866 {
867 	start_controlling_ktrace();
868 
869 	ktrace_session_t s;
870 	__block bool saw_user_stack = false;
871 
872 	s = ktrace_session_create();
873 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
874 	ktrace_set_collection_interval(s, 100);
875 
876 	// Make sure BSD events are traced in order to trigger samples on syscalls.
877 	ktrace_events_class(s, DBG_BSD, ^void (__unused struct trace_point *tp) {});
878 
879 	ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) {
880 		saw_user_stack = true;
881 		ktrace_end(s, 1);
882 	});
883 
884 	ktrace_set_completion_handler(s, ^{
885 		ktrace_session_destroy(s);
886 
887 		T_EXPECT_TRUE(saw_user_stack,
888 		"saw user stack after configuring kdbg_callstacks");
889 		T_END;
890 	});
891 
892 #pragma clang diagnostic push
893 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
894 	T_ASSERT_POSIX_SUCCESS(kperf_kdbg_callstacks_set(1), NULL);
895 #pragma clang diagnostic pop
896 	T_ATEND(reset_kperf);
897 
898 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
899 
900 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
901 	    dispatch_get_main_queue(), ^(void) {
902 		ktrace_end(s, 1);
903 	});
904 
905 	dispatch_main();
906 }
907 
908 #pragma mark - PET
909 
910 #define STACKS_WAIT_DURATION_NS (3 * NSEC_PER_SEC)
911 
912 static void
913 expect_stacks_traced(void (^setup)(ktrace_session_t s), void (^complete)(void))
914 {
915 	ktrace_session_t s;
916 
917 	s = ktrace_session_create();
918 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
919 	ktrace_set_collection_interval(s, 100);
920 	if (setup) {
921 		setup(s);
922 	}
923 
924 	__block unsigned int user_stacks = 0;
925 	__block unsigned int kernel_stacks = 0;
926 
927 	ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) {
928 		user_stacks++;
929 	});
930 	ktrace_events_single(s, PERF_STK_KHDR, ^(__unused struct trace_point *tp) {
931 		kernel_stacks++;
932 	});
933 
934 	ktrace_set_completion_handler(s, ^(void) {
935 		ktrace_session_destroy(s);
936 		T_EXPECT_GT(user_stacks, 0U, NULL);
937 		T_EXPECT_GT(kernel_stacks, 0U, NULL);
938 		complete();
939 	});
940 
941 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL);
942 
943 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
944 
945 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, STACKS_WAIT_DURATION_NS),
946 	    dispatch_get_main_queue(), ^(void)
947 	{
948 		kperf_reset();
949 		ktrace_end(s, 0);
950 	});
951 }
952 
953 T_DECL(kperf_pet, "test that PET mode samples kernel and user stacks")
954 {
955 	start_controlling_ktrace();
956 
957 	configure_kperf_stacks_timer(-1, 10, false);
958 	T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
959 
960 	expect_stacks_traced(NULL, ^(void) {
961 		T_END;
962 	});
963 
964 	dispatch_main();
965 }
966 
967 T_DECL(kperf_lightweight_pet,
968     "test that lightweight PET mode samples kernel and user stacks",
969     T_META_ASROOT(true))
970 {
971 	start_controlling_ktrace();
972 
973 	int set = 1;
974 
975 	configure_kperf_stacks_timer(-1, 10, false);
976 	T_ASSERT_POSIX_SUCCESS(sysctlbyname("kperf.lightweight_pet", NULL, NULL,
977 	    &set, sizeof(set)), NULL);
978 	T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
979 
980 	__block uint64_t nfires = 0;
981 
982 	expect_stacks_traced(^(ktrace_session_t s) {
983 		ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
984 			nfires++;
985 			T_QUIET;
986 			T_ASSERT_EQ(tp->arg1, (uint64_t)0,
987 					"timer fire should have timer ID of 0");
988 			T_QUIET;
989 			T_ASSERT_EQ(tp->arg2, (uint64_t)1,
990 					"timer fire should have PET bit set");
991 		});
992 	}, ^(void) {
993 		T_ASSERT_GT(nfires, (uint64_t)0, "timer fired at least once");
994 		T_END;
995 	});
996 
997 	dispatch_main();
998 }
999 
1000 T_DECL(kperf_pet_stress, "repeatedly enable and disable PET mode")
1001 {
1002 	start_controlling_ktrace();
1003 
1004 	const int niters = 500;
1005 	for (int i = 0; i < niters; i++) {
1006 		configure_kperf_stacks_timer(-1, 1, true);
1007 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
1008 		T_QUIET;
1009 		T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1010 		usleep(2000);
1011 		kperf_reset();
1012 	}
1013 
1014 	T_PASS("configured kperf PET %d times", niters);
1015 }
1016 
1017 #pragma mark - PMCs
1018 
1019 T_DECL(kperf_pmc_config_only,
1020     "shouldn't show PMC config events unless requested")
1021 {
1022 	start_controlling_ktrace();
1023 
1024 	__block bool saw_kpc_config = false;
1025 	__block bool saw_kpc_reg = false;
1026 
1027 	ktrace_session_t s = ktrace_session_create();
1028 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
1029 	ktrace_set_collection_interval(s, 100);
1030 
1031 	ktrace_events_single(s, PERF_KPC_CONFIG,
1032 	    ^(__unused struct trace_point *tp) {
1033 		saw_kpc_config = true;
1034 	});
1035 	ktrace_events_single(s, PERF_KPC_REG,
1036 	    ^(__unused struct trace_point *tp) {
1037 		saw_kpc_reg = true;
1038 	});
1039 	ktrace_events_single(s, PERF_KPC_REG32,
1040 	    ^(__unused struct trace_point *tp) {
1041 		saw_kpc_reg = true;
1042 	});
1043 
1044 	ktrace_set_completion_handler(s, ^{
1045 		ktrace_session_destroy(s);
1046 		T_EXPECT_FALSE(saw_kpc_config,
1047 		"should see no KPC configs without sampler enabled");
1048 		T_EXPECT_FALSE(saw_kpc_reg,
1049 		"should see no KPC registers without sampler enabled");
1050 		T_END;
1051 	});
1052 
1053 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
1054 	uint64_t *config = calloc(nconfigs, sizeof(*config));
1055 	config[0] = 0x02;
1056 	int ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
1057 	T_ASSERT_POSIX_SUCCESS(ret, "configured kpc");
1058 	T_QUIET;
1059 	T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_CONFIGURABLE_MASK),
1060 	    "kpc_set_counting");
1061 
1062 	(void)kperf_action_count_set(1);
1063 	T_ATEND(reset_kperf);
1064 	T_QUIET;
1065 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, KPERF_SAMPLER_PMC_CPU),
1066 	    NULL);
1067 
1068 	(void)kperf_timer_count_set(1);
1069 	T_QUIET;
1070 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
1071 	    kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL);
1072 	T_QUIET;
1073 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
1074 
1075 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1076 
1077 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
1078 
1079 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
1080 	    dispatch_get_main_queue(), ^(void) {
1081 		ktrace_end(s, 1);
1082 	});
1083 
1084 	dispatch_main();
1085 }
1086 
1087 static void
skip_if_monotonic_unsupported(void)1088 skip_if_monotonic_unsupported(void)
1089 {
1090 	int r;
1091 	int supported = 0;
1092 	size_t supported_size = sizeof(supported);
1093 
1094 	r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
1095 	    NULL, 0);
1096 	if (r < 0) {
1097 		T_WITH_ERRNO;
1098 		T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
1099 	}
1100 
1101 	if (!supported) {
1102 		T_SKIP("monotonic is not supported on this platform");
1103 	}
1104 }
1105 
1106 #define INSTRS_CYCLES_UPPER 500
1107 #define INSTRS_CYCLES_LOWER 50
1108 
1109 T_DECL(kperf_sample_instrs_cycles,
1110     "ensure instructions and cycles are sampled")
1111 {
1112 	T_SETUPBEGIN;
1113 
1114 	skip_if_monotonic_unsupported();
1115 	start_controlling_ktrace();
1116 
1117 	ktrace_session_t sess = ktrace_session_create();
1118 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
1119 	ktrace_set_collection_interval(sess, 100);
1120 
1121 	__block uint64_t ninstrs_cycles = 0;
1122 	__block uint64_t nzeroes = 0;
1123 	ktrace_events_single(sess, PERF_INSTR_DATA,
1124 	    ^(__unused struct trace_point *tp) {
1125 		ninstrs_cycles++;
1126 		if (tp->arg1 == 0) {
1127 			T_LOG("%llx (%s)\n", tp->threadid, tp->command);
1128 			nzeroes++;
1129 		}
1130 		if (ninstrs_cycles >= INSTRS_CYCLES_UPPER) {
1131 			ktrace_end(sess, 1);
1132 		}
1133 	});
1134 
1135 	ktrace_set_collection_interval(sess, 200);
1136 
1137 	ktrace_set_completion_handler(sess, ^{
1138 		T_EXPECT_GE(ninstrs_cycles, (uint64_t)INSTRS_CYCLES_LOWER,
1139 		    "saw enough instructions and cycles events");
1140 		T_EXPECT_EQ(nzeroes, UINT64_C(0),
1141 		    "saw no events with 0 instructions");
1142 		T_END;
1143 	});
1144 
1145 	(void)kperf_action_count_set(1);
1146 	T_ATEND(reset_kperf);
1147 	T_QUIET;
1148 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
1149 	    KPERF_SAMPLER_TH_INSTRS_CYCLES), NULL);
1150 
1151 	(void)kperf_timer_count_set(1);
1152 	T_QUIET;
1153 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
1154 	    kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL);
1155 	T_QUIET;
1156 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
1157 
1158 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1159 
1160 	T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()),
1161 	    NULL);
1162 
1163 	T_SETUPEND;
1164 
1165 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
1166 	    dispatch_get_main_queue(), ^(void) {
1167 		ktrace_end(sess, 1);
1168 	});
1169 
1170 	dispatch_main();
1171 }
1172 
1173 #define UNCOMMON_EVENTID 0xfeedfac0
1174 #define LABEL "this_is_a_test_label_that_is_very_long_and_will_be_truncated" \
1175 		"_but_needs_some_more_length_for_good_measure"
1176 #define MAX_LABEL_LEN 64
1177 static_assert(sizeof(LABEL) > MAX_LABEL_LEN,
1178     "should be larger than the max length of a label");
1179 
1180 T_DECL(kperf_dispatch_labels, "ensure dispatch queue labels are sampled")
1181 {
1182 	T_SETUPBEGIN;
1183 	start_controlling_ktrace();
1184 	ktrace_session_t sess = ktrace_session_create();
1185 	ktrace_set_collection_interval(sess, 200);
1186 
1187 	/*
1188 	 * Expect to see the label that was sampled by kperf.
1189 	 */
1190 	__block uint64_t nlabels = 0;
1191 	char obs_label[MAX_LABEL_LEN + 1] = {};
1192 	char *obs_label_b = obs_label;
1193 	char exp_label[MAX_LABEL_LEN + 1] = {};
1194 	strlcpy(exp_label, LABEL, sizeof(exp_label));
1195 	char *exp_label_b = exp_label;
1196 
1197 	ktrace_events_single(sess, PERF_DISPLABEL, ^(struct trace_point *tp) {
1198 		nlabels++;
1199 		size_t argsize = ktrace_is_kernel_64_bit(sess) ? 8 : 4;
1200 		strncat(obs_label_b, (char *)&tp->arg1, argsize);
1201 		strncat(obs_label_b, (char *)&tp->arg2, argsize);
1202 		strncat(obs_label_b, (char *)&tp->arg3, argsize);
1203 		strncat(obs_label_b, (char *)&tp->arg4, argsize);
1204 		if (tp->debugid & DBG_FUNC_END) {
1205 			ktrace_end(sess, 1);
1206 		}
1207 	});
1208 	ktrace_events_single(sess, PERF_DISPSAMPLE, ^(struct trace_point *tp) {
1209 		if (tp->debugid & DBG_FUNC_END) {
1210 			T_LOG("ending sample event returned %d (%s)",
1211 			    (int)tp->arg1, strerror((int)tp->arg1));
1212 		}
1213 	});
1214 	ktrace_events_single(sess, UNCOMMON_EVENTID,
1215 	    ^(__unused struct trace_point *tp) {
1216 		T_LOG("saw triggering event");
1217 	});
1218 	ktrace_set_completion_handler(sess, ^{
1219 		T_EXPECT_GT(nlabels, 0ULL, "saw %" PRIu64
1220 		    " dispatch queue label events", nlabels);
1221 		T_EXPECT_EQ_STR(obs_label_b, exp_label_b, "label string is correct");
1222 		T_END;
1223 	});
1224 
1225 	/*
1226 	 * Set up kperf to log a dispatch queue label when an uncommon event ID
1227 	 * is traced.
1228 	 */
1229 	(void)sysctlbyname("kperf.debug_level", NULL, NULL, &(int){ 2 }, sizeof(int));
1230 	(void)kperf_action_count_set(1);
1231 	T_ATEND(reset_kperf);
1232 	T_QUIET;
1233 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
1234 	    KPERF_SAMPLER_TH_DISPATCH), NULL);
1235 	(void)kperf_kdebug_action_set(1);
1236 	kperf_kdebug_filter_t kdfilt = kperf_kdebug_filter_create();
1237 	T_QUIET;
1238 	T_ASSERT_NOTNULL(kdfilt, "create kdebug filter for kperf");
1239 	T_QUIET;
1240 	T_ASSERT_POSIX_SUCCESS(
1241 	    kperf_kdebug_filter_add_debugid(kdfilt, UNCOMMON_EVENTID),
1242 	    "add debugid to filter on");
1243 	T_QUIET;
1244 	T_ASSERT_POSIX_SUCCESS(
1245 	    kperf_kdebug_filter_set(kdfilt), "set filter for kperf");
1246 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1247 
1248 	T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()),
1249 	    NULL);
1250 
1251 	/*
1252 	 * Create the queue and emit the sampling event from it.
1253 	 */
1254 	dispatch_queue_t testq = dispatch_queue_create(LABEL, 0);
1255 	T_ASSERT_NOTNULL(testq, "created queue with label: " LABEL);
1256 	dispatch_async(testq, ^{
1257 		kdebug_trace(UNCOMMON_EVENTID, 0, 0, 0, 0);
1258 		sleep(10);
1259 	});
1260 
1261 	T_SETUPEND;
1262 
1263 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 5 * NSEC_PER_SEC),
1264 	    dispatch_get_main_queue(), ^(void) {
1265 		ktrace_end(sess, 1);
1266 	});
1267 
1268 	dispatch_main();
1269 }
1270 
1271 T_HELPER_DECL(kperf_blessed_helper,
1272     "drop priviliges and attempt to configure kperf")
1273 {
1274 	drop_priv();
1275 	(void)kperf_action_count_set(1);
1276 	errno = 0;
1277 	int ret = kperf_lazy_wait_action_set(1);
1278 	int error = errno;
1279 	exit(ret != 0 ? (error == 0) ? EINVAL : error : 0);
1280 }
1281 
1282 static pid_t
spawn_bless_helper(void)1283 spawn_bless_helper(void)
1284 {
1285 	char test_path[MAXPATHLEN] = { 0 };
1286 	int ret = proc_pidpath(getpid(), test_path, MAXPATHLEN);
1287 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "query path for current process");
1288 	T_LOG("spawning helper tool at %s", test_path);
1289 	char *child_argv[] = { test_path, "-n", "kperf_blessed_helper", NULL };
1290 	pid_t child_pid = -1;
1291 	int error = dt_launch_tool(&child_pid, child_argv, true, NULL, NULL);
1292 	T_ASSERT_POSIX_ZERO(error, "spawned child with pid %d", child_pid);
1293 	T_QUIET; T_ASSERT_GT(child_pid, 0, "child pid is valid");
1294 	return child_pid;
1295 }
1296 
1297 static int
test_child_process(pid_t child_pid)1298 test_child_process(pid_t child_pid)
1299 {
1300 	int ret = kill(child_pid, SIGCONT);
1301 	T_ASSERT_POSIX_SUCCESS(ret, "continuing child process");
1302 	int status = 0;
1303 	ret = waitpid(child_pid, &status, 0);
1304 	T_ASSERT_POSIX_SUCCESS(ret, "waited on child process");
1305 	return status;
1306 }
1307 
1308 T_DECL(kperf_unblessed_refusal,
1309     "check that an unblessed child cannot access ktrace")
1310 {
1311 	T_SETUPBEGIN;
1312 	pid_t child_pid = spawn_bless_helper();
1313 	T_SETUPEND;
1314 	int status = test_child_process(child_pid);
1315 	int exit_status = WEXITSTATUS(status);
1316 	T_EXPECT_NE(exit_status, 0, "child failed to access ktrace with %d (%s)",
1317 		exit_status, exit_status == 0 ? "ok" : strerror(exit_status));
1318 }
1319 
1320 T_DECL(kperf_blessed_ownership, "check that a blessed child can access ktrace")
1321 {
1322 	T_LOG("parent pid is %d\n", getpid());
1323 	T_SETUPBEGIN;
1324 	pid_t child_pid = spawn_bless_helper();
1325 	T_SETUPEND;
1326 	int ret = kperf_bless_set(child_pid);
1327 	T_ASSERT_POSIX_SUCCESS(ret, "blessed child with pid %d", child_pid);
1328 	int status = test_child_process(child_pid);
1329 	int exit_status = WEXITSTATUS(status);
1330 	T_EXPECT_EQ(exit_status, 0, "child status was %d (%s)", exit_status,
1331 	    exit_status == 0 ? "ok" : strerror(exit_status));
1332 	ret = kperf_bless_set(-1);
1333 	T_ASSERT_POSIX_SUCCESS(ret, "removed blessing from child");
1334 }
1335