xref: /xnu-10063.141.1/tests/ktrace/kperf_tests.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 // Copyright (c) 2017-2021 Apple Inc.  All rights reserved.
2 
3 #include <darwintest.h>
4 #include <darwintest_utils.h>
5 #include <dispatch/dispatch.h>
6 #include <inttypes.h>
7 #include <ktrace/session.h>
8 #include <ktrace/private.h>
9 #include <sys/kdebug.h>
10 #include <sys/syscall.h>
11 #include <kperf/kpc.h>
12 #include <kperf/kperf.h>
13 #include <kperfdata/kpdecode.h>
14 #include <os/assumes.h>
15 #include <stdint.h>
16 #include <sys/sysctl.h>
17 
18 #include "kperf_helpers.h"
19 #include "ktrace_helpers.h"
20 #include "ktrace_meta.h"
21 #include "../drop_priv.h"
22 
23 
24 
25 #define MAX_CPUS    64
26 #define MAX_THREADS 64
27 
28 volatile static bool running_threads = true;
29 
30 static void *
spinning_thread(void * semp)31 spinning_thread(void *semp)
32 {
33 	T_QUIET;
34 	T_ASSERT_NOTNULL(semp, "semaphore passed to thread should not be NULL");
35 	dispatch_semaphore_signal(*(dispatch_semaphore_t *)semp);
36 
37 	while (running_threads) {
38 		;
39 	}
40 	return NULL;
41 }
42 
43 #define PERF_STK_KHDR   UINT32_C(0x25020014)
44 #define PERF_STK_UHDR   UINT32_C(0x25020018)
45 #define PERF_TMR_FIRE   KDBG_EVENTID(DBG_PERF, 3, 0)
46 #define PERF_TMR_HNDLR  KDBG_EVENTID(DBG_PERF, 3, 2)
47 #define PERF_TMR_PEND   KDBG_EVENTID(DBG_PERF, 3, 3)
48 #define PERF_TMR_SKIP   KDBG_EVENTID(DBG_PERF, 3, 4)
49 #define PERF_KPC_CONFIG KDBG_EVENTID(DBG_PERF, 6, 4)
50 #define PERF_KPC_REG    KDBG_EVENTID(DBG_PERF, 6, 5)
51 #define PERF_KPC_REG32  KDBG_EVENTID(DBG_PERF, 6, 7)
52 #define PERF_INSTR_DATA KDBG_EVENTID(DBG_PERF, 1, 17)
53 #define PERF_EVENT      KDBG_EVENTID(DBG_PERF, 0, 0)
54 #define PERF_DISPLABEL  KDBG_EVENTID(DBG_PERF, 1, 23)
55 #define PERF_DISPSAMPLE KDBG_EVENTID(DBG_PERF, 1, 10)
56 
57 #define SCHED_DISPATCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_DISPATCH)
58 #define SCHED_SWITCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_SCHED)
59 #define SCHED_HANDOFF KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_STACK_HANDOFF)
60 #define SCHED_IDLE KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_IDLE)
61 
62 #define MP_CPUS_CALL UINT32_C(0x1900004)
63 
64 #define DISPATCH_AFTER_EVENT UINT32_C(0xfefffffc)
65 #define TIMEOUT_SECS 10
66 
67 #define TIMER_PERIOD_NS (1 * NSEC_PER_MSEC)
68 
69 static void
start_tracing_with_timeout(ktrace_session_t s,unsigned int timeout_secs)70 start_tracing_with_timeout(ktrace_session_t s, unsigned int timeout_secs)
71 {
72 	// Only set the timeout after we've seen an event that was traced by us.
73 	// This helps set a reasonable timeout after we're guaranteed to get a
74 	// few events.
75 	dispatch_queue_t q = dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0);
76 
77 	ktrace_events_single(s, DISPATCH_AFTER_EVENT,
78 	    ^(__unused struct trace_point *tp)
79 	{
80 		T_LOG("arming timer to stop tracing after %d seconds", timeout_secs);
81 		dispatch_after(dispatch_time(DISPATCH_TIME_NOW,
82 		    timeout_secs * NSEC_PER_SEC), q, ^{
83 			T_LOG("ending tracing due to timeout");
84 			ktrace_end(s, 0);
85 		});
86 	});
87 	ktrace_set_collection_interval(s, 100);
88 
89 	T_ASSERT_POSIX_ZERO(ktrace_start(s, q), "start ktrace");
90 
91 	kdebug_trace(DISPATCH_AFTER_EVENT, 0, 0, 0, 0);
92 	T_LOG("trace point emitted");
93 }
94 
95 static void
configure_kperf_timer_samplers(uint64_t period_ns,uint32_t samplers)96 configure_kperf_timer_samplers(uint64_t period_ns, uint32_t samplers)
97 {
98 	T_SETUPBEGIN;
99 
100 	(void)kperf_action_count_set(1);
101 	T_QUIET;
102 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, samplers),
103 	    NULL);
104 	(void)kperf_timer_count_set(1);
105 	T_QUIET;
106 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
107 	    kperf_ns_to_ticks(period_ns)), NULL);
108 	T_QUIET;
109 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
110 
111 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
112 
113 	T_SETUPEND;
114 }
115 
116 static double
timestamp_secs(ktrace_session_t s,uint64_t timestamp)117 timestamp_secs(ktrace_session_t s, uint64_t timestamp)
118 {
119 	uint64_t ns = 0;
120 	T_QUIET;
121 	T_ASSERT_POSIX_ZERO(ktrace_convert_timestamp_to_nanoseconds(s, timestamp,
122 	    &ns), NULL);
123 	return (double)ns / NSEC_PER_SEC;
124 }
125 
126 #pragma mark - timers
127 
128 // Ensure that kperf is correctly sampling CPUs that are actively scheduling by
129 // bringing up threads and ensuring that threads on-core are sampled by each
130 // timer fire.
131 
132 T_DECL(kperf_sample_active_cpus,
133     "make sure that kperf samples all active CPUs")
134 {
135 	start_controlling_ktrace();
136 
137 	T_SETUPBEGIN;
138 
139 	int ncpus = dt_ncpu();
140 	T_QUIET;
141 	T_ASSERT_LT(ncpus, MAX_CPUS,
142 	    "only supports up to %d CPUs", MAX_CPUS);
143 	T_LOG("found %d CPUs", ncpus);
144 
145 	int nthreads = ncpus - 1;
146 	T_QUIET;
147 	T_ASSERT_LT(nthreads, MAX_THREADS,
148 	    "only supports up to %d threads", MAX_THREADS);
149 
150 	static pthread_t threads[MAX_THREADS];
151 
152 	ktrace_session_t s = ktrace_session_create();
153 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
154 	ktrace_set_collection_interval(s, 100);
155 
156 	__block uint64_t nfires = 0;
157 	__block uint64_t nsamples = 0;
158 	static uint64_t idle_tids[MAX_CPUS] = { 0 };
159 	__block double sum_saturation = 0;
160 	__block uint64_t last_nsamples = 0;
161 
162 	// As a test debugging aid, take an additonal argument that specifies the
163 	// number of fires to stop tracing after.  This also turns on additional
164 	// logging of scheduler trace events.
165 	int stopafter = 0;
166 	if (argc > 0) {
167 		stopafter = atoi(argv[0]);
168 		if (stopafter < 0) {
169 			T_ASSERT_FAIL("argument must be positive");
170 		}
171 	}
172 
173 	static uint64_t first_timestamp = 0;
174 	static uint64_t last_timestamp = 0;
175 	ktrace_events_any(s, ^(struct trace_point *tp) {
176 		if (first_timestamp == 0) {
177 			first_timestamp = tp->timestamp;
178 		}
179 		last_timestamp = tp->timestamp;
180 	});
181 
182 	ktrace_set_completion_handler(s, ^{
183 		T_LOG("stopping threads");
184 
185 		running_threads = false;
186 
187 		for (int i = 0; i < nthreads; i++) {
188 		        T_QUIET;
189 		        T_ASSERT_POSIX_ZERO(pthread_join(threads[i], NULL), NULL);
190 		}
191 
192 		double saturation = sum_saturation / nfires * 100;
193 
194 		T_LOG("over %.1f seconds, saw %" PRIu64 " timer fires, %" PRIu64
195 			" samples, %g samples/fire, %.2f%% saturation",
196 			timestamp_secs(s, last_timestamp - first_timestamp), nfires,
197 			nsamples, (double)nsamples / (double)nfires, saturation);
198 		T_ASSERT_GT(saturation, 95.0,
199 		    "saw reasonable percentage of full samples");
200 
201 		T_END;
202 	});
203 
204 	// Track which threads are running on each CPU.
205 	static uint64_t tids_on_cpu[MAX_CPUS] = { 0 };
206 	void (^switch_cb)(struct trace_point *, const char *name) =
207 	    ^(struct trace_point *tp, const char *name) {
208 		uint64_t new_thread = tp->arg2;
209 
210 		if (idle_tids[tp->cpuid] != new_thread) {
211 			tids_on_cpu[tp->cpuid] = new_thread;
212 		}
213 
214 		if (stopafter) {
215 			T_LOG("%.7g: %s on %d: %llx", timestamp_secs(s, tp->timestamp),
216 			    name, tp->cpuid, tp->arg2);
217 		}
218 	};
219 
220 	ktrace_events_single(s, SCHED_SWITCH, ^(struct trace_point *tp) {
221 		switch_cb(tp, "switch");
222 	});
223 	ktrace_events_single(s, SCHED_HANDOFF, ^(struct trace_point *tp) {
224 		switch_cb(tp, "hndoff");
225 	});
226 
227 	// Determine the thread IDs of the idle threads on each CPU.
228 	ktrace_events_single(s, SCHED_IDLE, ^(struct trace_point *tp) {
229 		if (tp->debugid & DBG_FUNC_END) {
230 			return;
231 		}
232 		tids_on_cpu[tp->cpuid] = 0;
233 		idle_tids[tp->cpuid] = tp->threadid;
234 		if (stopafter) {
235 			T_LOG("%.7g: idle on %d: %llx", timestamp_secs(s, tp->timestamp),
236 			    tp->cpuid, tp->threadid);
237 		}
238 	});
239 
240 	// On each timer fire, go through all the cores and mark any threads
241 	// that should be sampled.
242 
243 	__block int last_fire_cpu = -1;
244 	static bool sample_missing[MAX_CPUS] = { false };
245 	static uint64_t tids_snap[MAX_CPUS] = { 0 };
246 	__block int nexpected = 0;
247 	__block int nextra = 0;
248 	__block int nidles = 0;
249 
250 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
251 		T_QUIET; T_ASSERT_EQ((tp->debugid & DBG_FUNC_START), 0,
252 		    "no timer fire start events are allowed");
253 		int last_expected = nexpected;
254 		nfires++;
255 
256 		nexpected = 0;
257 		for (int i = 0; i < ncpus; i++) {
258 			if (sample_missing[i]) {
259 				T_LOG("missed sample on CPU %d for thread %#llx from "
260 				    "timer on CPU %d (expected %d samples)",
261 				    tp->cpuid, tids_snap[i], last_fire_cpu, last_expected);
262 				sample_missing[i] = false;
263 			}
264 
265 			if (tids_on_cpu[i] != 0) {
266 				tids_snap[i] = tids_on_cpu[i];
267 				sample_missing[i] = true;
268 				nexpected++;
269 			}
270 		}
271 		if (stopafter) {
272 			T_LOG("%.7g: FIRE on %d: %d extra, %d idles",
273 			    timestamp_secs(s, tp->timestamp), tp->cpuid, nextra, nidles);
274 		}
275 
276 		if (nfires == 1) {
277 			return;
278 		}
279 
280 		if (last_expected == 0) {
281 			sum_saturation += 1;
282 		} else {
283 			sum_saturation += (double)(nsamples - last_nsamples) /
284 			    last_expected;
285 		}
286 		last_nsamples = nsamples;
287 		nextra = 0;
288 		nidles = 0;
289 
290 		T_QUIET;
291 		T_ASSERT_LT((int)tp->cpuid, ncpus,
292 		    "timer fire should not occur on an IOP");
293 		last_fire_cpu = (int)tp->cpuid;
294 
295 		if (stopafter && (uint64_t)stopafter == nfires) {
296 			ktrace_end(s, 1);
297 		}
298 	});
299 
300 	// On the timer handler for each CPU, unset the missing sample bitmap.
301 
302 	ktrace_events_single(s, PERF_TMR_HNDLR, ^(struct trace_point *tp) {
303 		nsamples++;
304 		if ((int)tp->cpuid > ncpus) {
305 		        // Skip IOPs; they're not scheduling any relevant threads.
306 		        return;
307 		}
308 
309 		if (!sample_missing[tp->cpuid] && idle_tids[tp->cpuid] != 0) {
310 			T_LOG("sampled additional thread %llx on CPU %d", tp->threadid,
311 			    tp->cpuid);
312 			nextra++;
313 		}
314 		if (tp->threadid == idle_tids[tp->cpuid]) {
315 			T_LOG("sampled idle thread on CPU %d", tp->cpuid);
316 			nidles++;
317 		}
318 		sample_missing[tp->cpuid] = false;
319 	});
320 
321 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
322 
323 	T_SETUPEND;
324 
325 	start_tracing_with_timeout(s, TIMEOUT_SECS);
326 
327 	// Create threads to bring up all of the CPUs.
328 
329 	dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
330 
331 	for (int i = 0; i < nthreads; i++) {
332 		T_QUIET;
333 		T_ASSERT_POSIX_ZERO(
334 			pthread_create(&threads[i], NULL, &spinning_thread,
335 			&thread_spinning), NULL);
336 		dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
337 	}
338 
339 	T_LOG("spun up %d thread%s", nthreads, nthreads == 1 ? "" : "s");
340 
341 	dispatch_main();
342 }
343 
344 #define FIRES_THRESHOLD (5000)
345 
346 T_DECL(kperf_timer_fires_enough_times,
347     "ensure the correct number of timers fire in a period of time")
348 {
349 	start_controlling_ktrace();
350 
351 	dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
352 
353 	ktrace_session_t s = ktrace_session_create();
354 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
355 	ktrace_set_collection_interval(s, 100);
356 
357 	__block uint64_t nfires = 0;
358 	__block uint64_t first_fire_ns = 0;
359 	__block uint64_t last_fire_ns = 0;
360 
361 	int ncpus = dt_ncpu();
362 
363 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
364 		nfires++;
365 		if (first_fire_ns == 0) {
366 			ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
367 			    &first_fire_ns);
368 		}
369 		ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
370 		    &last_fire_ns);
371 
372 		T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
373 		    "timer fire should not occur on an IOP");
374 		if (nfires >= FIRES_THRESHOLD) {
375 			ktrace_end(s, 1);
376 		}
377 	});
378 
379 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
380 
381 	pthread_t thread;
382  	T_QUIET;
383 	T_ASSERT_POSIX_ZERO(pthread_create(&thread, NULL, &spinning_thread,
384 	    &thread_spinning), NULL);
385 	dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
386 
387 	ktrace_set_completion_handler(s, ^{
388 		running_threads = false;
389 
390 		double duration_secs = (double)(last_fire_ns - first_fire_ns) /
391 		    NSEC_PER_SEC;
392 		T_LOG("stopping thread after %.2f seconds", duration_secs);
393 
394 		T_QUIET; T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
395 
396 		T_LOG("saw %" PRIu64 " timer fires (%g fires/second)", nfires,
397 		    (double)nfires / (double)duration_secs);
398 		double expected_nfires = duration_secs * NSEC_PER_SEC / TIMER_PERIOD_NS;
399 		T_LOG("expecting %g timer fires", expected_nfires);
400 		double nfires_seen_pct = expected_nfires / nfires * 100;
401 		T_ASSERT_GT(nfires_seen_pct, 95.0,
402 		    "saw reasonable number of missed timer fires");
403 		T_ASSERT_LT(nfires_seen_pct, 105.0,
404 			"saw reasonable number of extra timer fires");
405 
406 		T_END;
407 	});
408 
409 	start_tracing_with_timeout(s, TIMEOUT_SECS);
410 
411 	dispatch_main();
412 }
413 
414 // kperf_timer_not_oversampling ensures that the profiling timer fires are
415 // spaced apart by the programmed timer period.  Otherwise, tools that rely on
416 // sample count as a proxy for CPU usage will over-estimate.
417 
418 #define FIRE_PERIOD_THRESHOLD_NS \
419 		(TIMER_PERIOD_NS - (uint64_t)(TIMER_PERIOD_NS * 0.05))
420 
421 struct cirq {
422 	unsigned int nslots;
423 	unsigned int tail_slot;
424 	unsigned int slot_size;
425 };
426 
427 #define CIRQ_INIT(TYPE, NSLOTS) \
428 	(struct cirq){ \
429 		.nslots = NSLOTS, .tail_slot = 0, .slot_size = sizeof(TYPE), \
430 	}
431 
432 static inline void *
cirq_get(struct cirq * cq,unsigned int i)433 cirq_get(struct cirq *cq, unsigned int i)
434 {
435 	return (char *)cq + sizeof(*cq) + (cq->slot_size * i);
436 }
437 
438 static void *
cirq_top(void * vcq)439 cirq_top(void *vcq)
440 {
441 	struct cirq *cq = vcq;
442 	unsigned int tail_slot = cq->tail_slot;
443 	unsigned int top_slot = (tail_slot > 0 ? tail_slot : cq->nslots) - 1;
444 	return cirq_get(cq, top_slot);
445 }
446 
447 static void *
cirq_push(void * vcq)448 cirq_push(void *vcq)
449 {
450 	struct cirq *cq = vcq;
451 	unsigned int tail_slot = cq->tail_slot;
452 	unsigned int next_slot = tail_slot == cq->nslots - 1 ? 0 : tail_slot + 1;
453 	cq->tail_slot = next_slot;
454 	return cirq_get(cq, tail_slot);
455 }
456 
457 static void
458 cirq_for(void *vcq, void (^iter)(void *elt))
459 {
460 	struct cirq *cq = vcq;
461 	for (unsigned int i = cq->tail_slot; i < cq->nslots; i++) {
462 		iter(cirq_get(cq, i));
463 	}
464 	for (unsigned int i = 0; i < cq->tail_slot; i++) {
465 		iter(cirq_get(cq, i));
466 	}
467 }
468 
469 #define HISTORY_LEN 5
470 
471 struct instval {
472 	uint64_t iv_instant_ns;
473 	uint64_t iv_val;
474 };
475 
476 struct cirq_instval {
477 	struct cirq cq;
478 	struct instval elts[HISTORY_LEN];
479 };
480 
481 struct cirq_u64 {
482 	struct cirq cq;
483 	uint64_t elts[HISTORY_LEN];
484 };
485 
486 struct cpu_oversample {
487 	struct cirq_instval timer_latencies;
488 	struct cirq_instval fire_latencies;
489 };
490 
491 static void
cpu_oversample_log(struct cpu_oversample * cpu,unsigned int cpuid)492 cpu_oversample_log(struct cpu_oversample *cpu, unsigned int cpuid)
493 {
494 	T_LOG("CPU %d timer latencies:", cpuid);
495 	__block int i = -HISTORY_LEN;
496 	cirq_for(&cpu->timer_latencies, ^(void *viv) {
497 		struct instval *iv = viv;
498 		T_LOG("\t%llu timer latency %d: %llu", iv->iv_instant_ns, i,
499 		    iv->iv_val);
500 		i++;
501 	});
502 
503 	T_LOG("CPU %d fire latencies:", cpuid);
504 	i = -HISTORY_LEN;
505 	cirq_for(&cpu->fire_latencies, ^(void *viv) {
506 		struct instval *iv = viv;
507 		T_LOG("\t%llu fire latency %d: %llu", iv->iv_instant_ns, i, iv->iv_val);
508 		i++;
509 	});
510 }
511 
512 T_DECL(kperf_timer_not_oversampling,
513     "ensure that time between fires is long enough")
514 {
515 	start_controlling_ktrace();
516 
517 	ktrace_session_t s = ktrace_session_create();
518 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
519 	// Try not to perturb the system with more work.
520 	ktrace_set_collection_interval(s, 1000);
521 	__block uint64_t nfires = 0;
522 	__block uint64_t first_fire_ns = 0;
523 	__block uint64_t last_fire_ns = 0;
524 	__block unsigned int last_fire_cpuid = 0;
525 
526 	int ncpus = dt_ncpu();
527 	T_QUIET; T_ASSERT_GT(ncpus, 0, "should see positive number of CPUs");
528 
529 	struct cpu_oversample *per_cpu = calloc((unsigned int)ncpus,
530 			sizeof(per_cpu[0]));
531 	T_QUIET; T_WITH_ERRNO;
532 	T_ASSERT_NOTNULL(per_cpu, "allocated timer latency tracking");
533 	for (int i = 0; i < ncpus; i++) {
534 		per_cpu[i].timer_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
535 		per_cpu[i].fire_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
536 	}
537 
538 	__block bool in_stackshot = false;
539 	__block uint64_t last_stackshot_ns = 0;
540 
541 	// Stackshots are the primary source of interrupt latency on the system.
542 	ktrace_events_single(s, KDBG_EVENTID(DBG_BSD, DBG_BSD_EXCP_SC,
543 			SYS_stack_snapshot_with_config), ^(struct trace_point *tp) {
544 		bool start = tp->debugid & DBG_FUNC_START;
545 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
546 		T_LOG("%llu: %s stackshot syscall from process %s",
547 				cur_ns, start ? "start" : "finish", tp->command);
548 		in_stackshot = start;
549 		if (!start) {
550 			last_stackshot_ns = cur_ns;
551 		}
552 	});
553 
554 	struct cirq_u64 *fire_times = calloc(1, sizeof(*fire_times));
555 	T_ASSERT_NOTNULL(fire_times, "allocated fire time tracking");
556 	fire_times->cq = CIRQ_INIT(uint64_t, HISTORY_LEN);
557 
558 	// Track the decrementer's latency values to find any unexpectedly long
559 	// interrupt latencies that could affect the firing cadence.
560 	ktrace_events_single(s, DECR_TRAP_LATENCY,
561 			^(struct trace_point *tp) {
562 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
563 		uint64_t latency_ns = ns_from_abs(s, 0 - tp->arg1);
564 		struct instval *latency = cirq_push(&per_cpu[tp->cpuid].timer_latencies);
565 		latency->iv_instant_ns = cur_ns;
566 		latency->iv_val = latency_ns;
567 	});
568 
569 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
570 		T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
571 				"timer fire should not occur on an IOP");
572 
573 		nfires++;
574 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
575 		uint64_t *fire_ns = cirq_push(fire_times);
576 		*fire_ns = cur_ns;
577 
578 		struct cpu_oversample *cur_cpu = &per_cpu[tp->cpuid];
579 		struct instval *last_timer_latency = cirq_top(
580 				&cur_cpu->timer_latencies);
581 		uint64_t timer_latency_ns = last_timer_latency->iv_val;
582 
583 		if (first_fire_ns == 0) {
584 			first_fire_ns = cur_ns;
585 		} else {
586 			struct cpu_oversample *last_cpu = &per_cpu[last_fire_cpuid];
587 			struct instval *last_latency = cirq_top(&last_cpu->fire_latencies);
588 			uint64_t last_fire_latency_ns = last_latency->iv_val;
589 
590 			if (timer_latency_ns > TIMER_PERIOD_NS / 4) {
591 				T_LOG("%llu: long timer latency at fire: %llu", cur_ns,
592 						timer_latency_ns);
593 			}
594 
595 			// Long interrupt latencies will cause the timer to miss its fire
596 			// time and report a fire past when it should have, making the next
597 			// period too short.  Keep track of the latency as a leeway
598 			// adjustment for the subsequent fire.
599 			uint64_t fire_period_ns = cur_ns - last_fire_ns;
600 			uint64_t fire_period_adj_ns = fire_period_ns +
601 			    last_fire_latency_ns + timer_latency_ns;
602 			// Within 5% is still a valid period -- adjust this for running in
603 			// potentially-noisy automation.
604 			uint64_t fire_period_pct_adj_ns = fire_period_adj_ns +
605 			    FIRE_PERIOD_THRESHOLD_NS / 20;
606 			if (fire_period_adj_ns < FIRE_PERIOD_THRESHOLD_NS &&
607 			    fire_period_pct_adj_ns >= FIRE_PERIOD_THRESHOLD_NS) {
608 				T_LOG("ignoring period of %llu within 5%% of expected %llu",
609 				    fire_period_adj_ns, fire_period_pct_adj_ns);
610 			}
611 			bool too_short = fire_period_pct_adj_ns < FIRE_PERIOD_THRESHOLD_NS;
612 			if (too_short) {
613 				T_LOG("%llu: period of timer fire %llu is %llu + %llu + %llu = "
614 						"%llu < %llu",
615 						cur_ns, nfires, fire_period_ns, last_fire_latency_ns,
616 						timer_latency_ns, fire_period_adj_ns,
617 						FIRE_PERIOD_THRESHOLD_NS);
618 
619 				T_LOG("short profile timer fired on CPU %d", tp->cpuid);
620 				cpu_oversample_log(cur_cpu, tp->cpuid);
621 
622 				if (cur_cpu == last_cpu) {
623 					T_LOG("fired back-to-back on CPU %d", tp->cpuid);
624 				} else {
625 					T_LOG("previous profile timer fired on CPU %d",
626 							last_fire_cpuid);
627 					cpu_oversample_log(last_cpu, last_fire_cpuid);
628 				}
629 
630 				T_LOG("profile timer fires:");
631 				cirq_for(fire_times, ^(void *vu64) {
632 					T_LOG("\tfire: %llu", *(uint64_t *)vu64);
633 				});
634 				if (nfires < (unsigned int)ncpus) {
635 					T_LOG("ignoring timer fire %llu as context may be missing",
636 							nfires);
637 				} else {
638 					if (in_stackshot) {
639 						T_LOG("skipping assertion because stackshot is "
640 								"happening");
641 					} else if (last_stackshot_ns != 0 &&
642 							cur_ns > last_stackshot_ns &&
643 							cur_ns - last_stackshot_ns < TIMER_PERIOD_NS) {
644 						T_LOG("skipping assertion because stackshot happened "
645 								"%" PRIu64 "ns ago",
646 								cur_ns - last_stackshot_ns);
647 					} else {
648 						T_ASSERT_FAIL("profiling period is shorter than "
649 								"expected with no stackshot interference");
650 					}
651 				}
652 			}
653 
654 			struct instval *latency = cirq_push(&cur_cpu->fire_latencies);
655 			latency->iv_instant_ns = cur_ns;
656 			latency->iv_val = timer_latency_ns;
657 
658 			// Snapshot this timer fire's interrupt latency, so the next one
659 			// can make an adjustment to the period.
660 			last_fire_latency_ns = timer_latency_ns;
661 		}
662 		last_fire_ns = cur_ns;
663 		last_fire_cpuid = tp->cpuid;
664 
665 		if (nfires >= FIRES_THRESHOLD) {
666 			ktrace_end(s, 1);
667 		}
668 	});
669 
670 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_TINFO);
671 
672 	ktrace_set_completion_handler(s, ^{
673 		double duration_secs = (double)(last_fire_ns - first_fire_ns) /
674 		    NSEC_PER_SEC;
675 		T_LOG("stopping trace after %.2f seconds", duration_secs);
676 
677 		T_PASS("saw %" PRIu64 " timer fires (%g fires/second) without "
678 			"oversampling", nfires, (double)nfires / (double)duration_secs);
679 
680 		T_END;
681 	});
682 
683 	start_tracing_with_timeout(s, 5);
684 
685 	// Get all CPUs out of idle.
686 	uint64_t *counts = kpc_counterbuf_alloc();
687 	(void)kpc_get_cpu_counters(true,KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
688 	free(counts);
689 
690 	dispatch_main();
691 }
692 
693 T_DECL(kperf_timer_stress, "repeatedly enable and disable timers")
694 {
695 	start_controlling_ktrace();
696 
697 	const int niters = 500;
698 	for (int i = 0; i < niters; i++) {
699 		configure_kperf_stacks_timer(-1, 1, true);
700 		T_QUIET;
701 		T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
702 		usleep(2000);
703 		kperf_reset();
704 	}
705 	T_LOG("configured kperf with a timer %d times", niters);
706 }
707 
708 #pragma mark - kdebug triggers
709 
710 #define KDEBUG_TRIGGER_TIMEOUT_NS (10 * NSEC_PER_SEC)
711 
712 #define NON_TRIGGER_CLASS    UINT32_C(0xfd)
713 #define NON_TRIGGER_SUBCLASS UINT32_C(0xff)
714 #define NON_TRIGGER_CODE     UINT32_C(0xff)
715 
716 #define NON_TRIGGER_EVENT \
717 	        (KDBG_EVENTID(NON_TRIGGER_CLASS, NON_TRIGGER_SUBCLASS, \
718 	        NON_TRIGGER_CODE))
719 
720 static void
expect_kdebug_trigger(const char * filter_desc,const uint32_t * debugids,unsigned int n_debugids)721 expect_kdebug_trigger(const char *filter_desc, const uint32_t *debugids,
722     unsigned int n_debugids)
723 {
724 	__block int missing_kernel_stacks = 0;
725 	__block int missing_user_stacks = 0;
726 	ktrace_session_t s;
727 	kperf_kdebug_filter_t filter;
728 
729 	s = ktrace_session_create();
730 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
731 	ktrace_set_collection_interval(s, 100);
732 
733 	ktrace_events_single(s, PERF_STK_KHDR, ^(struct trace_point *tp) {
734 		missing_kernel_stacks--;
735 		T_LOG("saw kernel stack with %" PRIu64 " frames, flags = %#"
736 		PRIx64, tp->arg2, tp->arg1);
737 	});
738 	ktrace_events_single(s, PERF_STK_UHDR, ^(struct trace_point *tp) {
739 		missing_user_stacks--;
740 		T_LOG("saw user stack with %" PRIu64 " frames, flags = %#"
741 		PRIx64, tp->arg2, tp->arg1);
742 	});
743 
744 	for (unsigned int i = 0; i < n_debugids; i++) {
745 		ktrace_events_single(s, debugids[i], ^(struct trace_point *tp) {
746 			missing_kernel_stacks++;
747 			missing_user_stacks++;
748 			T_LOG("saw event with debugid 0x%" PRIx32, tp->debugid);
749 		});
750 	}
751 
752 	ktrace_events_single(s, NON_TRIGGER_EVENT,
753 	    ^(__unused struct trace_point *tp)
754 	{
755 		ktrace_end(s, 0);
756 	});
757 
758 	ktrace_set_completion_handler(s, ^{
759 		T_EXPECT_LE(missing_kernel_stacks, 0, NULL);
760 		T_EXPECT_LE(missing_user_stacks, 0, NULL);
761 
762 		ktrace_session_destroy(s);
763 		T_END;
764 	});
765 
766 	kperf_reset();
767 
768 	(void)kperf_action_count_set(1);
769 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
770 	    KPERF_SAMPLER_KSTACK | KPERF_SAMPLER_USTACK), NULL);
771 
772 	filter = kperf_kdebug_filter_create();
773 	T_ASSERT_NOTNULL(filter, NULL);
774 
775 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_action_set(1), NULL);
776 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_add_desc(filter, filter_desc),
777 	    NULL);
778 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_set(filter), NULL);
779 	kperf_kdebug_filter_destroy(filter);
780 
781 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL);
782 
783 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
784 
785 	// Trace the triggering events.
786 
787 	for (unsigned int i = 0; i < n_debugids; i++) {
788 		T_ASSERT_POSIX_SUCCESS(kdebug_trace(debugids[i], 0, 0, 0, 0), NULL);
789 	}
790 
791 	T_ASSERT_POSIX_SUCCESS(kdebug_trace(NON_TRIGGER_EVENT, 0, 0, 0, 0), NULL);
792 
793 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, KDEBUG_TRIGGER_TIMEOUT_NS),
794 	    dispatch_get_main_queue(), ^(void)
795 	{
796 		ktrace_end(s, 1);
797 	});
798 }
799 
800 #define TRIGGER_CLASS     UINT32_C(0xfe)
801 #define TRIGGER_CLASS_END UINT32_C(0xfd)
802 #define TRIGGER_SUBCLASS  UINT32_C(0xff)
803 #define TRIGGER_CODE      UINT32_C(0)
804 #define TRIGGER_DEBUGID \
805 	        (KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, TRIGGER_CODE))
806 
807 T_DECL(kperf_kdebug_trigger_classes,
808     "test that kdebug trigger samples on classes")
809 {
810 	start_controlling_ktrace();
811 
812 	const uint32_t class_debugids[] = {
813 		KDBG_EVENTID(TRIGGER_CLASS, 1, 1),
814 		KDBG_EVENTID(TRIGGER_CLASS, 2, 1),
815 		KDBG_EVENTID(TRIGGER_CLASS_END, 1, 1) | DBG_FUNC_END,
816 		KDBG_EVENTID(TRIGGER_CLASS_END, 2, 1) | DBG_FUNC_END,
817 	};
818 
819 	expect_kdebug_trigger("C0xfe,C0xfdr", class_debugids,
820 	    sizeof(class_debugids) / sizeof(class_debugids[0]));
821 	dispatch_main();
822 }
823 
824 T_DECL(kperf_kdebug_trigger_subclasses,
825     "test that kdebug trigger samples on subclasses")
826 {
827 	start_controlling_ktrace();
828 
829 	const uint32_t subclass_debugids[] = {
830 		KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 0),
831 		KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 1),
832 		KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 0) | DBG_FUNC_END,
833 		KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 1) | DBG_FUNC_END
834 	};
835 
836 	expect_kdebug_trigger("S0xfeff,S0xfdffr", subclass_debugids,
837 	    sizeof(subclass_debugids) / sizeof(subclass_debugids[0]));
838 	dispatch_main();
839 }
840 
841 T_DECL(kperf_kdebug_trigger_debugids,
842     "test that kdebug trigger samples on debugids")
843 {
844 	start_controlling_ktrace();
845 
846 	const uint32_t debugids[] = {
847 		TRIGGER_DEBUGID
848 	};
849 
850 	expect_kdebug_trigger("D0xfeff0000", debugids,
851 	    sizeof(debugids) / sizeof(debugids[0]));
852 	dispatch_main();
853 }
854 
855 // TODO Set a single function specifier filter, expect not to trigger of all
856 // events from that class.
857 
858 static void
reset_kperf(void)859 reset_kperf(void)
860 {
861 	(void)kperf_reset();
862 	(void)sysctlbyname("kperf.debug_level", NULL, NULL, &(int){ 0 },
863 	    sizeof(int));
864 }
865 
866 T_DECL(kperf_kdbg_callstacks,
867     "test that the kdbg_callstacks samples on syscalls")
868 {
869 	start_controlling_ktrace();
870 
871 	ktrace_session_t s;
872 	__block bool saw_user_stack = false;
873 
874 	s = ktrace_session_create();
875 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
876 	ktrace_set_collection_interval(s, 100);
877 
878 	// Make sure BSD events are traced in order to trigger samples on syscalls.
879 	ktrace_events_class(s, DBG_BSD, ^void (__unused struct trace_point *tp) {});
880 
881 	ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) {
882 		saw_user_stack = true;
883 		ktrace_end(s, 1);
884 	});
885 
886 	ktrace_set_completion_handler(s, ^{
887 		ktrace_session_destroy(s);
888 
889 		T_EXPECT_TRUE(saw_user_stack,
890 		"saw user stack after configuring kdbg_callstacks");
891 		T_END;
892 	});
893 
894 #pragma clang diagnostic push
895 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
896 	T_ASSERT_POSIX_SUCCESS(kperf_kdbg_callstacks_set(1), NULL);
897 #pragma clang diagnostic pop
898 	T_ATEND(reset_kperf);
899 
900 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
901 
902 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
903 	    dispatch_get_main_queue(), ^(void) {
904 		ktrace_end(s, 1);
905 	});
906 
907 	dispatch_main();
908 }
909 
910 #pragma mark - PET
911 
912 #define STACKS_WAIT_DURATION_NS (3 * NSEC_PER_SEC)
913 
914 static void
915 expect_stacks_traced(void (^setup)(ktrace_session_t s), void (^complete)(void))
916 {
917 	ktrace_session_t s;
918 
919 	s = ktrace_session_create();
920 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
921 	ktrace_set_collection_interval(s, 100);
922 	if (setup) {
923 		setup(s);
924 	}
925 
926 	__block unsigned int user_stacks = 0;
927 	__block unsigned int kernel_stacks = 0;
928 
929 	ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) {
930 		user_stacks++;
931 	});
932 	ktrace_events_single(s, PERF_STK_KHDR, ^(__unused struct trace_point *tp) {
933 		kernel_stacks++;
934 	});
935 
936 	ktrace_set_completion_handler(s, ^(void) {
937 		ktrace_session_destroy(s);
938 		T_EXPECT_GT(user_stacks, 0U, NULL);
939 		T_EXPECT_GT(kernel_stacks, 0U, NULL);
940 		complete();
941 	});
942 
943 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL);
944 
945 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
946 
947 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, STACKS_WAIT_DURATION_NS),
948 	    dispatch_get_main_queue(), ^(void)
949 	{
950 		kperf_reset();
951 		ktrace_end(s, 0);
952 	});
953 }
954 
955 T_DECL(kperf_pet, "test that PET mode samples kernel and user stacks")
956 {
957 	start_controlling_ktrace();
958 
959 	configure_kperf_stacks_timer(-1, 10, false);
960 	T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
961 
962 	expect_stacks_traced(NULL, ^(void) {
963 		T_END;
964 	});
965 
966 	dispatch_main();
967 }
968 
969 T_DECL(kperf_lightweight_pet,
970     "test that lightweight PET mode samples kernel and user stacks",
971     T_META_ASROOT(true))
972 {
973 	start_controlling_ktrace();
974 
975 	int set = 1;
976 
977 	configure_kperf_stacks_timer(-1, 10, false);
978 	T_ASSERT_POSIX_SUCCESS(sysctlbyname("kperf.lightweight_pet", NULL, NULL,
979 	    &set, sizeof(set)), NULL);
980 	T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
981 
982 	__block uint64_t nfires = 0;
983 
984 	expect_stacks_traced(^(ktrace_session_t s) {
985 		ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
986 			nfires++;
987 			T_QUIET;
988 			T_ASSERT_EQ(tp->arg1, (uint64_t)0,
989 					"timer fire should have timer ID of 0");
990 			T_QUIET;
991 			T_ASSERT_EQ(tp->arg2, (uint64_t)1,
992 					"timer fire should have PET bit set");
993 		});
994 	}, ^(void) {
995 		T_ASSERT_GT(nfires, (uint64_t)0, "timer fired at least once");
996 		T_END;
997 	});
998 
999 	dispatch_main();
1000 }
1001 
1002 T_DECL(kperf_pet_stress, "repeatedly enable and disable PET mode")
1003 {
1004 	start_controlling_ktrace();
1005 
1006 	const int niters = 500;
1007 	for (int i = 0; i < niters; i++) {
1008 		configure_kperf_stacks_timer(-1, 1, true);
1009 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
1010 		T_QUIET;
1011 		T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1012 		usleep(2000);
1013 		kperf_reset();
1014 	}
1015 
1016 	T_PASS("configured kperf PET %d times", niters);
1017 }
1018 
1019 #pragma mark - PMCs
1020 
1021 T_DECL(kperf_pmc_config_only,
1022     "shouldn't show PMC config events unless requested")
1023 {
1024 	start_controlling_ktrace();
1025 
1026 	__block bool saw_kpc_config = false;
1027 	__block bool saw_kpc_reg = false;
1028 
1029 	ktrace_session_t s = ktrace_session_create();
1030 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
1031 	ktrace_set_collection_interval(s, 100);
1032 
1033 	ktrace_events_single(s, PERF_KPC_CONFIG,
1034 	    ^(__unused struct trace_point *tp) {
1035 		saw_kpc_config = true;
1036 	});
1037 	ktrace_events_single(s, PERF_KPC_REG,
1038 	    ^(__unused struct trace_point *tp) {
1039 		saw_kpc_reg = true;
1040 	});
1041 	ktrace_events_single(s, PERF_KPC_REG32,
1042 	    ^(__unused struct trace_point *tp) {
1043 		saw_kpc_reg = true;
1044 	});
1045 
1046 	ktrace_set_completion_handler(s, ^{
1047 		ktrace_session_destroy(s);
1048 		T_EXPECT_FALSE(saw_kpc_config,
1049 		"should see no KPC configs without sampler enabled");
1050 		T_EXPECT_FALSE(saw_kpc_reg,
1051 		"should see no KPC registers without sampler enabled");
1052 		T_END;
1053 	});
1054 
1055 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
1056 	uint64_t *config = calloc(nconfigs, sizeof(*config));
1057 	config[0] = 0x02;
1058 	int ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
1059 	T_ASSERT_POSIX_SUCCESS(ret, "configured kpc");
1060 	T_QUIET;
1061 	T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_CONFIGURABLE_MASK),
1062 	    "kpc_set_counting");
1063 
1064 	(void)kperf_action_count_set(1);
1065 	T_ATEND(reset_kperf);
1066 	T_QUIET;
1067 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, KPERF_SAMPLER_PMC_CPU),
1068 	    NULL);
1069 
1070 	(void)kperf_timer_count_set(1);
1071 	T_QUIET;
1072 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
1073 	    kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL);
1074 	T_QUIET;
1075 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
1076 
1077 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1078 
1079 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
1080 
1081 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
1082 	    dispatch_get_main_queue(), ^(void) {
1083 		ktrace_end(s, 1);
1084 	});
1085 
1086 	dispatch_main();
1087 }
1088 
1089 static void
skip_if_monotonic_unsupported(void)1090 skip_if_monotonic_unsupported(void)
1091 {
1092 	int r;
1093 	int supported = 0;
1094 	size_t supported_size = sizeof(supported);
1095 
1096 	r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
1097 	    NULL, 0);
1098 	if (r < 0) {
1099 		T_WITH_ERRNO;
1100 		T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
1101 	}
1102 
1103 	if (!supported) {
1104 		T_SKIP("monotonic is not supported on this platform");
1105 	}
1106 }
1107 
1108 #define INSTRS_CYCLES_UPPER 500
1109 #define INSTRS_CYCLES_LOWER 50
1110 
1111 T_DECL(kperf_sample_instrs_cycles,
1112     "ensure instructions and cycles are sampled")
1113 {
1114 	T_SETUPBEGIN;
1115 
1116 	skip_if_monotonic_unsupported();
1117 	start_controlling_ktrace();
1118 
1119 	ktrace_session_t sess = ktrace_session_create();
1120 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
1121 	ktrace_set_collection_interval(sess, 100);
1122 
1123 	__block uint64_t ninstrs_cycles = 0;
1124 	__block uint64_t nzeroes = 0;
1125 	ktrace_events_single(sess, PERF_INSTR_DATA,
1126 	    ^(__unused struct trace_point *tp) {
1127 		ninstrs_cycles++;
1128 		if (tp->arg1 == 0) {
1129 			T_LOG("%llx (%s)\n", tp->threadid, tp->command);
1130 			nzeroes++;
1131 		}
1132 		if (ninstrs_cycles >= INSTRS_CYCLES_UPPER) {
1133 			ktrace_end(sess, 1);
1134 		}
1135 	});
1136 
1137 	ktrace_set_collection_interval(sess, 200);
1138 
1139 	ktrace_set_completion_handler(sess, ^{
1140 		T_EXPECT_GE(ninstrs_cycles, (uint64_t)INSTRS_CYCLES_LOWER,
1141 		    "saw enough instructions and cycles events");
1142 		T_EXPECT_EQ(nzeroes, UINT64_C(0),
1143 		    "saw no events with 0 instructions");
1144 		T_END;
1145 	});
1146 
1147 	(void)kperf_action_count_set(1);
1148 	T_ATEND(reset_kperf);
1149 	T_QUIET;
1150 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
1151 	    KPERF_SAMPLER_TH_INSTRS_CYCLES), NULL);
1152 
1153 	(void)kperf_timer_count_set(1);
1154 	T_QUIET;
1155 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
1156 	    kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL);
1157 	T_QUIET;
1158 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
1159 
1160 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1161 
1162 	T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()),
1163 	    NULL);
1164 
1165 	T_SETUPEND;
1166 
1167 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
1168 	    dispatch_get_main_queue(), ^(void) {
1169 		ktrace_end(sess, 1);
1170 	});
1171 
1172 	dispatch_main();
1173 }
1174 
1175 #define UNCOMMON_EVENTID 0xfeedfac0
1176 #define LABEL "this_is_a_test_label_that_is_very_long_and_will_be_truncated" \
1177 		"_but_needs_some_more_length_for_good_measure"
1178 #define MAX_LABEL_LEN 64
1179 static_assert(sizeof(LABEL) > MAX_LABEL_LEN,
1180     "should be larger than the max length of a label");
1181 
1182 T_DECL(kperf_dispatch_labels, "ensure dispatch queue labels are sampled")
1183 {
1184 	T_SETUPBEGIN;
1185 	start_controlling_ktrace();
1186 	ktrace_session_t sess = ktrace_session_create();
1187 	ktrace_set_collection_interval(sess, 200);
1188 
1189 	/*
1190 	 * Expect to see the label that was sampled by kperf.
1191 	 */
1192 	__block uint64_t nlabels = 0;
1193 	char obs_label[MAX_LABEL_LEN + 1] = {};
1194 	char *obs_label_b = obs_label;
1195 	char exp_label[MAX_LABEL_LEN + 1] = {};
1196 	strlcpy(exp_label, LABEL, sizeof(exp_label));
1197 	char *exp_label_b = exp_label;
1198 
1199 	ktrace_events_single(sess, PERF_DISPLABEL, ^(struct trace_point *tp) {
1200 		nlabels++;
1201 		size_t argsize = ktrace_is_kernel_64_bit(sess) ? 8 : 4;
1202 		strncat(obs_label_b, (char *)&tp->arg1, argsize);
1203 		strncat(obs_label_b, (char *)&tp->arg2, argsize);
1204 		strncat(obs_label_b, (char *)&tp->arg3, argsize);
1205 		strncat(obs_label_b, (char *)&tp->arg4, argsize);
1206 		if (tp->debugid & DBG_FUNC_END) {
1207 			ktrace_end(sess, 1);
1208 		}
1209 	});
1210 	ktrace_events_single(sess, PERF_DISPSAMPLE, ^(struct trace_point *tp) {
1211 		if (tp->debugid & DBG_FUNC_END) {
1212 			T_LOG("ending sample event returned %d (%s)",
1213 			    (int)tp->arg1, strerror((int)tp->arg1));
1214 		}
1215 	});
1216 	ktrace_events_single(sess, UNCOMMON_EVENTID,
1217 	    ^(__unused struct trace_point *tp) {
1218 		T_LOG("saw triggering event");
1219 	});
1220 	ktrace_set_completion_handler(sess, ^{
1221 		T_EXPECT_GT(nlabels, 0ULL, "saw %" PRIu64
1222 		    " dispatch queue label events", nlabels);
1223 		T_EXPECT_EQ_STR(obs_label_b, exp_label_b, "label string is correct");
1224 		T_END;
1225 	});
1226 
1227 	/*
1228 	 * Set up kperf to log a dispatch queue label when an uncommon event ID
1229 	 * is traced.
1230 	 */
1231 	(void)sysctlbyname("kperf.debug_level", NULL, NULL, &(int){ 2 }, sizeof(int));
1232 	(void)kperf_action_count_set(1);
1233 	T_ATEND(reset_kperf);
1234 	T_QUIET;
1235 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
1236 	    KPERF_SAMPLER_TH_DISPATCH), NULL);
1237 	(void)kperf_kdebug_action_set(1);
1238 	kperf_kdebug_filter_t kdfilt = kperf_kdebug_filter_create();
1239 	T_QUIET;
1240 	T_ASSERT_NOTNULL(kdfilt, "create kdebug filter for kperf");
1241 	T_QUIET;
1242 	T_ASSERT_POSIX_SUCCESS(
1243 	    kperf_kdebug_filter_add_debugid(kdfilt, UNCOMMON_EVENTID),
1244 	    "add debugid to filter on");
1245 	T_QUIET;
1246 	T_ASSERT_POSIX_SUCCESS(
1247 	    kperf_kdebug_filter_set(kdfilt), "set filter for kperf");
1248 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1249 
1250 	T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()),
1251 	    NULL);
1252 
1253 	/*
1254 	 * Create the queue and emit the sampling event from it.
1255 	 */
1256 	dispatch_queue_t testq = dispatch_queue_create(LABEL, 0);
1257 	T_ASSERT_NOTNULL(testq, "created queue with label: " LABEL);
1258 	dispatch_async(testq, ^{
1259 		kdebug_trace(UNCOMMON_EVENTID, 0, 0, 0, 0);
1260 		sleep(10);
1261 	});
1262 
1263 	T_SETUPEND;
1264 
1265 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 5 * NSEC_PER_SEC),
1266 	    dispatch_get_main_queue(), ^(void) {
1267 		ktrace_end(sess, 1);
1268 	});
1269 
1270 	dispatch_main();
1271 }
1272 
1273 T_HELPER_DECL(kperf_blessed_helper,
1274     "drop priviliges and attempt to configure kperf")
1275 {
1276 	drop_priv();
1277 	(void)kperf_action_count_set(1);
1278 	errno = 0;
1279 	int ret = kperf_lazy_wait_action_set(1);
1280 	int error = errno;
1281 	exit(ret != 0 ? (error == 0) ? EINVAL : error : 0);
1282 }
1283 
1284 static pid_t
spawn_bless_helper(void)1285 spawn_bless_helper(void)
1286 {
1287 	char test_path[MAXPATHLEN] = { 0 };
1288 	int ret = proc_pidpath(getpid(), test_path, MAXPATHLEN);
1289 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "query path for current process");
1290 	T_LOG("spawning helper tool at %s", test_path);
1291 	char *child_argv[] = { test_path, "-n", "kperf_blessed_helper", NULL };
1292 	pid_t child_pid = -1;
1293 	int error = dt_launch_tool(&child_pid, child_argv, true, NULL, NULL);
1294 	T_ASSERT_POSIX_ZERO(error, "spawned child with pid %d", child_pid);
1295 	T_QUIET; T_ASSERT_GT(child_pid, 0, "child pid is valid");
1296 	return child_pid;
1297 }
1298 
1299 static int
test_child_process(pid_t child_pid)1300 test_child_process(pid_t child_pid)
1301 {
1302 	int ret = kill(child_pid, SIGCONT);
1303 	T_ASSERT_POSIX_SUCCESS(ret, "continuing child process");
1304 	int status = 0;
1305 	ret = waitpid(child_pid, &status, 0);
1306 	T_ASSERT_POSIX_SUCCESS(ret, "waited on child process");
1307 	return status;
1308 }
1309 
1310 T_DECL(kperf_unblessed_refusal,
1311     "check that an unblessed child cannot access ktrace")
1312 {
1313 	T_SETUPBEGIN;
1314 	pid_t child_pid = spawn_bless_helper();
1315 	T_SETUPEND;
1316 	int status = test_child_process(child_pid);
1317 	int exit_status = WEXITSTATUS(status);
1318 	T_EXPECT_NE(exit_status, 0, "child failed to access ktrace with %d (%s)",
1319 		exit_status, exit_status == 0 ? "ok" : strerror(exit_status));
1320 }
1321 
1322 T_DECL(kperf_blessed_ownership, "check that a blessed child can access ktrace")
1323 {
1324 	T_LOG("parent pid is %d\n", getpid());
1325 	T_SETUPBEGIN;
1326 	pid_t child_pid = spawn_bless_helper();
1327 	T_SETUPEND;
1328 	int ret = kperf_bless_set(child_pid);
1329 	T_ASSERT_POSIX_SUCCESS(ret, "blessed child with pid %d", child_pid);
1330 	int status = test_child_process(child_pid);
1331 	int exit_status = WEXITSTATUS(status);
1332 	T_EXPECT_EQ(exit_status, 0, "child status was %d (%s)", exit_status,
1333 	    exit_status == 0 ? "ok" : strerror(exit_status));
1334 	ret = kperf_bless_set(-1);
1335 	T_ASSERT_POSIX_SUCCESS(ret, "removed blessing from child");
1336 }
1337 
1338