xref: /xnu-11215.1.10/tests/ktrace/kperf_tests.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585) !
1 // Copyright (c) 2017-2021 Apple Inc.  All rights reserved.
2 
3 #include <darwintest.h>
4 #include <darwintest_utils.h>
5 #include <dispatch/dispatch.h>
6 #include <inttypes.h>
7 #include <ktrace/session.h>
8 #include <ktrace/private.h>
9 #include <sys/kdebug.h>
10 #include <sys/syscall.h>
11 #include <kperf/kpc.h>
12 #include <kperf/kperf.h>
13 #include <kperfdata/kpdecode.h>
14 #include <os/assumes.h>
15 #include <stdint.h>
16 #include <sys/sysctl.h>
17 
18 #include "kperf_helpers.h"
19 #include "ktrace_helpers.h"
20 #include "ktrace_meta.h"
21 #include "../drop_priv.h"
22 
23 
24 
25 #define MAX_CPUS    64
26 #define MAX_THREADS 64
27 
28 volatile static bool running_threads = true;
29 
30 static void *
spinning_thread(void * semp)31 spinning_thread(void *semp)
32 {
33 	T_QUIET;
34 	T_ASSERT_NOTNULL(semp, "semaphore passed to thread should not be NULL");
35 	dispatch_semaphore_signal(*(dispatch_semaphore_t *)semp);
36 
37 	while (running_threads) {
38 		;
39 	}
40 	return NULL;
41 }
42 
43 #define PERF_STK_KHDR   UINT32_C(0x25020014)
44 #define PERF_STK_UHDR   UINT32_C(0x25020018)
45 #define PERF_TMR_FIRE   KDBG_EVENTID(DBG_PERF, 3, 0)
46 #define PERF_TMR_HNDLR  KDBG_EVENTID(DBG_PERF, 3, 2)
47 #define PERF_TMR_PEND   KDBG_EVENTID(DBG_PERF, 3, 3)
48 #define PERF_TMR_SKIP   KDBG_EVENTID(DBG_PERF, 3, 4)
49 #define PERF_KPC_CONFIG KDBG_EVENTID(DBG_PERF, 6, 4)
50 #define PERF_KPC_REG    KDBG_EVENTID(DBG_PERF, 6, 5)
51 #define PERF_KPC_REG32  KDBG_EVENTID(DBG_PERF, 6, 7)
52 #define PERF_INSTR_DATA KDBG_EVENTID(DBG_PERF, 1, 17)
53 #define PERF_EVENT      KDBG_EVENTID(DBG_PERF, 0, 0)
54 #define PERF_DISPLABEL  KDBG_EVENTID(DBG_PERF, 1, 23)
55 #define PERF_DISPSAMPLE KDBG_EVENTID(DBG_PERF, 1, 10)
56 
57 #define SCHED_DISPATCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_DISPATCH)
58 #define SCHED_SWITCH KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_SCHED)
59 #define SCHED_HANDOFF KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_STACK_HANDOFF)
60 #define SCHED_IDLE KDBG_EVENTID(DBG_MACH, DBG_MACH_SCHED, MACH_IDLE)
61 
62 #define MP_CPUS_CALL UINT32_C(0x1900004)
63 
64 #define DISPATCH_AFTER_EVENT UINT32_C(0xfefffffc)
65 #define TIMEOUT_SECS 10
66 
67 #define TIMER_PERIOD_NS (1 * NSEC_PER_MSEC)
68 
69 static void
start_tracing_with_timeout(ktrace_session_t s,unsigned int timeout_secs)70 start_tracing_with_timeout(ktrace_session_t s, unsigned int timeout_secs)
71 {
72 	// Only set the timeout after we've seen an event that was traced by us.
73 	// This helps set a reasonable timeout after we're guaranteed to get a
74 	// few events.
75 	dispatch_queue_t q = dispatch_get_global_queue(QOS_CLASS_USER_INITIATED, 0);
76 
77 	ktrace_events_single(s, DISPATCH_AFTER_EVENT,
78 	    ^(__unused struct trace_point *tp)
79 	{
80 		T_LOG("arming timer to stop tracing after %d seconds", timeout_secs);
81 		dispatch_after(dispatch_time(DISPATCH_TIME_NOW,
82 		    timeout_secs * NSEC_PER_SEC), q, ^{
83 			T_LOG("ending tracing due to timeout");
84 			ktrace_end(s, 0);
85 		});
86 	});
87 	ktrace_set_collection_interval(s, 100);
88 
89 	T_ASSERT_POSIX_ZERO(ktrace_start(s, q), "start ktrace");
90 
91 	kdebug_trace(DISPATCH_AFTER_EVENT, 0, 0, 0, 0);
92 	T_LOG("trace point emitted");
93 }
94 
95 static void
configure_kperf_timer_samplers(uint64_t period_ns,uint32_t samplers)96 configure_kperf_timer_samplers(uint64_t period_ns, uint32_t samplers)
97 {
98 	T_SETUPBEGIN;
99 
100 	(void)kperf_action_count_set(1);
101 	T_QUIET;
102 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, samplers),
103 	    NULL);
104 	(void)kperf_timer_count_set(1);
105 	T_QUIET;
106 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
107 	    kperf_ns_to_ticks(period_ns)), NULL);
108 	T_QUIET;
109 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
110 
111 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
112 
113 	T_SETUPEND;
114 }
115 
116 static double
timestamp_secs(ktrace_session_t s,uint64_t timestamp)117 timestamp_secs(ktrace_session_t s, uint64_t timestamp)
118 {
119 	uint64_t ns = 0;
120 	T_QUIET;
121 	T_ASSERT_POSIX_ZERO(ktrace_convert_timestamp_to_nanoseconds(s, timestamp,
122 	    &ns), NULL);
123 	return (double)ns / NSEC_PER_SEC;
124 }
125 
126 #pragma mark - timers
127 
128 // Ensure that kperf is correctly sampling CPUs that are actively scheduling by
129 // bringing up threads and ensuring that threads on-core are sampled by each
130 // timer fire.
131 
132 T_DECL(kperf_sample_active_cpus,
133     "make sure that kperf samples all active CPUs",
134 	T_META_TAG_VM_NOT_ELIGIBLE)
135 {
136 	start_controlling_ktrace();
137 
138 	T_SETUPBEGIN;
139 
140 	int ncpus = dt_ncpu();
141 	T_QUIET;
142 	T_ASSERT_LT(ncpus, MAX_CPUS,
143 	    "only supports up to %d CPUs", MAX_CPUS);
144 	T_LOG("found %d CPUs", ncpus);
145 
146 	int nthreads = ncpus - 1;
147 	T_QUIET;
148 	T_ASSERT_LT(nthreads, MAX_THREADS,
149 	    "only supports up to %d threads", MAX_THREADS);
150 
151 	static pthread_t threads[MAX_THREADS];
152 
153 	ktrace_session_t s = ktrace_session_create();
154 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
155 	ktrace_set_collection_interval(s, 100);
156 
157 	__block uint64_t nfires = 0;
158 	__block uint64_t nsamples = 0;
159 	static uint64_t idle_tids[MAX_CPUS] = { 0 };
160 	__block double sum_saturation = 0;
161 	__block uint64_t last_nsamples = 0;
162 
163 	// As a test debugging aid, take an additonal argument that specifies the
164 	// number of fires to stop tracing after.  This also turns on additional
165 	// logging of scheduler trace events.
166 	int stopafter = 0;
167 	if (argc > 0) {
168 		stopafter = atoi(argv[0]);
169 		if (stopafter < 0) {
170 			T_ASSERT_FAIL("argument must be positive");
171 		}
172 	}
173 
174 	static uint64_t first_timestamp = 0;
175 	static uint64_t last_timestamp = 0;
176 	ktrace_events_any(s, ^(struct trace_point *tp) {
177 		if (first_timestamp == 0) {
178 			first_timestamp = tp->timestamp;
179 		}
180 		last_timestamp = tp->timestamp;
181 	});
182 
183 	ktrace_set_completion_handler(s, ^{
184 		T_LOG("stopping threads");
185 
186 		running_threads = false;
187 
188 		for (int i = 0; i < nthreads; i++) {
189 		        T_QUIET;
190 		        T_ASSERT_POSIX_ZERO(pthread_join(threads[i], NULL), NULL);
191 		}
192 
193 		double saturation = sum_saturation / nfires * 100;
194 
195 		T_LOG("over %.1f seconds, saw %" PRIu64 " timer fires, %" PRIu64
196 			" samples, %g samples/fire, %.2f%% saturation",
197 			timestamp_secs(s, last_timestamp - first_timestamp), nfires,
198 			nsamples, (double)nsamples / (double)nfires, saturation);
199 		T_ASSERT_GT(saturation, 95.0,
200 		    "saw reasonable percentage of full samples");
201 
202 		T_END;
203 	});
204 
205 	// Track which threads are running on each CPU.
206 	static uint64_t tids_on_cpu[MAX_CPUS] = { 0 };
207 	void (^switch_cb)(struct trace_point *, const char *name) =
208 	    ^(struct trace_point *tp, const char *name) {
209 		uint64_t new_thread = tp->arg2;
210 
211 		if (idle_tids[tp->cpuid] != new_thread) {
212 			tids_on_cpu[tp->cpuid] = new_thread;
213 		}
214 
215 		if (stopafter) {
216 			T_LOG("%.7g: %s on %d: %llx", timestamp_secs(s, tp->timestamp),
217 			    name, tp->cpuid, tp->arg2);
218 		}
219 	};
220 
221 	ktrace_events_single(s, SCHED_SWITCH, ^(struct trace_point *tp) {
222 		switch_cb(tp, "switch");
223 	});
224 	ktrace_events_single(s, SCHED_HANDOFF, ^(struct trace_point *tp) {
225 		switch_cb(tp, "hndoff");
226 	});
227 
228 	// Determine the thread IDs of the idle threads on each CPU.
229 	ktrace_events_single(s, SCHED_IDLE, ^(struct trace_point *tp) {
230 		if (tp->debugid & DBG_FUNC_END) {
231 			return;
232 		}
233 		tids_on_cpu[tp->cpuid] = 0;
234 		idle_tids[tp->cpuid] = tp->threadid;
235 		if (stopafter) {
236 			T_LOG("%.7g: idle on %d: %llx", timestamp_secs(s, tp->timestamp),
237 			    tp->cpuid, tp->threadid);
238 		}
239 	});
240 
241 	// On each timer fire, go through all the cores and mark any threads
242 	// that should be sampled.
243 
244 	__block int last_fire_cpu = -1;
245 	static bool sample_missing[MAX_CPUS] = { false };
246 	static uint64_t tids_snap[MAX_CPUS] = { 0 };
247 	__block int nexpected = 0;
248 	__block int nextra = 0;
249 	__block int nidles = 0;
250 
251 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
252 		T_QUIET; T_ASSERT_EQ((tp->debugid & DBG_FUNC_START), 0,
253 		    "no timer fire start events are allowed");
254 		int last_expected = nexpected;
255 		nfires++;
256 
257 		nexpected = 0;
258 		for (int i = 0; i < ncpus; i++) {
259 			if (sample_missing[i]) {
260 				T_LOG("missed sample on CPU %d for thread %#llx from "
261 				    "timer on CPU %d (expected %d samples)",
262 				    tp->cpuid, tids_snap[i], last_fire_cpu, last_expected);
263 				sample_missing[i] = false;
264 			}
265 
266 			if (tids_on_cpu[i] != 0) {
267 				tids_snap[i] = tids_on_cpu[i];
268 				sample_missing[i] = true;
269 				nexpected++;
270 			}
271 		}
272 		if (stopafter) {
273 			T_LOG("%.7g: FIRE on %d: %d extra, %d idles",
274 			    timestamp_secs(s, tp->timestamp), tp->cpuid, nextra, nidles);
275 		}
276 
277 		if (nfires == 1) {
278 			return;
279 		}
280 
281 		if (last_expected == 0) {
282 			sum_saturation += 1;
283 		} else {
284 			sum_saturation += (double)(nsamples - last_nsamples) /
285 			    last_expected;
286 		}
287 		last_nsamples = nsamples;
288 		nextra = 0;
289 		nidles = 0;
290 
291 		T_QUIET;
292 		T_ASSERT_LT((int)tp->cpuid, ncpus,
293 		    "timer fire should not occur on an IOP");
294 		last_fire_cpu = (int)tp->cpuid;
295 
296 		if (stopafter && (uint64_t)stopafter == nfires) {
297 			ktrace_end(s, 1);
298 		}
299 	});
300 
301 	// On the timer handler for each CPU, unset the missing sample bitmap.
302 
303 	ktrace_events_single(s, PERF_TMR_HNDLR, ^(struct trace_point *tp) {
304 		nsamples++;
305 		if ((int)tp->cpuid > ncpus) {
306 		        // Skip IOPs; they're not scheduling any relevant threads.
307 		        return;
308 		}
309 
310 		if (!sample_missing[tp->cpuid] && idle_tids[tp->cpuid] != 0) {
311 			T_LOG("sampled additional thread %llx on CPU %d", tp->threadid,
312 			    tp->cpuid);
313 			nextra++;
314 		}
315 		if (tp->threadid == idle_tids[tp->cpuid]) {
316 			T_LOG("sampled idle thread on CPU %d", tp->cpuid);
317 			nidles++;
318 		}
319 		sample_missing[tp->cpuid] = false;
320 	});
321 
322 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
323 
324 	T_SETUPEND;
325 
326 	start_tracing_with_timeout(s, TIMEOUT_SECS);
327 
328 	// Create threads to bring up all of the CPUs.
329 
330 	dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
331 
332 	for (int i = 0; i < nthreads; i++) {
333 		T_QUIET;
334 		T_ASSERT_POSIX_ZERO(
335 			pthread_create(&threads[i], NULL, &spinning_thread,
336 			&thread_spinning), NULL);
337 		dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
338 	}
339 
340 	T_LOG("spun up %d thread%s", nthreads, nthreads == 1 ? "" : "s");
341 
342 	dispatch_main();
343 }
344 
345 #define FIRES_THRESHOLD (5000)
346 
347 T_DECL(kperf_timer_fires_enough_times,
348     "ensure the correct number of timers fire in a period of time",
349 	T_META_TAG_VM_NOT_ELIGIBLE)
350 {
351 	start_controlling_ktrace();
352 
353 	dispatch_semaphore_t thread_spinning = dispatch_semaphore_create(0);
354 
355 	ktrace_session_t s = ktrace_session_create();
356 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
357 	ktrace_set_collection_interval(s, 100);
358 
359 	__block uint64_t nfires = 0;
360 	__block uint64_t first_fire_ns = 0;
361 	__block uint64_t last_fire_ns = 0;
362 
363 	int ncpus = dt_ncpu();
364 
365 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
366 		nfires++;
367 		if (first_fire_ns == 0) {
368 			ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
369 			    &first_fire_ns);
370 		}
371 		ktrace_convert_timestamp_to_nanoseconds(s, tp->timestamp,
372 		    &last_fire_ns);
373 
374 		T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
375 		    "timer fire should not occur on an IOP");
376 		if (nfires >= FIRES_THRESHOLD) {
377 			ktrace_end(s, 1);
378 		}
379 	});
380 
381 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_KSTACK);
382 
383 	pthread_t thread;
384  	T_QUIET;
385 	T_ASSERT_POSIX_ZERO(pthread_create(&thread, NULL, &spinning_thread,
386 	    &thread_spinning), NULL);
387 	dispatch_semaphore_wait(thread_spinning, DISPATCH_TIME_FOREVER);
388 
389 	ktrace_set_completion_handler(s, ^{
390 		running_threads = false;
391 
392 		double duration_secs = (double)(last_fire_ns - first_fire_ns) /
393 		    NSEC_PER_SEC;
394 		T_LOG("stopping thread after %.2f seconds", duration_secs);
395 
396 		T_QUIET; T_ASSERT_POSIX_ZERO(pthread_join(thread, NULL), NULL);
397 
398 		T_LOG("saw %" PRIu64 " timer fires (%g fires/second)", nfires,
399 		    (double)nfires / (double)duration_secs);
400 		double expected_nfires = duration_secs * NSEC_PER_SEC / TIMER_PERIOD_NS;
401 		T_LOG("expecting %g timer fires", expected_nfires);
402 		double nfires_seen_pct = expected_nfires / nfires * 100;
403 		T_ASSERT_GT(nfires_seen_pct, 95.0,
404 		    "saw reasonable number of missed timer fires");
405 		T_ASSERT_LT(nfires_seen_pct, 105.0,
406 			"saw reasonable number of extra timer fires");
407 
408 		T_END;
409 	});
410 
411 	start_tracing_with_timeout(s, TIMEOUT_SECS);
412 
413 	dispatch_main();
414 }
415 
416 // kperf_timer_not_oversampling ensures that the profiling timer fires are
417 // spaced apart by the programmed timer period.  Otherwise, tools that rely on
418 // sample count as a proxy for CPU usage will over-estimate.
419 
420 #define FIRE_PERIOD_THRESHOLD_NS \
421 		(TIMER_PERIOD_NS - (uint64_t)(TIMER_PERIOD_NS * 0.05))
422 
423 struct cirq {
424 	unsigned int nslots;
425 	unsigned int tail_slot;
426 	unsigned int slot_size;
427 };
428 
429 #define CIRQ_INIT(TYPE, NSLOTS) \
430 	(struct cirq){ \
431 		.nslots = NSLOTS, .tail_slot = 0, .slot_size = sizeof(TYPE), \
432 	}
433 
434 static inline void *
cirq_get(struct cirq * cq,unsigned int i)435 cirq_get(struct cirq *cq, unsigned int i)
436 {
437 	return (char *)cq + sizeof(*cq) + (cq->slot_size * i);
438 }
439 
440 static void *
cirq_top(void * vcq)441 cirq_top(void *vcq)
442 {
443 	struct cirq *cq = vcq;
444 	unsigned int tail_slot = cq->tail_slot;
445 	unsigned int top_slot = (tail_slot > 0 ? tail_slot : cq->nslots) - 1;
446 	return cirq_get(cq, top_slot);
447 }
448 
449 static void *
cirq_push(void * vcq)450 cirq_push(void *vcq)
451 {
452 	struct cirq *cq = vcq;
453 	unsigned int tail_slot = cq->tail_slot;
454 	unsigned int next_slot = tail_slot == cq->nslots - 1 ? 0 : tail_slot + 1;
455 	cq->tail_slot = next_slot;
456 	return cirq_get(cq, tail_slot);
457 }
458 
459 static void
460 cirq_for(void *vcq, void (^iter)(void *elt))
461 {
462 	struct cirq *cq = vcq;
463 	for (unsigned int i = cq->tail_slot; i < cq->nslots; i++) {
464 		iter(cirq_get(cq, i));
465 	}
466 	for (unsigned int i = 0; i < cq->tail_slot; i++) {
467 		iter(cirq_get(cq, i));
468 	}
469 }
470 
471 #define HISTORY_LEN 5
472 
473 struct instval {
474 	uint64_t iv_instant_ns;
475 	uint64_t iv_val;
476 };
477 
478 struct cirq_instval {
479 	struct cirq cq;
480 	struct instval elts[HISTORY_LEN];
481 };
482 
483 struct cirq_u64 {
484 	struct cirq cq;
485 	uint64_t elts[HISTORY_LEN];
486 };
487 
488 struct cpu_oversample {
489 	struct cirq_instval timer_latencies;
490 	struct cirq_instval fire_latencies;
491 };
492 
493 static void
cpu_oversample_log(struct cpu_oversample * cpu,unsigned int cpuid)494 cpu_oversample_log(struct cpu_oversample *cpu, unsigned int cpuid)
495 {
496 	T_LOG("CPU %d timer latencies:", cpuid);
497 	__block int i = -HISTORY_LEN;
498 	cirq_for(&cpu->timer_latencies, ^(void *viv) {
499 		struct instval *iv = viv;
500 		T_LOG("\t%llu timer latency %d: %llu", iv->iv_instant_ns, i,
501 		    iv->iv_val);
502 		i++;
503 	});
504 
505 	T_LOG("CPU %d fire latencies:", cpuid);
506 	i = -HISTORY_LEN;
507 	cirq_for(&cpu->fire_latencies, ^(void *viv) {
508 		struct instval *iv = viv;
509 		T_LOG("\t%llu fire latency %d: %llu", iv->iv_instant_ns, i, iv->iv_val);
510 		i++;
511 	});
512 }
513 
514 T_DECL(kperf_timer_not_oversampling,
515     "ensure that time between fires is long enough",
516 	T_META_TAG_VM_NOT_ELIGIBLE)
517 {
518 	start_controlling_ktrace();
519 
520 	ktrace_session_t s = ktrace_session_create();
521 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
522 	// Try not to perturb the system with more work.
523 	ktrace_set_collection_interval(s, 1000);
524 	__block uint64_t nfires = 0;
525 	__block uint64_t first_fire_ns = 0;
526 	__block uint64_t last_fire_ns = 0;
527 	__block unsigned int last_fire_cpuid = 0;
528 
529 	int ncpus = dt_ncpu();
530 	T_QUIET; T_ASSERT_GT(ncpus, 0, "should see positive number of CPUs");
531 
532 	struct cpu_oversample *per_cpu = calloc((unsigned int)ncpus,
533 			sizeof(per_cpu[0]));
534 	T_QUIET; T_WITH_ERRNO;
535 	T_ASSERT_NOTNULL(per_cpu, "allocated timer latency tracking");
536 	for (int i = 0; i < ncpus; i++) {
537 		per_cpu[i].timer_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
538 		per_cpu[i].fire_latencies.cq = CIRQ_INIT(struct instval, HISTORY_LEN);
539 	}
540 
541 	__block bool in_stackshot = false;
542 	__block uint64_t last_stackshot_ns = 0;
543 
544 	// Stackshots are the primary source of interrupt latency on the system.
545 	ktrace_events_single(s, KDBG_EVENTID(DBG_BSD, DBG_BSD_EXCP_SC,
546 			SYS_stack_snapshot_with_config), ^(struct trace_point *tp) {
547 		bool start = tp->debugid & DBG_FUNC_START;
548 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
549 		T_LOG("%llu: %s stackshot syscall from process %s",
550 				cur_ns, start ? "start" : "finish", tp->command);
551 		in_stackshot = start;
552 		if (!start) {
553 			last_stackshot_ns = cur_ns;
554 		}
555 	});
556 
557 	struct cirq_u64 *fire_times = calloc(1, sizeof(*fire_times));
558 	T_ASSERT_NOTNULL(fire_times, "allocated fire time tracking");
559 	fire_times->cq = CIRQ_INIT(uint64_t, HISTORY_LEN);
560 
561 	// Track the decrementer's latency values to find any unexpectedly long
562 	// interrupt latencies that could affect the firing cadence.
563 	ktrace_events_single(s, DECR_TRAP_LATENCY,
564 			^(struct trace_point *tp) {
565 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
566 		uint64_t latency_ns = ns_from_abs(s, 0 - tp->arg1);
567 		struct instval *latency = cirq_push(&per_cpu[tp->cpuid].timer_latencies);
568 		latency->iv_instant_ns = cur_ns;
569 		latency->iv_val = latency_ns;
570 	});
571 
572 	ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
573 		T_QUIET; T_ASSERT_LT((int)tp->cpuid, ncpus,
574 				"timer fire should not occur on an IOP");
575 
576 		nfires++;
577 		uint64_t cur_ns = relns_from_abs(s, tp->timestamp);
578 		uint64_t *fire_ns = cirq_push(fire_times);
579 		*fire_ns = cur_ns;
580 
581 		struct cpu_oversample *cur_cpu = &per_cpu[tp->cpuid];
582 		struct instval *last_timer_latency = cirq_top(
583 				&cur_cpu->timer_latencies);
584 		uint64_t timer_latency_ns = last_timer_latency->iv_val;
585 
586 		if (first_fire_ns == 0) {
587 			first_fire_ns = cur_ns;
588 		} else {
589 			struct cpu_oversample *last_cpu = &per_cpu[last_fire_cpuid];
590 			struct instval *last_latency = cirq_top(&last_cpu->fire_latencies);
591 			uint64_t last_fire_latency_ns = last_latency->iv_val;
592 
593 			if (timer_latency_ns > TIMER_PERIOD_NS / 4) {
594 				T_LOG("%llu: long timer latency at fire: %llu", cur_ns,
595 						timer_latency_ns);
596 			}
597 
598 			// Long interrupt latencies will cause the timer to miss its fire
599 			// time and report a fire past when it should have, making the next
600 			// period too short.  Keep track of the latency as a leeway
601 			// adjustment for the subsequent fire.
602 			uint64_t fire_period_ns = cur_ns - last_fire_ns;
603 			uint64_t fire_period_adj_ns = fire_period_ns +
604 			    last_fire_latency_ns + timer_latency_ns;
605 			// Within 5% is still a valid period -- adjust this for running in
606 			// potentially-noisy automation.
607 			uint64_t fire_period_pct_adj_ns = fire_period_adj_ns +
608 			    FIRE_PERIOD_THRESHOLD_NS / 20;
609 			if (fire_period_adj_ns < FIRE_PERIOD_THRESHOLD_NS &&
610 			    fire_period_pct_adj_ns >= FIRE_PERIOD_THRESHOLD_NS) {
611 				T_LOG("ignoring period of %llu within 5%% of expected %llu",
612 				    fire_period_adj_ns, fire_period_pct_adj_ns);
613 			}
614 			bool too_short = fire_period_pct_adj_ns < FIRE_PERIOD_THRESHOLD_NS;
615 			if (too_short) {
616 				T_LOG("%llu: period of timer fire %llu is %llu + %llu + %llu = "
617 						"%llu < %llu",
618 						cur_ns, nfires, fire_period_ns, last_fire_latency_ns,
619 						timer_latency_ns, fire_period_adj_ns,
620 						FIRE_PERIOD_THRESHOLD_NS);
621 
622 				T_LOG("short profile timer fired on CPU %d", tp->cpuid);
623 				cpu_oversample_log(cur_cpu, tp->cpuid);
624 
625 				if (cur_cpu == last_cpu) {
626 					T_LOG("fired back-to-back on CPU %d", tp->cpuid);
627 				} else {
628 					T_LOG("previous profile timer fired on CPU %d",
629 							last_fire_cpuid);
630 					cpu_oversample_log(last_cpu, last_fire_cpuid);
631 				}
632 
633 				T_LOG("profile timer fires:");
634 				cirq_for(fire_times, ^(void *vu64) {
635 					T_LOG("\tfire: %llu", *(uint64_t *)vu64);
636 				});
637 				if (nfires < (unsigned int)ncpus) {
638 					T_LOG("ignoring timer fire %llu as context may be missing",
639 							nfires);
640 				} else {
641 					if (in_stackshot) {
642 						T_LOG("skipping assertion because stackshot is "
643 								"happening");
644 					} else if (last_stackshot_ns != 0 &&
645 							cur_ns > last_stackshot_ns &&
646 							cur_ns - last_stackshot_ns < TIMER_PERIOD_NS) {
647 						T_LOG("skipping assertion because stackshot happened "
648 								"%" PRIu64 "ns ago",
649 								cur_ns - last_stackshot_ns);
650 					} else {
651 						T_ASSERT_FAIL("profiling period is shorter than "
652 								"expected with no stackshot interference");
653 					}
654 				}
655 			}
656 
657 			struct instval *latency = cirq_push(&cur_cpu->fire_latencies);
658 			latency->iv_instant_ns = cur_ns;
659 			latency->iv_val = timer_latency_ns;
660 
661 			// Snapshot this timer fire's interrupt latency, so the next one
662 			// can make an adjustment to the period.
663 			last_fire_latency_ns = timer_latency_ns;
664 		}
665 		last_fire_ns = cur_ns;
666 		last_fire_cpuid = tp->cpuid;
667 
668 		if (nfires >= FIRES_THRESHOLD) {
669 			ktrace_end(s, 1);
670 		}
671 	});
672 
673 	configure_kperf_timer_samplers(TIMER_PERIOD_NS, KPERF_SAMPLER_TINFO);
674 
675 	ktrace_set_completion_handler(s, ^{
676 		double duration_secs = (double)(last_fire_ns - first_fire_ns) /
677 		    NSEC_PER_SEC;
678 		T_LOG("stopping trace after %.2f seconds", duration_secs);
679 
680 		T_PASS("saw %" PRIu64 " timer fires (%g fires/second) without "
681 			"oversampling", nfires, (double)nfires / (double)duration_secs);
682 
683 		T_END;
684 	});
685 
686 	start_tracing_with_timeout(s, 5);
687 
688 	// Get all CPUs out of idle.
689 	uint64_t *counts = kpc_counterbuf_alloc();
690 	(void)kpc_get_cpu_counters(true,KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
691 	free(counts);
692 
693 	dispatch_main();
694 }
695 
696 T_DECL(kperf_timer_stress, "repeatedly enable and disable timers",
697 	T_META_TAG_VM_NOT_ELIGIBLE)
698 {
699 	start_controlling_ktrace();
700 
701 	const int niters = 500;
702 	for (int i = 0; i < niters; i++) {
703 		configure_kperf_stacks_timer(-1, 1, true);
704 		T_QUIET;
705 		T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
706 		usleep(2000);
707 		kperf_reset();
708 	}
709 	T_LOG("configured kperf with a timer %d times", niters);
710 }
711 
712 #pragma mark - kdebug triggers
713 
714 #define KDEBUG_TRIGGER_TIMEOUT_NS (10 * NSEC_PER_SEC)
715 
716 #define NON_TRIGGER_CLASS    UINT32_C(0xfd)
717 #define NON_TRIGGER_SUBCLASS UINT32_C(0xff)
718 #define NON_TRIGGER_CODE     UINT32_C(0xff)
719 
720 #define NON_TRIGGER_EVENT \
721 	        (KDBG_EVENTID(NON_TRIGGER_CLASS, NON_TRIGGER_SUBCLASS, \
722 	        NON_TRIGGER_CODE))
723 
724 static void
expect_kdebug_trigger(const char * filter_desc,const uint32_t * debugids,unsigned int n_debugids)725 expect_kdebug_trigger(const char *filter_desc, const uint32_t *debugids,
726     unsigned int n_debugids)
727 {
728 	__block int missing_kernel_stacks = 0;
729 	__block int missing_user_stacks = 0;
730 	ktrace_session_t s;
731 	kperf_kdebug_filter_t filter;
732 
733 	s = ktrace_session_create();
734 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
735 	ktrace_set_collection_interval(s, 100);
736 
737 	ktrace_events_single(s, PERF_STK_KHDR, ^(struct trace_point *tp) {
738 		missing_kernel_stacks--;
739 		T_LOG("saw kernel stack with %" PRIu64 " frames, flags = %#"
740 		PRIx64, tp->arg2, tp->arg1);
741 	});
742 	ktrace_events_single(s, PERF_STK_UHDR, ^(struct trace_point *tp) {
743 		missing_user_stacks--;
744 		T_LOG("saw user stack with %" PRIu64 " frames, flags = %#"
745 		PRIx64, tp->arg2, tp->arg1);
746 	});
747 
748 	for (unsigned int i = 0; i < n_debugids; i++) {
749 		ktrace_events_single(s, debugids[i], ^(struct trace_point *tp) {
750 			missing_kernel_stacks++;
751 			missing_user_stacks++;
752 			T_LOG("saw event with debugid 0x%" PRIx32, tp->debugid);
753 		});
754 	}
755 
756 	ktrace_events_single(s, NON_TRIGGER_EVENT,
757 	    ^(__unused struct trace_point *tp)
758 	{
759 		ktrace_end(s, 0);
760 	});
761 
762 	ktrace_set_completion_handler(s, ^{
763 		T_EXPECT_LE(missing_kernel_stacks, 0, NULL);
764 		T_EXPECT_LE(missing_user_stacks, 0, NULL);
765 
766 		ktrace_session_destroy(s);
767 		T_END;
768 	});
769 
770 	kperf_reset();
771 
772 	(void)kperf_action_count_set(1);
773 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
774 	    KPERF_SAMPLER_KSTACK | KPERF_SAMPLER_USTACK), NULL);
775 
776 	filter = kperf_kdebug_filter_create();
777 	T_ASSERT_NOTNULL(filter, NULL);
778 
779 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_action_set(1), NULL);
780 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_add_desc(filter, filter_desc),
781 	    NULL);
782 	T_ASSERT_POSIX_SUCCESS(kperf_kdebug_filter_set(filter), NULL);
783 	kperf_kdebug_filter_destroy(filter);
784 
785 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL);
786 
787 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
788 
789 	// Trace the triggering events.
790 
791 	for (unsigned int i = 0; i < n_debugids; i++) {
792 		T_ASSERT_POSIX_SUCCESS(kdebug_trace(debugids[i], 0, 0, 0, 0), NULL);
793 	}
794 
795 	T_ASSERT_POSIX_SUCCESS(kdebug_trace(NON_TRIGGER_EVENT, 0, 0, 0, 0), NULL);
796 
797 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, KDEBUG_TRIGGER_TIMEOUT_NS),
798 	    dispatch_get_main_queue(), ^(void)
799 	{
800 		ktrace_end(s, 1);
801 	});
802 }
803 
804 #define TRIGGER_CLASS     UINT32_C(0xfe)
805 #define TRIGGER_CLASS_END UINT32_C(0xfd)
806 #define TRIGGER_SUBCLASS  UINT32_C(0xff)
807 #define TRIGGER_CODE      UINT32_C(0)
808 #define TRIGGER_DEBUGID \
809 	        (KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, TRIGGER_CODE))
810 
811 T_DECL(kperf_kdebug_trigger_classes,
812     "test that kdebug trigger samples on classes",
813 	T_META_TAG_VM_PREFERRED)
814 {
815 	start_controlling_ktrace();
816 
817 	const uint32_t class_debugids[] = {
818 		KDBG_EVENTID(TRIGGER_CLASS, 1, 1),
819 		KDBG_EVENTID(TRIGGER_CLASS, 2, 1),
820 		KDBG_EVENTID(TRIGGER_CLASS_END, 1, 1) | DBG_FUNC_END,
821 		KDBG_EVENTID(TRIGGER_CLASS_END, 2, 1) | DBG_FUNC_END,
822 	};
823 
824 	expect_kdebug_trigger("C0xfe,C0xfdr", class_debugids,
825 	    sizeof(class_debugids) / sizeof(class_debugids[0]));
826 	dispatch_main();
827 }
828 
829 T_DECL(kperf_kdebug_trigger_subclasses,
830     "test that kdebug trigger samples on subclasses",
831 	T_META_TAG_VM_PREFERRED)
832 {
833 	start_controlling_ktrace();
834 
835 	const uint32_t subclass_debugids[] = {
836 		KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 0),
837 		KDBG_EVENTID(TRIGGER_CLASS, TRIGGER_SUBCLASS, 1),
838 		KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 0) | DBG_FUNC_END,
839 		KDBG_EVENTID(TRIGGER_CLASS_END, TRIGGER_SUBCLASS, 1) | DBG_FUNC_END
840 	};
841 
842 	expect_kdebug_trigger("S0xfeff,S0xfdffr", subclass_debugids,
843 	    sizeof(subclass_debugids) / sizeof(subclass_debugids[0]));
844 	dispatch_main();
845 }
846 
847 T_DECL(kperf_kdebug_trigger_debugids,
848     "test that kdebug trigger samples on debugids",
849 	T_META_TAG_VM_PREFERRED)
850 {
851 	start_controlling_ktrace();
852 
853 	const uint32_t debugids[] = {
854 		TRIGGER_DEBUGID
855 	};
856 
857 	expect_kdebug_trigger("D0xfeff0000", debugids,
858 	    sizeof(debugids) / sizeof(debugids[0]));
859 	dispatch_main();
860 }
861 
862 // TODO Set a single function specifier filter, expect not to trigger of all
863 // events from that class.
864 
865 static void
reset_kperf(void)866 reset_kperf(void)
867 {
868 	(void)kperf_reset();
869 	(void)sysctlbyname("kperf.debug_level", NULL, NULL, &(int){ 0 },
870 	    sizeof(int));
871 }
872 
873 T_DECL(kperf_kdbg_callstacks,
874     "test that the kdbg_callstacks samples on syscalls",
875 	T_META_TAG_VM_PREFERRED)
876 {
877 	start_controlling_ktrace();
878 
879 	ktrace_session_t s;
880 	__block bool saw_user_stack = false;
881 
882 	s = ktrace_session_create();
883 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
884 	ktrace_set_collection_interval(s, 100);
885 
886 	// Make sure BSD events are traced in order to trigger samples on syscalls.
887 	ktrace_events_class(s, DBG_BSD, ^void (__unused struct trace_point *tp) {});
888 
889 	ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) {
890 		saw_user_stack = true;
891 		ktrace_end(s, 1);
892 	});
893 
894 	ktrace_set_completion_handler(s, ^{
895 		ktrace_session_destroy(s);
896 
897 		T_EXPECT_TRUE(saw_user_stack,
898 		"saw user stack after configuring kdbg_callstacks");
899 		T_END;
900 	});
901 
902 #pragma clang diagnostic push
903 #pragma clang diagnostic ignored "-Wdeprecated-declarations"
904 	T_ASSERT_POSIX_SUCCESS(kperf_kdbg_callstacks_set(1), NULL);
905 #pragma clang diagnostic pop
906 	T_ATEND(reset_kperf);
907 
908 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
909 
910 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
911 	    dispatch_get_main_queue(), ^(void) {
912 		ktrace_end(s, 1);
913 	});
914 
915 	dispatch_main();
916 }
917 
918 #pragma mark - PET
919 
920 #define STACKS_WAIT_DURATION_NS (3 * NSEC_PER_SEC)
921 
922 static void
923 expect_stacks_traced(void (^setup)(ktrace_session_t s), void (^complete)(void))
924 {
925 	ktrace_session_t s;
926 
927 	s = ktrace_session_create();
928 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
929 	ktrace_set_collection_interval(s, 100);
930 	if (setup) {
931 		setup(s);
932 	}
933 
934 	__block unsigned int user_stacks = 0;
935 	__block unsigned int kernel_stacks = 0;
936 
937 	ktrace_events_single(s, PERF_STK_UHDR, ^(__unused struct trace_point *tp) {
938 		user_stacks++;
939 	});
940 	ktrace_events_single(s, PERF_STK_KHDR, ^(__unused struct trace_point *tp) {
941 		kernel_stacks++;
942 	});
943 
944 	ktrace_set_completion_handler(s, ^(void) {
945 		ktrace_session_destroy(s);
946 		T_EXPECT_GT(user_stacks, 0U, NULL);
947 		T_EXPECT_GT(kernel_stacks, 0U, NULL);
948 		complete();
949 	});
950 
951 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), NULL);
952 
953 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
954 
955 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, STACKS_WAIT_DURATION_NS),
956 	    dispatch_get_main_queue(), ^(void)
957 	{
958 		kperf_reset();
959 		ktrace_end(s, 0);
960 	});
961 }
962 
963 T_DECL(kperf_pet, "test that PET mode samples kernel and user stacks",
964 	T_META_TAG_VM_PREFERRED)
965 {
966 	start_controlling_ktrace();
967 
968 	configure_kperf_stacks_timer(-1, 10, false);
969 	T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
970 
971 	expect_stacks_traced(NULL, ^(void) {
972 		T_END;
973 	});
974 
975 	dispatch_main();
976 }
977 
978 T_DECL(kperf_lightweight_pet,
979     "test that lightweight PET mode samples kernel and user stacks",
980     T_META_ASROOT(true),
981 	T_META_TAG_VM_PREFERRED)
982 {
983 	start_controlling_ktrace();
984 
985 	int set = 1;
986 
987 	configure_kperf_stacks_timer(-1, 10, false);
988 	T_ASSERT_POSIX_SUCCESS(sysctlbyname("kperf.lightweight_pet", NULL, NULL,
989 	    &set, sizeof(set)), NULL);
990 	T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
991 
992 	__block uint64_t nfires = 0;
993 
994 	expect_stacks_traced(^(ktrace_session_t s) {
995 		ktrace_events_single(s, PERF_TMR_FIRE, ^(struct trace_point *tp) {
996 			nfires++;
997 			T_QUIET;
998 			T_ASSERT_EQ(tp->arg1, (uint64_t)0,
999 					"timer fire should have timer ID of 0");
1000 			T_QUIET;
1001 			T_ASSERT_EQ(tp->arg2, (uint64_t)1,
1002 					"timer fire should have PET bit set");
1003 		});
1004 	}, ^(void) {
1005 		T_ASSERT_GT(nfires, (uint64_t)0, "timer fired at least once");
1006 		T_END;
1007 	});
1008 
1009 	dispatch_main();
1010 }
1011 
1012 T_DECL(kperf_pet_stress, "repeatedly enable and disable PET mode",
1013 	T_META_TAG_VM_NOT_ELIGIBLE)
1014 {
1015 	start_controlling_ktrace();
1016 
1017 	const int niters = 500;
1018 	for (int i = 0; i < niters; i++) {
1019 		configure_kperf_stacks_timer(-1, 1, true);
1020 		T_QUIET; T_ASSERT_POSIX_SUCCESS(kperf_timer_pet_set(0), NULL);
1021 		T_QUIET;
1022 		T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1023 		usleep(2000);
1024 		kperf_reset();
1025 	}
1026 
1027 	T_PASS("configured kperf PET %d times", niters);
1028 }
1029 
1030 #pragma mark - PMCs
1031 
1032 T_DECL(kperf_pmc_config_only,
1033     "shouldn't show PMC config events unless requested",
1034 	T_META_TAG_VM_NOT_ELIGIBLE)
1035 {
1036 	start_controlling_ktrace();
1037 
1038 	__block bool saw_kpc_config = false;
1039 	__block bool saw_kpc_reg = false;
1040 
1041 	ktrace_session_t s = ktrace_session_create();
1042 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "ktrace_session_create");
1043 	ktrace_set_collection_interval(s, 100);
1044 
1045 	ktrace_events_single(s, PERF_KPC_CONFIG,
1046 	    ^(__unused struct trace_point *tp) {
1047 		saw_kpc_config = true;
1048 	});
1049 	ktrace_events_single(s, PERF_KPC_REG,
1050 	    ^(__unused struct trace_point *tp) {
1051 		saw_kpc_reg = true;
1052 	});
1053 	ktrace_events_single(s, PERF_KPC_REG32,
1054 	    ^(__unused struct trace_point *tp) {
1055 		saw_kpc_reg = true;
1056 	});
1057 
1058 	ktrace_set_completion_handler(s, ^{
1059 		ktrace_session_destroy(s);
1060 		T_EXPECT_FALSE(saw_kpc_config,
1061 		"should see no KPC configs without sampler enabled");
1062 		T_EXPECT_FALSE(saw_kpc_reg,
1063 		"should see no KPC registers without sampler enabled");
1064 		T_END;
1065 	});
1066 
1067 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
1068 	uint64_t *config = calloc(nconfigs, sizeof(*config));
1069 	config[0] = 0x02;
1070 	int ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, config);
1071 	T_ASSERT_POSIX_SUCCESS(ret, "configured kpc");
1072 	T_QUIET;
1073 	T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_CONFIGURABLE_MASK),
1074 	    "kpc_set_counting");
1075 
1076 	(void)kperf_action_count_set(1);
1077 	T_ATEND(reset_kperf);
1078 	T_QUIET;
1079 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1, KPERF_SAMPLER_PMC_CPU),
1080 	    NULL);
1081 
1082 	(void)kperf_timer_count_set(1);
1083 	T_QUIET;
1084 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
1085 	    kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL);
1086 	T_QUIET;
1087 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
1088 
1089 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1090 
1091 	T_ASSERT_POSIX_ZERO(ktrace_start(s, dispatch_get_main_queue()), NULL);
1092 
1093 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
1094 	    dispatch_get_main_queue(), ^(void) {
1095 		ktrace_end(s, 1);
1096 	});
1097 
1098 	dispatch_main();
1099 }
1100 
1101 static void
skip_if_monotonic_unsupported(void)1102 skip_if_monotonic_unsupported(void)
1103 {
1104 	int r;
1105 	int supported = 0;
1106 	size_t supported_size = sizeof(supported);
1107 
1108 	r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
1109 	    NULL, 0);
1110 	if (r < 0) {
1111 		T_WITH_ERRNO;
1112 		T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
1113 	}
1114 
1115 	if (!supported) {
1116 		T_SKIP("monotonic is not supported on this platform");
1117 	}
1118 }
1119 
1120 #define INSTRS_CYCLES_UPPER 500
1121 #define INSTRS_CYCLES_LOWER 50
1122 
1123 T_DECL(kperf_sample_instrs_cycles,
1124     "ensure instructions and cycles are sampled",
1125 	T_META_TAG_VM_NOT_ELIGIBLE)
1126 {
1127 	T_SETUPBEGIN;
1128 
1129 	skip_if_monotonic_unsupported();
1130 	start_controlling_ktrace();
1131 
1132 	ktrace_session_t sess = ktrace_session_create();
1133 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
1134 	ktrace_set_collection_interval(sess, 100);
1135 
1136 	__block uint64_t ninstrs_cycles = 0;
1137 	__block uint64_t nzeroes = 0;
1138 	ktrace_events_single(sess, PERF_INSTR_DATA,
1139 	    ^(__unused struct trace_point *tp) {
1140 		ninstrs_cycles++;
1141 		if (tp->arg1 == 0) {
1142 			T_LOG("%llx (%s)\n", tp->threadid, tp->command);
1143 			nzeroes++;
1144 		}
1145 		if (ninstrs_cycles >= INSTRS_CYCLES_UPPER) {
1146 			ktrace_end(sess, 1);
1147 		}
1148 	});
1149 
1150 	ktrace_set_collection_interval(sess, 200);
1151 
1152 	ktrace_set_completion_handler(sess, ^{
1153 		T_EXPECT_GE(ninstrs_cycles, (uint64_t)INSTRS_CYCLES_LOWER,
1154 		    "saw enough instructions and cycles events");
1155 		T_EXPECT_EQ(nzeroes, UINT64_C(0),
1156 		    "saw no events with 0 instructions");
1157 		T_END;
1158 	});
1159 
1160 	(void)kperf_action_count_set(1);
1161 	T_ATEND(reset_kperf);
1162 	T_QUIET;
1163 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
1164 	    KPERF_SAMPLER_TH_INSTRS_CYCLES), NULL);
1165 
1166 	(void)kperf_timer_count_set(1);
1167 	T_QUIET;
1168 	T_ASSERT_POSIX_SUCCESS(kperf_timer_period_set(0,
1169 	    kperf_ns_to_ticks(TIMER_PERIOD_NS)), NULL);
1170 	T_QUIET;
1171 	T_ASSERT_POSIX_SUCCESS(kperf_timer_action_set(0, 1), NULL);
1172 
1173 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1174 
1175 	T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()),
1176 	    NULL);
1177 
1178 	T_SETUPEND;
1179 
1180 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 10 * NSEC_PER_SEC),
1181 	    dispatch_get_main_queue(), ^(void) {
1182 		ktrace_end(sess, 1);
1183 	});
1184 
1185 	dispatch_main();
1186 }
1187 
1188 #define UNCOMMON_EVENTID 0xfeedfac0
1189 #define LABEL "this_is_a_test_label_that_is_very_long_and_will_be_truncated" \
1190 		"_but_needs_some_more_length_for_good_measure"
1191 #define MAX_LABEL_LEN 64
1192 static_assert(sizeof(LABEL) > MAX_LABEL_LEN,
1193     "should be larger than the max length of a label");
1194 
1195 T_DECL(kperf_dispatch_labels, "ensure dispatch queue labels are sampled",
1196 	T_META_TAG_VM_PREFERRED)
1197 {
1198 	T_SETUPBEGIN;
1199 	start_controlling_ktrace();
1200 	ktrace_session_t sess = ktrace_session_create();
1201 	ktrace_set_collection_interval(sess, 200);
1202 
1203 	/*
1204 	 * Expect to see the label that was sampled by kperf.
1205 	 */
1206 	__block uint64_t nlabels = 0;
1207 	char obs_label[MAX_LABEL_LEN + 1] = {};
1208 	char *obs_label_b = obs_label;
1209 	char exp_label[MAX_LABEL_LEN + 1] = {};
1210 	strlcpy(exp_label, LABEL, sizeof(exp_label));
1211 	char *exp_label_b = exp_label;
1212 
1213 	ktrace_events_single(sess, PERF_DISPLABEL, ^(struct trace_point *tp) {
1214 		nlabels++;
1215 		size_t argsize = ktrace_is_kernel_64_bit(sess) ? 8 : 4;
1216 		strncat(obs_label_b, (char *)&tp->arg1, argsize);
1217 		strncat(obs_label_b, (char *)&tp->arg2, argsize);
1218 		strncat(obs_label_b, (char *)&tp->arg3, argsize);
1219 		strncat(obs_label_b, (char *)&tp->arg4, argsize);
1220 		if (tp->debugid & DBG_FUNC_END) {
1221 			ktrace_end(sess, 1);
1222 		}
1223 	});
1224 	ktrace_events_single(sess, PERF_DISPSAMPLE, ^(struct trace_point *tp) {
1225 		if (tp->debugid & DBG_FUNC_END) {
1226 			T_LOG("ending sample event returned %d (%s)",
1227 			    (int)tp->arg1, strerror((int)tp->arg1));
1228 		}
1229 	});
1230 	ktrace_events_single(sess, UNCOMMON_EVENTID,
1231 	    ^(__unused struct trace_point *tp) {
1232 		T_LOG("saw triggering event");
1233 	});
1234 	ktrace_set_completion_handler(sess, ^{
1235 		T_EXPECT_GT(nlabels, 0ULL, "saw %" PRIu64
1236 		    " dispatch queue label events", nlabels);
1237 		T_EXPECT_EQ_STR(obs_label_b, exp_label_b, "label string is correct");
1238 		T_END;
1239 	});
1240 
1241 	/*
1242 	 * Set up kperf to log a dispatch queue label when an uncommon event ID
1243 	 * is traced.
1244 	 */
1245 	(void)sysctlbyname("kperf.debug_level", NULL, NULL, &(int){ 2 }, sizeof(int));
1246 	(void)kperf_action_count_set(1);
1247 	T_ATEND(reset_kperf);
1248 	T_QUIET;
1249 	T_ASSERT_POSIX_SUCCESS(kperf_action_samplers_set(1,
1250 	    KPERF_SAMPLER_TH_DISPATCH), NULL);
1251 	(void)kperf_kdebug_action_set(1);
1252 	kperf_kdebug_filter_t kdfilt = kperf_kdebug_filter_create();
1253 	T_QUIET;
1254 	T_ASSERT_NOTNULL(kdfilt, "create kdebug filter for kperf");
1255 	T_QUIET;
1256 	T_ASSERT_POSIX_SUCCESS(
1257 	    kperf_kdebug_filter_add_debugid(kdfilt, UNCOMMON_EVENTID),
1258 	    "add debugid to filter on");
1259 	T_QUIET;
1260 	T_ASSERT_POSIX_SUCCESS(
1261 	    kperf_kdebug_filter_set(kdfilt), "set filter for kperf");
1262 	T_ASSERT_POSIX_SUCCESS(kperf_sample_set(1), "start kperf sampling");
1263 
1264 	T_ASSERT_POSIX_ZERO(ktrace_start(sess, dispatch_get_main_queue()),
1265 	    NULL);
1266 
1267 	/*
1268 	 * Create the queue and emit the sampling event from it.
1269 	 */
1270 	dispatch_queue_t testq = dispatch_queue_create(LABEL, 0);
1271 	T_ASSERT_NOTNULL(testq, "created queue with label: " LABEL);
1272 	dispatch_async(testq, ^{
1273 		kdebug_trace(UNCOMMON_EVENTID, 0, 0, 0, 0);
1274 		sleep(10);
1275 	});
1276 
1277 	T_SETUPEND;
1278 
1279 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, 5 * NSEC_PER_SEC),
1280 	    dispatch_get_main_queue(), ^(void) {
1281 		ktrace_end(sess, 1);
1282 	});
1283 
1284 	dispatch_main();
1285 }
1286 
1287 T_HELPER_DECL(kperf_blessed_helper,
1288     "drop priviliges and attempt to configure kperf")
1289 {
1290 	drop_priv();
1291 	(void)kperf_action_count_set(1);
1292 	errno = 0;
1293 	int ret = kperf_lazy_wait_action_set(1);
1294 	int error = errno;
1295 	exit(ret != 0 ? (error == 0) ? EINVAL : error : 0);
1296 }
1297 
1298 static pid_t
spawn_bless_helper(void)1299 spawn_bless_helper(void)
1300 {
1301 	char test_path[MAXPATHLEN] = { 0 };
1302 	int ret = proc_pidpath(getpid(), test_path, MAXPATHLEN);
1303 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "query path for current process");
1304 	T_LOG("spawning helper tool at %s", test_path);
1305 	char *child_argv[] = { test_path, "-n", "kperf_blessed_helper", NULL };
1306 	pid_t child_pid = -1;
1307 	int error = dt_launch_tool(&child_pid, child_argv, true, NULL, NULL);
1308 	T_ASSERT_POSIX_ZERO(error, "spawned child with pid %d", child_pid);
1309 	T_QUIET; T_ASSERT_GT(child_pid, 0, "child pid is valid");
1310 	return child_pid;
1311 }
1312 
1313 static int
test_child_process(pid_t child_pid)1314 test_child_process(pid_t child_pid)
1315 {
1316 	int ret = kill(child_pid, SIGCONT);
1317 	T_ASSERT_POSIX_SUCCESS(ret, "continuing child process");
1318 	int status = 0;
1319 	ret = waitpid(child_pid, &status, 0);
1320 	T_ASSERT_POSIX_SUCCESS(ret, "waited on child process");
1321 	return status;
1322 }
1323 
1324 T_DECL(kperf_unblessed_refusal,
1325     "check that an unblessed child cannot access ktrace",
1326 	T_META_TAG_VM_PREFERRED)
1327 {
1328 	T_SETUPBEGIN;
1329 	pid_t child_pid = spawn_bless_helper();
1330 	T_SETUPEND;
1331 	int status = test_child_process(child_pid);
1332 	int exit_status = WEXITSTATUS(status);
1333 	T_EXPECT_NE(exit_status, 0, "child failed to access ktrace with %d (%s)",
1334 		exit_status, exit_status == 0 ? "ok" : strerror(exit_status));
1335 }
1336 
1337 T_DECL(kperf_blessed_ownership, "check that a blessed child can access ktrace",
1338 	T_META_TAG_VM_PREFERRED)
1339 {
1340 	T_LOG("parent pid is %d\n", getpid());
1341 	T_SETUPBEGIN;
1342 	pid_t child_pid = spawn_bless_helper();
1343 	T_SETUPEND;
1344 	int ret = kperf_bless_set(child_pid);
1345 	T_ASSERT_POSIX_SUCCESS(ret, "blessed child with pid %d", child_pid);
1346 	int status = test_child_process(child_pid);
1347 	int exit_status = WEXITSTATUS(status);
1348 	T_EXPECT_EQ(exit_status, 0, "child status was %d (%s)", exit_status,
1349 	    exit_status == 0 ? "ok" : strerror(exit_status));
1350 	ret = kperf_bless_set(-1);
1351 	T_ASSERT_POSIX_SUCCESS(ret, "removed blessing from child");
1352 }
1353 
1354