xref: /xnu-8020.101.4/tests/ktrace/kpc_tests.c (revision e7776783b89a353188416a9a346c6cdb4928faad)
1 // Copyright (c) 2018-2020 Apple Inc.  All rights reserved.
2 
3 #include <darwintest.h>
4 #include <ktrace/config.h>
5 #include <ktrace/session.h>
6 #include <inttypes.h>
7 #include <libproc.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <sys/resource.h>
11 #include <sys/sysctl.h>
12 
13 #include <kperf/kpc.h>
14 #include <kperf/kperf.h>
15 
16 #include "ktrace_helpers.h"
17 #include "kperf_helpers.h"
18 #include "ktrace_meta.h"
19 struct machine {
20 	unsigned int ncpus;
21 	unsigned int nfixed;
22 	unsigned int nconfig;
23 };
24 
25 #ifndef ABSV64
26 #define ABSV64(n)	((((int64_t)(n)) < 0) ? -((int64_t)(n)) : ((int64_t)(n)))
27 #endif
28 
29 static void
skip_if_unsupported(void)30 skip_if_unsupported(void)
31 {
32 	int r;
33 	int supported = 0;
34 	size_t supported_size = sizeof(supported);
35 
36 	r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
37 	    NULL, 0);
38 	if (r < 0) {
39 		T_WITH_ERRNO;
40 		T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
41 	}
42 
43 	if (!supported) {
44 		T_SKIP("PMCs are not supported on this platform");
45 	}
46 }
47 
48 static struct rusage_info_v4 pre_ru = {};
49 
50 static void
start_kpc(void)51 start_kpc(void)
52 {
53 	T_SETUPBEGIN;
54 
55 	kpc_classmask_t classes = KPC_CLASS_FIXED_MASK |
56 	    KPC_CLASS_CONFIGURABLE_MASK;
57 	int ret = kpc_set_counting(classes);
58 	T_ASSERT_POSIX_SUCCESS(ret, "started counting");
59 
60 	ret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4, (rusage_info_t *)&pre_ru);
61 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "got rusage information");
62 
63 	kpc_classmask_t classes_on = kpc_get_counting();
64 	T_QUIET;
65 	T_ASSERT_EQ(classes, classes_on, "classes counting is correct");
66 
67 	T_SETUPEND;
68 }
69 
70 static void kpc_reset_atend(void);
71 
72 #if defined(__arm__) || defined(__arm64__)
73 #define CYCLES_EVENT 0x02
74 #else // defined(__arm__) || defined(__arm64__)
75 #define CYCLES_EVENT (0x10000 | 0x20000 | 0x3c)
76 #endif // !defined(__arm__) && !defined(__arm64__)
77 
78 static void
prepare_kpc(struct machine * mch,bool config,uint64_t event)79 prepare_kpc(struct machine *mch, bool config, uint64_t event)
80 {
81 	T_SETUPBEGIN;
82 
83 	if (!event) {
84 		T_ATEND(kpc_reset_atend);
85 	}
86 
87 	size_t ncpus_sz = sizeof(mch->ncpus);
88 	int ret = sysctlbyname("hw.logicalcpu_max", &mch->ncpus, &ncpus_sz,
89 	    NULL, 0);
90 	T_QUIET;
91 	T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(hw.logicalcpu_max)");
92 	T_QUIET;
93 	T_ASSERT_GT(mch->ncpus, 0, "must have some number of CPUs");
94 
95 	ret = kpc_force_all_ctrs_set(1);
96 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_set(1)");
97 
98 	int forcing = 0;
99 	ret = kpc_force_all_ctrs_get(&forcing);
100 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_get");
101 	T_QUIET; T_ASSERT_EQ(forcing, 1, "counters must be forced");
102 
103 	mch->nfixed = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
104 	mch->nconfig = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
105 
106 	T_LOG("machine: ncpus = %d, nfixed = %d, nconfig = %d", mch->ncpus,
107 	    mch->nfixed, mch->nconfig);
108 
109 	if (config) {
110 		uint32_t nconfigs = kpc_get_config_count(
111 		    KPC_CLASS_CONFIGURABLE_MASK);
112 		uint64_t *configs = calloc(nconfigs, sizeof(*configs));
113 		T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
114 
115 		for (unsigned int i = 0; i < nconfigs; i++) {
116 			configs[i] = event;
117 		}
118 
119 		ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
120 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
121 	}
122 
123 	T_SETUPEND;
124 }
125 
126 static void
kpc_reset_atend(void)127 kpc_reset_atend(void)
128 {
129 	struct machine mch = {};
130 	prepare_kpc(&mch, true, 0);
131 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
132 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
133 
134 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
135 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
136 	free(periods);
137 }
138 
139 static void *
spin(void * arg)140 spin(void *arg)
141 {
142 	while (*(volatile int *)arg == 0) {
143 		;
144 	}
145 
146 	return NULL;
147 }
148 
149 static pthread_t *
start_threads(const struct machine * mch,void * (* func)(void *),void * arg)150 start_threads(const struct machine *mch, void *(*func)(void *), void *arg)
151 {
152 	T_SETUPBEGIN;
153 
154 	pthread_t *threads = calloc((unsigned int)mch->ncpus,
155 	    sizeof(*threads));
156 	T_QUIET; T_ASSERT_NOTNULL(threads, "allocated array of threads");
157 	for (unsigned int i = 0; i < mch->ncpus; i++) {
158 		int error = pthread_create(&threads[i], NULL, func, arg);
159 		T_QUIET; T_ASSERT_POSIX_ZERO(error, "pthread_create");
160 	}
161 
162 	T_SETUPEND;
163 
164 	return threads;
165 }
166 
167 static void
end_threads(const struct machine * mch,pthread_t * threads)168 end_threads(const struct machine *mch, pthread_t *threads)
169 {
170 	for (unsigned int i = 0; i < mch->ncpus; i++) {
171 		int error = pthread_join(threads[i], NULL);
172 		T_QUIET; T_ASSERT_POSIX_ZERO(error, "joined thread %d", i);
173 	}
174 	free(threads);
175 }
176 
177 struct tally {
178 	uint64_t firstvalue;
179 	uint64_t lastvalue;
180 	uint64_t nchecks;
181 	uint64_t nzero;
182 	uint64_t nstuck;
183 	uint64_t ndecrease;
184 };
185 
186 static void
check_counters(unsigned int ncpus,unsigned int nctrs,struct tally * tallies,uint64_t * counts)187 check_counters(unsigned int ncpus, unsigned int nctrs, struct tally *tallies,
188 		uint64_t *counts)
189 {
190 	for (unsigned int i = 0; i < ncpus; i++) {
191 		for (unsigned int j = 0; j < nctrs; j++) {
192 			unsigned int ctr = i * nctrs + j;
193 			struct tally *tly = &tallies[ctr];
194 			uint64_t count = counts[ctr];
195 
196 			if (counts[ctr] == 0) {
197 				tly->nzero++;
198 			}
199 			if (tly->lastvalue == count) {
200 				tly->nstuck++;
201 			}
202 			if (tly->lastvalue > count) {
203 				tly->ndecrease++;
204 			}
205 			tly->lastvalue = count;
206 			if (tly->nchecks == 0) {
207 				tly->firstvalue = count;
208 			}
209 			tly->nchecks++;
210 		}
211 	}
212 }
213 
214 static void
check_tally(unsigned int ncpus,unsigned int nctrs,struct tally * tallies)215 check_tally(unsigned int ncpus, unsigned int nctrs, struct tally *tallies)
216 {
217 	uint64_t nstuck = 0;
218 	uint64_t nchecks = 0;
219 	uint64_t nzero = 0;
220 	uint64_t ndecrease = 0;
221 
222 	for (unsigned int i = 0; i < ncpus; i++) {
223 		for (unsigned int j = 0; j < nctrs; j++) {
224 			unsigned int ctr = i * nctrs + j;
225 			struct tally *tly = &tallies[ctr];
226 
227 			T_LOG("CPU %2u PMC %u: nchecks = %llu, last value = %llx, "
228 				"delta = %llu, nstuck = %llu", i, j,
229 			    tly->nchecks, tly->lastvalue, tly->lastvalue - tly->firstvalue,
230 			    tly->nstuck);
231 
232 			nchecks += tly->nchecks;
233 			nstuck += tly->nstuck;
234 			nzero += tly->nzero;
235 			ndecrease += tly->ndecrease;
236 		}
237 	}
238 
239 	T_EXPECT_GT(nchecks, 0ULL, "checked 0x%" PRIx64 " counter values", nchecks);
240 	T_EXPECT_EQ(nzero, 0ULL, "found 0x%" PRIx64 " zero values", nzero);
241 	T_EXPECT_EQ(nstuck, 0ULL, "found 0x%" PRIx64 " stuck values", nstuck);
242 	T_EXPECT_EQ(ndecrease, 0ULL,
243 	    "found 0x%" PRIx64 " decreasing values", ndecrease);
244 }
245 
246 #define TESTDUR_NS (5 * NSEC_PER_SEC)
247 
248 T_DECL(kpc_cpu_direct_configurable,
249     "test that configurable counters return monotonically increasing values")
250 {
251 	skip_if_unsupported();
252 
253 	struct machine mch = {};
254 	prepare_kpc(&mch, true, CYCLES_EVENT);
255 
256 	int until = 0;
257 	pthread_t *threads = start_threads(&mch, spin, &until);
258 	start_kpc();
259 
260 	T_SETUPBEGIN;
261 
262 	uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
263 	uint64_t *counts = kpc_counterbuf_alloc();
264 	T_QUIET; T_ASSERT_NOTNULL(counts, "allocated space for counter values");
265 	memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
266 	struct tally *tly = calloc(mch.ncpus * mch.nconfig, sizeof(*tly));
267 	T_QUIET; T_ASSERT_NOTNULL(tly, "allocated space for tallies");
268 
269 	T_SETUPEND;
270 
271 	int n = 0;
272 	while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns < TESTDUR_NS) {
273 		int ret = kpc_get_cpu_counters(true,
274 		    KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
275 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_get_cpu_counters");
276 
277 		check_counters(mch.ncpus, mch.nconfig, tly, counts);
278 
279 		usleep(10000);
280 		n++;
281 		if (n % 100 == 0) {
282 			T_LOG("checked 100 times");
283 		}
284 	}
285 
286 	check_tally(mch.ncpus, mch.nconfig, tly);
287 
288 	until = 1;
289 	end_threads(&mch, threads);
290 }
291 
292 T_DECL(kpc_thread_direct_instrs_cycles,
293     "test that fixed thread counters return monotonically increasing values")
294 {
295 	int err;
296 	uint32_t ctrs_cnt;
297 	uint64_t *ctrs_a;
298 	uint64_t *ctrs_b;
299 
300 	skip_if_unsupported();
301 
302 	T_SETUPBEGIN;
303 
304 	ctrs_cnt = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
305 	if (ctrs_cnt == 0) {
306 		T_SKIP("no fixed counters available");
307 	}
308 	T_LOG("device has %" PRIu32 " fixed counters", ctrs_cnt);
309 
310 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kpc_force_all_ctrs_set(1), NULL);
311 	T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_FIXED_MASK),
312 	    "kpc_set_counting");
313 	T_ASSERT_POSIX_SUCCESS(kpc_set_thread_counting(KPC_CLASS_FIXED_MASK),
314 	    "kpc_set_thread_counting");
315 
316 	T_SETUPEND;
317 
318 	ctrs_a = malloc(ctrs_cnt * sizeof(uint64_t));
319 	T_QUIET; T_ASSERT_NOTNULL(ctrs_a, NULL);
320 
321 	err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_a);
322 	T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
323 
324 	for (uint32_t i = 0; i < ctrs_cnt; i++) {
325 		T_LOG("checking counter %d with value %" PRIu64 " > 0", i, ctrs_a[i]);
326 		T_QUIET;
327 		T_EXPECT_GT(ctrs_a[i], UINT64_C(0), "counter %d is non-zero", i);
328 	}
329 
330 	ctrs_b = malloc(ctrs_cnt * sizeof(uint64_t));
331 	T_QUIET; T_ASSERT_NOTNULL(ctrs_b, NULL);
332 
333 	err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_b);
334 	T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
335 
336 	for (uint32_t i = 0; i < ctrs_cnt; i++) {
337 		T_LOG("checking counter %d with value %" PRIu64
338 		    " > previous value %" PRIu64, i, ctrs_b[i], ctrs_a[i]);
339 		T_QUIET;
340 		T_EXPECT_GT(ctrs_b[i], UINT64_C(0), "counter %d is non-zero", i);
341 		T_QUIET; T_EXPECT_LT(ctrs_a[i], ctrs_b[i],
342 		    "counter %d is increasing", i);
343 	}
344 
345 	free(ctrs_a);
346 	free(ctrs_b);
347 }
348 
349 #define PMI_TEST_DURATION_NS (15 * NSEC_PER_SEC)
350 #define PERIODIC_CPU_COUNT_MS (250)
351 #define NTIMESLICES (72)
352 #define PMI_PERIOD (50ULL * 1000 * 1000)
353 #define END_EVENT KDBG_EVENTID(0xfe, 0xfe, 0)
354 
355 struct cpu {
356 	uint64_t prev_count, max_skid;
357 	unsigned int scheduled_outside_slice;
358 	unsigned int pmi_timeslices[NTIMESLICES];
359 	unsigned int scheduled_timeslices[NTIMESLICES];
360 };
361 
362 T_DECL(kpc_pmi_configurable,
363     "test that PMIs don't interfere with sampling counters in kperf")
364 {
365 	skip_if_unsupported();
366 
367 	start_controlling_ktrace();
368 	struct machine mch = {};
369 	prepare_kpc(&mch, true, CYCLES_EVENT);
370 
371 	T_SETUPBEGIN;
372 
373 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
374 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
375 	periods[0] = PMI_PERIOD;
376 
377 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
378 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
379 	free(periods);
380 
381 	int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
382 	actions[0] = 1;
383 	ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
384 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
385 	free(actions);
386 
387 	(void)kperf_action_count_set(1);
388 	ret = kperf_action_samplers_set(1,
389 	    KPERF_SAMPLER_TINFO | KPERF_SAMPLER_KSTACK);
390 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
391 
392 	ktrace_config_t ktconfig = ktrace_config_create_current();
393 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(ktconfig, "create current config");
394 	ret = ktrace_config_print_description(ktconfig, stdout);
395 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "print config description");
396 
397 	struct cpu *cpus = calloc(mch.ncpus, sizeof(*cpus));
398 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(cpus, "allocate CPUs array");
399 
400 	__block unsigned int sample_count = 0;
401 	__block unsigned int pmi_count = 0;
402 	__block unsigned int callstack_count = 0;
403 	__block uint64_t first_ns = 0;
404 	__block uint64_t last_ns = 0;
405 
406 	ktrace_session_t sess = ktrace_session_create();
407 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
408 
409 	ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
410 		if (tp->debugid & DBG_FUNC_END) {
411 			return;
412 		}
413 
414 		uint64_t cur_ns = 0;
415 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
416 		    tp->timestamp, &cur_ns);
417 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
418 
419 		uint64_t desc = tp->arg1;
420 		uint64_t config = desc & UINT32_MAX;
421 		T_QUIET; T_EXPECT_EQ(config & UINT8_MAX,
422 				(uint64_t)CYCLES_EVENT & UINT8_MAX,
423 				"PMI argument matches configuration");
424 		__unused uint64_t counter = (desc >> 32) & UINT16_MAX;
425 		__unused uint64_t flags = desc >> 48;
426 
427 		uint64_t count = tp->arg2;
428 		if (first_ns == 0) {
429 			first_ns = cur_ns;
430 		}
431 		struct cpu *cpu = &cpus[tp->cpuid];
432 
433 		if (cpu->prev_count != 0) {
434 			uint64_t delta = count - cpu->prev_count;
435 			uint64_t skid = delta - PMI_PERIOD;
436 			if (skid > cpu->max_skid) {
437 				cpu->max_skid = skid;
438 			}
439 		}
440 		cpu->prev_count = count;
441 
442 		__unused uint64_t pc = tp->arg3;
443 
444 		double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
445 		    NTIMESLICES;
446 		if (slice < NTIMESLICES) {
447 			cpu->pmi_timeslices[(unsigned int)slice] += 1;
448 		}
449 
450 		pmi_count++;
451 	});
452 
453 	void (^sched_handler)(struct trace_point *tp) =
454 	    ^(struct trace_point *tp) {
455 		uint64_t cur_ns = 0;
456 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
457 		    tp->timestamp, &cur_ns);
458 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
459 		if (first_ns == 0) {
460 			first_ns = cur_ns;
461 		}
462 
463 		struct cpu *cpu = &cpus[tp->cpuid];
464 		double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
465 		    NTIMESLICES;
466 		if (slice < NTIMESLICES) {
467 			cpu->scheduled_timeslices[(unsigned int)slice] += 1;
468 		} else {
469 			cpu->scheduled_outside_slice += 1;
470 		}
471 	};
472 	ktrace_events_single(sess, MACH_SCHED, sched_handler);
473 	ktrace_events_single(sess, MACH_STACK_HANDOFF, sched_handler);
474 
475 	ktrace_events_single(sess, PERF_SAMPLE, ^(struct trace_point * tp) {
476 		if (tp->debugid & DBG_FUNC_START) {
477 			sample_count++;
478 		}
479 	});
480 	ktrace_events_single(sess, PERF_STK_KHDR,
481 	    ^(struct trace_point * __unused tp) {
482 		callstack_count++;
483 	});
484 
485 	ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp) {
486 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
487 		    tp->timestamp, &last_ns);
488 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
489 
490 		ktrace_end(sess, 1);
491 	});
492 
493 	uint64_t *counts = kpc_counterbuf_alloc();
494 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(counts,
495 			"allocated counter values array");
496 	memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
497 	struct tally *tly = calloc(mch.ncpus * (mch.nconfig + mch.nfixed),
498 			sizeof(*tly));
499 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(tly, "allocated tallies array");
500 
501 	dispatch_source_t cpu_count_timer = dispatch_source_create(
502 			DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_get_main_queue());
503     dispatch_source_set_timer(cpu_count_timer, dispatch_time(DISPATCH_TIME_NOW,
504         PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC),
505         PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC, 0);
506     dispatch_source_set_cancel_handler(cpu_count_timer, ^{
507         dispatch_release(cpu_count_timer);
508     });
509 
510     __block uint64_t first_check_ns = 0;
511     __block uint64_t last_check_ns = 0;
512 
513     dispatch_source_set_event_handler(cpu_count_timer, ^{
514 		int cret = kpc_get_cpu_counters(true,
515 		    KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
516 		T_QUIET; T_ASSERT_POSIX_SUCCESS(cret, "kpc_get_cpu_counters");
517 
518 		if (!first_check_ns) {
519 			first_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
520 		} else {
521 			last_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
522 		}
523 		check_counters(mch.ncpus, mch.nfixed + mch.nconfig, tly, counts);
524 	});
525 	ktrace_events_class(sess, DBG_PERF, ^(struct trace_point * __unused tp) {});
526 
527 	int stop = 0;
528 	(void)start_threads(&mch, spin, &stop);
529 
530 	ktrace_set_completion_handler(sess, ^{
531 		dispatch_cancel(cpu_count_timer);
532 
533 		check_tally(mch.ncpus, mch.nfixed + mch.nconfig, tly);
534 
535 		struct rusage_info_v4 post_ru = {};
536 		int ruret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4,
537 				(rusage_info_t *)&post_ru);
538 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ruret, "got rusage information");
539 		T_LOG("saw %llu cycles in process",
540 				post_ru.ri_cycles - pre_ru.ri_cycles);
541 		uint64_t total_cycles = 0;
542 
543 		T_LOG("saw pmis = %u, samples = %u, stacks = %u", pmi_count, sample_count,
544 		    callstack_count);
545 		// Allow some slop in case the trace is cut-off midway through a
546 		// sample.
547 		const unsigned int cutoff_leeway = 32;
548 		T_EXPECT_GE(sample_count + cutoff_leeway, pmi_count,
549 		    "saw as many samples as PMIs");
550 		T_EXPECT_GE(callstack_count + cutoff_leeway, pmi_count,
551 		    "saw as many stacks as PMIs");
552 
553 		unsigned int cpu_sample_count = 0;
554 		char sample_slices[NTIMESLICES + 1];
555 		sample_slices[NTIMESLICES] = '\0';
556 		for (unsigned int i = 0; i < mch.ncpus; i++) {
557 			memset(sample_slices, '-', sizeof(sample_slices) - 1);
558 
559 			struct cpu *cpu = &cpus[i];
560 			unsigned int pmi_slice_count = 0, no_sched_slice_count = 0,
561 					cpu_pmi_count = 0, last_contiguous = 0;
562 			bool seen_empty = false;
563 			for (unsigned int j = 0; j < NTIMESLICES; j++) {
564 				unsigned int slice_pmi_count = cpu->pmi_timeslices[j];
565 				unsigned int slice_sched_count = cpu->scheduled_timeslices[j];
566 				cpu_pmi_count += slice_pmi_count;
567 				if (slice_pmi_count > 0) {
568 					pmi_slice_count++;
569 					sample_slices[j] = '*';
570 				} else if (slice_sched_count == 0) {
571 					no_sched_slice_count++;
572 					sample_slices[j] = '.';
573 				} else {
574 					seen_empty = true;
575 				}
576 				if (!seen_empty) {
577 					last_contiguous = j;
578 				}
579 			}
580 			unsigned int ctr = i * (mch.nfixed + mch.nconfig) + mch.nfixed;
581 			uint64_t delta = tly[ctr].lastvalue - tly[ctr].firstvalue;
582 			T_LOG("%g GHz", (double)delta / (last_check_ns - first_check_ns));
583 			total_cycles += delta;
584 			uint64_t abs_max_skid = (uint64_t)ABSV64(cpu->max_skid);
585 			T_LOG("CPU %2u: %4up:%4un/%u, %6u/%llu, max skid = %llu (%.4f%%), "
586 					"last contiguous = %u, scheduled outside = %u", i,
587 					pmi_slice_count, no_sched_slice_count, NTIMESLICES,
588 					sample_count, delta / PMI_PERIOD, abs_max_skid,
589 					(double)abs_max_skid / PMI_PERIOD * 100, last_contiguous,
590 					cpu->scheduled_outside_slice);
591 			T_LOG("%s", sample_slices);
592 			if (cpu_pmi_count > 0) {
593 				cpu_sample_count++;
594 			}
595 			T_EXPECT_EQ(last_contiguous, NTIMESLICES - 1,
596 					"CPU %2u: saw samples in each time slice", i);
597 		}
598 		T_LOG("kpc reported %llu total cycles", total_cycles);
599 		T_LOG("saw %u sample events, across %u/%u cpus", sample_count,
600 				cpu_sample_count, mch.ncpus);
601 		T_EXPECT_EQ(cpu_sample_count, mch.ncpus,
602 				"should see PMIs on every CPU");
603 		T_END;
604 	});
605 
606 	int dbglvl = 3;
607 	ret = sysctlbyname("kperf.debug_level", NULL, NULL, &dbglvl,
608 	    sizeof(dbglvl));
609 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kperf debug level");
610 	ret = kperf_sample_set(1);
611 	T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
612 
613 	start_kpc();
614 
615 	int error = ktrace_start(sess, dispatch_get_main_queue());
616 	T_ASSERT_POSIX_ZERO(error, "started tracing");
617 
618 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
619 			dispatch_get_main_queue(), ^{
620 		T_LOG("ending tracing after timeout");
621 		kdebug_trace(END_EVENT, 0, 0, 0, 0);
622 	});
623 
624 	dispatch_activate(cpu_count_timer);
625 
626 	T_SETUPEND;
627 
628 	dispatch_main();
629 }
630 
631 #if defined(__arm64__)
632 #define IS_ARM64 true
633 #else // defined(__arm64__)
634 #define IS_ARM64 false
635 #endif // !defined(__arm64__)
636 
637 T_DECL(kpc_pmu_config, "ensure PMU can be configured", T_META_ENABLED(IS_ARM64))
638 {
639 	T_SETUPBEGIN;
640 	int ret = kpc_force_all_ctrs_set(1);
641 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret,
642 			"force all counters to allow raw PMU configuration");
643 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_RAWPMU_MASK);
644 	T_LOG("found %u raw PMU configuration words", nconfigs);
645 	uint64_t *configs = calloc(nconfigs, sizeof(*configs));
646 	T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
647 	T_SETUPEND;
648 
649 	ret = kpc_set_config(KPC_CLASS_RAWPMU_MASK, configs);
650 	T_ASSERT_POSIX_SUCCESS(ret, "should set PMU configuration");
651 }
652 
653 #define DTLB_MISS_EVENT (0xc1)
654 
655 T_DECL(pmi_pc_capture, "ensure PC capture works for PMCs 5, 6, and 7",
656     T_META_REQUIRES_SYSCTL_EQ("kpc.pc_capture_supported", 1))
657 {
658 	start_controlling_ktrace();
659 	struct machine mch = {};
660 	prepare_kpc(&mch, true, DTLB_MISS_EVENT);
661 
662 	T_SETUPBEGIN;
663 
664 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
665 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
666 	for (unsigned int i = 0; i < mch.nconfig; i++) {
667 		/*
668 		 * Offset the periods so the PMIs don't alias to the same PC capture.
669 		 * Since there's only one PC capture register, they will clobber each
670 		 * other.
671 		 */
672 		periods[i] = PMI_PERIOD / 1000 + (i * 1000);
673 	}
674 
675 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
676 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
677 	free(periods);
678 
679 	int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
680 	for (unsigned int i = 0; i < mch.nconfig; i++) {
681 		actions[i] = 1;
682 	}
683 	ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
684 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
685 	free(actions);
686 
687 	(void)kperf_action_count_set(1);
688 	ret = kperf_action_samplers_set(1, KPERF_SAMPLER_TINFO);
689 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
690 
691 	ktrace_session_t sess = ktrace_session_create();
692 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
693 
694 	uint64_t pc_captured_arr[3] = {};
695 	uint64_t *pc_captured = pc_captured_arr;
696 	uint64_t pmi_event_arr[3] = {};
697 	uint64_t *pmi_event = pmi_event_arr;
698 	ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
699 		if (tp->debugid & DBG_FUNC_END) {
700 			return;
701 		}
702 
703 		uint64_t desc = tp->arg1;
704 
705 #define KPC_DESC_COUNTER(DESC) (((DESC) >> 32) & 0xff)
706 #define KPC_DESC_FLAGS(DESC) ((DESC) >> 48)
707 #define KPC_FLAG_PC_CAPTURED (0x08)
708 
709 		uint64_t counter = KPC_DESC_COUNTER(desc);
710 		uint64_t flags = KPC_DESC_FLAGS(desc);
711 		if (counter >= 5 && counter <= 7) {
712 			pmi_event[counter - 5]++;
713 			if (flags & KPC_FLAG_PC_CAPTURED) {
714 				pc_captured[counter - 5]++;
715 			}
716 		}
717 	});
718 
719 	ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp __unused) {
720 		ktrace_config_t config = ktrace_config_create_current();
721 		ktrace_config_print_description(config, stdout);
722 		ktrace_config_destroy(config);
723 		T_LOG("saw ending event");
724 		ktrace_end(sess, 1);
725 	});
726 
727 	ktrace_set_completion_handler(sess, ^{
728 		ktrace_session_destroy(sess);
729 		for (unsigned int i = 0; i < 3; i++) {
730 			T_LOG("PMC%u: saw %llu/%llu (%g%%) PMIs with PC capture", i + 5,
731 			    pc_captured[i], pmi_event[i],
732 			    (double)pc_captured[i] / (double)pmi_event[i] * 100.0);
733 			T_EXPECT_GT(pc_captured[i], 0ULL, "saw PC capture for counter %u",
734 			    i + 5);
735 		}
736 		T_END;
737 	});
738 
739 	ret = kperf_sample_set(1);
740 	T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
741 
742 	start_kpc();
743 
744 	int error = ktrace_start(sess, dispatch_get_main_queue());
745 	T_ASSERT_POSIX_ZERO(error, "started tracing");
746 
747 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
748 			dispatch_get_main_queue(), ^{
749 		T_LOG("ending tracing after timeout");
750 		kdebug_trace(END_EVENT, 0, 0, 0, 0);
751 	});
752 
753 	T_SETUPEND;
754 
755 	dispatch_main();
756 }
757 
758