xref: /xnu-10002.61.3/tests/ktrace/kpc_tests.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 // Copyright (c) 2018-2022 Apple Inc.  All rights reserved.
2 
3 #include <darwintest.h>
4 #include <ktrace/config.h>
5 #include <ktrace/session.h>
6 #include <inttypes.h>
7 #include <libproc.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <sys/resource.h>
11 #include <sys/sysctl.h>
12 
13 #include <kperf/kpc.h>
14 #include <kperf/kperf.h>
15 
16 #include "ktrace_helpers.h"
17 #include "kperf_helpers.h"
18 #include "ktrace_meta.h"
19 #include "test_utils.h"
20 
21 struct machine {
22 	unsigned int ncpus;
23 	unsigned int nfixed;
24 	unsigned int nconfig;
25 };
26 
27 #ifndef ABSV64
28 #define ABSV64(n)	((((int64_t)(n)) < 0) ? -((int64_t)(n)) : ((int64_t)(n)))
29 #endif
30 
31 static void
skip_if_unsupported(void)32 skip_if_unsupported(void)
33 {
34 	int r;
35 	int supported = 0;
36 	size_t supported_size = sizeof(supported);
37 
38 	r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
39 	    NULL, 0);
40 	if (r < 0) {
41 		T_WITH_ERRNO;
42 		T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
43 	}
44 
45 	if (!supported) {
46 		T_SKIP("PMCs are not supported on this platform");
47 	}
48 }
49 
50 static struct rusage_info_v4 pre_ru = {};
51 
52 static void
start_kpc(void)53 start_kpc(void)
54 {
55 	T_SETUPBEGIN;
56 
57 	kpc_classmask_t classes = KPC_CLASS_FIXED_MASK |
58 	    KPC_CLASS_CONFIGURABLE_MASK;
59 	int ret = kpc_set_counting(classes);
60 	T_ASSERT_POSIX_SUCCESS(ret, "started counting");
61 
62 	ret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4, (rusage_info_t *)&pre_ru);
63 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "got rusage information");
64 
65 	kpc_classmask_t classes_on = kpc_get_counting();
66 	T_QUIET;
67 	T_ASSERT_EQ(classes, classes_on, "classes counting is correct");
68 
69 	T_SETUPEND;
70 }
71 
72 static void kpc_reset_atend(void);
73 
74 #if defined(__arm64__)
75 #define CYCLES_EVENT 0x02
76 #else // defined(__arm64__)
77 #define CYCLES_EVENT (0x10000 | 0x20000 | 0x3c)
78 #endif // defined(__arm64__)
79 
80 static void
prepare_kpc(struct machine * mch,bool config,uint64_t event)81 prepare_kpc(struct machine *mch, bool config, uint64_t event)
82 {
83 	T_SETUPBEGIN;
84 
85 	if (!event) {
86 		T_ATEND(kpc_reset_atend);
87 	}
88 
89 	size_t ncpus_sz = sizeof(mch->ncpus);
90 	int ret = sysctlbyname("hw.logicalcpu_max", &mch->ncpus, &ncpus_sz,
91 	    NULL, 0);
92 	T_QUIET;
93 	T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(hw.logicalcpu_max)");
94 	T_QUIET;
95 	T_ASSERT_GT(mch->ncpus, 0, "must have some number of CPUs");
96 
97 	ret = kpc_force_all_ctrs_set(1);
98 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_set(1)");
99 
100 	int forcing = 0;
101 	ret = kpc_force_all_ctrs_get(&forcing);
102 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_get");
103 	T_QUIET; T_ASSERT_EQ(forcing, 1, "counters must be forced");
104 
105 	mch->nfixed = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
106 	mch->nconfig = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
107 
108 	T_LOG("machine: ncpus = %d, nfixed = %d, nconfig = %d", mch->ncpus,
109 	    mch->nfixed, mch->nconfig);
110 
111 	if (config) {
112 		uint32_t nconfigs = kpc_get_config_count(
113 		    KPC_CLASS_CONFIGURABLE_MASK);
114 		uint64_t *configs = calloc(nconfigs, sizeof(*configs));
115 		T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
116 
117 		for (unsigned int i = 0; i < nconfigs; i++) {
118 			configs[i] = event;
119 		}
120 
121 		ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
122 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
123 	}
124 
125 	T_SETUPEND;
126 }
127 
128 static void
kpc_reset_atend(void)129 kpc_reset_atend(void)
130 {
131 	struct machine mch = {};
132 	prepare_kpc(&mch, true, 0);
133 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
134 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
135 
136 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
137 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
138 	free(periods);
139 }
140 
141 static void *
spin(void * arg)142 spin(void *arg)
143 {
144 	while (*(volatile int *)arg == 0) {
145 		;
146 	}
147 
148 	return NULL;
149 }
150 
151 static pthread_t *
start_threads(const struct machine * mch,void * (* func)(void *),void * arg)152 start_threads(const struct machine *mch, void *(*func)(void *), void *arg)
153 {
154 	T_SETUPBEGIN;
155 
156 	pthread_t *threads = calloc((unsigned int)mch->ncpus,
157 	    sizeof(*threads));
158 	T_QUIET; T_ASSERT_NOTNULL(threads, "allocated array of threads");
159 	for (unsigned int i = 0; i < mch->ncpus; i++) {
160 		int error = pthread_create(&threads[i], NULL, func, arg);
161 		T_QUIET; T_ASSERT_POSIX_ZERO(error, "pthread_create");
162 	}
163 
164 	T_SETUPEND;
165 
166 	return threads;
167 }
168 
169 static void
end_threads(const struct machine * mch,pthread_t * threads)170 end_threads(const struct machine *mch, pthread_t *threads)
171 {
172 	for (unsigned int i = 0; i < mch->ncpus; i++) {
173 		int error = pthread_join(threads[i], NULL);
174 		T_QUIET; T_ASSERT_POSIX_ZERO(error, "joined thread %d", i);
175 	}
176 	free(threads);
177 }
178 
179 struct tally {
180 	uint64_t firstvalue;
181 	uint64_t lastvalue;
182 	uint64_t nchecks;
183 	uint64_t nzero;
184 	uint64_t nstuck;
185 	uint64_t ndecrease;
186 };
187 
188 static void
check_counters(unsigned int ncpus,unsigned int nctrs,struct tally * tallies,uint64_t * counts)189 check_counters(unsigned int ncpus, unsigned int nctrs, struct tally *tallies,
190 		uint64_t *counts)
191 {
192 	for (unsigned int i = 0; i < ncpus; i++) {
193 		for (unsigned int j = 0; j < nctrs; j++) {
194 			unsigned int ctr = i * nctrs + j;
195 			struct tally *tly = &tallies[ctr];
196 			uint64_t count = counts[ctr];
197 
198 			if (counts[ctr] == 0) {
199 				tly->nzero++;
200 			}
201 			if (tly->lastvalue == count) {
202 				tly->nstuck++;
203 			}
204 			if (tly->lastvalue > count) {
205 				tly->ndecrease++;
206 			}
207 			tly->lastvalue = count;
208 			if (tly->nchecks == 0) {
209 				tly->firstvalue = count;
210 			}
211 			tly->nchecks++;
212 		}
213 	}
214 }
215 
216 static void
check_tally(unsigned int ncpus,unsigned int nctrs,struct tally * tallies)217 check_tally(unsigned int ncpus, unsigned int nctrs, struct tally *tallies)
218 {
219 	uint64_t nstuck = 0;
220 	uint64_t nchecks = 0;
221 	uint64_t nzero = 0;
222 	uint64_t ndecrease = 0;
223 
224 	for (unsigned int i = 0; i < ncpus; i++) {
225 		for (unsigned int j = 0; j < nctrs; j++) {
226 			unsigned int ctr = i * nctrs + j;
227 			struct tally *tly = &tallies[ctr];
228 
229 			T_LOG("CPU %2u PMC %u: nchecks = %llu, last value = %llx, "
230 				"delta = %llu, nstuck = %llu", i, j,
231 			    tly->nchecks, tly->lastvalue, tly->lastvalue - tly->firstvalue,
232 			    tly->nstuck);
233 
234 			nchecks += tly->nchecks;
235 			nstuck += tly->nstuck;
236 			nzero += tly->nzero;
237 			ndecrease += tly->ndecrease;
238 		}
239 	}
240 
241 	T_EXPECT_GT(nchecks, 0ULL, "checked 0x%" PRIx64 " counter values", nchecks);
242 	T_EXPECT_EQ(nzero, 0ULL, "found 0x%" PRIx64 " zero values", nzero);
243 	T_EXPECT_EQ(nstuck, 0ULL, "found 0x%" PRIx64 " stuck values", nstuck);
244 	T_EXPECT_EQ(ndecrease, 0ULL,
245 	    "found 0x%" PRIx64 " decreasing values", ndecrease);
246 }
247 
248 #define TESTDUR_NS (5 * NSEC_PER_SEC)
249 
250 T_DECL(kpc_cpu_direct_configurable,
251     "test that configurable counters return monotonically increasing values",
252     XNU_T_META_SOC_SPECIFIC,
253     T_META_BOOTARGS_SET("enable_skstb=1"))
254 {
255 	skip_if_unsupported();
256 
257 	struct machine mch = {};
258 	prepare_kpc(&mch, true, CYCLES_EVENT);
259 
260 	int until = 0;
261 	pthread_t *threads = start_threads(&mch, spin, &until);
262 	start_kpc();
263 
264 	T_SETUPBEGIN;
265 
266 	uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
267 	uint64_t *counts = kpc_counterbuf_alloc();
268 	T_QUIET; T_ASSERT_NOTNULL(counts, "allocated space for counter values");
269 	memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
270 	struct tally *tly = calloc(mch.ncpus * mch.nconfig, sizeof(*tly));
271 	T_QUIET; T_ASSERT_NOTNULL(tly, "allocated space for tallies");
272 
273 	T_SETUPEND;
274 
275 	int n = 0;
276 	while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns < TESTDUR_NS) {
277 		int ret = kpc_get_cpu_counters(true,
278 		    KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
279 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_get_cpu_counters");
280 
281 		check_counters(mch.ncpus, mch.nconfig, tly, counts);
282 
283 		usleep(10000);
284 		n++;
285 		if (n % 100 == 0) {
286 			T_LOG("checked 100 times");
287 		}
288 	}
289 
290 	check_tally(mch.ncpus, mch.nconfig, tly);
291 
292 	until = 1;
293 	end_threads(&mch, threads);
294 }
295 
296 T_DECL(kpc_thread_direct_instrs_cycles,
297     "test that fixed thread counters return monotonically increasing values",
298     XNU_T_META_SOC_SPECIFIC)
299 {
300 	int err;
301 	uint32_t ctrs_cnt;
302 	uint64_t *ctrs_a;
303 	uint64_t *ctrs_b;
304 
305 	skip_if_unsupported();
306 
307 	T_SETUPBEGIN;
308 
309 	ctrs_cnt = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
310 	if (ctrs_cnt == 0) {
311 		T_SKIP("no fixed counters available");
312 	}
313 	T_LOG("device has %" PRIu32 " fixed counters", ctrs_cnt);
314 
315 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kpc_force_all_ctrs_set(1), NULL);
316 	T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_FIXED_MASK),
317 	    "kpc_set_counting");
318 	T_ASSERT_POSIX_SUCCESS(kpc_set_thread_counting(KPC_CLASS_FIXED_MASK),
319 	    "kpc_set_thread_counting");
320 
321 	T_SETUPEND;
322 
323 	ctrs_a = malloc(ctrs_cnt * sizeof(uint64_t));
324 	T_QUIET; T_ASSERT_NOTNULL(ctrs_a, NULL);
325 
326 	err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_a);
327 	T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
328 
329 	for (uint32_t i = 0; i < ctrs_cnt; i++) {
330 		T_LOG("checking counter %d with value %" PRIu64 " > 0", i, ctrs_a[i]);
331 		T_QUIET;
332 		T_EXPECT_GT(ctrs_a[i], UINT64_C(0), "counter %d is non-zero", i);
333 	}
334 
335 	ctrs_b = malloc(ctrs_cnt * sizeof(uint64_t));
336 	T_QUIET; T_ASSERT_NOTNULL(ctrs_b, NULL);
337 
338 	err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_b);
339 	T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
340 
341 	for (uint32_t i = 0; i < ctrs_cnt; i++) {
342 		T_LOG("checking counter %d with value %" PRIu64
343 		    " > previous value %" PRIu64, i, ctrs_b[i], ctrs_a[i]);
344 		T_QUIET;
345 		T_EXPECT_GT(ctrs_b[i], UINT64_C(0), "counter %d is non-zero", i);
346 		T_QUIET; T_EXPECT_LT(ctrs_a[i], ctrs_b[i],
347 		    "counter %d is increasing", i);
348 	}
349 
350 	free(ctrs_a);
351 	free(ctrs_b);
352 }
353 
354 #define PMI_TEST_DURATION_NS (15 * NSEC_PER_SEC)
355 #define PERIODIC_CPU_COUNT_MS (250)
356 #define NTIMESLICES (72)
357 #define PMI_PERIOD (50ULL * 1000 * 1000)
358 #define END_EVENT KDBG_EVENTID(0xfe, 0xfe, 0)
359 
360 struct cpu {
361 	uint64_t prev_count, max_skid;
362 	unsigned int scheduled_outside_slice;
363 	unsigned int pmi_timeslices[NTIMESLICES];
364 	unsigned int scheduled_timeslices[NTIMESLICES];
365 };
366 
367 T_DECL(kpc_pmi_configurable,
368     "test that PMIs don't interfere with sampling counters in kperf",
369     XNU_T_META_SOC_SPECIFIC,
370     T_META_BOOTARGS_SET("enable_skstb=1"))
371 {
372 	skip_if_unsupported();
373 
374 	start_controlling_ktrace();
375 	struct machine mch = {};
376 	prepare_kpc(&mch, true, CYCLES_EVENT);
377 
378 	T_SETUPBEGIN;
379 
380 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
381 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
382 	periods[0] = PMI_PERIOD;
383 
384 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
385 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
386 	free(periods);
387 
388 	int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
389 	actions[0] = 1;
390 	ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
391 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
392 	free(actions);
393 
394 	(void)kperf_action_count_set(1);
395 	ret = kperf_action_samplers_set(1,
396 	    KPERF_SAMPLER_TINFO | KPERF_SAMPLER_KSTACK);
397 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
398 
399 	ktrace_config_t ktconfig = ktrace_config_create_current();
400 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(ktconfig, "create current config");
401 	ret = ktrace_config_print_description(ktconfig, stdout);
402 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "print config description");
403 
404 	struct cpu *cpus = calloc(mch.ncpus, sizeof(*cpus));
405 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(cpus, "allocate CPUs array");
406 
407 	__block unsigned int sample_count = 0;
408 	__block unsigned int pmi_count = 0;
409 	__block unsigned int callstack_count = 0;
410 	__block uint64_t first_ns = 0;
411 	__block uint64_t last_ns = 0;
412 
413 	ktrace_session_t sess = ktrace_session_create();
414 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
415 
416 	ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
417 		if (tp->debugid & DBG_FUNC_END) {
418 			return;
419 		}
420 
421 		uint64_t cur_ns = 0;
422 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
423 		    tp->timestamp, &cur_ns);
424 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
425 
426 		uint64_t desc = tp->arg1;
427 		uint64_t config = desc & UINT32_MAX;
428 		T_QUIET; T_EXPECT_EQ(config & UINT8_MAX,
429 				(uint64_t)CYCLES_EVENT & UINT8_MAX,
430 				"PMI argument matches configuration");
431 		__unused uint64_t counter = (desc >> 32) & UINT16_MAX;
432 		__unused uint64_t flags = desc >> 48;
433 
434 		uint64_t count = tp->arg2;
435 		if (first_ns == 0) {
436 			first_ns = cur_ns;
437 		}
438 		struct cpu *cpu = &cpus[tp->cpuid];
439 
440 		if (cpu->prev_count != 0) {
441 			uint64_t delta = count - cpu->prev_count;
442 			uint64_t skid = delta - PMI_PERIOD;
443 			if (skid > cpu->max_skid) {
444 				cpu->max_skid = skid;
445 			}
446 		}
447 		cpu->prev_count = count;
448 
449 		__unused uint64_t pc = tp->arg3;
450 
451 		double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
452 		    NTIMESLICES;
453 		if (slice < NTIMESLICES) {
454 			cpu->pmi_timeslices[(unsigned int)slice] += 1;
455 		}
456 
457 		pmi_count++;
458 	});
459 
460 	void (^sched_handler)(struct trace_point *tp) =
461 	    ^(struct trace_point *tp) {
462 		uint64_t cur_ns = 0;
463 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
464 		    tp->timestamp, &cur_ns);
465 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
466 		if (first_ns == 0) {
467 			first_ns = cur_ns;
468 		}
469 
470 		struct cpu *cpu = &cpus[tp->cpuid];
471 		double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
472 		    NTIMESLICES;
473 		if (slice < NTIMESLICES) {
474 			cpu->scheduled_timeslices[(unsigned int)slice] += 1;
475 		} else {
476 			cpu->scheduled_outside_slice += 1;
477 		}
478 	};
479 	ktrace_events_single(sess, MACH_SCHED, sched_handler);
480 	ktrace_events_single(sess, MACH_STACK_HANDOFF, sched_handler);
481 
482 	ktrace_events_single(sess, PERF_SAMPLE, ^(struct trace_point * tp) {
483 		if (tp->debugid & DBG_FUNC_START) {
484 			sample_count++;
485 		}
486 	});
487 	ktrace_events_single(sess, PERF_STK_KHDR,
488 	    ^(struct trace_point * __unused tp) {
489 		callstack_count++;
490 	});
491 
492 	ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp) {
493 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
494 		    tp->timestamp, &last_ns);
495 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
496 
497 		ktrace_end(sess, 1);
498 	});
499 
500 	uint64_t *counts = kpc_counterbuf_alloc();
501 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(counts,
502 			"allocated counter values array");
503 	memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
504 	struct tally *tly = calloc(mch.ncpus * (mch.nconfig + mch.nfixed),
505 			sizeof(*tly));
506 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(tly, "allocated tallies array");
507 
508 	dispatch_source_t cpu_count_timer = dispatch_source_create(
509 			DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_get_main_queue());
510     dispatch_source_set_timer(cpu_count_timer, dispatch_time(DISPATCH_TIME_NOW,
511         PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC),
512         PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC, 0);
513     dispatch_source_set_cancel_handler(cpu_count_timer, ^{
514         dispatch_release(cpu_count_timer);
515     });
516 
517     __block uint64_t first_check_ns = 0;
518     __block uint64_t last_check_ns = 0;
519 
520     dispatch_source_set_event_handler(cpu_count_timer, ^{
521 		int cret = kpc_get_cpu_counters(true,
522 		    KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
523 		T_QUIET; T_ASSERT_POSIX_SUCCESS(cret, "kpc_get_cpu_counters");
524 
525 		if (!first_check_ns) {
526 			first_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
527 		} else {
528 			last_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
529 		}
530 		check_counters(mch.ncpus, mch.nfixed + mch.nconfig, tly, counts);
531 	});
532 	ktrace_events_class(sess, DBG_PERF, ^(struct trace_point * __unused tp) {});
533 
534 	int stop = 0;
535 	(void)start_threads(&mch, spin, &stop);
536 
537 	ktrace_set_completion_handler(sess, ^{
538 		dispatch_cancel(cpu_count_timer);
539 
540 		check_tally(mch.ncpus, mch.nfixed + mch.nconfig, tly);
541 
542 		struct rusage_info_v4 post_ru = {};
543 		int ruret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4,
544 				(rusage_info_t *)&post_ru);
545 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ruret, "got rusage information");
546 		T_LOG("saw %llu cycles in process",
547 				post_ru.ri_cycles - pre_ru.ri_cycles);
548 		uint64_t total_cycles = 0;
549 
550 		T_LOG("saw pmis = %u, samples = %u, stacks = %u", pmi_count, sample_count,
551 		    callstack_count);
552 		// Allow some slop in case the trace is cut-off midway through a
553 		// sample.
554 		const unsigned int cutoff_leeway = 32;
555 		T_EXPECT_GE(sample_count + cutoff_leeway, pmi_count,
556 		    "saw as many samples as PMIs");
557 		T_EXPECT_GE(callstack_count + cutoff_leeway, pmi_count,
558 		    "saw as many stacks as PMIs");
559 
560 		unsigned int cpu_sample_count = 0;
561 		char sample_slices[NTIMESLICES + 1];
562 		sample_slices[NTIMESLICES] = '\0';
563 		for (unsigned int i = 0; i < mch.ncpus; i++) {
564 			memset(sample_slices, '-', sizeof(sample_slices) - 1);
565 
566 			struct cpu *cpu = &cpus[i];
567 			unsigned int pmi_slice_count = 0, no_sched_slice_count = 0,
568 					cpu_pmi_count = 0, last_contiguous = 0;
569 			bool seen_empty = false;
570 			for (unsigned int j = 0; j < NTIMESLICES; j++) {
571 				unsigned int slice_pmi_count = cpu->pmi_timeslices[j];
572 				unsigned int slice_sched_count = cpu->scheduled_timeslices[j];
573 				cpu_pmi_count += slice_pmi_count;
574 				if (slice_pmi_count > 0) {
575 					pmi_slice_count++;
576 					sample_slices[j] = '*';
577 				} else if (slice_sched_count == 0) {
578 					no_sched_slice_count++;
579 					sample_slices[j] = '.';
580 				} else {
581 					seen_empty = true;
582 				}
583 				if (!seen_empty) {
584 					last_contiguous = j;
585 				}
586 			}
587 			unsigned int ctr = i * (mch.nfixed + mch.nconfig) + mch.nfixed;
588 			uint64_t delta = tly[ctr].lastvalue - tly[ctr].firstvalue;
589 			T_LOG("%g GHz", (double)delta / (last_check_ns - first_check_ns));
590 			total_cycles += delta;
591 			uint64_t abs_max_skid = (uint64_t)ABSV64(cpu->max_skid);
592 			T_LOG("CPU %2u: %4up:%4un/%u, %6u/%llu, max skid = %llu (%.4f%%), "
593 					"last contiguous = %u, scheduled outside = %u", i,
594 					pmi_slice_count, no_sched_slice_count, NTIMESLICES,
595 					sample_count, delta / PMI_PERIOD, abs_max_skid,
596 					(double)abs_max_skid / PMI_PERIOD * 100, last_contiguous,
597 					cpu->scheduled_outside_slice);
598 			T_LOG("%s", sample_slices);
599 			if (cpu_pmi_count > 0) {
600 				cpu_sample_count++;
601 			}
602 			T_EXPECT_EQ(last_contiguous, NTIMESLICES - 1,
603 					"CPU %2u: saw samples in each time slice", i);
604 		}
605 		T_LOG("kpc reported %llu total cycles", total_cycles);
606 		T_LOG("saw %u sample events, across %u/%u cpus", sample_count,
607 				cpu_sample_count, mch.ncpus);
608 		T_EXPECT_EQ(cpu_sample_count, mch.ncpus,
609 				"should see PMIs on every CPU");
610 		T_END;
611 	});
612 
613 	int dbglvl = 3;
614 	ret = sysctlbyname("kperf.debug_level", NULL, NULL, &dbglvl,
615 	    sizeof(dbglvl));
616 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kperf debug level");
617 	ret = kperf_sample_set(1);
618 	T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
619 
620 	start_kpc();
621 
622 	int error = ktrace_start(sess, dispatch_get_main_queue());
623 	T_ASSERT_POSIX_ZERO(error, "started tracing");
624 
625 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
626 			dispatch_get_main_queue(), ^{
627 		T_LOG("ending tracing after timeout");
628 		kdebug_trace(END_EVENT, 0, 0, 0, 0);
629 	});
630 
631 	dispatch_activate(cpu_count_timer);
632 
633 	T_SETUPEND;
634 
635 	dispatch_main();
636 }
637 
638 #if defined(__arm64__)
639 #define IS_ARM64 true
640 #else // defined(__arm64__)
641 #define IS_ARM64 false
642 #endif // !defined(__arm64__)
643 
644 T_DECL(kpc_pmu_config, "ensure PMU can be configured",
645     XNU_T_META_SOC_SPECIFIC,
646     T_META_ENABLED(IS_ARM64))
647 {
648 	T_SETUPBEGIN;
649 	int ret = kpc_force_all_ctrs_set(1);
650 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret,
651 			"force all counters to allow raw PMU configuration");
652 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_RAWPMU_MASK);
653 	T_LOG("found %u raw PMU configuration words", nconfigs);
654 	uint64_t *configs = calloc(nconfigs, sizeof(*configs));
655 	T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
656 	T_SETUPEND;
657 
658 	ret = kpc_set_config(KPC_CLASS_RAWPMU_MASK, configs);
659 	T_ASSERT_POSIX_SUCCESS(ret, "should set PMU configuration");
660 }
661 
662 #define DTLB_MISS_EVENT (0xc1)
663 
664 T_DECL(pmi_pc_capture, "ensure PC capture works for PMCs 5, 6, and 7",
665     XNU_T_META_SOC_SPECIFIC,
666     T_META_REQUIRES_SYSCTL_EQ("kpc.pc_capture_supported", 1))
667 {
668 	start_controlling_ktrace();
669 	struct machine mch = {};
670 	prepare_kpc(&mch, true, DTLB_MISS_EVENT);
671 
672 	T_SETUPBEGIN;
673 
674 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
675 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
676 	for (unsigned int i = 0; i < mch.nconfig; i++) {
677 		/*
678 		 * Offset the periods so the PMIs don't alias to the same PC capture.
679 		 * Since there's only one PC capture register, they will clobber each
680 		 * other.
681 		 */
682 		periods[i] = PMI_PERIOD / 1000 + (i * 1000);
683 	}
684 
685 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
686 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
687 	free(periods);
688 
689 	int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
690 	for (unsigned int i = 0; i < mch.nconfig; i++) {
691 		actions[i] = 1;
692 	}
693 	ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
694 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
695 	free(actions);
696 
697 	(void)kperf_action_count_set(1);
698 	ret = kperf_action_samplers_set(1, KPERF_SAMPLER_TINFO);
699 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
700 
701 	ktrace_session_t sess = ktrace_session_create();
702 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
703 
704 	uint64_t pc_captured_arr[3] = {};
705 	uint64_t *pc_captured = pc_captured_arr;
706 	uint64_t pmi_event_arr[3] = {};
707 	uint64_t *pmi_event = pmi_event_arr;
708 	ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
709 		if (tp->debugid & DBG_FUNC_END) {
710 			return;
711 		}
712 
713 		uint64_t desc = tp->arg1;
714 
715 #define KPC_DESC_COUNTER(DESC) (((DESC) >> 32) & 0xff)
716 #define KPC_DESC_FLAGS(DESC) ((DESC) >> 48)
717 #define KPC_FLAG_PC_CAPTURED (0x08)
718 
719 		uint64_t counter = KPC_DESC_COUNTER(desc);
720 		uint64_t flags = KPC_DESC_FLAGS(desc);
721 		if (counter >= 5 && counter <= 7) {
722 			pmi_event[counter - 5]++;
723 			if (flags & KPC_FLAG_PC_CAPTURED) {
724 				pc_captured[counter - 5]++;
725 			}
726 		}
727 	});
728 
729 	ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp __unused) {
730 		ktrace_config_t config = ktrace_config_create_current();
731 		ktrace_config_print_description(config, stdout);
732 		ktrace_config_destroy(config);
733 		T_LOG("saw ending event");
734 		ktrace_end(sess, 1);
735 	});
736 
737 	ktrace_set_completion_handler(sess, ^{
738 		ktrace_session_destroy(sess);
739 		for (unsigned int i = 0; i < 3; i++) {
740 			T_LOG("PMC%u: saw %llu/%llu (%g%%) PMIs with PC capture", i + 5,
741 			    pc_captured[i], pmi_event[i],
742 			    (double)pc_captured[i] / (double)pmi_event[i] * 100.0);
743 			T_EXPECT_GT(pc_captured[i], 0ULL, "saw PC capture for counter %u",
744 			    i + 5);
745 		}
746 		T_END;
747 	});
748 
749 	ret = kperf_sample_set(1);
750 	T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
751 
752 	start_kpc();
753 
754 	int error = ktrace_start(sess, dispatch_get_main_queue());
755 	T_ASSERT_POSIX_ZERO(error, "started tracing");
756 
757 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
758 			dispatch_get_main_queue(), ^{
759 		T_LOG("ending tracing after timeout");
760 		kdebug_trace(END_EVENT, 0, 0, 0, 0);
761 	});
762 
763 	T_SETUPEND;
764 
765 	dispatch_main();
766 }
767