xref: /xnu-12377.41.6/tests/cpu_counters/kpc_tests.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 // Copyright (c) 2018-2023 Apple Inc.  All rights reserved.
2 
3 #include <darwintest.h>
4 #include <ktrace/config.h>
5 #include <ktrace/session.h>
6 #include <inttypes.h>
7 #include <libproc.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <sys/resource.h>
11 #include <sys/sysctl.h>
12 
13 #include <kperf/kpc.h>
14 #include <kperf/kperf.h>
15 #include <kperfdata/kpep.h>
16 
17 #include "ktrace_helpers.h"
18 #include "kperf_helpers.h"
19 #include "test_utils.h"
20 
21 T_GLOBAL_META(
22 	T_META_NAMESPACE("xnu.cpu_counters"),
23 	T_META_RADAR_COMPONENT_NAME("xnu"),
24 	T_META_RADAR_COMPONENT_VERSION("cpu counters"),
25 	T_META_OWNER("mwidmann"),
26 	T_META_ASROOT(true),
27 	T_META_CHECK_LEAKS(false));
28 
29 struct machine {
30 	unsigned int ncpus;
31 	unsigned int nfixed;
32 	unsigned int nconfig;
33 	uint64_t selector;
34 };
35 
36 #ifndef ABSV64
37 #define ABSV64(n) ((((int64_t)(n)) < 0) ? -((int64_t)(n)) : ((int64_t)(n)))
38 #endif
39 
40 static void
skip_if_unsupported(void)41 skip_if_unsupported(void)
42 {
43 	int r;
44 	int supported = 0;
45 	size_t supported_size = sizeof(supported);
46 
47 	r = sysctlbyname("kern.monotonic.supported", &supported, &supported_size,
48 	    NULL, 0);
49 	if (r < 0) {
50 		T_WITH_ERRNO;
51 		T_SKIP("could not find \"kern.monotonic.supported\" sysctl");
52 	}
53 
54 	if (!supported) {
55 		T_SKIP("PMCs are not supported on this platform");
56 	}
57 }
58 
59 static struct rusage_info_v4 pre_ru = {};
60 
61 static void
start_kpc(void)62 start_kpc(void)
63 {
64 	T_SETUPBEGIN;
65 
66 	kpc_classmask_t classes = KPC_CLASS_FIXED_MASK |
67 	    KPC_CLASS_CONFIGURABLE_MASK;
68 	int ret = kpc_set_counting(classes);
69 	T_ASSERT_POSIX_SUCCESS(ret, "started counting");
70 
71 	ret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4, (rusage_info_t *)&pre_ru);
72 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "got rusage information");
73 
74 	kpc_classmask_t classes_on = kpc_get_counting();
75 	T_QUIET;
76 	T_ASSERT_EQ(classes, classes_on, "classes counting is correct");
77 
78 	T_SETUPEND;
79 }
80 
81 static void kpc_reset_atend(void);
82 
83 static void
_assert_kpep_ok(int kpep_err,const char * fmt,...)84 _assert_kpep_ok(int kpep_err, const char *fmt, ...)
85 {
86 	char msg[1024] = "";
87 	va_list args;
88 	va_start(args, fmt);
89 	vsnprintf(msg, sizeof(msg), fmt, args);
90 	va_end(args);
91 	T_QUIET;
92 	T_ASSERT_EQ(kpep_err, KPEP_ERR_NONE, "%s: %s", msg, kpep_strerror(kpep_err));
93 }
94 
95 static void
prepare_kpc(struct machine * mch,unsigned int n,const char * event_name,uint64_t period)96 prepare_kpc(struct machine *mch, unsigned int n, const char *event_name,
97     uint64_t period)
98 {
99 	T_SETUPBEGIN;
100 
101 	T_ATEND(kpc_reset_atend);
102 
103 	kpep_db_t db = NULL;
104 	int ret = kpep_db_create(NULL, &db);
105 	_assert_kpep_ok(ret, "get kpep database");
106 	kpep_config_t config = NULL;
107 	ret = kpep_config_create(db, &config);
108 	_assert_kpep_ok(ret, "creating event configuration");
109 	ret = kpep_config_force_counters(config);
110 	_assert_kpep_ok(ret, "forcing counters with configuration");
111 	kpep_event_t event = NULL;
112 	ret = kpep_db_event(db, event_name, &event);
113 	_assert_kpep_ok(ret, "finding event named %s", event_name);
114 
115 	size_t ncpus_sz = sizeof(mch->ncpus);
116 	ret = sysctlbyname("hw.logicalcpu_max", &mch->ncpus, &ncpus_sz,
117 	    NULL, 0);
118 	T_QUIET;
119 	T_ASSERT_POSIX_SUCCESS(ret, "sysctlbyname(hw.logicalcpu_max)");
120 	T_QUIET;
121 	T_ASSERT_GT(mch->ncpus, 0, "must have some number of CPUs");
122 
123 	ret = kpc_force_all_ctrs_set(1);
124 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_set(1)");
125 
126 	int forcing = 0;
127 	ret = kpc_force_all_ctrs_get(&forcing);
128 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_force_all_ctrs_get");
129 	T_QUIET; T_ASSERT_EQ(forcing, 1, "counters must be forced");
130 
131 	mch->nfixed = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
132 	mch->nconfig = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK);
133 
134 	T_LOG("machine: ncpus = %d, nfixed = %d, nconfig = %d", mch->ncpus,
135 	    mch->nfixed, mch->nconfig);
136 
137 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
138 	for (uint32_t i = 0; i < nconfigs; i++) {
139 		if (period != 0 && (n == 0 || i == 0)) {
140 			ret = kpep_config_add_event_trigger(config, &event, 0,
141 			    period + i * 1000, NULL);
142 		} else {
143 			ret = kpep_config_add_event(config, &event, 0, NULL);
144 		}
145 		if (ret == KPEP_ERR_CONFIG_CONFLICT) {
146 			T_LOG("configured %d counters with %s", i, event_name);
147 			break;
148 		}
149 		_assert_kpep_ok(ret, "adding %d event %s to configuration", i,
150 		    event_name);
151 	}
152 
153 	uint64_t *configs = calloc(nconfigs, sizeof(*configs));
154 	T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
155 	ret = kpep_config_kpc(config, configs, nconfigs * sizeof(*configs));
156 	_assert_kpep_ok(ret, "get kpc configuration");
157 	for (uint32_t i = 0; i < nconfigs; i++) {
158 		if (configs[i] != 0) {
159 			mch->selector = configs[i];
160 			break;
161 		}
162 	}
163 	T_QUIET; T_ASSERT_NE(mch->selector, 0ULL, "found event selector to check");
164 	ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
165 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
166 
167 	ret = kpep_config_kpc_periods(config, configs, nconfigs * sizeof(*configs));
168 	_assert_kpep_ok(ret, "get kpc periods");
169 	ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, configs);
170 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
171 
172 	free(configs);
173 
174 	T_SETUPEND;
175 }
176 
177 static void
kpc_reset_atend(void)178 kpc_reset_atend(void)
179 {
180 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK);
181 	uint64_t *configs = calloc(nconfigs, sizeof(*configs));
182 	T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
183 
184 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, configs);
185 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
186 	ret = kpc_set_config(KPC_CLASS_CONFIGURABLE_MASK, configs);
187 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_config");
188 
189 	free(configs);
190 }
191 
192 static void *
spin(void * arg)193 spin(void *arg)
194 {
195 	while (*(volatile int *)arg == 0) {
196 		;
197 	}
198 
199 	return NULL;
200 }
201 
202 static pthread_t *
start_threads(const struct machine * mch,void * (* func)(void *),void * arg)203 start_threads(const struct machine *mch, void *(*func)(void *), void *arg)
204 {
205 	T_SETUPBEGIN;
206 
207 	pthread_t *threads = calloc((unsigned int)mch->ncpus,
208 	    sizeof(*threads));
209 	T_QUIET; T_ASSERT_NOTNULL(threads, "allocated array of threads");
210 	for (unsigned int i = 0; i < mch->ncpus; i++) {
211 		int error = pthread_create(&threads[i], NULL, func, arg);
212 		T_QUIET; T_ASSERT_POSIX_ZERO(error, "pthread_create");
213 	}
214 
215 	T_SETUPEND;
216 
217 	return threads;
218 }
219 
220 static void
end_threads(const struct machine * mch,pthread_t * threads)221 end_threads(const struct machine *mch, pthread_t *threads)
222 {
223 	for (unsigned int i = 0; i < mch->ncpus; i++) {
224 		int error = pthread_join(threads[i], NULL);
225 		T_QUIET; T_ASSERT_POSIX_ZERO(error, "joined thread %d", i);
226 	}
227 	free(threads);
228 }
229 
230 struct tally {
231 	uint64_t firstvalue;
232 	uint64_t lastvalue;
233 	uint64_t nchecks;
234 	uint64_t nzero;
235 	uint64_t nstuck;
236 	uint64_t ndecrease;
237 };
238 
239 static void
check_counters(unsigned int ncpus,unsigned int nctrs,struct tally * tallies,uint64_t * counts)240 check_counters(unsigned int ncpus, unsigned int nctrs, struct tally *tallies,
241 		uint64_t *counts)
242 {
243 	for (unsigned int i = 0; i < ncpus; i++) {
244 		for (unsigned int j = 0; j < nctrs; j++) {
245 			unsigned int ctr = i * nctrs + j;
246 			struct tally *tly = &tallies[ctr];
247 			uint64_t count = counts[ctr];
248 
249 			if (counts[ctr] == 0) {
250 				tly->nzero++;
251 			}
252 			if (tly->lastvalue == count) {
253 				tly->nstuck++;
254 			}
255 			if (tly->lastvalue > count) {
256 				tly->ndecrease++;
257 			}
258 			tly->lastvalue = count;
259 			if (tly->nchecks == 0) {
260 				tly->firstvalue = count;
261 			}
262 			tly->nchecks++;
263 		}
264 	}
265 }
266 
267 static void
check_tally(unsigned int ncpus,unsigned int nctrs,struct tally * tallies)268 check_tally(unsigned int ncpus, unsigned int nctrs, struct tally *tallies)
269 {
270 	uint64_t nstuck = 0;
271 	uint64_t nchecks = 0;
272 	uint64_t nzero = 0;
273 	uint64_t ndecrease = 0;
274 
275 	for (unsigned int i = 0; i < ncpus; i++) {
276 		for (unsigned int j = 0; j < nctrs; j++) {
277 			unsigned int ctr = i * nctrs + j;
278 			struct tally *tly = &tallies[ctr];
279 
280 			T_LOG("CPU %2u PMC %u: nchecks = %llu, last value = %llx, "
281 				"delta = %llu, nstuck = %llu", i, j,
282 			    tly->nchecks, tly->lastvalue, tly->lastvalue - tly->firstvalue,
283 			    tly->nstuck);
284 
285 			nchecks += tly->nchecks;
286 			nstuck += tly->nstuck;
287 			nzero += tly->nzero;
288 			ndecrease += tly->ndecrease;
289 		}
290 	}
291 
292 	T_EXPECT_GT(nchecks, 0ULL, "checked 0x%" PRIx64 " counter values", nchecks);
293 	T_EXPECT_EQ(nzero, 0ULL, "found 0x%" PRIx64 " zero values", nzero);
294 	T_EXPECT_EQ(nstuck, 0ULL, "found 0x%" PRIx64 " stuck values", nstuck);
295 	T_EXPECT_EQ(ndecrease, 0ULL,
296 	    "found 0x%" PRIx64 " decreasing values", ndecrease);
297 }
298 
299 #define TESTDUR_NS (5 * NSEC_PER_SEC)
300 
301 T_DECL(kpc_cpu_direct_configurable,
302     "test that configurable counters return monotonically increasing values",
303     XNU_T_META_SOC_SPECIFIC,
304     T_META_BOOTARGS_SET("enable_skstb=1"),
305     T_META_TAG_VM_NOT_ELIGIBLE,
306     T_META_ENABLED(false) /* rdar://134505531 */)
307 {
308 	skip_if_unsupported();
309 
310 	struct machine mch = {};
311 	prepare_kpc(&mch, 0, "CORE_ACTIVE_CYCLE", 0);
312 
313 	int until = 0;
314 	pthread_t *threads = start_threads(&mch, spin, &until);
315 	start_kpc();
316 
317 	T_SETUPBEGIN;
318 
319 	uint64_t startns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
320 	uint64_t *counts = kpc_counterbuf_alloc();
321 	T_QUIET; T_ASSERT_NOTNULL(counts, "allocated space for counter values");
322 	memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
323 	struct tally *tly = calloc(mch.ncpus * mch.nconfig, sizeof(*tly));
324 	T_QUIET; T_ASSERT_NOTNULL(tly, "allocated space for tallies");
325 
326 	T_SETUPEND;
327 
328 	int n = 0;
329 	while (clock_gettime_nsec_np(CLOCK_MONOTONIC) - startns < TESTDUR_NS) {
330 		int ret = kpc_get_cpu_counters(true,
331 		    KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
332 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_get_cpu_counters");
333 
334 		check_counters(mch.ncpus, mch.nconfig, tly, counts);
335 
336 		usleep(10000);
337 		n++;
338 		if (n % 100 == 0) {
339 			T_LOG("checked 100 times");
340 		}
341 	}
342 
343 	check_tally(mch.ncpus, mch.nconfig, tly);
344 
345 	until = 1;
346 	end_threads(&mch, threads);
347 }
348 
349 T_DECL(kpc_thread_direct_instrs_cycles,
350     "test that fixed thread counters return monotonically increasing values",
351     XNU_T_META_SOC_SPECIFIC, T_META_TAG_VM_NOT_ELIGIBLE)
352 {
353 	int err;
354 	uint32_t ctrs_cnt;
355 	uint64_t *ctrs_a;
356 	uint64_t *ctrs_b;
357 
358 	skip_if_unsupported();
359 
360 	T_SETUPBEGIN;
361 
362 	ctrs_cnt = kpc_get_counter_count(KPC_CLASS_FIXED_MASK);
363 	if (ctrs_cnt == 0) {
364 		T_SKIP("no fixed counters available");
365 	}
366 	T_LOG("device has %" PRIu32 " fixed counters", ctrs_cnt);
367 
368 	T_QUIET; T_ASSERT_POSIX_SUCCESS(kpc_force_all_ctrs_set(1), NULL);
369 	T_ASSERT_POSIX_SUCCESS(kpc_set_counting(KPC_CLASS_FIXED_MASK),
370 	    "kpc_set_counting");
371 	T_ASSERT_POSIX_SUCCESS(kpc_set_thread_counting(KPC_CLASS_FIXED_MASK),
372 	    "kpc_set_thread_counting");
373 
374 	T_SETUPEND;
375 
376 	ctrs_a = malloc(ctrs_cnt * sizeof(uint64_t));
377 	T_QUIET; T_ASSERT_NOTNULL(ctrs_a, NULL);
378 
379 	err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_a);
380 	T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
381 
382 	for (uint32_t i = 0; i < ctrs_cnt; i++) {
383 		T_LOG("checking counter %d with value %" PRIu64 " > 0", i, ctrs_a[i]);
384 		T_QUIET;
385 		T_EXPECT_GT(ctrs_a[i], UINT64_C(0), "counter %d is non-zero", i);
386 	}
387 
388 	ctrs_b = malloc(ctrs_cnt * sizeof(uint64_t));
389 	T_QUIET; T_ASSERT_NOTNULL(ctrs_b, NULL);
390 
391 	err = kpc_get_thread_counters(0, ctrs_cnt, ctrs_b);
392 	T_ASSERT_POSIX_SUCCESS(err, "kpc_get_thread_counters");
393 
394 	for (uint32_t i = 0; i < ctrs_cnt; i++) {
395 		T_LOG("checking counter %d with value %" PRIu64
396 		    " > previous value %" PRIu64, i, ctrs_b[i], ctrs_a[i]);
397 		T_QUIET;
398 		T_EXPECT_GT(ctrs_b[i], UINT64_C(0), "counter %d is non-zero", i);
399 		T_QUIET; T_EXPECT_LT(ctrs_a[i], ctrs_b[i],
400 		    "counter %d is increasing", i);
401 	}
402 
403 	free(ctrs_a);
404 	free(ctrs_b);
405 }
406 
407 #define PMI_TEST_DURATION_NS (15 * NSEC_PER_SEC)
408 #define PERIODIC_CPU_COUNT_MS (250)
409 #define NTIMESLICES (72)
410 #define PMI_PERIOD (50ULL * 1000 * 1000)
411 #define END_EVENT KDBG_EVENTID(0xfe, 0xfe, 0)
412 
413 struct cpu {
414 	uint64_t prev_count, max_skid;
415 	unsigned int scheduled_outside_slice;
416 	unsigned int pmi_timeslices[NTIMESLICES];
417 	unsigned int scheduled_timeslices[NTIMESLICES];
418 };
419 
420 T_DECL(kpc_pmi_configurable,
421     "test that PMIs don't interfere with sampling counters in kperf",
422     XNU_T_META_SOC_SPECIFIC,
423     T_META_BOOTARGS_SET("enable_skstb=1"),
424     T_META_TAG_VM_NOT_ELIGIBLE,
425     T_META_ENABLED(false) /* rdar://134505531 */)
426 {
427 	skip_if_unsupported();
428 
429 	start_controlling_ktrace();
430 	struct machine mch = {};
431 	prepare_kpc(&mch, 1, "CORE_ACTIVE_CYCLE", PMI_PERIOD);
432 
433 	T_SETUPBEGIN;
434 
435 	int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
436 	actions[0] = 1;
437 	int ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
438 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
439 	free(actions);
440 
441 	(void)kperf_action_count_set(1);
442 	ret = kperf_action_samplers_set(1,
443 	    KPERF_SAMPLER_TINFO | KPERF_SAMPLER_KSTACK);
444 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
445 
446 	ktrace_config_t ktconfig = ktrace_config_create_current();
447 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(ktconfig, "create current config");
448 	ret = ktrace_config_print_description(ktconfig, stdout);
449 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "print config description");
450 
451 	struct cpu *cpus = calloc(mch.ncpus, sizeof(*cpus));
452 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(cpus, "allocate CPUs array");
453 
454 	__block unsigned int sample_count = 0;
455 	__block unsigned int pmi_count = 0;
456 	__block unsigned int callstack_count = 0;
457 	__block uint64_t first_ns = 0;
458 	__block uint64_t last_ns = 0;
459 
460 	ktrace_session_t sess = ktrace_session_create();
461 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
462 
463 	ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
464 		if (tp->debugid & DBG_FUNC_END) {
465 			return;
466 		}
467 
468 		uint64_t cur_ns = 0;
469 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
470 		    tp->timestamp, &cur_ns);
471 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
472 
473 		uint64_t desc = tp->arg1;
474 		uint64_t config = desc & UINT32_MAX;
475 		T_QUIET; T_EXPECT_EQ(config & UINT16_MAX,
476 				mch.selector & UINT16_MAX,
477 				"PMI argument matches configuration");
478 		__unused uint64_t counter = (desc >> 32) & UINT16_MAX;
479 		__unused uint64_t flags = desc >> 48;
480 
481 		uint64_t count = tp->arg2;
482 		if (first_ns == 0) {
483 			first_ns = cur_ns;
484 		}
485 		struct cpu *cpu = &cpus[tp->cpuid];
486 
487 		if (cpu->prev_count != 0) {
488 			uint64_t delta = count - cpu->prev_count;
489 			uint64_t skid = delta - PMI_PERIOD;
490 			if (skid > cpu->max_skid) {
491 				cpu->max_skid = skid;
492 			}
493 		}
494 		cpu->prev_count = count;
495 
496 		__unused uint64_t pc = tp->arg3;
497 
498 		double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
499 		    NTIMESLICES;
500 		if (slice < NTIMESLICES) {
501 			cpu->pmi_timeslices[(unsigned int)slice] += 1;
502 		}
503 
504 		pmi_count++;
505 	});
506 
507 	void (^sched_handler)(struct trace_point *tp) =
508 	    ^(struct trace_point *tp) {
509 		uint64_t cur_ns = 0;
510 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
511 		    tp->timestamp, &cur_ns);
512 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
513 		if (first_ns == 0) {
514 			first_ns = cur_ns;
515 		}
516 
517 		struct cpu *cpu = &cpus[tp->cpuid];
518 		double slice = (double)(cur_ns - first_ns) / PMI_TEST_DURATION_NS *
519 		    NTIMESLICES;
520 		if (slice < NTIMESLICES) {
521 			cpu->scheduled_timeslices[(unsigned int)slice] += 1;
522 		} else {
523 			cpu->scheduled_outside_slice += 1;
524 		}
525 	};
526 	ktrace_events_single(sess, MACH_SCHED, sched_handler);
527 	ktrace_events_single(sess, MACH_STACK_HANDOFF, sched_handler);
528 
529 	ktrace_events_single(sess, PERF_SAMPLE, ^(struct trace_point * tp) {
530 		if (tp->debugid & DBG_FUNC_START) {
531 			sample_count++;
532 		}
533 	});
534 	ktrace_events_single(sess, PERF_STK_KHDR,
535 	    ^(struct trace_point * __unused tp) {
536 		callstack_count++;
537 	});
538 
539 	ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp) {
540 		int cret = ktrace_convert_timestamp_to_nanoseconds(sess,
541 		    tp->timestamp, &last_ns);
542 		T_QUIET; T_ASSERT_POSIX_ZERO(cret, "convert timestamp");
543 
544 		ktrace_end(sess, 1);
545 	});
546 
547 	uint64_t *counts = kpc_counterbuf_alloc();
548 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(counts,
549 			"allocated counter values array");
550 	memset(counts, 0, sizeof(*counts) * mch.ncpus * (mch.nfixed + mch.nconfig));
551 	struct tally *tly = calloc(mch.ncpus * (mch.nconfig + mch.nfixed),
552 			sizeof(*tly));
553 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(tly, "allocated tallies array");
554 
555 	dispatch_source_t cpu_count_timer = dispatch_source_create(
556 			DISPATCH_SOURCE_TYPE_TIMER, 0, 0, dispatch_get_main_queue());
557     dispatch_source_set_timer(cpu_count_timer, dispatch_time(DISPATCH_TIME_NOW,
558         PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC),
559         PERIODIC_CPU_COUNT_MS * NSEC_PER_MSEC, 0);
560     dispatch_source_set_cancel_handler(cpu_count_timer, ^{
561         dispatch_release(cpu_count_timer);
562     });
563 
564     __block uint64_t first_check_ns = 0;
565     __block uint64_t last_check_ns = 0;
566 
567     dispatch_source_set_event_handler(cpu_count_timer, ^{
568 		int cret = kpc_get_cpu_counters(true,
569 		    KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK, NULL, counts);
570 		T_QUIET; T_ASSERT_POSIX_SUCCESS(cret, "kpc_get_cpu_counters");
571 
572 		if (!first_check_ns) {
573 			first_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
574 		} else {
575 			last_check_ns = clock_gettime_nsec_np(CLOCK_MONOTONIC);
576 		}
577 		check_counters(mch.ncpus, mch.nfixed + mch.nconfig, tly, counts);
578 	});
579 	ktrace_events_class(sess, DBG_PERF, ^(struct trace_point * __unused tp) {});
580 
581 	int stop = 0;
582 	(void)start_threads(&mch, spin, &stop);
583 
584 	ktrace_set_completion_handler(sess, ^{
585 		dispatch_cancel(cpu_count_timer);
586 
587 		check_tally(mch.ncpus, mch.nfixed + mch.nconfig, tly);
588 
589 		struct rusage_info_v4 post_ru = {};
590 		int ruret = proc_pid_rusage(getpid(), RUSAGE_INFO_V4,
591 				(rusage_info_t *)&post_ru);
592 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ruret, "got rusage information");
593 		T_LOG("saw %llu cycles in process",
594 				post_ru.ri_cycles - pre_ru.ri_cycles);
595 		uint64_t total_cycles = 0;
596 
597 		T_LOG("saw pmis = %u, samples = %u, stacks = %u", pmi_count, sample_count,
598 		    callstack_count);
599 		// Allow some slop in case the trace is cut-off midway through a
600 		// sample.
601 		const unsigned int cutoff_leeway = 32;
602 		T_EXPECT_GE(sample_count + cutoff_leeway, pmi_count,
603 		    "saw as many samples as PMIs");
604 		T_EXPECT_GE(callstack_count + cutoff_leeway, pmi_count,
605 		    "saw as many stacks as PMIs");
606 
607 		unsigned int cpu_sample_count = 0;
608 		char sample_slices[NTIMESLICES + 1];
609 		sample_slices[NTIMESLICES] = '\0';
610 		for (unsigned int i = 0; i < mch.ncpus; i++) {
611 			memset(sample_slices, '-', sizeof(sample_slices) - 1);
612 
613 			struct cpu *cpu = &cpus[i];
614 			unsigned int pmi_slice_count = 0, no_sched_slice_count = 0,
615 					cpu_pmi_count = 0, last_contiguous = 0;
616 			bool seen_empty = false;
617 			for (unsigned int j = 0; j < NTIMESLICES; j++) {
618 				unsigned int slice_pmi_count = cpu->pmi_timeslices[j];
619 				unsigned int slice_sched_count = cpu->scheduled_timeslices[j];
620 				cpu_pmi_count += slice_pmi_count;
621 				if (slice_pmi_count > 0) {
622 					pmi_slice_count++;
623 					sample_slices[j] = '*';
624 				} else if (slice_sched_count == 0) {
625 					no_sched_slice_count++;
626 					sample_slices[j] = '.';
627 				} else {
628 					seen_empty = true;
629 				}
630 				if (!seen_empty) {
631 					last_contiguous = j;
632 				}
633 			}
634 			unsigned int ctr = i * (mch.nfixed + mch.nconfig) + mch.nfixed;
635 			uint64_t delta = tly[ctr].lastvalue - tly[ctr].firstvalue;
636 			T_LOG("%g GHz", (double)delta / (last_check_ns - first_check_ns));
637 			total_cycles += delta;
638 			uint64_t abs_max_skid = (uint64_t)ABSV64(cpu->max_skid);
639 			T_LOG("CPU %2u: %4up:%4un/%u, %6u/%llu, max skid = %llu (%.4f%%), "
640 					"last contiguous = %u, scheduled outside = %u", i,
641 					pmi_slice_count, no_sched_slice_count, NTIMESLICES,
642 					sample_count, delta / PMI_PERIOD, abs_max_skid,
643 					(double)abs_max_skid / PMI_PERIOD * 100, last_contiguous,
644 					cpu->scheduled_outside_slice);
645 			T_LOG("%s", sample_slices);
646 			if (cpu_pmi_count > 0) {
647 				cpu_sample_count++;
648 			}
649 			T_EXPECT_EQ(last_contiguous, NTIMESLICES - 1,
650 					"CPU %2u: saw samples in each time slice", i);
651 		}
652 		T_LOG("kpc reported %llu total cycles", total_cycles);
653 		T_LOG("saw %u sample events, across %u/%u cpus", sample_count,
654 				cpu_sample_count, mch.ncpus);
655 		T_EXPECT_EQ(cpu_sample_count, mch.ncpus,
656 				"should see PMIs on every CPU");
657 		T_END;
658 	});
659 
660 	int dbglvl = 3;
661 	ret = sysctlbyname("kperf.debug_level", NULL, NULL, &dbglvl,
662 	    sizeof(dbglvl));
663 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "set kperf debug level");
664 	ret = kperf_sample_set(1);
665 	T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
666 
667 	start_kpc();
668 
669 	int error = ktrace_start(sess, dispatch_get_main_queue());
670 	T_ASSERT_POSIX_ZERO(error, "started tracing");
671 
672 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
673 			dispatch_get_main_queue(), ^{
674 		T_LOG("ending tracing after timeout");
675 		kdebug_trace(END_EVENT, 0, 0, 0, 0);
676 	});
677 
678 	dispatch_activate(cpu_count_timer);
679 
680 	T_SETUPEND;
681 
682 	dispatch_main();
683 }
684 
685 #if defined(__arm64__)
686 #define IS_ARM64 true
687 #else // defined(__arm64__)
688 #define IS_ARM64 false
689 #endif // !defined(__arm64__)
690 
691 T_DECL(kpc_pmu_config, "ensure PMU can be configured",
692     XNU_T_META_SOC_SPECIFIC,
693     T_META_ENABLED(IS_ARM64), T_META_TAG_VM_NOT_ELIGIBLE)
694 {
695 	T_SETUPBEGIN;
696 	int ret = kpc_force_all_ctrs_set(1);
697 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret,
698 			"force all counters to allow raw PMU configuration");
699 	uint32_t nconfigs = kpc_get_config_count(KPC_CLASS_RAWPMU_MASK);
700 	T_LOG("found %u raw PMU configuration words", nconfigs);
701 	uint64_t *configs = calloc(nconfigs, sizeof(*configs));
702 	T_QUIET; T_ASSERT_NOTNULL(configs, "allocated config words");
703 	T_SETUPEND;
704 
705 	ret = kpc_set_config(KPC_CLASS_RAWPMU_MASK, configs);
706 	T_ASSERT_POSIX_SUCCESS(ret, "should set PMU configuration");
707 }
708 
709 T_DECL(pmi_pc_capture, "ensure PC capture works for PMCs 5, 6, and 7",
710     XNU_T_META_SOC_SPECIFIC,
711     T_META_REQUIRES_SYSCTL_EQ("kpc.pc_capture_supported", 1), T_META_TAG_VM_NOT_ELIGIBLE)
712 {
713 	start_controlling_ktrace();
714 	struct machine mch = {};
715 	prepare_kpc(&mch, 0, "INST_BRANCH_TAKEN", PMI_PERIOD);
716 
717 	T_SETUPBEGIN;
718 
719 	uint64_t *periods = calloc(mch.nconfig, sizeof(*periods));
720 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(periods, "allocate periods array");
721 	for (unsigned int i = 0; i < mch.nconfig; i++) {
722 		/*
723 		 * Offset the periods so the PMIs don't alias to the same PC capture.
724 		 * Since there's only one PC capture register, they will clobber each
725 		 * other.
726 		 */
727 		periods[i] = PMI_PERIOD / 1000 + (i * 1000);
728 	}
729 
730 	int ret = kpc_set_period(KPC_CLASS_CONFIGURABLE_MASK, periods);
731 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_period");
732 	free(periods);
733 
734 	int32_t *actions = calloc(mch.nconfig, sizeof(*actions));
735 	for (unsigned int i = 0; i < mch.nconfig; i++) {
736 		actions[i] = 1;
737 	}
738 	ret = kpc_set_actionid(KPC_CLASS_CONFIGURABLE_MASK, actions);
739 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kpc_set_actionid");
740 	free(actions);
741 
742 	(void)kperf_action_count_set(1);
743 	ret = kperf_action_samplers_set(1, KPERF_SAMPLER_TINFO);
744 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kperf_action_samplers_set");
745 
746 	ktrace_session_t sess = ktrace_session_create();
747 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(sess, "ktrace_session_create");
748 
749 	uint64_t pc_captured_arr[3] = {};
750 	uint64_t *pc_captured = pc_captured_arr;
751 	uint64_t pmi_event_arr[3] = {};
752 	uint64_t *pmi_event = pmi_event_arr;
753 	ktrace_events_single(sess, PERF_KPC_PMI, ^(struct trace_point *tp) {
754 		if (tp->debugid & DBG_FUNC_END) {
755 			return;
756 		}
757 
758 		uint64_t desc = tp->arg1;
759 
760 #define KPC_DESC_COUNTER(DESC) (((DESC) >> 32) & 0xffff)
761 #define KPC_DESC_CONFIG(DESC) ((DESC) & 0xffff)
762 #define KPC_DESC_FLAGS(DESC) ((DESC) >> 48)
763 #define KPC_FLAG_PC_CAPTURED (0x08)
764 
765 		uint64_t counter = KPC_DESC_COUNTER(desc);
766 		uint64_t flags = KPC_DESC_FLAGS(desc);
767 		if (counter >= 5 && counter <= 7) {
768 			pmi_event[counter - 5]++;
769 			if (flags & KPC_FLAG_PC_CAPTURED) {
770 				pc_captured[counter - 5]++;
771 			}
772 		}
773 		T_QUIET;
774 		T_ASSERT_EQ(KPC_DESC_CONFIG(desc), mch.selector,
775 		    "correct counter configuration");
776 	});
777 
778 	ktrace_events_single(sess, END_EVENT, ^(struct trace_point *tp __unused) {
779 		ktrace_config_t config = ktrace_config_create_current();
780 		ktrace_config_print_description(config, stdout);
781 		ktrace_config_destroy(config);
782 		T_LOG("saw ending event");
783 		ktrace_end(sess, 1);
784 	});
785 
786 	ktrace_set_completion_handler(sess, ^{
787 		ktrace_session_destroy(sess);
788 		for (unsigned int i = 0; i < 3; i++) {
789 			T_LOG("PMC%u: saw %llu/%llu (%g%%) PMIs with PC capture", i + 5,
790 			    pc_captured[i], pmi_event[i],
791 			    (double)pc_captured[i] / (double)pmi_event[i] * 100.0);
792 			T_EXPECT_GT(pc_captured[i], 0ULL, "saw PC capture for counter %u",
793 			    i + 5);
794 		}
795 		T_END;
796 	});
797 
798 	ret = kperf_sample_set(1);
799 	T_ASSERT_POSIX_SUCCESS(ret, "kperf_sample_set");
800 
801 	start_kpc();
802 
803 	int error = ktrace_start(sess, dispatch_get_main_queue());
804 	T_ASSERT_POSIX_ZERO(error, "started tracing");
805 
806 	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, PMI_TEST_DURATION_NS),
807 			dispatch_get_main_queue(), ^{
808 		T_LOG("ending tracing after timeout");
809 		kdebug_trace(END_EVENT, 0, 0, 0, 0);
810 	});
811 
812 	T_SETUPEND;
813 
814 	dispatch_main();
815 }
816