xref: /xnu-8796.121.2/osfmk/arm64/perfmon_arm64.c (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 // Copyright (c) 2020 Apple Inc. All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 
27 #if KERNEL
28 
29 #include <arm64/perfmon_arm64_regs.h>
30 #include <kern/perfmon.h>
31 #include <kern/sched_prim.h>
32 #include <kern/startup.h>
33 #include <machine/machine_perfmon.h>
34 #include <machine/machine_routines.h>
35 #include <os/atomic.h>
36 #include <pexpert/arm64/board_config.h>
37 
38 #endif // KERNEL
39 
40 #include <stdatomic.h>
41 #include <stddef.h>
42 #include <string.h>
43 #include <sys/perfmon_private.h>
44 
45 #define _STR(N) #N
46 #define STR(N) _STR(N)
47 #define ARRAYLEN(A) (sizeof(A) / sizeof(A[0]))
48 
49 #define REG(N) #N,
50 #define PMC(N) "PMC" #N,
51 const perfmon_name_t cpmu_reg_names[] = { CPMU_REGS };
52 const size_t cpmu_reg_count = ARRAYLEN(cpmu_reg_names);
53 
54 const perfmon_name_t cpmu_attr_names[] = { CPMU_ATTR_REGS };
55 const size_t cpmu_attr_count = ARRAYLEN(cpmu_attr_names);
56 
57 #if HAS_UPMU
58 
59 #define PIOREG(N, O) #N,
60 #define UPMC(N, O) "UPMC" #N,
61 const perfmon_name_t upmu_reg_names[] = { UPMU_REGS };
62 const size_t upmu_reg_count = ARRAYLEN(upmu_reg_names);
63 
64 const perfmon_name_t upmu_attr_names[] = { UPMU_ATTR_REGS };
65 const size_t upmu_attr_count = ARRAYLEN(upmu_attr_names);
66 
67 #if KERNEL
68 
69 SECURITY_READ_ONLY_LATE(static uintptr_t) cpm_impl[MAX_CPU_CLUSTERS] = {};
70 SECURITY_READ_ONLY_LATE(static uintptr_t) acc_impl[MAX_CPU_CLUSTERS] = {};
71 
72 #endif // KERNEL
73 
74 SECURITY_READ_ONLY_LATE(static unsigned int) cluster_count = 1;
75 SECURITY_READ_ONLY_LATE(static uint64_t) upmu_cpu_pmi_mask = 0;
76 
77 #if UPMU_PER_CLUSTER
78 #define UPMU_UNIT_COUNT (cluster_count)
79 #else // UPMU_PER_CLUSTER
80 #define UPMU_UNIT_COUNT (1)
81 #endif // !UPMU_PER_CLUSTER
82 
83 #endif // HAS_UPMU
84 
85 const unsigned short cpmu_fixed_count = 2;
86 
87 __startup_func
88 static void
perfmon_machine_startup(void)89 perfmon_machine_startup(void)
90 {
91 	struct perfmon_source *cpmu_source = perfmon_source_reserve(perfmon_cpmu);
92 	cpmu_source->ps_layout = (struct perfmon_layout){
93 		.pl_counter_count = CPMU_PMC_COUNT,
94 		.pl_fixed_offset = 0,
95 		.pl_fixed_count = cpmu_fixed_count,
96 		.pl_unit_count = (unsigned short)ml_get_cpu_count(),
97 		.pl_reg_count = cpmu_reg_count,
98 		.pl_attr_count = cpmu_attr_count,
99 	};
100 	cpmu_source->ps_register_names = cpmu_reg_names;
101 	cpmu_source->ps_attribute_names = cpmu_attr_names;
102 
103 #if HAS_UPMU
104 	bool upmu_mapped = true;
105 
106 #if KERNEL
107 	const ml_topology_info_t *topo_info = ml_get_topology_info();
108 	cluster_count = topo_info->num_clusters;
109 
110 	for (unsigned int c = 0; c < cluster_count; c++) {
111 		ml_topology_cluster_t *cluster = &topo_info->clusters[c];
112 		upmu_cpu_pmi_mask |= 1ULL << cluster->first_cpu_id;
113 		cpm_impl[c] = (uintptr_t)cluster->cpm_IMPL_regs;
114 		acc_impl[c] = (uintptr_t)cluster->acc_IMPL_regs;
115 		if (cpm_impl[c] == 0 || acc_impl[c] == 0) {
116 			upmu_mapped = false;
117 			break;
118 		}
119 	}
120 #endif // KERNEL
121 
122 	if (!upmu_mapped) {
123 		return;
124 	}
125 	struct perfmon_source *upmu_source = perfmon_source_reserve(perfmon_upmu);
126 	upmu_source->ps_layout = (struct perfmon_layout){
127 		.pl_counter_count = UPMU_PMC_COUNT,
128 		.pl_fixed_offset = 0,
129 		.pl_fixed_count = 0,
130 		.pl_unit_count = (unsigned short)UPMU_UNIT_COUNT,
131 		.pl_reg_count = upmu_reg_count,
132 		.pl_attr_count = upmu_attr_count,
133 	};
134 	upmu_source->ps_register_names = upmu_reg_names;
135 	upmu_source->ps_attribute_names = upmu_attr_names;
136 #endif // HAS_UPMU
137 }
138 
139 #if KERNEL
140 
141 STARTUP(PERCPU, STARTUP_RANK_MIDDLE, perfmon_machine_startup);
142 
143 static void
perfmon_cpmu_sample_regs_xcall(void * regs_arg)144 perfmon_cpmu_sample_regs_xcall(void *regs_arg)
145 {
146 	uint64_t *regs = regs_arg;
147 
148 #undef REG
149 #define REG_EL1(N) N##_EL1
150 #define REG(N) __builtin_arm_rsr64(STR(REG_EL1(N))),
151 #undef PMC
152 #define PMC_EL1(N) PMC##N
153 #define PMC(N) __builtin_arm_rsr64(STR(PMC_EL1(N))),
154 
155 	const uint64_t cpmu_regs[] = { CPMU_REGS };
156 	memcpy(&regs[cpu_number() * cpmu_reg_count], cpmu_regs, sizeof(cpmu_regs));
157 }
158 
159 #if HAS_UPMU
160 #undef PIOREG
161 #undef UPMC
162 
163 #if UPMU_PER_CLUSTER
164 #define PIOREG(N, O) O,
165 #define UPMC(N, O) O,
166 const uintptr_t upmu_reg_cpm_offs[] = { UPMU_REGS };
167 const uintptr_t upmu_attr_cpm_offs[] = { UPMU_ATTR_REGS };
168 #endif // !UPMU_PER_CLUSTER
169 #endif // HAS_UPMU
170 
171 void
perfmon_machine_sample_regs(enum perfmon_kind kind,uint64_t * regs,size_t __unused regs_len)172 perfmon_machine_sample_regs(enum perfmon_kind kind, uint64_t *regs,
173     size_t __unused regs_len)
174 {
175 	if (kind == perfmon_cpmu) {
176 		boolean_t include_self = TRUE;
177 		cpu_broadcast_xcall_simple(include_self,
178 		    perfmon_cpmu_sample_regs_xcall, regs);
179 #if HAS_UPMU
180 	} else if (kind == perfmon_upmu) {
181 #if UPMU_PER_CLUSTER
182 		// Read the registers remotely through PIO when each cluster has its own
183 		// UPMU.
184 		for (unsigned int c = 0; c < UPMU_UNIT_COUNT; c++) {
185 			for (size_t r = 0; r < upmu_reg_count; r++) {
186 				regs[c * upmu_reg_count + r] =
187 				    *(uint64_t *)(cpm_impl[c] + upmu_reg_cpm_offs[r]);
188 			}
189 		}
190 #else // UPMU_PER_CLUSTER
191 #define PIOREG(N, O) REG(N)
192 #define UPMC_EL1(N) UPMC##N
193 #define UPMC(N, O) __builtin_arm_rsr64(STR(UPMC_EL1(N))),
194 		// Use direct MSR reads when the UPMU is global -- PIO access is not
195 		// consistent across all registers.
196 		const uint64_t upmu_regs[] = { UPMU_REGS };
197 		assert(regs_len == ARRAYLEN(upmu_regs));
198 		memcpy(regs, upmu_regs, sizeof(upmu_regs));
199 #endif // !UPMU_PER_CLUSTER
200 #endif // HAS_UPMU
201 	} else {
202 		panic("perfmon: unexpected kind: %d", kind);
203 	}
204 }
205 
206 #endif // KERNEL
207 
208 #undef REG
209 #define REG(N) CPMU_##N,
210 
211 enum perfmon_cpmu_attr_reg {
212 	CPMU_ATTR_REGS
213 	CPMU_ATTR_REG_MAX,
214 };
215 
216 struct perfmon_cpmu_regs {
217 	uint64_t pcr_pmcr0;
218 	uint64_t pcr_pmesr[2];
219 	uint64_t pcr_attr_regs[CPMU_ATTR_REG_MAX];
220 };
221 
222 struct perfmon_cpmu_regs cpmu_reg_state;
223 
224 static void
perfmon_cpmu_regs_init(struct perfmon_cpmu_regs * cpmu_regs)225 perfmon_cpmu_regs_init(struct perfmon_cpmu_regs *cpmu_regs)
226 {
227 	bzero(cpmu_regs, sizeof(*cpmu_regs));
228 	const uint64_t fixed_enable = 0x3;
229 	const uint64_t __unused intgen_fiq = 0x400;
230 	const uint64_t __unused intgen_aic = 0x100;
231 	const uint64_t fixed_pmi_enable = 0x3000;
232 	cpmu_regs->pcr_pmcr0 = fixed_enable | fixed_pmi_enable |
233 #if CPMU_AIC_PMI
234 	    intgen_aic;
235 #else // CPMU_AIC_PMI
236 	    intgen_fiq;
237 #endif // !CPMU_AIC_PMI
238 }
239 
240 #if HAS_UPMU
241 
242 #undef PIOREG
243 #define PIOREG(N, O) UPMU_##N,
244 
245 enum perfmon_upmu_attr_reg {
246 	UPMU_ATTR_REGS
247 	UPMU_ATTR_REG_MAX,
248 };
249 
250 #if UPMU_PMC_COUNT > 8
251 #define UPMU_ESR_COUNT 2
252 #else // UPMU_PMC_COUNT > 8
253 #define UPMU_ESR_COUNT 1
254 #endif // UPMU_PMC_COUNT <= 8
255 
256 struct perfmon_upmu_regs {
257 	uint64_t pur_upmcr0;
258 	uint64_t pur_upmesr[UPMU_ESR_COUNT];
259 	// UPMPCM is handled by Monotonic.
260 	uint64_t pur_attr_regs[UPMU_ATTR_REG_MAX];
261 };
262 
263 struct perfmon_upmu_regs upmu_reg_state;
264 
265 static void
perfmon_upmu_regs_init(struct perfmon_upmu_regs * upmu_regs)266 perfmon_upmu_regs_init(struct perfmon_upmu_regs *upmu_regs)
267 {
268 	bzero(upmu_regs, sizeof(*upmu_regs));
269 
270 	uint64_t pmi_enable = 0xff000
271 #if UPMU_PMC_COUNT > 8
272 	    | 0xff00000
273 #endif // UPMU_PMC_COUNT > 8
274 	;
275 	uint64_t intgen_fiq = 0x100;
276 	upmu_regs->pur_upmcr0 = pmi_enable | intgen_fiq;
277 }
278 
279 #endif // HAS_UPMU
280 
281 #if KERNEL
282 
283 static void
perfmon_cpmu_configure_xcall(void * cpmu_regs_arg)284 perfmon_cpmu_configure_xcall(void *cpmu_regs_arg)
285 {
286 	struct perfmon_cpmu_regs *cpmu_regs = cpmu_regs_arg;
287 	__builtin_arm_wsr64("PMCR0_EL1", cpmu_regs->pcr_pmcr0);
288 	__builtin_arm_wsr64("PMESR0_EL1", cpmu_regs->pcr_pmesr[0]);
289 	__builtin_arm_wsr64("PMESR1_EL1", cpmu_regs->pcr_pmesr[1]);
290 
291 	if (!PE_i_can_has_debugger(NULL)) {
292 		return;
293 	}
294 
295 	enum { REG_COUNTER_BASE = __COUNTER__ };
296 #define REG_COUNTER (__COUNTER__ - REG_COUNTER_BASE - 1)
297 
298 	for (size_t i = 0; i < cpmu_attr_count; i++) {
299 		uint64_t attr_value = cpmu_regs->pcr_attr_regs[i];
300 		switch (i) {
301 #undef REG
302 #define REG(N) \
303 	        case REG_COUNTER: \
304 	                __builtin_arm_wsr64(STR(REG_EL1(N)), attr_value); \
305 	                break;
306 
307 			CPMU_ATTR_REGS
308 
309 		default:
310 			panic("perfmon: unexpected CPMU attribute ID: %zu", i);
311 			break;
312 		}
313 	}
314 }
315 
316 #endif // KERNEL
317 
318 #if HAS_UPMU
319 
320 #if KERNEL
321 
322 static void
perfmon_upmu_apply_attrs(struct perfmon_upmu_regs * upmu_regs,unsigned int __unused cluster_id)323 perfmon_upmu_apply_attrs(struct perfmon_upmu_regs *upmu_regs,
324     unsigned int __unused cluster_id)
325 {
326 #if KERNEL
327 	if (!PE_i_can_has_debugger(NULL)) {
328 		return;
329 	}
330 
331 	for (size_t i = 0; i < upmu_attr_count; i++) {
332 		uint64_t attr_value = upmu_regs->pur_attr_regs[i];
333 
334 #if UPMU_PER_CLUSTER
335 		uint64_t *attr_addr =
336 		    (uint64_t *)(cpm_impl[cluster_id] + upmu_attr_cpm_offs[i]);
337 		*attr_addr = attr_value;
338 #else // UPMU_PER_CLUSTER
339 		enum { PIOREG_COUNTER_BASE = __COUNTER__ };
340 #define PIOREG_COUNTER (__COUNTER__ - PIOREG_COUNTER_BASE - 1)
341 
342 		switch (i) {
343 #undef PIOREG
344 #define PIOREG(N, O) \
345 	        case PIOREG_COUNTER: \
346 	                __builtin_arm_wsr64(STR(REG_EL1(N)), attr_value); \
347 	                break;
348 
349 			UPMU_ATTR_REGS
350 
351 		default:
352 			panic("perfmon: unexpected UPMU attribute ID: %zu", i);
353 			break;
354 		}
355 #endif // !UPMU_PER_CLUSTER
356 	}
357 #else // KERNEL
358 #pragma unused(cluster_id, upmu_regs)
359 #endif // KERNEL
360 }
361 
362 static void
perfmon_upmu_configure(struct perfmon_upmu_regs * upmu_regs)363 perfmon_upmu_configure(struct perfmon_upmu_regs *upmu_regs)
364 {
365 #if !UPMU_PER_CLUSTER
366 	__builtin_arm_wsr64("UPMCR0_EL1", upmu_regs->pur_upmcr0);
367 	__builtin_arm_wsr64("UPMESR0_EL1", upmu_regs->pur_upmesr[0]);
368 #if UPMU_PMC_COUNT > 8
369 	__builtin_arm_wsr64("UPMESR1_EL1", upmu_regs->pur_upmesr[1]);
370 #endif // UPMU_PMC_COUNT > 8
371 #endif // !UPMU_PER_CLUSTER
372 
373 	for (unsigned int cluster = 0; cluster < cluster_count; cluster++) {
374 #if UPMU_PER_CLUSTER
375 #undef PIOREG
376 #define PIOREG(N, O) ((uint64_t *)(cpm_impl[cluster] + O))
377 		*UPMCR0 = upmu_regs->pur_upmcr0;
378 		*UPMESR0 = upmu_regs->pur_upmesr[0];
379 #if UPMU_PMC_COUNT > 8
380 		*UPMESR1 = upmu_regs->pur_upmesr[1];
381 #endif // UPMU_PMC_COUNT > 8
382 #endif // UPMU_PER_CLUSTER
383 		perfmon_upmu_apply_attrs(&upmu_reg_state, cluster);
384 	}
385 }
386 
387 #endif // KERNEL
388 
389 #endif // HAS_UPMU
390 
391 static void
perfmon_set_attrs(uint64_t * attr_regs,size_t __unused attr_regs_len,perfmon_config_t config)392 perfmon_set_attrs(uint64_t *attr_regs, size_t __unused attr_regs_len,
393     perfmon_config_t config)
394 {
395 	for (size_t attr = 0; attr < config->pc_spec.ps_attr_count; attr++) {
396 		unsigned short id = config->pc_attr_ids[attr];
397 		assert(id < attr_regs_len);
398 		attr_regs[id] = config->pc_spec.ps_attrs[attr].pa_value;
399 	}
400 }
401 
402 int
perfmon_machine_configure(enum perfmon_kind kind,const perfmon_config_t config)403 perfmon_machine_configure(enum perfmon_kind kind, const perfmon_config_t config)
404 {
405 	if (kind == perfmon_cpmu) {
406 		perfmon_cpmu_regs_init(&cpmu_reg_state);
407 
408 		for (unsigned int pmc = cpmu_fixed_count; pmc < CPMU_PMC_COUNT; pmc++) {
409 			if ((config->pc_counters_used & (1ULL << pmc)) == 0) {
410 				continue;
411 			}
412 			struct perfmon_counter *counter = &config->pc_counters[pmc];
413 			uint64_t event = counter->pc_number &
414 #if CPMU_16BIT_EVENTS
415 			    0xffff;
416 #else // CPMU_16BIT_EVENTS
417 			    0xff;
418 #endif // !CPMU_16BIT_EVENTS
419 
420 			unsigned int enable_offset = pmc > 7 ? 32 : 0;
421 			cpmu_reg_state.pcr_pmcr0 |= 1ULL << (enable_offset + pmc);
422 
423 			unsigned int pmi_offset = pmc > 7 ? 44 - 7 : 12;
424 			cpmu_reg_state.pcr_pmcr0 |= 1ULL << (pmi_offset + pmc);
425 
426 			unsigned int pmesr_index = pmc > 5 ? 1 : 0;
427 			unsigned int pmesr_shift = pmc > 5 ? pmc - 6 :
428 			    pmc - cpmu_fixed_count;
429 			// 8-bits for each event.
430 #if CPMU_16BIT_EVENTS
431 			pmesr_shift *= 16;
432 #else // CPMU_16BIT_EVENTS
433 			pmesr_shift *= 8;
434 #endif // !CPMU_16BIT_EVENTS
435 			uint64_t pmesr_bits = event << pmesr_shift;
436 			cpmu_reg_state.pcr_pmesr[pmesr_index] |= pmesr_bits;
437 		}
438 		perfmon_set_attrs(cpmu_reg_state.pcr_attr_regs,
439 		    ARRAYLEN(cpmu_reg_state.pcr_attr_regs), config);
440 
441 #if KERNEL
442 		boolean_t include_self = TRUE;
443 		cpu_broadcast_xcall_simple(include_self, perfmon_cpmu_configure_xcall,
444 		    &cpmu_reg_state);
445 #endif // KERNEL
446 #if HAS_UPMU
447 	} else if (kind == perfmon_upmu) {
448 		perfmon_upmu_regs_init(&upmu_reg_state);
449 
450 		for (unsigned short pmc = 0; pmc < UPMU_PMC_COUNT; pmc++) {
451 			struct perfmon_counter *counter = &config->pc_counters[pmc];
452 			if ((config->pc_counters_used & (1ULL << pmc)) == 0) {
453 				continue;
454 			}
455 
456 			upmu_reg_state.pur_upmcr0 |= 1 << pmc;
457 
458 			uint64_t event = counter->pc_number & 0xff;
459 			unsigned int upmesr_index = pmc >= 8 ? 1 : 0;
460 			unsigned int upmesr_shift = pmc >= 8 ? pmc - 8 : pmc;
461 			uint64_t upmesr_bits = event << upmesr_shift;
462 			upmu_reg_state.pur_upmesr[upmesr_index] |= upmesr_bits;
463 		}
464 		perfmon_set_attrs(upmu_reg_state.pur_attr_regs,
465 		    ARRAYLEN(upmu_reg_state.pur_attr_regs), config);
466 
467 #if KERNEL
468 		perfmon_upmu_configure(&upmu_reg_state);
469 #endif // KERNEL
470 #endif // HAS_UPMU
471 	} else {
472 		panic("perfmon: unexpected kind: %d", kind);
473 	}
474 	return 0;
475 }
476 
477 void
perfmon_machine_reset(enum perfmon_kind kind)478 perfmon_machine_reset(enum perfmon_kind kind)
479 {
480 	if (kind == perfmon_cpmu) {
481 		perfmon_cpmu_regs_init(&cpmu_reg_state);
482 #if KERNEL
483 		boolean_t include_self = TRUE;
484 		cpu_broadcast_xcall_simple(include_self, perfmon_cpmu_configure_xcall,
485 		    &cpmu_reg_state);
486 #endif // KERNEL
487 #if HAS_UPMU
488 	} else if (kind == perfmon_upmu) {
489 #if KERNEL
490 		perfmon_upmu_regs_init(&upmu_reg_state);
491 		perfmon_upmu_configure(&upmu_reg_state);
492 #endif // KERNEL
493 #endif // HAS_PMU
494 	} else {
495 		panic("perfmon: unexpected kind: %d", kind);
496 	}
497 }
498