1 // Copyright (c) 2020 Apple Inc. All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26
27 #if KERNEL
28
29 #include <arm64/perfmon_arm64_regs.h>
30 #include <kern/perfmon.h>
31 #include <kern/sched_prim.h>
32 #include <kern/startup.h>
33 #include <machine/machine_perfmon.h>
34 #include <machine/machine_routines.h>
35 #include <os/atomic.h>
36 #include <pexpert/arm64/board_config.h>
37
38 #endif // KERNEL
39
40 #include <stdatomic.h>
41 #include <stddef.h>
42 #include <string.h>
43 #include <sys/perfmon_private.h>
44
45 #define _STR(N) #N
46 #define STR(N) _STR(N)
47 #define ARRAYLEN(A) (sizeof(A) / sizeof(A[0]))
48
49 #define REG(N) #N,
50 #define PMC(N) "PMC" #N,
51 const perfmon_name_t cpmu_reg_names[] = { CPMU_REGS };
52 const size_t cpmu_reg_count = ARRAYLEN(cpmu_reg_names);
53
54 const perfmon_name_t cpmu_attr_names[] = { CPMU_ATTR_REGS };
55 const size_t cpmu_attr_count = ARRAYLEN(cpmu_attr_names);
56
57 #if HAS_UPMU
58
59 #define PIOREG(N, O) #N,
60 #define UPMC(N, O) "UPMC" #N,
61 const perfmon_name_t upmu_reg_names[] = { UPMU_REGS };
62 const size_t upmu_reg_count = ARRAYLEN(upmu_reg_names);
63
64 const perfmon_name_t upmu_attr_names[] = { UPMU_ATTR_REGS };
65 const size_t upmu_attr_count = ARRAYLEN(upmu_attr_names);
66
67 #if KERNEL
68
69 SECURITY_READ_ONLY_LATE(static uintptr_t) cpm_impl[MAX_CPU_CLUSTERS] = {};
70 SECURITY_READ_ONLY_LATE(static uintptr_t) acc_impl[MAX_CPU_CLUSTERS] = {};
71
72 #endif // KERNEL
73
74 SECURITY_READ_ONLY_LATE(static unsigned int) cluster_count = 1;
75 SECURITY_READ_ONLY_LATE(static uint64_t) upmu_cpu_pmi_mask = 0;
76
77 #if UPMU_PER_CLUSTER
78 #define UPMU_UNIT_COUNT (cluster_count)
79 #else // UPMU_PER_CLUSTER
80 #define UPMU_UNIT_COUNT (1)
81 #endif // !UPMU_PER_CLUSTER
82
83 #endif // HAS_UPMU
84
85 const unsigned short cpmu_fixed_count = 2;
86
87 __startup_func
88 static void
perfmon_machine_startup(void)89 perfmon_machine_startup(void)
90 {
91 struct perfmon_source *cpmu_source = perfmon_source_reserve(perfmon_cpmu);
92 cpmu_source->ps_layout = (struct perfmon_layout){
93 .pl_counter_count = CPMU_PMC_COUNT,
94 .pl_fixed_offset = 0,
95 .pl_fixed_count = cpmu_fixed_count,
96 .pl_unit_count = (unsigned short)ml_get_cpu_count(),
97 .pl_reg_count = cpmu_reg_count,
98 .pl_attr_count = cpmu_attr_count,
99 };
100 cpmu_source->ps_register_names = cpmu_reg_names;
101 cpmu_source->ps_attribute_names = cpmu_attr_names;
102
103 #if HAS_UPMU
104 bool upmu_mapped = true;
105
106 #if KERNEL
107 const ml_topology_info_t *topo_info = ml_get_topology_info();
108 cluster_count = topo_info->num_clusters;
109
110 for (unsigned int c = 0; c < cluster_count; c++) {
111 ml_topology_cluster_t *cluster = &topo_info->clusters[c];
112 upmu_cpu_pmi_mask |= 1ULL << cluster->first_cpu_id;
113 cpm_impl[c] = (uintptr_t)cluster->cpm_IMPL_regs;
114 acc_impl[c] = (uintptr_t)cluster->acc_IMPL_regs;
115 if (cpm_impl[c] == 0 || acc_impl[c] == 0) {
116 upmu_mapped = false;
117 break;
118 }
119 }
120 #endif // KERNEL
121
122 if (!upmu_mapped) {
123 return;
124 }
125 struct perfmon_source *upmu_source = perfmon_source_reserve(perfmon_upmu);
126 upmu_source->ps_layout = (struct perfmon_layout){
127 .pl_counter_count = UPMU_PMC_COUNT,
128 .pl_fixed_offset = 0,
129 .pl_fixed_count = 0,
130 .pl_unit_count = (unsigned short)UPMU_UNIT_COUNT,
131 .pl_reg_count = upmu_reg_count,
132 .pl_attr_count = upmu_attr_count,
133 };
134 upmu_source->ps_register_names = upmu_reg_names;
135 upmu_source->ps_attribute_names = upmu_attr_names;
136 #endif // HAS_UPMU
137 }
138
139 #if KERNEL
140
141 STARTUP(PERCPU, STARTUP_RANK_MIDDLE, perfmon_machine_startup);
142
143 static void
perfmon_cpmu_sample_regs_xcall(void * regs_arg)144 perfmon_cpmu_sample_regs_xcall(void *regs_arg)
145 {
146 uint64_t *regs = regs_arg;
147
148 #undef REG
149 #define REG_EL1(N) N##_EL1
150 #define REG(N) __builtin_arm_rsr64(STR(REG_EL1(N))),
151 #undef PMC
152 #define PMC_EL1(N) PMC##N
153 #define PMC(N) __builtin_arm_rsr64(STR(PMC_EL1(N))),
154
155 const uint64_t cpmu_regs[] = { CPMU_REGS };
156 memcpy(®s[cpu_number() * cpmu_reg_count], cpmu_regs, sizeof(cpmu_regs));
157 }
158
159 #if HAS_UPMU
160 #undef PIOREG
161 #undef UPMC
162
163 #if UPMU_PER_CLUSTER
164 #define PIOREG(N, O) O,
165 #define UPMC(N, O) O,
166 const uintptr_t upmu_reg_cpm_offs[] = { UPMU_REGS };
167 const uintptr_t upmu_attr_cpm_offs[] = { UPMU_ATTR_REGS };
168 #endif // !UPMU_PER_CLUSTER
169 #endif // HAS_UPMU
170
171 void
perfmon_machine_sample_regs(enum perfmon_kind kind,uint64_t * regs,size_t __unused regs_len)172 perfmon_machine_sample_regs(enum perfmon_kind kind, uint64_t *regs,
173 size_t __unused regs_len)
174 {
175 if (kind == perfmon_cpmu) {
176 boolean_t include_self = TRUE;
177 cpu_broadcast_xcall_simple(include_self,
178 perfmon_cpmu_sample_regs_xcall, regs);
179 #if HAS_UPMU
180 } else if (kind == perfmon_upmu) {
181 #if UPMU_PER_CLUSTER
182 // Read the registers remotely through PIO when each cluster has its own
183 // UPMU.
184 for (unsigned int c = 0; c < UPMU_UNIT_COUNT; c++) {
185 for (size_t r = 0; r < upmu_reg_count; r++) {
186 regs[c * upmu_reg_count + r] =
187 *(uint64_t *)(cpm_impl[c] + upmu_reg_cpm_offs[r]);
188 }
189 }
190 #else // UPMU_PER_CLUSTER
191 #define PIOREG(N, O) REG(N)
192 #define UPMC_EL1(N) UPMC##N
193 #define UPMC(N, O) __builtin_arm_rsr64(STR(UPMC_EL1(N))),
194 // Use direct MSR reads when the UPMU is global -- PIO access is not
195 // consistent across all registers.
196 const uint64_t upmu_regs[] = { UPMU_REGS };
197 assert(regs_len == ARRAYLEN(upmu_regs));
198 memcpy(regs, upmu_regs, sizeof(upmu_regs));
199 #endif // !UPMU_PER_CLUSTER
200 #endif // HAS_UPMU
201 } else {
202 panic("perfmon: unexpected kind: %d", kind);
203 }
204 }
205
206 #endif // KERNEL
207
208 #undef REG
209 #define REG(N) CPMU_##N,
210
211 enum perfmon_cpmu_attr_reg {
212 CPMU_ATTR_REGS
213 CPMU_ATTR_REG_MAX,
214 };
215
216 struct perfmon_cpmu_regs {
217 uint64_t pcr_pmcr0;
218 uint64_t pcr_pmesr[2];
219 uint64_t pcr_attr_regs[CPMU_ATTR_REG_MAX];
220 };
221
222 struct perfmon_cpmu_regs cpmu_reg_state;
223
224 static void
perfmon_cpmu_regs_init(struct perfmon_cpmu_regs * cpmu_regs)225 perfmon_cpmu_regs_init(struct perfmon_cpmu_regs *cpmu_regs)
226 {
227 bzero(cpmu_regs, sizeof(*cpmu_regs));
228 const uint64_t fixed_enable = 0x3;
229 const uint64_t __unused intgen_fiq = 0x400;
230 const uint64_t __unused intgen_aic = 0x100;
231 const uint64_t fixed_pmi_enable = 0x3000;
232 cpmu_regs->pcr_pmcr0 = fixed_enable | fixed_pmi_enable |
233 #if CPMU_AIC_PMI
234 intgen_aic;
235 #else // CPMU_AIC_PMI
236 intgen_fiq;
237 #endif // !CPMU_AIC_PMI
238 }
239
240 #if HAS_UPMU
241
242 #undef PIOREG
243 #define PIOREG(N, O) UPMU_##N,
244
245 enum perfmon_upmu_attr_reg {
246 UPMU_ATTR_REGS
247 UPMU_ATTR_REG_MAX,
248 };
249
250 #if UPMU_PMC_COUNT > 8
251 #define UPMU_ESR_COUNT 2
252 #else // UPMU_PMC_COUNT > 8
253 #define UPMU_ESR_COUNT 1
254 #endif // UPMU_PMC_COUNT <= 8
255
256 struct perfmon_upmu_regs {
257 uint64_t pur_upmcr0;
258 uint64_t pur_upmesr[UPMU_ESR_COUNT];
259 // UPMPCM is handled by Monotonic.
260 uint64_t pur_attr_regs[UPMU_ATTR_REG_MAX];
261 };
262
263 struct perfmon_upmu_regs upmu_reg_state;
264
265 static void
perfmon_upmu_regs_init(struct perfmon_upmu_regs * upmu_regs)266 perfmon_upmu_regs_init(struct perfmon_upmu_regs *upmu_regs)
267 {
268 bzero(upmu_regs, sizeof(*upmu_regs));
269
270 uint64_t pmi_enable = 0xff000
271 #if UPMU_PMC_COUNT > 8
272 | 0xff00000
273 #endif // UPMU_PMC_COUNT > 8
274 ;
275 uint64_t intgen_fiq = 0x100;
276 upmu_regs->pur_upmcr0 = pmi_enable | intgen_fiq;
277 }
278
279 #endif // HAS_UPMU
280
281 #if KERNEL
282
283 static void
perfmon_cpmu_configure_xcall(void * cpmu_regs_arg)284 perfmon_cpmu_configure_xcall(void *cpmu_regs_arg)
285 {
286 struct perfmon_cpmu_regs *cpmu_regs = cpmu_regs_arg;
287 __builtin_arm_wsr64("PMCR0_EL1", cpmu_regs->pcr_pmcr0);
288 __builtin_arm_wsr64("PMESR0_EL1", cpmu_regs->pcr_pmesr[0]);
289 __builtin_arm_wsr64("PMESR1_EL1", cpmu_regs->pcr_pmesr[1]);
290
291 if (!PE_i_can_has_debugger(NULL)) {
292 return;
293 }
294
295 enum { REG_COUNTER_BASE = __COUNTER__ };
296 #define REG_COUNTER (__COUNTER__ - REG_COUNTER_BASE - 1)
297
298 for (size_t i = 0; i < cpmu_attr_count; i++) {
299 uint64_t attr_value = cpmu_regs->pcr_attr_regs[i];
300 switch (i) {
301 #undef REG
302 #define REG(N) \
303 case REG_COUNTER: \
304 __builtin_arm_wsr64(STR(REG_EL1(N)), attr_value); \
305 break;
306
307 CPMU_ATTR_REGS
308
309 default:
310 panic("perfmon: unexpected CPMU attribute ID: %zu", i);
311 break;
312 }
313 }
314 }
315
316 #endif // KERNEL
317
318 #if HAS_UPMU
319
320 #if KERNEL
321
322 static void
perfmon_upmu_apply_attrs(struct perfmon_upmu_regs * upmu_regs,unsigned int __unused cluster_id)323 perfmon_upmu_apply_attrs(struct perfmon_upmu_regs *upmu_regs,
324 unsigned int __unused cluster_id)
325 {
326 #if KERNEL
327 if (!PE_i_can_has_debugger(NULL)) {
328 return;
329 }
330
331 for (size_t i = 0; i < upmu_attr_count; i++) {
332 uint64_t attr_value = upmu_regs->pur_attr_regs[i];
333
334 #if UPMU_PER_CLUSTER
335 uint64_t *attr_addr =
336 (uint64_t *)(cpm_impl[cluster_id] + upmu_attr_cpm_offs[i]);
337 *attr_addr = attr_value;
338 #else // UPMU_PER_CLUSTER
339 enum { PIOREG_COUNTER_BASE = __COUNTER__ };
340 #define PIOREG_COUNTER (__COUNTER__ - PIOREG_COUNTER_BASE - 1)
341
342 switch (i) {
343 #undef PIOREG
344 #define PIOREG(N, O) \
345 case PIOREG_COUNTER: \
346 __builtin_arm_wsr64(STR(REG_EL1(N)), attr_value); \
347 break;
348
349 UPMU_ATTR_REGS
350
351 default:
352 panic("perfmon: unexpected UPMU attribute ID: %zu", i);
353 break;
354 }
355 #endif // !UPMU_PER_CLUSTER
356 }
357 #else // KERNEL
358 #pragma unused(cluster_id, upmu_regs)
359 #endif // KERNEL
360 }
361
362 static void
perfmon_upmu_configure(struct perfmon_upmu_regs * upmu_regs)363 perfmon_upmu_configure(struct perfmon_upmu_regs *upmu_regs)
364 {
365 #if !UPMU_PER_CLUSTER
366 __builtin_arm_wsr64("UPMCR0_EL1", upmu_regs->pur_upmcr0);
367 __builtin_arm_wsr64("UPMESR0_EL1", upmu_regs->pur_upmesr[0]);
368 #if UPMU_PMC_COUNT > 8
369 __builtin_arm_wsr64("UPMESR1_EL1", upmu_regs->pur_upmesr[1]);
370 #endif // UPMU_PMC_COUNT > 8
371 #endif // !UPMU_PER_CLUSTER
372
373 for (unsigned int cluster = 0; cluster < cluster_count; cluster++) {
374 #if UPMU_PER_CLUSTER
375 #undef PIOREG
376 #define PIOREG(N, O) ((uint64_t *)(cpm_impl[cluster] + O))
377 *UPMCR0 = upmu_regs->pur_upmcr0;
378 *UPMESR0 = upmu_regs->pur_upmesr[0];
379 #if UPMU_PMC_COUNT > 8
380 *UPMESR1 = upmu_regs->pur_upmesr[1];
381 #endif // UPMU_PMC_COUNT > 8
382 #endif // UPMU_PER_CLUSTER
383 perfmon_upmu_apply_attrs(&upmu_reg_state, cluster);
384 }
385 }
386
387 #endif // KERNEL
388
389 #endif // HAS_UPMU
390
391 static void
perfmon_set_attrs(uint64_t * attr_regs,size_t __unused attr_regs_len,perfmon_config_t config)392 perfmon_set_attrs(uint64_t *attr_regs, size_t __unused attr_regs_len,
393 perfmon_config_t config)
394 {
395 for (size_t attr = 0; attr < config->pc_spec.ps_attr_count; attr++) {
396 unsigned short id = config->pc_attr_ids[attr];
397 assert(id < attr_regs_len);
398 attr_regs[id] = config->pc_spec.ps_attrs[attr].pa_value;
399 }
400 }
401
402 int
perfmon_machine_configure(enum perfmon_kind kind,const perfmon_config_t config)403 perfmon_machine_configure(enum perfmon_kind kind, const perfmon_config_t config)
404 {
405 if (kind == perfmon_cpmu) {
406 perfmon_cpmu_regs_init(&cpmu_reg_state);
407
408 for (unsigned int pmc = cpmu_fixed_count; pmc < CPMU_PMC_COUNT; pmc++) {
409 if ((config->pc_counters_used & (1ULL << pmc)) == 0) {
410 continue;
411 }
412 struct perfmon_counter *counter = &config->pc_counters[pmc];
413 uint64_t event = counter->pc_number &
414 #if CPMU_16BIT_EVENTS
415 0xffff;
416 #else // CPMU_16BIT_EVENTS
417 0xff;
418 #endif // !CPMU_16BIT_EVENTS
419
420 unsigned int enable_offset = pmc > 7 ? 32 : 0;
421 cpmu_reg_state.pcr_pmcr0 |= 1ULL << (enable_offset + pmc);
422
423 unsigned int pmi_offset = pmc > 7 ? 44 - 7 : 12;
424 cpmu_reg_state.pcr_pmcr0 |= 1ULL << (pmi_offset + pmc);
425
426 unsigned int pmesr_index = pmc > 5 ? 1 : 0;
427 unsigned int pmesr_shift = pmc > 5 ? pmc - 6 :
428 pmc - cpmu_fixed_count;
429 // 8-bits for each event.
430 #if CPMU_16BIT_EVENTS
431 pmesr_shift *= 16;
432 #else // CPMU_16BIT_EVENTS
433 pmesr_shift *= 8;
434 #endif // !CPMU_16BIT_EVENTS
435 uint64_t pmesr_bits = event << pmesr_shift;
436 cpmu_reg_state.pcr_pmesr[pmesr_index] |= pmesr_bits;
437 }
438 perfmon_set_attrs(cpmu_reg_state.pcr_attr_regs,
439 ARRAYLEN(cpmu_reg_state.pcr_attr_regs), config);
440
441 #if KERNEL
442 boolean_t include_self = TRUE;
443 cpu_broadcast_xcall_simple(include_self, perfmon_cpmu_configure_xcall,
444 &cpmu_reg_state);
445 #endif // KERNEL
446 #if HAS_UPMU
447 } else if (kind == perfmon_upmu) {
448 perfmon_upmu_regs_init(&upmu_reg_state);
449
450 for (unsigned short pmc = 0; pmc < UPMU_PMC_COUNT; pmc++) {
451 struct perfmon_counter *counter = &config->pc_counters[pmc];
452 if ((config->pc_counters_used & (1ULL << pmc)) == 0) {
453 continue;
454 }
455
456 upmu_reg_state.pur_upmcr0 |= 1 << pmc;
457
458 uint64_t event = counter->pc_number & 0xff;
459 unsigned int upmesr_index = pmc >= 8 ? 1 : 0;
460 unsigned int upmesr_shift = pmc >= 8 ? pmc - 8 : pmc;
461 uint64_t upmesr_bits = event << upmesr_shift;
462 upmu_reg_state.pur_upmesr[upmesr_index] |= upmesr_bits;
463 }
464 perfmon_set_attrs(upmu_reg_state.pur_attr_regs,
465 ARRAYLEN(upmu_reg_state.pur_attr_regs), config);
466
467 #if KERNEL
468 perfmon_upmu_configure(&upmu_reg_state);
469 #endif // KERNEL
470 #endif // HAS_UPMU
471 } else {
472 panic("perfmon: unexpected kind: %d", kind);
473 }
474 return 0;
475 }
476
477 void
perfmon_machine_reset(enum perfmon_kind kind)478 perfmon_machine_reset(enum perfmon_kind kind)
479 {
480 if (kind == perfmon_cpmu) {
481 perfmon_cpmu_regs_init(&cpmu_reg_state);
482 #if KERNEL
483 boolean_t include_self = TRUE;
484 cpu_broadcast_xcall_simple(include_self, perfmon_cpmu_configure_xcall,
485 &cpmu_reg_state);
486 #endif // KERNEL
487 #if HAS_UPMU
488 } else if (kind == perfmon_upmu) {
489 #if KERNEL
490 perfmon_upmu_regs_init(&upmu_reg_state);
491 perfmon_upmu_configure(&upmu_reg_state);
492 #endif // KERNEL
493 #endif // HAS_PMU
494 } else {
495 panic("perfmon: unexpected kind: %d", kind);
496 }
497 }
498