1 // Copyright (c) 2021 Apple Inc. All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26
27 #include <kern/assert.h>
28 #include <kern/kalloc.h>
29 #include <pexpert/pexpert.h>
30 #include <sys/kdebug.h>
31 #include <sys/_types/_size_t.h>
32 #include <kern/monotonic.h>
33 #include <kern/percpu.h>
34 #include <kern/processor.h>
35 #include <kern/recount.h>
36 #include <kern/startup.h>
37 #include <kern/task.h>
38 #include <kern/thread.h>
39 #include <kern/work_interval.h>
40 #include <mach/mach_time.h>
41 #include <mach/mach_types.h>
42 #include <machine/config.h>
43 #include <machine/machine_routines.h>
44 #include <os/atomic_private.h>
45 #include <stdbool.h>
46 #include <stdint.h>
47
48 // Recount's machine-independent implementation and interfaces for the kernel
49 // at-large.
50
51 #define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME
52
53 // On non-release kernels, allow precise PMC (instructions, cycles) updates to
54 // be disabled for performance characterization.
55 #if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG)
56 #define PRECISE_USER_KERNEL_PMC_TUNABLE 1
57
58 TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs", false);
59 #endif // PRECISE_USER_KERNEL_PMCS
60
61 #if !PRECISE_USER_KERNEL_TIME
62 #define PRECISE_TIME_FATAL_FUNC OS_NORETURN
63 #define PRECISE_TIME_ONLY_FUNC OS_UNUSED
64 #else // !PRECISE_USER_KERNEL_TIME
65 #define PRECISE_TIME_FATAL_FUNC
66 #define PRECISE_TIME_ONLY_FUNC
67 #endif // PRECISE_USER_KERNEL_TIME
68
69 #if !PRECISE_USER_KERNEL_PMCS
70 #define PRECISE_PMCS_ONLY_FUNC OS_UNUSED
71 #else // !PRECISE_PMCS_ONLY_FUNC
72 #define PRECISE_PMCS_ONLY_FUNC
73 #endif // PRECISE_USER_KERNEL_PMCS
74
75 #if HAS_CPU_DPE_COUNTER
76 // Only certain platforms have DPE counters.
77 #define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY
78 #else // HAS_CPU_DPE_COUNTER
79 #define RECOUNT_ENERGY 0
80 #endif // !HAS_CPU_DPE_COUNTER
81
82 // Topography helpers.
83 size_t recount_topo_count(recount_topo_t topo);
84 static bool recount_topo_matches_cpu_kind(recount_topo_t topo,
85 recount_cpu_kind_t kind, size_t idx);
86 static size_t recount_topo_index(recount_topo_t topo, processor_t processor);
87 static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to,
88 size_t i);
89
90 // Prevent counter updates before the system is ready.
91 __security_const_late bool _recount_started = false;
92
93 // Lookup table that matches CPU numbers (indices) to their track index.
94 __security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 };
95
96 // Allocation metadata and zones.
97
98 // Keep static strings for `zone_create`.
99 static const char *_usage_zone_names[RCT_TOPO_COUNT] = {
100 [RCT_TOPO_CPU] = "recount_usage_cpu",
101 [RCT_TOPO_CPU_KIND] = "recount_usage_cpu_kind",
102 };
103
104 static const char *_track_zone_names[RCT_TOPO_COUNT] = {
105 [RCT_TOPO_CPU] = "recount_track_cpu",
106 [RCT_TOPO_CPU_KIND] = "recount_track_cpu_kind",
107 };
108
109 static const bool _topo_allocates[RCT_TOPO_COUNT] = {
110 [RCT_TOPO_SYSTEM] = false,
111 [RCT_TOPO_CPU] = true,
112 [RCT_TOPO_CPU_KIND] = true,
113 };
114
115 // Fixed-size zones for allocations.
116 __security_const_late zone_t _recount_usage_zones[RCT_TOPO_COUNT] = { };
117 __security_const_late zone_t _recount_track_zones[RCT_TOPO_COUNT] = { };
118
119 __startup_func
120 static void
recount_startup(void)121 recount_startup(void)
122 {
123 #if __AMP__
124 unsigned int cpu_count = ml_get_cpu_count();
125 const ml_topology_info_t *topo_info = ml_get_topology_info();
126 for (unsigned int i = 0; i < cpu_count; i++) {
127 cluster_type_t type = topo_info->cpus[i].cluster_type;
128 uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE :
129 RCT_CPU_EFFICIENCY;
130 _topo_cpu_kinds[i] = cluster_i;
131 }
132 #endif // __AMP__
133
134 for (unsigned int i = 0; i < RCT_TOPO_COUNT; i++) {
135 if (_topo_allocates[i]) {
136 const char *usage_name = _usage_zone_names[i];
137 assert(usage_name != NULL);
138 _recount_usage_zones[i] = zone_create(usage_name,
139 sizeof(struct recount_usage) * recount_topo_count(i),
140 0);
141
142 const char *track_name = _track_zone_names[i];
143 assert(track_name != NULL);
144 _recount_track_zones[i] = zone_create(track_name,
145 sizeof(struct recount_track) * recount_topo_count(i),
146 0);
147 }
148 }
149
150 _recount_started = true;
151 }
152
153 STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup);
154
155 #pragma mark - tracks
156
157 RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND);
158 RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU);
159 RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU);
160 RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND);
161 RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND);
162 RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM);
163
164 OS_ALWAYS_INLINE
165 static inline uint64_t
recount_timestamp_speculative(void)166 recount_timestamp_speculative(void)
167 {
168 #if __arm__ || __arm64__
169 return ml_get_speculative_timebase();
170 #else // __arm__ || __arm64__
171 return mach_absolute_time();
172 #endif // !__arm__ && !__arm64__
173 }
174
175 OS_ALWAYS_INLINE
176 void
recount_snapshot_speculative(struct recount_snap * snap)177 recount_snapshot_speculative(struct recount_snap *snap)
178 {
179 snap->rsn_time_mach = recount_timestamp_speculative();
180 #if CONFIG_PERVASIVE_CPI
181 mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns);
182 #endif // CONFIG_PERVASIVE_CPI
183 }
184
185 void
recount_snapshot(struct recount_snap * snap)186 recount_snapshot(struct recount_snap *snap)
187 {
188 #if __arm__ || __arm64__
189 __builtin_arm_isb(ISB_SY);
190 #endif // __arm__ || __arm64__
191 recount_snapshot_speculative(snap);
192 }
193
194 static struct recount_snap *
recount_get_snap(processor_t processor)195 recount_get_snap(processor_t processor)
196 {
197 return &processor->pr_recount.rpr_snap;
198 }
199
200 static struct recount_snap *
recount_get_interrupt_snap(processor_t processor)201 recount_get_interrupt_snap(processor_t processor)
202 {
203 return &processor->pr_recount.rpr_interrupt_snap;
204 }
205
206 // A simple sequence lock implementation.
207
208 static void
_seqlock_shared_lock_slowpath(const uint32_t * lck,uint32_t gen)209 _seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen)
210 {
211 disable_preemption();
212 do {
213 gen = hw_wait_while_equals32((uint32_t *)(uintptr_t)lck, gen);
214 } while (__improbable((gen & 1) != 0));
215 os_atomic_thread_fence(acquire);
216 enable_preemption();
217 }
218
219 static uintptr_t
_seqlock_shared_lock(const uint32_t * lck)220 _seqlock_shared_lock(const uint32_t *lck)
221 {
222 uint32_t gen = os_atomic_load(lck, acquire);
223 if (__improbable((gen & 1) != 0)) {
224 _seqlock_shared_lock_slowpath(lck, gen);
225 }
226 return gen;
227 }
228
229 static bool
_seqlock_shared_try_unlock(const uint32_t * lck,uintptr_t on_enter)230 _seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter)
231 {
232 return os_atomic_load(lck, acquire) == on_enter;
233 }
234
235 static void
_seqlock_excl_lock_relaxed(uint32_t * lck)236 _seqlock_excl_lock_relaxed(uint32_t *lck)
237 {
238 __assert_only uintptr_t new = os_atomic_inc(lck, relaxed);
239 assert3u((new & 1), ==, 1);
240 }
241
242 static void
_seqlock_excl_commit(void)243 _seqlock_excl_commit(void)
244 {
245 os_atomic_thread_fence(release);
246 }
247
248 static void
_seqlock_excl_unlock_relaxed(uint32_t * lck)249 _seqlock_excl_unlock_relaxed(uint32_t *lck)
250 {
251 __assert_only uint32_t new = os_atomic_inc(lck, relaxed);
252 assert3u((new & 1), ==, 0);
253 }
254
255 static struct recount_track *
recount_update_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)256 recount_update_start(struct recount_track *tracks, recount_topo_t topo,
257 processor_t processor)
258 {
259 struct recount_track *track = &tracks[recount_topo_index(topo, processor)];
260 _seqlock_excl_lock_relaxed(&track->rt_sync);
261 return track;
262 }
263
264 #if RECOUNT_ENERGY
265
266 static struct recount_track *
recount_update_single_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)267 recount_update_single_start(struct recount_track *tracks, recount_topo_t topo,
268 processor_t processor)
269 {
270 return &tracks[recount_topo_index(topo, processor)];
271 }
272
273 #endif // RECOUNT_ENERGY
274
275 static void
recount_update_commit(void)276 recount_update_commit(void)
277 {
278 _seqlock_excl_commit();
279 }
280
281 static void
recount_update_end(struct recount_track * track)282 recount_update_end(struct recount_track *track)
283 {
284 _seqlock_excl_unlock_relaxed(&track->rt_sync);
285 }
286
287 static const struct recount_usage *
recount_read_start(const struct recount_track * track,uintptr_t * on_enter)288 recount_read_start(const struct recount_track *track, uintptr_t *on_enter)
289 {
290 const struct recount_usage *stats = &track->rt_usage;
291 *on_enter = _seqlock_shared_lock(&track->rt_sync);
292 return stats;
293 }
294
295 static bool
recount_try_read_end(const struct recount_track * track,uintptr_t on_enter)296 recount_try_read_end(const struct recount_track *track, uintptr_t on_enter)
297 {
298 return _seqlock_shared_try_unlock(&track->rt_sync, on_enter);
299 }
300
301 static void
recount_read_track(struct recount_usage * stats,const struct recount_track * track)302 recount_read_track(struct recount_usage *stats,
303 const struct recount_track *track)
304 {
305 uintptr_t on_enter = 0;
306 do {
307 const struct recount_usage *vol_stats =
308 recount_read_start(track, &on_enter);
309 *stats = *vol_stats;
310 } while (!recount_try_read_end(track, on_enter));
311 }
312
313 static void
recount_metrics_add(struct recount_metrics * sum,const struct recount_metrics * to_add)314 recount_metrics_add(struct recount_metrics *sum, const struct recount_metrics *to_add)
315 {
316 sum->rm_time_mach += to_add->rm_time_mach;
317 #if CONFIG_PERVASIVE_CPI
318 sum->rm_instructions += to_add->rm_instructions;
319 sum->rm_cycles += to_add->rm_cycles;
320 #endif // CONFIG_PERVASIVE_CPI
321 }
322
323 static void
recount_usage_add(struct recount_usage * sum,const struct recount_usage * to_add)324 recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add)
325 {
326 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
327 recount_metrics_add(&sum->ru_metrics[i], &to_add->ru_metrics[i]);
328 }
329 #if CONFIG_PERVASIVE_ENERGY
330 sum->ru_energy_nj += to_add->ru_energy_nj;
331 #endif // CONFIG_PERVASIVE_CPI
332 }
333
334 OS_ALWAYS_INLINE
335 static inline void
recount_usage_add_snap(struct recount_usage * usage,recount_level_t level,struct recount_snap * snap)336 recount_usage_add_snap(struct recount_usage *usage, recount_level_t level,
337 struct recount_snap *snap)
338 {
339 struct recount_metrics *metrics = &usage->ru_metrics[level];
340
341 metrics->rm_time_mach += snap->rsn_time_mach;
342 #if CONFIG_PERVASIVE_CPI
343 metrics->rm_cycles += snap->rsn_cycles;
344 metrics->rm_instructions += snap->rsn_insns;
345 #else // CONFIG_PERVASIVE_CPI
346 #pragma unused(usage)
347 #endif // !CONFIG_PERVASIVE_CPI
348 }
349
350 static void
recount_rollup(recount_plan_t plan,const struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)351 recount_rollup(recount_plan_t plan, const struct recount_track *tracks,
352 recount_topo_t to_topo, struct recount_usage *stats)
353 {
354 recount_topo_t from_topo = plan->rpl_topo;
355 size_t topo_count = recount_topo_count(from_topo);
356 struct recount_usage tmp = { 0 };
357 for (size_t i = 0; i < topo_count; i++) {
358 recount_read_track(&tmp, &tracks[i]);
359 size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
360 recount_usage_add(&stats[to_i], &tmp);
361 }
362 }
363
364 // This function must be run when counters cannot increment for the track, like from the current thread.
365 static void
recount_rollup_unsafe(recount_plan_t plan,struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)366 recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks,
367 recount_topo_t to_topo, struct recount_usage *stats)
368 {
369 recount_topo_t from_topo = plan->rpl_topo;
370 size_t topo_count = recount_topo_count(from_topo);
371 for (size_t i = 0; i < topo_count; i++) {
372 size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
373 recount_usage_add(&stats[to_i], &tracks[i].rt_usage);
374 }
375 }
376
377 void
recount_sum(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)378 recount_sum(recount_plan_t plan, const struct recount_track *tracks,
379 struct recount_usage *sum)
380 {
381 recount_rollup(plan, tracks, RCT_TOPO_SYSTEM, sum);
382 }
383
384 void
recount_sum_unsafe(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)385 recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
386 struct recount_usage *sum)
387 {
388 recount_topo_t topo = plan->rpl_topo;
389 size_t topo_count = recount_topo_count(topo);
390 for (size_t i = 0; i < topo_count; i++) {
391 recount_usage_add(sum, &tracks[i].rt_usage);
392 }
393 }
394
395 void
recount_sum_and_isolate_cpu_kind(recount_plan_t plan,struct recount_track * tracks,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)396 recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
397 struct recount_track *tracks, recount_cpu_kind_t kind,
398 struct recount_usage *sum, struct recount_usage *only_kind)
399 {
400 size_t topo_count = recount_topo_count(plan->rpl_topo);
401 struct recount_usage tmp = { 0 };
402 for (size_t i = 0; i < topo_count; i++) {
403 recount_read_track(&tmp, &tracks[i]);
404 recount_usage_add(sum, &tmp);
405 if (recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
406 recount_usage_add(only_kind, &tmp);
407 }
408 }
409 }
410
411 static void
recount_sum_usage(recount_plan_t plan,const struct recount_usage * usages,struct recount_usage * sum)412 recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages,
413 struct recount_usage *sum)
414 {
415 const size_t topo_count = recount_topo_count(plan->rpl_topo);
416 for (size_t i = 0; i < topo_count; i++) {
417 recount_usage_add(sum, &usages[i]);
418 }
419 }
420
421 void
recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,struct recount_usage * usage,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)422 recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
423 struct recount_usage *usage, recount_cpu_kind_t kind,
424 struct recount_usage *sum, struct recount_usage *only_kind)
425 {
426 const size_t topo_count = recount_topo_count(plan->rpl_topo);
427 for (size_t i = 0; i < topo_count; i++) {
428 recount_usage_add(sum, &usage[i]);
429 if (only_kind && recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
430 recount_usage_add(only_kind, &usage[i]);
431 }
432 }
433 }
434
435 void
recount_sum_perf_levels(recount_plan_t plan,struct recount_track * tracks,struct recount_usage * sums)436 recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks,
437 struct recount_usage *sums)
438 {
439 recount_rollup(plan, tracks, RCT_TOPO_CPU_KIND, sums);
440 }
441
442 struct recount_times_mach
recount_usage_times_mach(struct recount_usage * usage)443 recount_usage_times_mach(struct recount_usage *usage)
444 {
445 return (struct recount_times_mach){
446 .rtm_user = usage->ru_metrics[RCT_LVL_USER].rm_time_mach,
447 .rtm_system = recount_usage_system_time_mach(usage),
448 };
449 }
450
451 uint64_t
recount_usage_system_time_mach(struct recount_usage * usage)452 recount_usage_system_time_mach(struct recount_usage *usage)
453 {
454 uint64_t system_time = usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach;
455 #if RECOUNT_SECURE_METRICS
456 system_time += usage->ru_metrics[RCT_LVL_SECURE].rm_time_mach;
457 #endif // RECOUNT_SECURE_METRICS
458 return system_time;
459 }
460
461 uint64_t
recount_usage_time_mach(struct recount_usage * usage)462 recount_usage_time_mach(struct recount_usage *usage)
463 {
464 uint64_t time = 0;
465 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
466 time += usage->ru_metrics[i].rm_time_mach;
467 }
468 return time;
469 }
470
471 uint64_t
recount_usage_cycles(struct recount_usage * usage)472 recount_usage_cycles(struct recount_usage *usage)
473 {
474 uint64_t cycles = 0;
475 #if CONFIG_CPU_COUNTERS
476 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
477 cycles += usage->ru_metrics[i].rm_cycles;
478 }
479 #else // CONFIG_CPU_COUNTERS
480 #pragma unused(usage)
481 #endif // !CONFIG_CPU_COUNTERS
482 return cycles;
483 }
484
485 uint64_t
recount_usage_instructions(struct recount_usage * usage)486 recount_usage_instructions(struct recount_usage *usage)
487 {
488 uint64_t instructions = 0;
489 #if CONFIG_CPU_COUNTERS
490 for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
491 instructions += usage->ru_metrics[i].rm_instructions;
492 }
493 #else // CONFIG_CPU_COUNTERS
494 #pragma unused(usage)
495 #endif // !CONFIG_CPU_COUNTERS
496 return instructions;
497 }
498
499 // Plan-specific helpers.
500
501 void
recount_coalition_rollup_task(struct recount_coalition * co,struct recount_task * tk)502 recount_coalition_rollup_task(struct recount_coalition *co,
503 struct recount_task *tk)
504 {
505 recount_rollup(&recount_task_plan, tk->rtk_lifetime,
506 recount_coalition_plan.rpl_topo, co->rco_exited);
507 }
508
509 void
recount_task_rollup_thread(struct recount_task * tk,const struct recount_thread * th)510 recount_task_rollup_thread(struct recount_task *tk,
511 const struct recount_thread *th)
512 {
513 recount_rollup(&recount_thread_plan, th->rth_lifetime,
514 recount_task_terminated_plan.rpl_topo, tk->rtk_terminated);
515 }
516
517 #pragma mark - scheduler
518
519 // `result = lhs - rhs` for snapshots.
520 OS_ALWAYS_INLINE
521 static void
recount_snap_diff(struct recount_snap * result,const struct recount_snap * lhs,const struct recount_snap * rhs)522 recount_snap_diff(struct recount_snap *result,
523 const struct recount_snap *lhs, const struct recount_snap *rhs)
524 {
525 assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach);
526 result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach;
527 #if CONFIG_PERVASIVE_CPI
528 assert3u(lhs->rsn_insns, >=, rhs->rsn_insns);
529 assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles);
530 result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles;
531 result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns;
532 #endif // CONFIG_PERVASIVE_CPI
533 }
534
535 static void
_fix_time_precision(struct recount_usage * usage)536 _fix_time_precision(struct recount_usage *usage)
537 {
538 #if PRECISE_USER_KERNEL_TIME
539 #pragma unused(usage)
540 #else // PRECISE_USER_KERNEL_TIME
541 // Attribute all time to user, as the system is only acting "on behalf
542 // of" user processes -- a bit sketchy.
543 usage->ru_metrics[RCT_LVL_USER].rm_time_mach +=
544 recount_usage_system_time_mach(usage);
545 usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach = 0;
546 #endif // !PRECISE_USER_KERNEL_TIME
547 }
548
549 void
recount_current_thread_usage(struct recount_usage * usage)550 recount_current_thread_usage(struct recount_usage *usage)
551 {
552 assert(ml_get_interrupts_enabled() == FALSE);
553 thread_t thread = current_thread();
554 struct recount_snap snap = { 0 };
555 recount_snapshot(&snap);
556 recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
557 usage);
558 struct recount_snap *last = recount_get_snap(current_processor());
559 struct recount_snap diff = { 0 };
560 recount_snap_diff(&diff, &snap, last);
561 recount_usage_add_snap(usage, RCT_LVL_KERNEL, &diff);
562 _fix_time_precision(usage);
563 }
564
565 void
recount_current_thread_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)566 recount_current_thread_usage_perf_only(struct recount_usage *usage,
567 struct recount_usage *usage_perf_only)
568 {
569 struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 };
570 recount_current_thread_perf_level_usage(usage_perf_levels);
571 recount_sum_usage(&recount_thread_plan, usage_perf_levels, usage);
572 *usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE];
573 _fix_time_precision(usage);
574 _fix_time_precision(usage_perf_only);
575 }
576
577 void
recount_thread_perf_level_usage(struct thread * thread,struct recount_usage * usage_levels)578 recount_thread_perf_level_usage(struct thread *thread,
579 struct recount_usage *usage_levels)
580 {
581 recount_rollup(&recount_thread_plan, thread->th_recount.rth_lifetime,
582 RCT_TOPO_CPU_KIND, usage_levels);
583 size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
584 for (size_t i = 0; i < topo_count; i++) {
585 _fix_time_precision(&usage_levels[i]);
586 }
587 }
588
589 void
recount_current_thread_perf_level_usage(struct recount_usage * usage_levels)590 recount_current_thread_perf_level_usage(struct recount_usage *usage_levels)
591 {
592 assert(ml_get_interrupts_enabled() == FALSE);
593 processor_t processor = current_processor();
594 thread_t thread = current_thread();
595 struct recount_snap snap = { 0 };
596 recount_snapshot(&snap);
597 recount_rollup_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
598 RCT_TOPO_CPU_KIND, usage_levels);
599 struct recount_snap *last = recount_get_snap(processor);
600 struct recount_snap diff = { 0 };
601 recount_snap_diff(&diff, &snap, last);
602 size_t cur_i = recount_topo_index(RCT_TOPO_CPU_KIND, processor);
603 struct recount_usage *cur_usage = &usage_levels[cur_i];
604 recount_usage_add_snap(cur_usage, RCT_LVL_KERNEL, &diff);
605 size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
606 for (size_t i = 0; i < topo_count; i++) {
607 _fix_time_precision(&usage_levels[i]);
608 }
609 }
610
611 uint64_t
recount_current_thread_energy_nj(void)612 recount_current_thread_energy_nj(void)
613 {
614 #if RECOUNT_ENERGY
615 assert(ml_get_interrupts_enabled() == FALSE);
616 thread_t thread = current_thread();
617 size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
618 uint64_t energy_nj = 0;
619 for (size_t i = 0; i < topo_count; i++) {
620 energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj;
621 }
622 return energy_nj;
623 #else // RECOUNT_ENERGY
624 return 0;
625 #endif // !RECOUNT_ENERGY
626 }
627
628 static void
_times_add_usage(struct recount_times_mach * times,struct recount_usage * usage)629 _times_add_usage(struct recount_times_mach *times, struct recount_usage *usage)
630 {
631 times->rtm_user += usage->ru_metrics[RCT_LVL_USER].rm_time_mach;
632 #if PRECISE_USER_KERNEL_TIME
633 times->rtm_system += recount_usage_system_time_mach(usage);
634 #else // PRECISE_USER_KERNEL_TIME
635 times->rtm_user += recount_usage_system_time_mach(usage);
636 #endif // !PRECISE_USER_KERNEL_TIME
637 }
638
639 struct recount_times_mach
recount_thread_times(struct thread * thread)640 recount_thread_times(struct thread *thread)
641 {
642 size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
643 struct recount_times_mach times = { 0 };
644 for (size_t i = 0; i < topo_count; i++) {
645 _times_add_usage(×, &thread->th_recount.rth_lifetime[i].rt_usage);
646 }
647 return times;
648 }
649
650 uint64_t
recount_thread_time_mach(struct thread * thread)651 recount_thread_time_mach(struct thread *thread)
652 {
653 struct recount_times_mach times = recount_thread_times(thread);
654 return times.rtm_user + times.rtm_system;
655 }
656
657 static uint64_t
_time_since_last_snapshot(void)658 _time_since_last_snapshot(void)
659 {
660 struct recount_snap *last = recount_get_snap(current_processor());
661 uint64_t cur_time = mach_absolute_time();
662 return cur_time - last->rsn_time_mach;
663 }
664
665 uint64_t
recount_current_thread_time_mach(void)666 recount_current_thread_time_mach(void)
667 {
668 assert(ml_get_interrupts_enabled() == FALSE);
669 uint64_t previous_time = recount_thread_time_mach(current_thread());
670 return previous_time + _time_since_last_snapshot();
671 }
672
673 struct recount_times_mach
recount_current_thread_times(void)674 recount_current_thread_times(void)
675 {
676 assert(ml_get_interrupts_enabled() == FALSE);
677 struct recount_times_mach times = recount_thread_times(
678 current_thread());
679 #if PRECISE_USER_KERNEL_TIME
680 // This code is executing in the kernel, so the time since the last snapshot
681 // (with precise user/kernel time) is since entering the kernel.
682 times.rtm_system += _time_since_last_snapshot();
683 #else // PRECISE_USER_KERNEL_TIME
684 times.rtm_user += _time_since_last_snapshot();
685 #endif // !PRECISE_USER_KERNEL_TIME
686 return times;
687 }
688
689 void
recount_thread_usage(thread_t thread,struct recount_usage * usage)690 recount_thread_usage(thread_t thread, struct recount_usage *usage)
691 {
692 recount_sum(&recount_thread_plan, thread->th_recount.rth_lifetime, usage);
693 _fix_time_precision(usage);
694 }
695
696 uint64_t
recount_current_thread_interrupt_time_mach(void)697 recount_current_thread_interrupt_time_mach(void)
698 {
699 thread_t thread = current_thread();
700 return thread->th_recount.rth_interrupt_time_mach;
701 }
702
703 void
recount_work_interval_usage(struct work_interval * work_interval,struct recount_usage * usage)704 recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage)
705 {
706 recount_sum(&recount_work_interval_plan, work_interval_get_recount_tracks(work_interval), usage);
707 _fix_time_precision(usage);
708 }
709
710 struct recount_times_mach
recount_work_interval_times(struct work_interval * work_interval)711 recount_work_interval_times(struct work_interval *work_interval)
712 {
713 size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
714 struct recount_times_mach times = { 0 };
715 for (size_t i = 0; i < topo_count; i++) {
716 _times_add_usage(×, &work_interval_get_recount_tracks(work_interval)[i].rt_usage);
717 }
718 return times;
719 }
720
721 uint64_t
recount_work_interval_energy_nj(struct work_interval * work_interval)722 recount_work_interval_energy_nj(struct work_interval *work_interval)
723 {
724 #if RECOUNT_ENERGY
725 size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
726 uint64_t energy = 0;
727 for (size_t i = 0; i < topo_count; i++) {
728 energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj;
729 }
730 return energy;
731 #else // RECOUNT_ENERGY
732 #pragma unused(work_interval)
733 return 0;
734 #endif // !RECOUNT_ENERGY
735 }
736
737 void
recount_current_task_usage(struct recount_usage * usage)738 recount_current_task_usage(struct recount_usage *usage)
739 {
740 task_t task = current_task();
741 struct recount_track *tracks = task->tk_recount.rtk_lifetime;
742 recount_sum(&recount_task_plan, tracks, usage);
743 _fix_time_precision(usage);
744 }
745
746 void
recount_current_task_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)747 recount_current_task_usage_perf_only(struct recount_usage *usage,
748 struct recount_usage *usage_perf_only)
749 {
750 task_t task = current_task();
751 struct recount_track *tracks = task->tk_recount.rtk_lifetime;
752 recount_sum_and_isolate_cpu_kind(&recount_task_plan,
753 tracks, RCT_CPU_PERFORMANCE, usage, usage_perf_only);
754 _fix_time_precision(usage);
755 _fix_time_precision(usage_perf_only);
756 }
757
758 void
recount_task_times_perf_only(struct task * task,struct recount_times_mach * sum,struct recount_times_mach * sum_perf_only)759 recount_task_times_perf_only(struct task *task,
760 struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only)
761 {
762 const recount_topo_t topo = recount_task_plan.rpl_topo;
763 const size_t topo_count = recount_topo_count(topo);
764 struct recount_track *tracks = task->tk_recount.rtk_lifetime;
765 for (size_t i = 0; i < topo_count; i++) {
766 struct recount_usage *usage = &tracks[i].rt_usage;
767 _times_add_usage(sum, usage);
768 if (recount_topo_matches_cpu_kind(topo, RCT_CPU_PERFORMANCE, i)) {
769 _times_add_usage(sum_perf_only, usage);
770 }
771 }
772 }
773
774 void
recount_task_terminated_usage(task_t task,struct recount_usage * usage)775 recount_task_terminated_usage(task_t task, struct recount_usage *usage)
776 {
777 recount_sum_usage(&recount_task_terminated_plan,
778 task->tk_recount.rtk_terminated, usage);
779 _fix_time_precision(usage);
780 }
781
782 struct recount_times_mach
recount_task_terminated_times(struct task * task)783 recount_task_terminated_times(struct task *task)
784 {
785 size_t topo_count = recount_topo_count(recount_task_terminated_plan.rpl_topo);
786 struct recount_times_mach times = { 0 };
787 for (size_t i = 0; i < topo_count; i++) {
788 _times_add_usage(×, &task->tk_recount.rtk_terminated[i]);
789 }
790 return times;
791 }
792
793 void
recount_task_terminated_usage_perf_only(task_t task,struct recount_usage * usage,struct recount_usage * perf_only)794 recount_task_terminated_usage_perf_only(task_t task,
795 struct recount_usage *usage, struct recount_usage *perf_only)
796 {
797 recount_sum_usage_and_isolate_cpu_kind(&recount_task_terminated_plan,
798 task->tk_recount.rtk_terminated, RCT_CPU_PERFORMANCE, usage, perf_only);
799 _fix_time_precision(usage);
800 _fix_time_precision(perf_only);
801 }
802
803 void
recount_task_usage_perf_only(task_t task,struct recount_usage * sum,struct recount_usage * sum_perf_only)804 recount_task_usage_perf_only(task_t task, struct recount_usage *sum,
805 struct recount_usage *sum_perf_only)
806 {
807 recount_sum_and_isolate_cpu_kind(&recount_task_plan,
808 task->tk_recount.rtk_lifetime, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
809 _fix_time_precision(sum);
810 _fix_time_precision(sum_perf_only);
811 }
812
813 void
recount_task_usage(task_t task,struct recount_usage * usage)814 recount_task_usage(task_t task, struct recount_usage *usage)
815 {
816 recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, usage);
817 _fix_time_precision(usage);
818 }
819
820 struct recount_times_mach
recount_task_times(struct task * task)821 recount_task_times(struct task *task)
822 {
823 size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
824 struct recount_times_mach times = { 0 };
825 for (size_t i = 0; i < topo_count; i++) {
826 _times_add_usage(×, &task->tk_recount.rtk_lifetime[i].rt_usage);
827 }
828 return times;
829 }
830
831 uint64_t
recount_task_energy_nj(struct task * task)832 recount_task_energy_nj(struct task *task)
833 {
834 #if RECOUNT_ENERGY
835 size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
836 uint64_t energy = 0;
837 for (size_t i = 0; i < topo_count; i++) {
838 energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj;
839 }
840 return energy;
841 #else // RECOUNT_ENERGY
842 #pragma unused(task)
843 return 0;
844 #endif // !RECOUNT_ENERGY
845 }
846
847 void
recount_coalition_usage_perf_only(struct recount_coalition * coal,struct recount_usage * sum,struct recount_usage * sum_perf_only)848 recount_coalition_usage_perf_only(struct recount_coalition *coal,
849 struct recount_usage *sum, struct recount_usage *sum_perf_only)
850 {
851 recount_sum_usage_and_isolate_cpu_kind(&recount_coalition_plan,
852 coal->rco_exited, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
853 _fix_time_precision(sum);
854 _fix_time_precision(sum_perf_only);
855 }
856
857 OS_ALWAYS_INLINE
858 static void
recount_absorb_snap(struct recount_snap * to_add,thread_t thread,task_t task,processor_t processor,recount_level_t level)859 recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task,
860 processor_t processor, recount_level_t level)
861 {
862 // Idle threads do not attribute their usage back to the task or processor,
863 // as the time is not spent "running."
864 //
865 // The processor-level metrics include idle time, instead, as the idle time
866 // needs to be read as up-to-date from `recount_processor_usage`.
867
868 const bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0;
869
870 struct recount_track *wi_tracks_array = NULL;
871 if (!was_idle) {
872 wi_tracks_array = work_interval_get_recount_tracks(
873 thread->th_work_interval);
874 }
875 bool absorb_work_interval = wi_tracks_array != NULL;
876
877 struct recount_track *th_track = recount_update_start(
878 thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
879 processor);
880 struct recount_track *wi_track = NULL;
881 if (absorb_work_interval) {
882 wi_track = recount_update_start(wi_tracks_array,
883 recount_work_interval_plan.rpl_topo, processor);
884 }
885 struct recount_track *tk_track = was_idle ? NULL : recount_update_start(
886 task->tk_recount.rtk_lifetime, recount_task_plan.rpl_topo, processor);
887 struct recount_track *pr_track = was_idle ? NULL : recount_update_start(
888 &processor->pr_recount.rpr_active, recount_processor_plan.rpl_topo,
889 processor);
890 recount_update_commit();
891
892 recount_usage_add_snap(&th_track->rt_usage, level, to_add);
893 if (!was_idle) {
894 if (absorb_work_interval) {
895 recount_usage_add_snap(&wi_track->rt_usage, level, to_add);
896 }
897 recount_usage_add_snap(&tk_track->rt_usage, level, to_add);
898 recount_usage_add_snap(&pr_track->rt_usage, level, to_add);
899 }
900
901 recount_update_commit();
902 recount_update_end(th_track);
903 if (!was_idle) {
904 if (absorb_work_interval) {
905 recount_update_end(wi_track);
906 }
907 recount_update_end(tk_track);
908 recount_update_end(pr_track);
909 }
910 }
911
912 void
recount_switch_thread(struct recount_snap * cur,struct thread * off_thread,struct task * off_task)913 recount_switch_thread(struct recount_snap *cur, struct thread *off_thread,
914 struct task *off_task)
915 {
916 assert(ml_get_interrupts_enabled() == FALSE);
917
918 if (__improbable(!_recount_started)) {
919 return;
920 }
921
922 processor_t processor = current_processor();
923
924 struct recount_snap *last = recount_get_snap(processor);
925 struct recount_snap diff = { 0 };
926 recount_snap_diff(&diff, cur, last);
927 recount_absorb_snap(&diff, off_thread, off_task, processor,
928 #if RECOUNT_THREAD_BASED_LEVEL
929 off_thread->th_recount.rth_current_level
930 #else // RECOUNT_THREAD_BASED_LEVEL
931 RCT_LVL_KERNEL
932 #endif // !RECOUNT_THREAD_BASED_LEVEL
933 );
934 memcpy(last, cur, sizeof(*last));
935 }
936
937 void
recount_add_energy(struct thread * off_thread,struct task * off_task,uint64_t energy_nj)938 recount_add_energy(struct thread *off_thread, struct task *off_task,
939 uint64_t energy_nj)
940 {
941 #if RECOUNT_ENERGY
942 assert(ml_get_interrupts_enabled() == FALSE);
943 if (__improbable(!_recount_started)) {
944 return;
945 }
946
947 bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0;
948 struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval);
949 bool collect_work_interval_telemetry = wi_tracks_array != NULL;
950 processor_t processor = current_processor();
951
952 struct recount_track *th_track = recount_update_single_start(
953 off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
954 processor);
955 struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL :
956 recount_update_single_start(wi_tracks_array,
957 recount_work_interval_plan.rpl_topo, processor);
958 struct recount_track *tk_track = was_idle ? NULL :
959 recount_update_single_start(off_task->tk_recount.rtk_lifetime,
960 recount_task_plan.rpl_topo, processor);
961 struct recount_track *pr_track = was_idle ? NULL :
962 recount_update_single_start(&processor->pr_recount.rpr_active,
963 recount_processor_plan.rpl_topo, processor);
964
965 th_track->rt_usage.ru_energy_nj += energy_nj;
966 if (!was_idle) {
967 if (collect_work_interval_telemetry) {
968 wi_track->rt_usage.ru_energy_nj += energy_nj;
969 }
970 tk_track->rt_usage.ru_energy_nj += energy_nj;
971 pr_track->rt_usage.ru_energy_nj += energy_nj;
972 }
973 #else // RECOUNT_ENERGY
974 #pragma unused(off_thread, off_task, energy_nj)
975 #endif // !RECOUNT_ENERGY
976 }
977
978 #define MT_KDBG_IC_CPU_CSWITCH \
979 KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1)
980
981 #define MT_KDBG_IC_CPU_CSWITCH_ON \
982 KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1)
983
984 void
recount_log_switch_thread(const struct recount_snap * snap)985 recount_log_switch_thread(const struct recount_snap *snap)
986 {
987 #if CONFIG_PERVASIVE_CPI
988 if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
989 // In Monotonic's event hierarchy for backwards-compatibility.
990 KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles);
991 }
992 #else // CONFIG_PERVASIVE_CPI
993 #pragma unused(snap)
994 #endif // CONFIG_PERVASIVE_CPI
995 }
996
997 void
recount_log_switch_thread_on(const struct recount_snap * snap)998 recount_log_switch_thread_on(const struct recount_snap *snap)
999 {
1000 #if CONFIG_PERVASIVE_CPI
1001 if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) {
1002 if (!snap) {
1003 snap = recount_get_snap(current_processor());
1004 }
1005 // In Monotonic's event hierarchy for backwards-compatibility.
1006 KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles);
1007 }
1008 #else // CONFIG_PERVASIVE_CPI
1009 #pragma unused(snap)
1010 #endif // CONFIG_PERVASIVE_CPI
1011 }
1012
1013 OS_ALWAYS_INLINE
1014 PRECISE_TIME_ONLY_FUNC
1015 static void
recount_precise_transition_diff(struct recount_snap * diff,struct recount_snap * last,struct recount_snap * cur)1016 recount_precise_transition_diff(struct recount_snap *diff,
1017 struct recount_snap *last, struct recount_snap *cur)
1018 {
1019 #if PRECISE_USER_KERNEL_PMCS
1020 #if PRECISE_USER_KERNEL_PMC_TUNABLE
1021 // The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable
1022 // in this configuration.
1023 if (__improbable(no_precise_pmcs)) {
1024 cur->rsn_time_mach = recount_timestamp_speculative();
1025 diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1026 } else
1027 #endif // PRECISE_USER_KERNEL_PMC_TUNABLE
1028 {
1029 recount_snapshot_speculative(cur);
1030 recount_snap_diff(diff, cur, last);
1031 }
1032 #else // PRECISE_USER_KERNEL_PMCS
1033 cur->rsn_time_mach = recount_timestamp_speculative();
1034 diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1035 #endif // !PRECISE_USER_KERNEL_PMCS
1036 }
1037
1038 #if MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1039
1040 PRECISE_TIME_ONLY_FUNC
1041 static void
recount_assert_level(thread_t thread,recount_level_t old)1042 recount_assert_level(thread_t thread, recount_level_t old)
1043 {
1044 assert3u(thread->th_recount.rth_current_level, ==, old);
1045 }
1046
1047 #else // MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1048
1049 PRECISE_TIME_ONLY_FUNC
1050 static void
recount_assert_level(thread_t __unused thread,recount_level_t __unused old)1051 recount_assert_level(thread_t __unused thread,
1052 recount_level_t __unused old)
1053 {
1054 }
1055
1056 #endif // !(MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL)
1057
1058 /// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive.
1059 ///
1060 /// Must be called with interrupts disabled.
1061 ///
1062 /// - Parameter from: What level is being switched from.
1063 /// - Parameter to: What level is being switched to.
1064 ///
1065 /// - Returns: The value of Mach time that was sampled inside this function.
1066 PRECISE_TIME_FATAL_FUNC
1067 static uint64_t
recount_transition(recount_level_t from,recount_level_t to)1068 recount_transition(recount_level_t from, recount_level_t to)
1069 {
1070 #if PRECISE_USER_KERNEL_TIME
1071 // Omit interrupts-disabled assertion for performance reasons.
1072 processor_t processor = current_processor();
1073 thread_t thread = processor->active_thread;
1074 if (thread) {
1075 task_t task = get_thread_ro_unchecked(thread)->tro_task;
1076
1077 recount_assert_level(thread, from);
1078 #if RECOUNT_THREAD_BASED_LEVEL
1079 thread->th_recount.rth_current_level = to;
1080 #else // RECOUNT_THREAD_BASED_LEVEL
1081 #pragma unused(to)
1082 #endif // !RECOUNT_THREAD_BASED_LEVEL
1083 struct recount_snap *last = recount_get_snap(processor);
1084 struct recount_snap diff = { 0 };
1085 struct recount_snap cur = { 0 };
1086 recount_precise_transition_diff(&diff, last, &cur);
1087 recount_absorb_snap(&diff, thread, task, processor, from);
1088 memcpy(last, &cur, sizeof(*last));
1089
1090 return cur.rsn_time_mach;
1091 } else {
1092 return 0;
1093 }
1094 #else // PRECISE_USER_KERNEL_TIME
1095 #pragma unused(from, to)
1096 panic("recount: kernel transition called with precise time off");
1097 #endif // !PRECISE_USER_KERNEL_TIME
1098 }
1099
1100 PRECISE_TIME_FATAL_FUNC
1101 void
recount_leave_user(void)1102 recount_leave_user(void)
1103 {
1104 recount_transition(RCT_LVL_USER, RCT_LVL_KERNEL);
1105 }
1106
1107 PRECISE_TIME_FATAL_FUNC
1108 void
recount_enter_user(void)1109 recount_enter_user(void)
1110 {
1111 recount_transition(RCT_LVL_KERNEL, RCT_LVL_USER);
1112 }
1113
1114 void
recount_enter_interrupt(void)1115 recount_enter_interrupt(void)
1116 {
1117 processor_t processor = current_processor();
1118 struct recount_snap *last = recount_get_interrupt_snap(processor);
1119 recount_snapshot_speculative(last);
1120 }
1121
1122 void
recount_leave_interrupt(void)1123 recount_leave_interrupt(void)
1124 {
1125 processor_t processor = current_processor();
1126 thread_t thread = processor->active_thread;
1127 struct recount_snap *last = recount_get_snap(processor);
1128 uint64_t last_time = last->rsn_time_mach;
1129 recount_snapshot_speculative(last);
1130 processor->pr_recount.rpr_interrupt_time_mach +=
1131 last->rsn_time_mach - last_time;
1132 thread->th_recount.rth_interrupt_time_mach +=
1133 last->rsn_time_mach - last_time;
1134 }
1135
1136 #if __x86_64__
1137
1138 void
recount_enter_intel_interrupt(x86_saved_state_t * state)1139 recount_enter_intel_interrupt(x86_saved_state_t *state)
1140 {
1141 // The low bits of `%cs` being set indicate interrupt was delivered while
1142 // executing in user space.
1143 bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs :
1144 state->ss_32.cs) & 0x03;
1145 uint64_t timestamp = recount_transition(
1146 from_user ? RCT_LVL_USER : RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1147 current_cpu_datap()->cpu_int_event_time = timestamp;
1148 }
1149
1150 void
recount_leave_intel_interrupt(void)1151 recount_leave_intel_interrupt(void)
1152 {
1153 recount_transition(RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1154 current_cpu_datap()->cpu_int_event_time = 0;
1155 }
1156
1157 #endif // __x86_64__
1158
1159 #if RECOUNT_SECURE_METRICS
1160
1161 PRECISE_TIME_FATAL_FUNC
1162 void
recount_leave_secure(void)1163 recount_leave_secure(void)
1164 {
1165 boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1166 recount_transition(RCT_LVL_SECURE, RCT_LVL_KERNEL);
1167 ml_set_interrupts_enabled(intrs_en);
1168 }
1169
1170 PRECISE_TIME_FATAL_FUNC
1171 void
recount_enter_secure(void)1172 recount_enter_secure(void)
1173 {
1174 boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1175 recount_transition(RCT_LVL_KERNEL, RCT_LVL_SECURE);
1176 ml_set_interrupts_enabled(intrs_en);
1177 }
1178
1179 #endif // RECOUNT_SECURE_METRICS
1180
1181 // Set on rpr_state_last_abs_time when the processor is idle.
1182 #define RCT_PR_IDLING (0x1ULL << 63)
1183
1184 void
recount_processor_idle(struct recount_processor * pr,struct recount_snap * snap)1185 recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap)
1186 {
1187 __assert_only uint64_t state_time = os_atomic_load_wide(
1188 &pr->rpr_state_last_abs_time, relaxed);
1189 assert((state_time & RCT_PR_IDLING) == 0);
1190 assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1191 uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach;
1192 os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1193 relaxed);
1194 }
1195
1196 OS_PURE OS_ALWAYS_INLINE
1197 static inline uint64_t
_state_time(uint64_t state_stamp)1198 _state_time(uint64_t state_stamp)
1199 {
1200 return state_stamp & ~(RCT_PR_IDLING);
1201 }
1202
1203 void
recount_processor_init(processor_t processor)1204 recount_processor_init(processor_t processor)
1205 {
1206 #if __AMP__
1207 processor->pr_recount.rpr_cpu_kind_index =
1208 processor->processor_set->pset_cluster_type == PSET_AMP_P ?
1209 RCT_CPU_PERFORMANCE : RCT_CPU_EFFICIENCY;
1210 #else // __AMP__
1211 #pragma unused(processor)
1212 #endif // !__AMP__
1213 }
1214
1215 void
recount_processor_run(struct recount_processor * pr,struct recount_snap * snap)1216 recount_processor_run(struct recount_processor *pr, struct recount_snap *snap)
1217 {
1218 uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed);
1219 assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING);
1220 assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1221 uint64_t new_state_stamp = snap->rsn_time_mach;
1222 pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state);
1223 os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1224 relaxed);
1225 }
1226
1227 void
recount_processor_online(processor_t processor,struct recount_snap * cur)1228 recount_processor_online(processor_t processor, struct recount_snap *cur)
1229 {
1230 recount_processor_run(&processor->pr_recount, cur);
1231 struct recount_snap *pr_snap = recount_get_snap(processor);
1232 memcpy(pr_snap, cur, sizeof(*pr_snap));
1233 }
1234
1235 void
recount_processor_usage(struct recount_processor * pr,struct recount_usage * usage,uint64_t * idle_time_out)1236 recount_processor_usage(struct recount_processor *pr,
1237 struct recount_usage *usage, uint64_t *idle_time_out)
1238 {
1239 recount_sum(&recount_processor_plan, &pr->rpr_active, usage);
1240 _fix_time_precision(usage);
1241
1242 uint64_t idle_time = pr->rpr_idle_time_mach;
1243 uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time,
1244 relaxed);
1245 bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING;
1246 if (idle) {
1247 // Since processors can idle for some time without an update, make sure
1248 // the idle time is up-to-date with respect to the caller.
1249 idle_time += mach_absolute_time() - _state_time(idle_stamp);
1250 }
1251 *idle_time_out = idle_time;
1252 }
1253
1254 uint64_t
recount_current_processor_interrupt_time_mach(void)1255 recount_current_processor_interrupt_time_mach(void)
1256 {
1257 assert(!preemption_enabled());
1258 return current_processor()->pr_recount.rpr_interrupt_time_mach;
1259 }
1260
1261 bool
recount_task_thread_perf_level_usage(struct task * task,uint64_t tid,struct recount_usage * usage_levels)1262 recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
1263 struct recount_usage *usage_levels)
1264 {
1265 thread_t thread = task_findtid(task, tid);
1266 if (thread != THREAD_NULL) {
1267 if (thread == current_thread()) {
1268 boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1269 recount_current_thread_perf_level_usage(usage_levels);
1270 ml_set_interrupts_enabled(interrupt_state);
1271 } else {
1272 recount_thread_perf_level_usage(thread, usage_levels);
1273 }
1274 }
1275 return thread != THREAD_NULL;
1276 }
1277
1278 #pragma mark - utilities
1279
1280 // For rolling up counts, convert an index from one topography to another.
1281 static size_t
recount_convert_topo_index(recount_topo_t from,recount_topo_t to,size_t i)1282 recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i)
1283 {
1284 if (from == to) {
1285 return i;
1286 } else if (to == RCT_TOPO_SYSTEM) {
1287 return 0;
1288 } else if (from == RCT_TOPO_CPU) {
1289 assertf(to == RCT_TOPO_CPU_KIND,
1290 "recount: cannot convert from CPU topography to %d", to);
1291 return _topo_cpu_kinds[i];
1292 } else {
1293 panic("recount: unexpected rollup request from %d to %d", from, to);
1294 }
1295 }
1296
1297 // Get the track index of the provided processor and topography.
1298 OS_ALWAYS_INLINE
1299 static size_t
recount_topo_index(recount_topo_t topo,processor_t processor)1300 recount_topo_index(recount_topo_t topo, processor_t processor)
1301 {
1302 switch (topo) {
1303 case RCT_TOPO_SYSTEM:
1304 return 0;
1305 case RCT_TOPO_CPU:
1306 return processor->cpu_id;
1307 case RCT_TOPO_CPU_KIND:
1308 #if __AMP__
1309 return processor->pr_recount.rpr_cpu_kind_index;
1310 #else // __AMP__
1311 return 0;
1312 #endif // !__AMP__
1313 default:
1314 panic("recount: invalid topology %u to index", topo);
1315 }
1316 }
1317
1318 // Return the number of tracks needed for a given topography.
1319 size_t
recount_topo_count(recount_topo_t topo)1320 recount_topo_count(recount_topo_t topo)
1321 {
1322 // Allow the compiler to reason about at least the system and CPU kind
1323 // counts.
1324 switch (topo) {
1325 case RCT_TOPO_SYSTEM:
1326 return 1;
1327
1328 case RCT_TOPO_CPU_KIND:
1329 #if __AMP__
1330 return 2;
1331 #else // __AMP__
1332 return 1;
1333 #endif // !__AMP__
1334
1335 case RCT_TOPO_CPU:
1336 #if __arm__ || __arm64__
1337 return ml_get_cpu_count();
1338 #else // __arm__ || __arm64__
1339 return ml_early_cpu_max_number() + 1;
1340 #endif // !__arm__ && !__arm64__
1341
1342 default:
1343 panic("recount: invalid topography %d", topo);
1344 }
1345 }
1346
1347 static bool
recount_topo_matches_cpu_kind(recount_topo_t topo,recount_cpu_kind_t kind,size_t idx)1348 recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind,
1349 size_t idx)
1350 {
1351 #if !__AMP__
1352 #pragma unused(kind, idx)
1353 #endif // !__AMP__
1354 switch (topo) {
1355 case RCT_TOPO_SYSTEM:
1356 return true;
1357
1358 case RCT_TOPO_CPU_KIND:
1359 #if __AMP__
1360 return kind == idx;
1361 #else // __AMP__
1362 return false;
1363 #endif // !__AMP__
1364
1365 case RCT_TOPO_CPU: {
1366 #if __AMP__
1367 return _topo_cpu_kinds[idx] == kind;
1368 #else // __AMP__
1369 return false;
1370 #endif // !__AMP__
1371 }
1372
1373 default:
1374 panic("recount: unexpected topography %d", topo);
1375 }
1376 }
1377
1378 struct recount_track *
recount_tracks_create(recount_plan_t plan)1379 recount_tracks_create(recount_plan_t plan)
1380 {
1381 assert(_topo_allocates[plan->rpl_topo]);
1382 return zalloc_flags(_recount_track_zones[plan->rpl_topo],
1383 Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1384 }
1385
1386 static void
recount_tracks_copy(recount_plan_t plan,struct recount_track * dst,struct recount_track * src)1387 recount_tracks_copy(recount_plan_t plan, struct recount_track *dst,
1388 struct recount_track *src)
1389 {
1390 size_t topo_count = recount_topo_count(plan->rpl_topo);
1391 for (size_t i = 0; i < topo_count; i++) {
1392 recount_read_track(&dst[i].rt_usage, &src[i]);
1393 }
1394 }
1395
1396 void
recount_tracks_destroy(recount_plan_t plan,struct recount_track * tracks)1397 recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks)
1398 {
1399 assert(_topo_allocates[plan->rpl_topo]);
1400 zfree(_recount_track_zones[plan->rpl_topo], tracks);
1401 }
1402
1403 void
recount_thread_init(struct recount_thread * th)1404 recount_thread_init(struct recount_thread *th)
1405 {
1406 th->rth_lifetime = recount_tracks_create(&recount_thread_plan);
1407 }
1408
1409 void
recount_thread_copy(struct recount_thread * dst,struct recount_thread * src)1410 recount_thread_copy(struct recount_thread *dst, struct recount_thread *src)
1411 {
1412 recount_tracks_copy(&recount_thread_plan, dst->rth_lifetime,
1413 src->rth_lifetime);
1414 }
1415
1416 void
recount_task_copy(struct recount_task * dst,const struct recount_task * src)1417 recount_task_copy(struct recount_task *dst, const struct recount_task *src)
1418 {
1419 recount_tracks_copy(&recount_task_plan, dst->rtk_lifetime,
1420 src->rtk_lifetime);
1421 }
1422
1423 void
recount_thread_deinit(struct recount_thread * th)1424 recount_thread_deinit(struct recount_thread *th)
1425 {
1426 recount_tracks_destroy(&recount_thread_plan, th->rth_lifetime);
1427 }
1428
1429 void
recount_task_init(struct recount_task * tk)1430 recount_task_init(struct recount_task *tk)
1431 {
1432 tk->rtk_lifetime = recount_tracks_create(&recount_task_plan);
1433 tk->rtk_terminated = recount_usage_alloc(
1434 recount_task_terminated_plan.rpl_topo);
1435 }
1436
1437 void
recount_task_deinit(struct recount_task * tk)1438 recount_task_deinit(struct recount_task *tk)
1439 {
1440 recount_tracks_destroy(&recount_task_plan, tk->rtk_lifetime);
1441 recount_usage_free(recount_task_terminated_plan.rpl_topo,
1442 tk->rtk_terminated);
1443 }
1444
1445 void
recount_coalition_init(struct recount_coalition * co)1446 recount_coalition_init(struct recount_coalition *co)
1447 {
1448 co->rco_exited = recount_usage_alloc(recount_coalition_plan.rpl_topo);
1449 }
1450
1451 void
recount_coalition_deinit(struct recount_coalition * co)1452 recount_coalition_deinit(struct recount_coalition *co)
1453 {
1454 recount_usage_free(recount_coalition_plan.rpl_topo, co->rco_exited);
1455 }
1456
1457 void
recount_work_interval_init(struct recount_work_interval * wi)1458 recount_work_interval_init(struct recount_work_interval *wi)
1459 {
1460 wi->rwi_current_instance = recount_tracks_create(&recount_work_interval_plan);
1461 }
1462
1463 void
recount_work_interval_deinit(struct recount_work_interval * wi)1464 recount_work_interval_deinit(struct recount_work_interval *wi)
1465 {
1466 recount_tracks_destroy(&recount_work_interval_plan, wi->rwi_current_instance);
1467 }
1468
1469 struct recount_usage *
recount_usage_alloc(recount_topo_t topo)1470 recount_usage_alloc(recount_topo_t topo)
1471 {
1472 assert(_topo_allocates[topo]);
1473 return zalloc_flags(_recount_usage_zones[topo],
1474 Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1475 }
1476
1477 void
recount_usage_free(recount_topo_t topo,struct recount_usage * usage)1478 recount_usage_free(recount_topo_t topo, struct recount_usage *usage)
1479 {
1480 assert(_topo_allocates[topo]);
1481 zfree(_recount_usage_zones[topo], usage);
1482 }
1483