xref: /xnu-10002.1.13/osfmk/kern/recount.c (revision 1031c584a5e37aff177559b9f69dbd3c8c3fd30a)
1 // Copyright (c) 2021 Apple Inc.  All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 
27 #include <kern/assert.h>
28 #include <kern/kalloc.h>
29 #include <pexpert/pexpert.h>
30 #include <sys/kdebug.h>
31 #include <sys/_types/_size_t.h>
32 #if MONOTONIC
33 #include <kern/monotonic.h>
34 #endif // MONOTONIC
35 #include <kern/percpu.h>
36 #include <kern/processor.h>
37 #include <kern/recount.h>
38 #include <kern/startup.h>
39 #include <kern/task.h>
40 #include <kern/thread.h>
41 #include <kern/work_interval.h>
42 #include <mach/mach_time.h>
43 #include <mach/mach_types.h>
44 #include <machine/config.h>
45 #include <machine/machine_routines.h>
46 #include <os/atomic_private.h>
47 #include <stdbool.h>
48 #include <stdint.h>
49 
50 // Recount's machine-independent implementation and interfaces for the kernel
51 // at-large.
52 
53 #define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME
54 
55 // On non-release kernels, allow precise PMC (instructions, cycles) updates to
56 // be disabled for performance characterization.
57 #if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG)
58 #define PRECISE_USER_KERNEL_PMC_TUNABLE 1
59 
60 TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs", false);
61 #endif // PRECISE_USER_KERNEL_PMCS
62 
63 #if !PRECISE_USER_KERNEL_TIME
64 #define PRECISE_TIME_FATAL_FUNC OS_NORETURN
65 #define PRECISE_TIME_ONLY_FUNC OS_UNUSED
66 #else // !PRECISE_USER_KERNEL_TIME
67 #define PRECISE_TIME_FATAL_FUNC
68 #define PRECISE_TIME_ONLY_FUNC
69 #endif // PRECISE_USER_KERNEL_TIME
70 
71 #if !PRECISE_USER_KERNEL_PMCS
72 #define PRECISE_PMCS_ONLY_FUNC OS_UNUSED
73 #else // !PRECISE_PMCS_ONLY_FUNC
74 #define PRECISE_PMCS_ONLY_FUNC
75 #endif // PRECISE_USER_KERNEL_PMCS
76 
77 #if HAS_CPU_DPE_COUNTER
78 // Only certain platforms have DPE counters.
79 #define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY
80 #else // HAS_CPU_DPE_COUNTER
81 #define RECOUNT_ENERGY 0
82 #endif // !HAS_CPU_DPE_COUNTER
83 
84 // Topography helpers.
85 size_t recount_topo_count(recount_topo_t topo);
86 static bool recount_topo_matches_cpu_kind(recount_topo_t topo,
87     recount_cpu_kind_t kind, size_t idx);
88 static size_t recount_topo_index(recount_topo_t topo, processor_t processor);
89 static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to,
90     size_t i);
91 
92 // Prevent counter updates before the system is ready.
93 __security_const_late bool recount_started = false;
94 
95 // Lookup table that matches CPU numbers (indices) to their track index.
96 __security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 };
97 
98 __startup_func
99 static void
recount_startup(void)100 recount_startup(void)
101 {
102 #if __AMP__
103 	unsigned int cpu_count = ml_get_cpu_count();
104 	const ml_topology_info_t *topo_info = ml_get_topology_info();
105 	for (unsigned int i = 0; i < cpu_count; i++) {
106 		cluster_type_t type = topo_info->cpus[i].cluster_type;
107 		uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE :
108 		    RCT_CPU_EFFICIENCY;
109 		_topo_cpu_kinds[i] = cluster_i;
110 	}
111 #endif // __AMP__
112 
113 	recount_started = true;
114 }
115 
116 STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup);
117 
118 #pragma mark - tracks
119 
120 RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND);
121 RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU);
122 RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU);
123 RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND);
124 RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND);
125 RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM);
126 
127 OS_ALWAYS_INLINE
128 static inline uint64_t
recount_timestamp_speculative(void)129 recount_timestamp_speculative(void)
130 {
131 #if __arm__ || __arm64__
132 	return ml_get_speculative_timebase();
133 #else // __arm__ || __arm64__
134 	return mach_absolute_time();
135 #endif // !__arm__ && !__arm64__
136 }
137 
138 OS_ALWAYS_INLINE
139 void
recount_snapshot_speculative(struct recount_snap * snap)140 recount_snapshot_speculative(struct recount_snap *snap)
141 {
142 	snap->rsn_time_mach = recount_timestamp_speculative();
143 #if CONFIG_PERVASIVE_CPI
144 	mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns);
145 #endif // CONFIG_PERVASIVE_CPI
146 }
147 
148 void
recount_snapshot(struct recount_snap * snap)149 recount_snapshot(struct recount_snap *snap)
150 {
151 #if __arm__ || __arm64__
152 	__builtin_arm_isb(ISB_SY);
153 #endif // __arm__ || __arm64__
154 	recount_snapshot_speculative(snap);
155 }
156 
157 static struct recount_snap *
recount_get_snap(processor_t processor)158 recount_get_snap(processor_t processor)
159 {
160 	return &processor->pr_recount.rpr_snap;
161 }
162 
163 // A simple sequence lock implementation.
164 
165 static void
_seqlock_shared_lock_slowpath(const uint32_t * lck,uint32_t gen)166 _seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen)
167 {
168 	disable_preemption();
169 	do {
170 		gen = hw_wait_while_equals32((uint32_t *)(uintptr_t)lck, gen);
171 	} while (__improbable((gen & 1) != 0));
172 	os_atomic_thread_fence(acquire);
173 	enable_preemption();
174 }
175 
176 static uintptr_t
_seqlock_shared_lock(const uint32_t * lck)177 _seqlock_shared_lock(const uint32_t *lck)
178 {
179 	uint32_t gen = os_atomic_load(lck, acquire);
180 	if (__improbable((gen & 1) != 0)) {
181 		_seqlock_shared_lock_slowpath(lck, gen);
182 	}
183 	return gen;
184 }
185 
186 static bool
_seqlock_shared_try_unlock(const uint32_t * lck,uintptr_t on_enter)187 _seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter)
188 {
189 	return os_atomic_load(lck, acquire) == on_enter;
190 }
191 
192 static void
_seqlock_excl_lock_relaxed(uint32_t * lck)193 _seqlock_excl_lock_relaxed(uint32_t *lck)
194 {
195 	__assert_only uintptr_t new = os_atomic_inc(lck, relaxed);
196 	assert3u((new & 1), ==, 1);
197 }
198 
199 static void
_seqlock_excl_commit(void)200 _seqlock_excl_commit(void)
201 {
202 	os_atomic_thread_fence(release);
203 }
204 
205 static void
_seqlock_excl_unlock_relaxed(uint32_t * lck)206 _seqlock_excl_unlock_relaxed(uint32_t *lck)
207 {
208 	__assert_only uint32_t new = os_atomic_inc(lck, relaxed);
209 	assert3u((new & 1), ==, 0);
210 }
211 
212 static struct recount_track *
recount_update_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)213 recount_update_start(struct recount_track *tracks, recount_topo_t topo,
214     processor_t processor)
215 {
216 	struct recount_track *track = &tracks[recount_topo_index(topo, processor)];
217 	_seqlock_excl_lock_relaxed(&track->rt_sync);
218 	return track;
219 }
220 
221 #if RECOUNT_ENERGY
222 
223 static struct recount_track *
recount_update_single_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)224 recount_update_single_start(struct recount_track *tracks, recount_topo_t topo,
225     processor_t processor)
226 {
227 	return &tracks[recount_topo_index(topo, processor)];
228 }
229 
230 #endif // RECOUNT_ENERGY
231 
232 static void
recount_update_commit(void)233 recount_update_commit(void)
234 {
235 	_seqlock_excl_commit();
236 }
237 
238 static void
recount_update_end(struct recount_track * track)239 recount_update_end(struct recount_track *track)
240 {
241 	_seqlock_excl_unlock_relaxed(&track->rt_sync);
242 }
243 
244 static const struct recount_usage *
recount_read_start(const struct recount_track * track,uintptr_t * on_enter)245 recount_read_start(const struct recount_track *track, uintptr_t *on_enter)
246 {
247 	const struct recount_usage *stats = &track->rt_usage;
248 	*on_enter = _seqlock_shared_lock(&track->rt_sync);
249 	return stats;
250 }
251 
252 static bool
recount_try_read_end(const struct recount_track * track,uintptr_t on_enter)253 recount_try_read_end(const struct recount_track *track, uintptr_t on_enter)
254 {
255 	return _seqlock_shared_try_unlock(&track->rt_sync, on_enter);
256 }
257 
258 static void
recount_read_track(struct recount_usage * stats,const struct recount_track * track)259 recount_read_track(struct recount_usage *stats,
260     const struct recount_track *track)
261 {
262 	uintptr_t on_enter = 0;
263 	do {
264 		const struct recount_usage *vol_stats =
265 		    recount_read_start(track, &on_enter);
266 		*stats = *vol_stats;
267 	} while (!recount_try_read_end(track, on_enter));
268 }
269 
270 static void
recount_usage_add(struct recount_usage * sum,const struct recount_usage * to_add)271 recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add)
272 {
273 	sum->ru_user_time_mach += to_add->ru_user_time_mach;
274 	sum->ru_system_time_mach += to_add->ru_system_time_mach;
275 #if CONFIG_PERVASIVE_CPI
276 	sum->ru_cycles += to_add->ru_cycles;
277 	sum->ru_instructions += to_add->ru_instructions;
278 #endif // CONFIG_PERVASIVE_CPI
279 #if CONFIG_PERVASIVE_ENERGY
280 	sum->ru_energy_nj += to_add->ru_energy_nj;
281 #endif // CONFIG_PERVASIVE_CPI
282 }
283 
284 OS_ALWAYS_INLINE
285 static inline void
recount_usage_add_snap(struct recount_usage * usage,uint64_t * add_time,struct recount_snap * snap)286 recount_usage_add_snap(struct recount_usage *usage, uint64_t *add_time,
287     struct recount_snap *snap)
288 {
289 	*add_time += snap->rsn_time_mach;
290 #if CONFIG_PERVASIVE_CPI
291 	usage->ru_cycles += snap->rsn_cycles;
292 	usage->ru_instructions += snap->rsn_insns;
293 #else // CONFIG_PERVASIVE_CPI
294 #pragma unused(usage)
295 #endif // !CONFIG_PERVASIVE_CPI
296 }
297 
298 static void
recount_rollup(recount_plan_t plan,const struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)299 recount_rollup(recount_plan_t plan, const struct recount_track *tracks,
300     recount_topo_t to_topo, struct recount_usage *stats)
301 {
302 	recount_topo_t from_topo = plan->rpl_topo;
303 	size_t topo_count = recount_topo_count(from_topo);
304 	struct recount_usage tmp = { 0 };
305 	for (size_t i = 0; i < topo_count; i++) {
306 		recount_read_track(&tmp, &tracks[i]);
307 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
308 		recount_usage_add(&stats[to_i], &tmp);
309 	}
310 }
311 
312 // This function must be run when counters cannot increment for the track, like from the current thread.
313 static void
recount_rollup_unsafe(recount_plan_t plan,struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)314 recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks,
315     recount_topo_t to_topo, struct recount_usage *stats)
316 {
317 	recount_topo_t from_topo = plan->rpl_topo;
318 	size_t topo_count = recount_topo_count(from_topo);
319 	for (size_t i = 0; i < topo_count; i++) {
320 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
321 		recount_usage_add(&stats[to_i], &tracks[i].rt_usage);
322 	}
323 }
324 
325 void
recount_sum(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)326 recount_sum(recount_plan_t plan, const struct recount_track *tracks,
327     struct recount_usage *sum)
328 {
329 	recount_rollup(plan, tracks, RCT_TOPO_SYSTEM, sum);
330 }
331 
332 void
recount_sum_unsafe(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)333 recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
334     struct recount_usage *sum)
335 {
336 	recount_topo_t topo = plan->rpl_topo;
337 	size_t topo_count = recount_topo_count(topo);
338 	for (size_t i = 0; i < topo_count; i++) {
339 		recount_usage_add(sum, &tracks[i].rt_usage);
340 	}
341 }
342 
343 void
recount_sum_and_isolate_cpu_kind(recount_plan_t plan,struct recount_track * tracks,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)344 recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
345     struct recount_track *tracks, recount_cpu_kind_t kind,
346     struct recount_usage *sum, struct recount_usage *only_kind)
347 {
348 	size_t topo_count = recount_topo_count(plan->rpl_topo);
349 	struct recount_usage tmp = { 0 };
350 	for (size_t i = 0; i < topo_count; i++) {
351 		recount_read_track(&tmp, &tracks[i]);
352 		recount_usage_add(sum, &tmp);
353 		if (recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
354 			recount_usage_add(only_kind, &tmp);
355 		}
356 	}
357 }
358 
359 static void
recount_sum_usage(recount_plan_t plan,const struct recount_usage * usages,struct recount_usage * sum)360 recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages,
361     struct recount_usage *sum)
362 {
363 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
364 	for (size_t i = 0; i < topo_count; i++) {
365 		recount_usage_add(sum, &usages[i]);
366 	}
367 }
368 
369 void
recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,struct recount_usage * usage,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)370 recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
371     struct recount_usage *usage, recount_cpu_kind_t kind,
372     struct recount_usage *sum, struct recount_usage *only_kind)
373 {
374 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
375 	for (size_t i = 0; i < topo_count; i++) {
376 		recount_usage_add(sum, &usage[i]);
377 		if (only_kind && recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
378 			recount_usage_add(only_kind, &usage[i]);
379 		}
380 	}
381 }
382 
383 void
recount_sum_perf_levels(recount_plan_t plan,struct recount_track * tracks,struct recount_usage * sums)384 recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks,
385     struct recount_usage *sums)
386 {
387 	recount_rollup(plan, tracks, RCT_TOPO_CPU_KIND, sums);
388 }
389 
390 // Plan-specific helpers.
391 
392 void
recount_coalition_rollup_task(struct recount_coalition * co,struct recount_task * tk)393 recount_coalition_rollup_task(struct recount_coalition *co,
394     struct recount_task *tk)
395 {
396 	recount_rollup(&recount_task_plan, tk->rtk_lifetime,
397 	    recount_coalition_plan.rpl_topo, co->rco_exited);
398 }
399 
400 void
recount_task_rollup_thread(struct recount_task * tk,const struct recount_thread * th)401 recount_task_rollup_thread(struct recount_task *tk,
402     const struct recount_thread *th)
403 {
404 	recount_rollup(&recount_thread_plan, th->rth_lifetime,
405 	    recount_task_terminated_plan.rpl_topo, tk->rtk_terminated);
406 }
407 
408 #pragma mark - scheduler
409 
410 // `result = lhs - rhs` for snapshots.
411 OS_ALWAYS_INLINE
412 static void
recount_snap_diff(struct recount_snap * result,const struct recount_snap * lhs,const struct recount_snap * rhs)413 recount_snap_diff(struct recount_snap *result,
414     const struct recount_snap *lhs, const struct recount_snap *rhs)
415 {
416 	assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach);
417 	result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach;
418 #if CONFIG_PERVASIVE_CPI
419 	assert3u(lhs->rsn_insns, >=, rhs->rsn_insns);
420 	assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles);
421 	result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles;
422 	result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns;
423 #endif // CONFIG_PERVASIVE_CPI
424 }
425 
426 void
recount_update_snap(struct recount_snap * cur)427 recount_update_snap(struct recount_snap *cur)
428 {
429 	struct recount_snap *this_snap = recount_get_snap(current_processor());
430 	this_snap->rsn_time_mach = cur->rsn_time_mach;
431 #if CONFIG_PERVASIVE_CPI
432 	this_snap->rsn_cycles = cur->rsn_cycles;
433 	this_snap->rsn_insns = cur->rsn_insns;
434 #endif // CONFIG_PERVASIVE_CPI
435 }
436 
437 static void
_fix_time_precision(struct recount_usage * usage)438 _fix_time_precision(struct recount_usage *usage)
439 {
440 #if PRECISE_USER_KERNEL_TIME
441 #pragma unused(usage)
442 #else // PRECISE_USER_KERNEL_TIME
443 	// Attribute all time to user, as the system is only acting "on behalf
444 	// of" user processes -- a bit sketchy.
445 	usage->ru_user_time_mach += usage->ru_system_time_mach;
446 	usage->ru_system_time_mach = 0;
447 #endif // !PRECISE_USER_KERNEL_TIME
448 }
449 
450 void
recount_current_thread_usage(struct recount_usage * usage)451 recount_current_thread_usage(struct recount_usage *usage)
452 {
453 	assert(ml_get_interrupts_enabled() == FALSE);
454 	thread_t thread = current_thread();
455 	struct recount_snap snap = { 0 };
456 	recount_snapshot(&snap);
457 	recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
458 	    usage);
459 	struct recount_snap *last = recount_get_snap(current_processor());
460 	struct recount_snap diff = { 0 };
461 	recount_snap_diff(&diff, &snap, last);
462 	recount_usage_add_snap(usage, &usage->ru_system_time_mach, &diff);
463 	_fix_time_precision(usage);
464 }
465 
466 void
recount_current_thread_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)467 recount_current_thread_usage_perf_only(struct recount_usage *usage,
468     struct recount_usage *usage_perf_only)
469 {
470 	struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 };
471 	recount_current_thread_perf_level_usage(usage_perf_levels);
472 	recount_sum_usage(&recount_thread_plan, usage_perf_levels, usage);
473 	*usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE];
474 	_fix_time_precision(usage);
475 	_fix_time_precision(usage_perf_only);
476 }
477 
478 void
recount_thread_perf_level_usage(struct thread * thread,struct recount_usage * usage_levels)479 recount_thread_perf_level_usage(struct thread *thread,
480     struct recount_usage *usage_levels)
481 {
482 	recount_rollup(&recount_thread_plan, thread->th_recount.rth_lifetime,
483 	    RCT_TOPO_CPU_KIND, usage_levels);
484 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
485 	for (size_t i = 0; i < topo_count; i++) {
486 		_fix_time_precision(&usage_levels[i]);
487 	}
488 }
489 
490 void
recount_current_thread_perf_level_usage(struct recount_usage * usage_levels)491 recount_current_thread_perf_level_usage(struct recount_usage *usage_levels)
492 {
493 	assert(ml_get_interrupts_enabled() == FALSE);
494 	processor_t processor = current_processor();
495 	thread_t thread = current_thread();
496 	struct recount_snap snap = { 0 };
497 	recount_snapshot(&snap);
498 	recount_rollup_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
499 	    RCT_TOPO_CPU_KIND, usage_levels);
500 	struct recount_snap *last = recount_get_snap(processor);
501 	struct recount_snap diff = { 0 };
502 	recount_snap_diff(&diff, &snap, last);
503 	size_t cur_i = recount_topo_index(RCT_TOPO_CPU_KIND, processor);
504 	struct recount_usage *cur_usage = &usage_levels[cur_i];
505 	recount_usage_add_snap(cur_usage, &cur_usage->ru_system_time_mach, &diff);
506 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
507 	for (size_t i = 0; i < topo_count; i++) {
508 		_fix_time_precision(&usage_levels[i]);
509 	}
510 }
511 
512 uint64_t
recount_current_thread_energy_nj(void)513 recount_current_thread_energy_nj(void)
514 {
515 #if RECOUNT_ENERGY
516 	assert(ml_get_interrupts_enabled() == FALSE);
517 	thread_t thread = current_thread();
518 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
519 	uint64_t energy_nj = 0;
520 	for (size_t i = 0; i < topo_count; i++) {
521 		energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj;
522 	}
523 	return energy_nj;
524 #else // RECOUNT_ENERGY
525 	return 0;
526 #endif // !RECOUNT_ENERGY
527 }
528 
529 static void
_times_add_usage(struct recount_times_mach * times,struct recount_usage * usage)530 _times_add_usage(struct recount_times_mach *times, struct recount_usage *usage)
531 {
532 	times->rtm_user += usage->ru_user_time_mach;
533 #if PRECISE_USER_KERNEL_TIME
534 	times->rtm_system += usage->ru_system_time_mach;
535 #else // PRECISE_USER_KERNEL_TIME
536 	times->rtm_user += usage->ru_system_time_mach;
537 #endif // !PRECISE_USER_KERNEL_TIME
538 }
539 
540 struct recount_times_mach
recount_thread_times(struct thread * thread)541 recount_thread_times(struct thread *thread)
542 {
543 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
544 	struct recount_times_mach times = { 0 };
545 	for (size_t i = 0; i < topo_count; i++) {
546 		_times_add_usage(&times, &thread->th_recount.rth_lifetime[i].rt_usage);
547 	}
548 	return times;
549 }
550 
551 uint64_t
recount_thread_time_mach(struct thread * thread)552 recount_thread_time_mach(struct thread *thread)
553 {
554 	struct recount_times_mach times = recount_thread_times(thread);
555 	return times.rtm_user + times.rtm_system;
556 }
557 
558 static uint64_t
_time_since_last_snapshot(void)559 _time_since_last_snapshot(void)
560 {
561 	struct recount_snap *last = recount_get_snap(current_processor());
562 	uint64_t cur_time = mach_absolute_time();
563 	return cur_time - last->rsn_time_mach;
564 }
565 
566 uint64_t
recount_current_thread_time_mach(void)567 recount_current_thread_time_mach(void)
568 {
569 	assert(ml_get_interrupts_enabled() == FALSE);
570 	uint64_t previous_time = recount_thread_time_mach(current_thread());
571 	return previous_time + _time_since_last_snapshot();
572 }
573 
574 struct recount_times_mach
recount_current_thread_times(void)575 recount_current_thread_times(void)
576 {
577 	assert(ml_get_interrupts_enabled() == FALSE);
578 	struct recount_times_mach times = recount_thread_times(
579 		current_thread());
580 #if PRECISE_USER_KERNEL_TIME
581 	// This code is executing in the kernel, so the time since the last snapshot
582 	// (with precise user/kernel time) is since entering the kernel.
583 	times.rtm_system += _time_since_last_snapshot();
584 #else // PRECISE_USER_KERNEL_TIME
585 	times.rtm_user += _time_since_last_snapshot();
586 #endif // !PRECISE_USER_KERNEL_TIME
587 	return times;
588 }
589 
590 void
recount_thread_usage(thread_t thread,struct recount_usage * usage)591 recount_thread_usage(thread_t thread, struct recount_usage *usage)
592 {
593 	recount_sum(&recount_thread_plan, thread->th_recount.rth_lifetime, usage);
594 	_fix_time_precision(usage);
595 }
596 
597 void
recount_work_interval_usage(struct work_interval * work_interval,struct recount_usage * usage)598 recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage)
599 {
600 	recount_sum(&recount_work_interval_plan, work_interval_get_recount_tracks(work_interval), usage);
601 	_fix_time_precision(usage);
602 }
603 
604 struct recount_times_mach
recount_work_interval_times(struct work_interval * work_interval)605 recount_work_interval_times(struct work_interval *work_interval)
606 {
607 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
608 	struct recount_times_mach times = { 0 };
609 	for (size_t i = 0; i < topo_count; i++) {
610 		_times_add_usage(&times, &work_interval_get_recount_tracks(work_interval)[i].rt_usage);
611 	}
612 	return times;
613 }
614 
615 uint64_t
recount_work_interval_energy_nj(struct work_interval * work_interval)616 recount_work_interval_energy_nj(struct work_interval *work_interval)
617 {
618 #if RECOUNT_ENERGY
619 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
620 	uint64_t energy = 0;
621 	for (size_t i = 0; i < topo_count; i++) {
622 		energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj;
623 	}
624 	return energy;
625 #else // RECOUNT_ENERGY
626 #pragma unused(work_interval)
627 	return 0;
628 #endif // !RECOUNT_ENERGY
629 }
630 
631 void
recount_current_task_usage(struct recount_usage * usage)632 recount_current_task_usage(struct recount_usage *usage)
633 {
634 	task_t task = current_task();
635 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
636 	recount_sum(&recount_task_plan, tracks, usage);
637 	_fix_time_precision(usage);
638 }
639 
640 void
recount_current_task_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)641 recount_current_task_usage_perf_only(struct recount_usage *usage,
642     struct recount_usage *usage_perf_only)
643 {
644 	task_t task = current_task();
645 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
646 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
647 	    tracks, RCT_CPU_PERFORMANCE, usage, usage_perf_only);
648 	_fix_time_precision(usage);
649 	_fix_time_precision(usage_perf_only);
650 }
651 
652 void
recount_task_times_perf_only(struct task * task,struct recount_times_mach * sum,struct recount_times_mach * sum_perf_only)653 recount_task_times_perf_only(struct task *task,
654     struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only)
655 {
656 	const recount_topo_t topo = recount_task_plan.rpl_topo;
657 	const size_t topo_count = recount_topo_count(topo);
658 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
659 	for (size_t i = 0; i < topo_count; i++) {
660 		struct recount_usage *usage = &tracks[i].rt_usage;
661 		_times_add_usage(sum, usage);
662 		if (recount_topo_matches_cpu_kind(topo, RCT_CPU_PERFORMANCE, i)) {
663 			_times_add_usage(sum_perf_only, usage);
664 		}
665 	}
666 }
667 
668 void
recount_task_terminated_usage(task_t task,struct recount_usage * usage)669 recount_task_terminated_usage(task_t task, struct recount_usage *usage)
670 {
671 	recount_sum_usage(&recount_task_terminated_plan,
672 	    task->tk_recount.rtk_terminated, usage);
673 	_fix_time_precision(usage);
674 }
675 
676 struct recount_times_mach
recount_task_terminated_times(struct task * task)677 recount_task_terminated_times(struct task *task)
678 {
679 	size_t topo_count = recount_topo_count(recount_task_terminated_plan.rpl_topo);
680 	struct recount_times_mach times = { 0 };
681 	for (size_t i = 0; i < topo_count; i++) {
682 		_times_add_usage(&times, &task->tk_recount.rtk_terminated[i]);
683 	}
684 	return times;
685 }
686 
687 void
recount_task_terminated_usage_perf_only(task_t task,struct recount_usage * usage,struct recount_usage * perf_only)688 recount_task_terminated_usage_perf_only(task_t task,
689     struct recount_usage *usage, struct recount_usage *perf_only)
690 {
691 	recount_sum_usage_and_isolate_cpu_kind(&recount_task_terminated_plan,
692 	    task->tk_recount.rtk_terminated, RCT_CPU_PERFORMANCE, usage, perf_only);
693 	_fix_time_precision(usage);
694 	_fix_time_precision(perf_only);
695 }
696 
697 void
recount_task_usage_perf_only(task_t task,struct recount_usage * sum,struct recount_usage * sum_perf_only)698 recount_task_usage_perf_only(task_t task, struct recount_usage *sum,
699     struct recount_usage *sum_perf_only)
700 {
701 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
702 	    task->tk_recount.rtk_lifetime, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
703 	_fix_time_precision(sum);
704 	_fix_time_precision(sum_perf_only);
705 }
706 
707 void
recount_task_usage(task_t task,struct recount_usage * usage)708 recount_task_usage(task_t task, struct recount_usage *usage)
709 {
710 	recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, usage);
711 	_fix_time_precision(usage);
712 }
713 
714 struct recount_times_mach
recount_task_times(struct task * task)715 recount_task_times(struct task *task)
716 {
717 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
718 	struct recount_times_mach times = { 0 };
719 	for (size_t i = 0; i < topo_count; i++) {
720 		_times_add_usage(&times, &task->tk_recount.rtk_lifetime[i].rt_usage);
721 	}
722 	return times;
723 }
724 
725 uint64_t
recount_task_energy_nj(struct task * task)726 recount_task_energy_nj(struct task *task)
727 {
728 #if RECOUNT_ENERGY
729 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
730 	uint64_t energy = 0;
731 	for (size_t i = 0; i < topo_count; i++) {
732 		energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj;
733 	}
734 	return energy;
735 #else // RECOUNT_ENERGY
736 #pragma unused(task)
737 	return 0;
738 #endif // !RECOUNT_ENERGY
739 }
740 
741 void
recount_coalition_usage_perf_only(struct recount_coalition * coal,struct recount_usage * sum,struct recount_usage * sum_perf_only)742 recount_coalition_usage_perf_only(struct recount_coalition *coal,
743     struct recount_usage *sum, struct recount_usage *sum_perf_only)
744 {
745 	recount_sum_usage_and_isolate_cpu_kind(&recount_coalition_plan,
746 	    coal->rco_exited, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
747 	_fix_time_precision(sum);
748 	_fix_time_precision(sum_perf_only);
749 }
750 
751 OS_ALWAYS_INLINE
752 static void
recount_absorb_snap(struct recount_snap * to_add,thread_t thread,task_t task,processor_t processor,bool from_user)753 recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task,
754     processor_t processor, bool from_user)
755 {
756 	// Idle threads do not attribute their usage back to the task or processor,
757 	// as the time is not spent "running."
758 	//
759 	// The processor-level metrics include idle time, instead, as the idle time
760 	// needs to be read as up-to-date from `recount_processor_usage`.
761 
762 	bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0;
763 	struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(thread->th_work_interval);
764 	bool collect_work_interval_telemetry = wi_tracks_array != NULL;
765 
766 	struct recount_track *th_track = recount_update_start(
767 		thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
768 		processor);
769 	struct recount_track *wi_track =
770 	    (was_idle || !collect_work_interval_telemetry) ? NULL : recount_update_start(
771 		wi_tracks_array,
772 		recount_work_interval_plan.rpl_topo,
773 		processor);
774 	struct recount_track *tk_track = was_idle ? NULL : recount_update_start(
775 		task->tk_recount.rtk_lifetime, recount_task_plan.rpl_topo,
776 		processor);
777 	struct recount_track *pr_track = was_idle ? NULL : recount_update_start(
778 		&processor->pr_recount.rpr_active, recount_processor_plan.rpl_topo,
779 		processor);
780 	recount_update_commit();
781 
782 	uint64_t *th_time = NULL, *wi_time = NULL, *tk_time = NULL, *pr_time = NULL;
783 	if (from_user) {
784 		th_time = &th_track->rt_usage.ru_user_time_mach;
785 		wi_time = &wi_track->rt_usage.ru_user_time_mach;
786 		tk_time = &tk_track->rt_usage.ru_user_time_mach;
787 		pr_time = &pr_track->rt_usage.ru_user_time_mach;
788 	} else {
789 		th_time = &th_track->rt_usage.ru_system_time_mach;
790 		wi_time = &wi_track->rt_usage.ru_system_time_mach;
791 		tk_time = &tk_track->rt_usage.ru_system_time_mach;
792 		pr_time = &pr_track->rt_usage.ru_system_time_mach;
793 	}
794 
795 	recount_usage_add_snap(&th_track->rt_usage, th_time, to_add);
796 	if (!was_idle) {
797 		if (collect_work_interval_telemetry) {
798 			recount_usage_add_snap(&wi_track->rt_usage, wi_time, to_add);
799 		}
800 		recount_usage_add_snap(&tk_track->rt_usage, tk_time, to_add);
801 		recount_usage_add_snap(&pr_track->rt_usage, pr_time, to_add);
802 	}
803 
804 	recount_update_commit();
805 	recount_update_end(th_track);
806 	if (!was_idle) {
807 		if (collect_work_interval_telemetry) {
808 			recount_update_end(wi_track);
809 		}
810 		recount_update_end(tk_track);
811 		recount_update_end(pr_track);
812 	}
813 }
814 
815 void
recount_switch_thread(struct recount_snap * cur,struct thread * off_thread,struct task * off_task)816 recount_switch_thread(struct recount_snap *cur, struct thread *off_thread,
817     struct task *off_task)
818 {
819 	assert(ml_get_interrupts_enabled() == FALSE);
820 
821 	if (__improbable(!recount_started)) {
822 		return;
823 	}
824 
825 	processor_t processor = current_processor();
826 
827 	struct recount_snap *last = recount_get_snap(processor);
828 	struct recount_snap diff = { 0 };
829 	recount_snap_diff(&diff, cur, last);
830 	recount_absorb_snap(&diff, off_thread, off_task, processor, false);
831 	recount_update_snap(cur);
832 }
833 
834 void
recount_add_energy(struct thread * off_thread,struct task * off_task,uint64_t energy_nj)835 recount_add_energy(struct thread *off_thread, struct task *off_task,
836     uint64_t energy_nj)
837 {
838 #if RECOUNT_ENERGY
839 	assert(ml_get_interrupts_enabled() == FALSE);
840 	if (__improbable(!recount_started)) {
841 		return;
842 	}
843 
844 	bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0;
845 	struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval);
846 	bool collect_work_interval_telemetry = wi_tracks_array != NULL;
847 	processor_t processor = current_processor();
848 
849 	struct recount_track *th_track = recount_update_single_start(
850 		off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
851 		processor);
852 	struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL :
853 	    recount_update_single_start(wi_tracks_array,
854 	    recount_work_interval_plan.rpl_topo, processor);
855 	struct recount_track *tk_track = was_idle ? NULL :
856 	    recount_update_single_start(off_task->tk_recount.rtk_lifetime,
857 	    recount_task_plan.rpl_topo, processor);
858 	struct recount_track *pr_track = was_idle ? NULL :
859 	    recount_update_single_start(&processor->pr_recount.rpr_active,
860 	    recount_processor_plan.rpl_topo, processor);
861 
862 	th_track->rt_usage.ru_energy_nj += energy_nj;
863 	if (!was_idle) {
864 		if (collect_work_interval_telemetry) {
865 			wi_track->rt_usage.ru_energy_nj += energy_nj;
866 		}
867 		tk_track->rt_usage.ru_energy_nj += energy_nj;
868 		pr_track->rt_usage.ru_energy_nj += energy_nj;
869 	}
870 #else // RECOUNT_ENERGY
871 #pragma unused(off_thread, off_task, energy_nj)
872 #endif // !RECOUNT_ENERGY
873 }
874 
875 #define MT_KDBG_IC_CPU_CSWITCH \
876 	KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1)
877 
878 #define MT_KDBG_IC_CPU_CSWITCH_ON \
879     KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1)
880 
881 void
recount_log_switch_thread(const struct recount_snap * snap)882 recount_log_switch_thread(const struct recount_snap *snap)
883 {
884 #if CONFIG_PERVASIVE_CPI
885 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
886 		// In Monotonic's event hierarchy for backwards-compatibility.
887 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles);
888 	}
889 #else // CONFIG_PERVASIVE_CPI
890 #pragma unused(snap)
891 #endif // CONFIG_PERVASIVE_CPI
892 }
893 
894 void
recount_log_switch_thread_on(const struct recount_snap * snap)895 recount_log_switch_thread_on(const struct recount_snap *snap)
896 {
897 #if CONFIG_PERVASIVE_CPI
898 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) {
899 		if (!snap) {
900 			snap = recount_get_snap(current_processor());
901 		}
902 		// In Monotonic's event hierarchy for backwards-compatibility.
903 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles);
904 	}
905 #else // CONFIG_PERVASIVE_CPI
906 #pragma unused(snap)
907 #endif // CONFIG_PERVASIVE_CPI
908 }
909 
910 OS_ALWAYS_INLINE
911 PRECISE_TIME_ONLY_FUNC
912 static void
recount_precise_transition_diff(struct recount_snap * diff,struct recount_snap * last,struct recount_snap * cur)913 recount_precise_transition_diff(struct recount_snap *diff,
914     struct recount_snap *last, struct recount_snap *cur)
915 {
916 #if PRECISE_USER_KERNEL_PMCS
917 #if PRECISE_USER_KERNEL_PMC_TUNABLE
918 	// The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable
919 	// in this configuration.
920 	if (__improbable(no_precise_pmcs)) {
921 		cur->rsn_time_mach = recount_timestamp_speculative();
922 		diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
923 	} else
924 #endif // PRECISE_USER_KERNEL_PMC_TUNABLE
925 	{
926 		recount_snapshot_speculative(cur);
927 		recount_snap_diff(diff, cur, last);
928 	}
929 #else // PRECISE_USER_KERNEL_PMCS
930 	cur->rsn_time_mach = recount_timestamp_speculative();
931 	diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
932 #endif // !PRECISE_USER_KERNEL_PMCS
933 }
934 
935 /// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive.
936 ///
937 /// Must be called with interrupts disabled.
938 ///
939 /// - Parameter from_user: Whether the kernel is being entered from user space.
940 ///
941 /// - Returns: The value of Mach time that was sampled inside this function.
942 PRECISE_TIME_FATAL_FUNC
943 static uint64_t
recount_kernel_transition(bool from_user)944 recount_kernel_transition(bool from_user)
945 {
946 #if PRECISE_USER_KERNEL_TIME
947 	// Omit interrupts-disabled assertion for performance reasons.
948 	processor_t processor = current_processor();
949 	thread_t thread = processor->active_thread;
950 	task_t task = get_thread_ro_unchecked(thread)->tro_task;
951 
952 	struct recount_snap *last = recount_get_snap(processor);
953 	struct recount_snap diff = { 0 };
954 	struct recount_snap cur = { 0 };
955 	recount_precise_transition_diff(&diff, last, &cur);
956 	recount_absorb_snap(&diff, thread, task, processor, from_user);
957 	recount_update_snap(&cur);
958 
959 	return cur.rsn_time_mach;
960 #else // PRECISE_USER_KERNEL_TIME
961 #pragma unused(from_user)
962 	panic("recount: kernel transition called with precise time off");
963 #endif // !PRECISE_USER_KERNEL_TIME
964 }
965 
966 PRECISE_TIME_FATAL_FUNC
967 void
recount_leave_user(void)968 recount_leave_user(void)
969 {
970 	recount_kernel_transition(true);
971 }
972 
973 PRECISE_TIME_FATAL_FUNC
974 void
recount_enter_user(void)975 recount_enter_user(void)
976 {
977 	recount_kernel_transition(false);
978 }
979 
980 #if __x86_64__
981 
982 void
recount_enter_intel_interrupt(x86_saved_state_t * state)983 recount_enter_intel_interrupt(x86_saved_state_t *state)
984 {
985 	// The low bits of `%cs` being set indicate interrupt was delivered while
986 	// executing in user space.
987 	bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs :
988 	    state->ss_32.cs) & 0x03;
989 	uint64_t timestamp = recount_kernel_transition(from_user);
990 	current_cpu_datap()->cpu_int_event_time = timestamp;
991 }
992 
993 void
recount_leave_intel_interrupt(void)994 recount_leave_intel_interrupt(void)
995 {
996 	// XXX This is not actually entering user space, but it does update the
997 	//     system timer, which is desirable.
998 	recount_enter_user();
999 	current_cpu_datap()->cpu_int_event_time = 0;
1000 }
1001 
1002 #endif // __x86_64__
1003 
1004 // Set on rpr_state_last_abs_time when the processor is idle.
1005 #define RCT_PR_IDLING (0x1ULL << 63)
1006 
1007 void
recount_processor_idle(struct recount_processor * pr,struct recount_snap * snap)1008 recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap)
1009 {
1010 	__assert_only uint64_t state_time = os_atomic_load_wide(
1011 		&pr->rpr_state_last_abs_time, relaxed);
1012 	assert((state_time & RCT_PR_IDLING) == 0);
1013 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1014 	uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach;
1015 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1016 	    relaxed);
1017 }
1018 
1019 OS_PURE OS_ALWAYS_INLINE
1020 static inline uint64_t
_state_time(uint64_t state_stamp)1021 _state_time(uint64_t state_stamp)
1022 {
1023 	return state_stamp & ~(RCT_PR_IDLING);
1024 }
1025 
1026 void
recount_processor_init(processor_t processor)1027 recount_processor_init(processor_t processor)
1028 {
1029 #if __AMP__
1030 	processor->pr_recount.rpr_cpu_kind_index =
1031 	    processor->processor_set->pset_cluster_type == PSET_AMP_P ? 1 : 0;
1032 #else // __AMP__
1033 #pragma unused(processor)
1034 #endif // !__AMP__
1035 }
1036 
1037 void
recount_processor_run(struct recount_processor * pr,struct recount_snap * snap)1038 recount_processor_run(struct recount_processor *pr, struct recount_snap *snap)
1039 {
1040 	uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed);
1041 	assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING);
1042 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1043 	uint64_t new_state_stamp = snap->rsn_time_mach;
1044 	pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state);
1045 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1046 	    relaxed);
1047 }
1048 
1049 void
recount_processor_usage(struct recount_processor * pr,struct recount_usage * usage,uint64_t * idle_time_out)1050 recount_processor_usage(struct recount_processor *pr,
1051     struct recount_usage *usage, uint64_t *idle_time_out)
1052 {
1053 	recount_sum(&recount_processor_plan, &pr->rpr_active, usage);
1054 	_fix_time_precision(usage);
1055 
1056 	uint64_t idle_time = pr->rpr_idle_time_mach;
1057 	uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time,
1058 	    relaxed);
1059 	bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING;
1060 	if (idle) {
1061 		// Since processors can idle for some time without an update, make sure
1062 		// the idle time is up-to-date with respect to the caller.
1063 		idle_time += mach_absolute_time() - _state_time(idle_stamp);
1064 	}
1065 	*idle_time_out = idle_time;
1066 }
1067 
1068 bool
recount_task_thread_perf_level_usage(struct task * task,uint64_t tid,struct recount_usage * usage_levels)1069 recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
1070     struct recount_usage *usage_levels)
1071 {
1072 	thread_t thread = task_findtid(task, tid);
1073 	if (thread != THREAD_NULL) {
1074 		if (thread == current_thread()) {
1075 			boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1076 			recount_current_thread_perf_level_usage(usage_levels);
1077 			ml_set_interrupts_enabled(interrupt_state);
1078 		} else {
1079 			recount_thread_perf_level_usage(thread, usage_levels);
1080 		}
1081 	}
1082 	return thread != THREAD_NULL;
1083 }
1084 
1085 #pragma mark - utilities
1086 
1087 // For rolling up counts, convert an index from one topography to another.
1088 static size_t
recount_convert_topo_index(recount_topo_t from,recount_topo_t to,size_t i)1089 recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i)
1090 {
1091 	if (from == to) {
1092 		return i;
1093 	} else if (to == RCT_TOPO_SYSTEM) {
1094 		return 0;
1095 	} else if (from == RCT_TOPO_CPU) {
1096 		assertf(to == RCT_TOPO_CPU_KIND,
1097 		    "recount: cannot convert from CPU topography to %d", to);
1098 		return _topo_cpu_kinds[i];
1099 	} else {
1100 		panic("recount: unexpected rollup request from %d to %d", from, to);
1101 	}
1102 }
1103 
1104 // Get the track index of the provided processor and topography.
1105 OS_ALWAYS_INLINE
1106 static size_t
recount_topo_index(recount_topo_t topo,processor_t processor)1107 recount_topo_index(recount_topo_t topo, processor_t processor)
1108 {
1109 	switch (topo) {
1110 	case RCT_TOPO_SYSTEM:
1111 		return 0;
1112 	case RCT_TOPO_CPU:
1113 		return processor->cpu_id;
1114 	case RCT_TOPO_CPU_KIND:
1115 #if __AMP__
1116 		return processor->pr_recount.rpr_cpu_kind_index;
1117 #else // __AMP__
1118 		return 0;
1119 #endif // !__AMP__
1120 	default:
1121 		panic("recount: invalid topology %u to index", topo);
1122 	}
1123 }
1124 
1125 // Return the number of tracks needed for a given topography.
1126 size_t
recount_topo_count(recount_topo_t topo)1127 recount_topo_count(recount_topo_t topo)
1128 {
1129 	// Allow the compiler to reason about at least the system and CPU kind
1130 	// counts.
1131 	switch (topo) {
1132 	case RCT_TOPO_SYSTEM:
1133 		return 1;
1134 
1135 	case RCT_TOPO_CPU_KIND:
1136 #if __AMP__
1137 		return 2;
1138 #else // __AMP__
1139 		return 1;
1140 #endif // !__AMP__
1141 
1142 	case RCT_TOPO_CPU:
1143 #if __arm__ || __arm64__
1144 		return ml_get_cpu_count();
1145 #else // __arm__ || __arm64__
1146 		return ml_early_cpu_max_number() + 1;
1147 #endif // !__arm__ && !__arm64__
1148 
1149 	default:
1150 		panic("recount: invalid topography %d", topo);
1151 	}
1152 }
1153 
1154 static bool
recount_topo_matches_cpu_kind(recount_topo_t topo,recount_cpu_kind_t kind,size_t idx)1155 recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind,
1156     size_t idx)
1157 {
1158 #if !__AMP__
1159 #pragma unused(kind, idx)
1160 #endif // !__AMP__
1161 	switch (topo) {
1162 	case RCT_TOPO_SYSTEM:
1163 		return true;
1164 
1165 	case RCT_TOPO_CPU_KIND:
1166 #if __AMP__
1167 		return kind == idx;
1168 #else // __AMP__
1169 		return false;
1170 #endif // !__AMP__
1171 
1172 	case RCT_TOPO_CPU: {
1173 #if __AMP__
1174 		return _topo_cpu_kinds[idx] == kind;
1175 #else // __AMP__
1176 		return false;
1177 #endif // !__AMP__
1178 	}
1179 
1180 	default:
1181 		panic("recount: unexpected topography %d", topo);
1182 	}
1183 }
1184 
1185 struct recount_track *
recount_tracks_create(recount_plan_t plan)1186 recount_tracks_create(recount_plan_t plan)
1187 {
1188 	return kalloc_type_tag(struct recount_track,
1189 	           recount_topo_count(plan->rpl_topo), Z_WAITOK | Z_ZERO | Z_NOFAIL,
1190 	           VM_KERN_MEMORY_RECOUNT);
1191 }
1192 
1193 static void
recount_tracks_copy(recount_plan_t plan,struct recount_track * dst,struct recount_track * src)1194 recount_tracks_copy(recount_plan_t plan, struct recount_track *dst,
1195     struct recount_track *src)
1196 {
1197 	size_t topo_count = recount_topo_count(plan->rpl_topo);
1198 	for (size_t i = 0; i < topo_count; i++) {
1199 		recount_read_track(&dst[i].rt_usage, &src[i]);
1200 	}
1201 }
1202 
1203 void
recount_tracks_destroy(recount_plan_t plan,struct recount_track * tracks)1204 recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks)
1205 {
1206 	kfree_type(struct recount_track, recount_topo_count(plan->rpl_topo),
1207 	    tracks);
1208 }
1209 
1210 void
recount_thread_init(struct recount_thread * th)1211 recount_thread_init(struct recount_thread *th)
1212 {
1213 	th->rth_lifetime = recount_tracks_create(&recount_thread_plan);
1214 }
1215 
1216 void
recount_thread_copy(struct recount_thread * dst,struct recount_thread * src)1217 recount_thread_copy(struct recount_thread *dst, struct recount_thread *src)
1218 {
1219 	recount_tracks_copy(&recount_thread_plan, dst->rth_lifetime,
1220 	    src->rth_lifetime);
1221 }
1222 
1223 void
recount_task_copy(struct recount_task * dst,const struct recount_task * src)1224 recount_task_copy(struct recount_task *dst, const struct recount_task *src)
1225 {
1226 	recount_tracks_copy(&recount_task_plan, dst->rtk_lifetime,
1227 	    src->rtk_lifetime);
1228 }
1229 
1230 void
recount_thread_deinit(struct recount_thread * th)1231 recount_thread_deinit(struct recount_thread *th)
1232 {
1233 	recount_tracks_destroy(&recount_thread_plan, th->rth_lifetime);
1234 }
1235 
1236 void
recount_task_init(struct recount_task * tk)1237 recount_task_init(struct recount_task *tk)
1238 {
1239 	tk->rtk_lifetime = recount_tracks_create(&recount_task_plan);
1240 	tk->rtk_terminated = recount_usage_alloc(
1241 		recount_task_terminated_plan.rpl_topo);
1242 }
1243 
1244 void
recount_task_deinit(struct recount_task * tk)1245 recount_task_deinit(struct recount_task *tk)
1246 {
1247 	recount_tracks_destroy(&recount_task_plan, tk->rtk_lifetime);
1248 	recount_usage_free(recount_task_terminated_plan.rpl_topo,
1249 	    tk->rtk_terminated);
1250 }
1251 
1252 void
recount_coalition_init(struct recount_coalition * co)1253 recount_coalition_init(struct recount_coalition *co)
1254 {
1255 	co->rco_exited = recount_usage_alloc(recount_coalition_plan.rpl_topo);
1256 }
1257 
1258 void
recount_coalition_deinit(struct recount_coalition * co)1259 recount_coalition_deinit(struct recount_coalition *co)
1260 {
1261 	recount_usage_free(recount_coalition_plan.rpl_topo, co->rco_exited);
1262 }
1263 
1264 void
recount_work_interval_init(struct recount_work_interval * wi)1265 recount_work_interval_init(struct recount_work_interval *wi)
1266 {
1267 	wi->rwi_current_instance = recount_tracks_create(&recount_work_interval_plan);
1268 }
1269 
1270 void
recount_work_interval_deinit(struct recount_work_interval * wi)1271 recount_work_interval_deinit(struct recount_work_interval *wi)
1272 {
1273 	recount_tracks_destroy(&recount_work_interval_plan, wi->rwi_current_instance);
1274 }
1275 
1276 struct recount_usage *
recount_usage_alloc(recount_topo_t topo)1277 recount_usage_alloc(recount_topo_t topo)
1278 {
1279 	return kalloc_type_tag(struct recount_usage, recount_topo_count(topo),
1280 	           Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT);
1281 }
1282 
1283 void
recount_usage_free(recount_topo_t topo,struct recount_usage * usage)1284 recount_usage_free(recount_topo_t topo, struct recount_usage *usage)
1285 {
1286 	kfree_type(struct recount_usage, recount_topo_count(topo),
1287 	    usage);
1288 }
1289