xref: /xnu-10002.81.5/osfmk/kern/recount.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 // Copyright (c) 2021 Apple Inc.  All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 
27 #include <kern/assert.h>
28 #include <kern/kalloc.h>
29 #include <pexpert/pexpert.h>
30 #include <sys/kdebug.h>
31 #include <sys/_types/_size_t.h>
32 #if MONOTONIC
33 #include <kern/monotonic.h>
34 #endif // MONOTONIC
35 #include <kern/percpu.h>
36 #include <kern/processor.h>
37 #include <kern/recount.h>
38 #include <kern/startup.h>
39 #include <kern/task.h>
40 #include <kern/thread.h>
41 #include <kern/work_interval.h>
42 #include <mach/mach_time.h>
43 #include <mach/mach_types.h>
44 #include <machine/config.h>
45 #include <machine/machine_routines.h>
46 #include <os/atomic_private.h>
47 #include <stdbool.h>
48 #include <stdint.h>
49 
50 // Recount's machine-independent implementation and interfaces for the kernel
51 // at-large.
52 
53 #define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME
54 
55 // On non-release kernels, allow precise PMC (instructions, cycles) updates to
56 // be disabled for performance characterization.
57 #if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG)
58 #define PRECISE_USER_KERNEL_PMC_TUNABLE 1
59 
60 TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs", false);
61 #endif // PRECISE_USER_KERNEL_PMCS
62 
63 #if !PRECISE_USER_KERNEL_TIME
64 #define PRECISE_TIME_FATAL_FUNC OS_NORETURN
65 #define PRECISE_TIME_ONLY_FUNC OS_UNUSED
66 #else // !PRECISE_USER_KERNEL_TIME
67 #define PRECISE_TIME_FATAL_FUNC
68 #define PRECISE_TIME_ONLY_FUNC
69 #endif // PRECISE_USER_KERNEL_TIME
70 
71 #if !PRECISE_USER_KERNEL_PMCS
72 #define PRECISE_PMCS_ONLY_FUNC OS_UNUSED
73 #else // !PRECISE_PMCS_ONLY_FUNC
74 #define PRECISE_PMCS_ONLY_FUNC
75 #endif // PRECISE_USER_KERNEL_PMCS
76 
77 #if HAS_CPU_DPE_COUNTER
78 // Only certain platforms have DPE counters.
79 #define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY
80 #else // HAS_CPU_DPE_COUNTER
81 #define RECOUNT_ENERGY 0
82 #endif // !HAS_CPU_DPE_COUNTER
83 
84 // Topography helpers.
85 size_t recount_topo_count(recount_topo_t topo);
86 static bool recount_topo_matches_cpu_kind(recount_topo_t topo,
87     recount_cpu_kind_t kind, size_t idx);
88 static size_t recount_topo_index(recount_topo_t topo, processor_t processor);
89 static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to,
90     size_t i);
91 
92 // Prevent counter updates before the system is ready.
93 __security_const_late bool recount_started = false;
94 
95 // Lookup table that matches CPU numbers (indices) to their track index.
96 __security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 };
97 
98 __startup_func
99 static void
recount_startup(void)100 recount_startup(void)
101 {
102 #if __AMP__
103 	unsigned int cpu_count = ml_get_cpu_count();
104 	const ml_topology_info_t *topo_info = ml_get_topology_info();
105 	for (unsigned int i = 0; i < cpu_count; i++) {
106 		cluster_type_t type = topo_info->cpus[i].cluster_type;
107 		uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE :
108 		    RCT_CPU_EFFICIENCY;
109 		_topo_cpu_kinds[i] = cluster_i;
110 	}
111 #endif // __AMP__
112 
113 	recount_started = true;
114 }
115 
116 STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup);
117 
118 #pragma mark - tracks
119 
120 RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND);
121 RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU);
122 RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU);
123 RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND);
124 RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND);
125 RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM);
126 
127 OS_ALWAYS_INLINE
128 static inline uint64_t
recount_timestamp_speculative(void)129 recount_timestamp_speculative(void)
130 {
131 #if __arm__ || __arm64__
132 	return ml_get_speculative_timebase();
133 #else // __arm__ || __arm64__
134 	return mach_absolute_time();
135 #endif // !__arm__ && !__arm64__
136 }
137 
138 OS_ALWAYS_INLINE
139 void
recount_snapshot_speculative(struct recount_snap * snap)140 recount_snapshot_speculative(struct recount_snap *snap)
141 {
142 	snap->rsn_time_mach = recount_timestamp_speculative();
143 #if CONFIG_PERVASIVE_CPI
144 	mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns);
145 #endif // CONFIG_PERVASIVE_CPI
146 }
147 
148 void
recount_snapshot(struct recount_snap * snap)149 recount_snapshot(struct recount_snap *snap)
150 {
151 #if __arm__ || __arm64__
152 	__builtin_arm_isb(ISB_SY);
153 #endif // __arm__ || __arm64__
154 	recount_snapshot_speculative(snap);
155 }
156 
157 static struct recount_snap *
recount_get_snap(processor_t processor)158 recount_get_snap(processor_t processor)
159 {
160 	return &processor->pr_recount.rpr_snap;
161 }
162 
163 static struct recount_snap *
recount_get_interrupt_snap(processor_t processor)164 recount_get_interrupt_snap(processor_t processor)
165 {
166 	return &processor->pr_recount.rpr_interrupt_snap;
167 }
168 
169 // A simple sequence lock implementation.
170 
171 static void
_seqlock_shared_lock_slowpath(const uint32_t * lck,uint32_t gen)172 _seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen)
173 {
174 	disable_preemption();
175 	do {
176 		gen = hw_wait_while_equals32((uint32_t *)(uintptr_t)lck, gen);
177 	} while (__improbable((gen & 1) != 0));
178 	os_atomic_thread_fence(acquire);
179 	enable_preemption();
180 }
181 
182 static uintptr_t
_seqlock_shared_lock(const uint32_t * lck)183 _seqlock_shared_lock(const uint32_t *lck)
184 {
185 	uint32_t gen = os_atomic_load(lck, acquire);
186 	if (__improbable((gen & 1) != 0)) {
187 		_seqlock_shared_lock_slowpath(lck, gen);
188 	}
189 	return gen;
190 }
191 
192 static bool
_seqlock_shared_try_unlock(const uint32_t * lck,uintptr_t on_enter)193 _seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter)
194 {
195 	return os_atomic_load(lck, acquire) == on_enter;
196 }
197 
198 static void
_seqlock_excl_lock_relaxed(uint32_t * lck)199 _seqlock_excl_lock_relaxed(uint32_t *lck)
200 {
201 	__assert_only uintptr_t new = os_atomic_inc(lck, relaxed);
202 	assert3u((new & 1), ==, 1);
203 }
204 
205 static void
_seqlock_excl_commit(void)206 _seqlock_excl_commit(void)
207 {
208 	os_atomic_thread_fence(release);
209 }
210 
211 static void
_seqlock_excl_unlock_relaxed(uint32_t * lck)212 _seqlock_excl_unlock_relaxed(uint32_t *lck)
213 {
214 	__assert_only uint32_t new = os_atomic_inc(lck, relaxed);
215 	assert3u((new & 1), ==, 0);
216 }
217 
218 static struct recount_track *
recount_update_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)219 recount_update_start(struct recount_track *tracks, recount_topo_t topo,
220     processor_t processor)
221 {
222 	struct recount_track *track = &tracks[recount_topo_index(topo, processor)];
223 	_seqlock_excl_lock_relaxed(&track->rt_sync);
224 	return track;
225 }
226 
227 #if RECOUNT_ENERGY
228 
229 static struct recount_track *
recount_update_single_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)230 recount_update_single_start(struct recount_track *tracks, recount_topo_t topo,
231     processor_t processor)
232 {
233 	return &tracks[recount_topo_index(topo, processor)];
234 }
235 
236 #endif // RECOUNT_ENERGY
237 
238 static void
recount_update_commit(void)239 recount_update_commit(void)
240 {
241 	_seqlock_excl_commit();
242 }
243 
244 static void
recount_update_end(struct recount_track * track)245 recount_update_end(struct recount_track *track)
246 {
247 	_seqlock_excl_unlock_relaxed(&track->rt_sync);
248 }
249 
250 static const struct recount_usage *
recount_read_start(const struct recount_track * track,uintptr_t * on_enter)251 recount_read_start(const struct recount_track *track, uintptr_t *on_enter)
252 {
253 	const struct recount_usage *stats = &track->rt_usage;
254 	*on_enter = _seqlock_shared_lock(&track->rt_sync);
255 	return stats;
256 }
257 
258 static bool
recount_try_read_end(const struct recount_track * track,uintptr_t on_enter)259 recount_try_read_end(const struct recount_track *track, uintptr_t on_enter)
260 {
261 	return _seqlock_shared_try_unlock(&track->rt_sync, on_enter);
262 }
263 
264 static void
recount_read_track(struct recount_usage * stats,const struct recount_track * track)265 recount_read_track(struct recount_usage *stats,
266     const struct recount_track *track)
267 {
268 	uintptr_t on_enter = 0;
269 	do {
270 		const struct recount_usage *vol_stats =
271 		    recount_read_start(track, &on_enter);
272 		*stats = *vol_stats;
273 	} while (!recount_try_read_end(track, on_enter));
274 }
275 
276 static void
recount_usage_add(struct recount_usage * sum,const struct recount_usage * to_add)277 recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add)
278 {
279 	sum->ru_user_time_mach += to_add->ru_user_time_mach;
280 	sum->ru_system_time_mach += to_add->ru_system_time_mach;
281 #if CONFIG_PERVASIVE_CPI
282 	sum->ru_cycles += to_add->ru_cycles;
283 	sum->ru_instructions += to_add->ru_instructions;
284 #endif // CONFIG_PERVASIVE_CPI
285 #if CONFIG_PERVASIVE_ENERGY
286 	sum->ru_energy_nj += to_add->ru_energy_nj;
287 #endif // CONFIG_PERVASIVE_CPI
288 }
289 
290 OS_ALWAYS_INLINE
291 static inline void
recount_usage_add_snap(struct recount_usage * usage,uint64_t * add_time,struct recount_snap * snap)292 recount_usage_add_snap(struct recount_usage *usage, uint64_t *add_time,
293     struct recount_snap *snap)
294 {
295 	*add_time += snap->rsn_time_mach;
296 #if CONFIG_PERVASIVE_CPI
297 	usage->ru_cycles += snap->rsn_cycles;
298 	usage->ru_instructions += snap->rsn_insns;
299 #else // CONFIG_PERVASIVE_CPI
300 #pragma unused(usage)
301 #endif // !CONFIG_PERVASIVE_CPI
302 }
303 
304 static void
recount_rollup(recount_plan_t plan,const struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)305 recount_rollup(recount_plan_t plan, const struct recount_track *tracks,
306     recount_topo_t to_topo, struct recount_usage *stats)
307 {
308 	recount_topo_t from_topo = plan->rpl_topo;
309 	size_t topo_count = recount_topo_count(from_topo);
310 	struct recount_usage tmp = { 0 };
311 	for (size_t i = 0; i < topo_count; i++) {
312 		recount_read_track(&tmp, &tracks[i]);
313 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
314 		recount_usage_add(&stats[to_i], &tmp);
315 	}
316 }
317 
318 // This function must be run when counters cannot increment for the track, like from the current thread.
319 static void
recount_rollup_unsafe(recount_plan_t plan,struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)320 recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks,
321     recount_topo_t to_topo, struct recount_usage *stats)
322 {
323 	recount_topo_t from_topo = plan->rpl_topo;
324 	size_t topo_count = recount_topo_count(from_topo);
325 	for (size_t i = 0; i < topo_count; i++) {
326 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
327 		recount_usage_add(&stats[to_i], &tracks[i].rt_usage);
328 	}
329 }
330 
331 void
recount_sum(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)332 recount_sum(recount_plan_t plan, const struct recount_track *tracks,
333     struct recount_usage *sum)
334 {
335 	recount_rollup(plan, tracks, RCT_TOPO_SYSTEM, sum);
336 }
337 
338 void
recount_sum_unsafe(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)339 recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
340     struct recount_usage *sum)
341 {
342 	recount_topo_t topo = plan->rpl_topo;
343 	size_t topo_count = recount_topo_count(topo);
344 	for (size_t i = 0; i < topo_count; i++) {
345 		recount_usage_add(sum, &tracks[i].rt_usage);
346 	}
347 }
348 
349 void
recount_sum_and_isolate_cpu_kind(recount_plan_t plan,struct recount_track * tracks,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)350 recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
351     struct recount_track *tracks, recount_cpu_kind_t kind,
352     struct recount_usage *sum, struct recount_usage *only_kind)
353 {
354 	size_t topo_count = recount_topo_count(plan->rpl_topo);
355 	struct recount_usage tmp = { 0 };
356 	for (size_t i = 0; i < topo_count; i++) {
357 		recount_read_track(&tmp, &tracks[i]);
358 		recount_usage_add(sum, &tmp);
359 		if (recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
360 			recount_usage_add(only_kind, &tmp);
361 		}
362 	}
363 }
364 
365 static void
recount_sum_usage(recount_plan_t plan,const struct recount_usage * usages,struct recount_usage * sum)366 recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages,
367     struct recount_usage *sum)
368 {
369 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
370 	for (size_t i = 0; i < topo_count; i++) {
371 		recount_usage_add(sum, &usages[i]);
372 	}
373 }
374 
375 void
recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,struct recount_usage * usage,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)376 recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
377     struct recount_usage *usage, recount_cpu_kind_t kind,
378     struct recount_usage *sum, struct recount_usage *only_kind)
379 {
380 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
381 	for (size_t i = 0; i < topo_count; i++) {
382 		recount_usage_add(sum, &usage[i]);
383 		if (only_kind && recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
384 			recount_usage_add(only_kind, &usage[i]);
385 		}
386 	}
387 }
388 
389 void
recount_sum_perf_levels(recount_plan_t plan,struct recount_track * tracks,struct recount_usage * sums)390 recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks,
391     struct recount_usage *sums)
392 {
393 	recount_rollup(plan, tracks, RCT_TOPO_CPU_KIND, sums);
394 }
395 
396 // Plan-specific helpers.
397 
398 void
recount_coalition_rollup_task(struct recount_coalition * co,struct recount_task * tk)399 recount_coalition_rollup_task(struct recount_coalition *co,
400     struct recount_task *tk)
401 {
402 	recount_rollup(&recount_task_plan, tk->rtk_lifetime,
403 	    recount_coalition_plan.rpl_topo, co->rco_exited);
404 }
405 
406 void
recount_task_rollup_thread(struct recount_task * tk,const struct recount_thread * th)407 recount_task_rollup_thread(struct recount_task *tk,
408     const struct recount_thread *th)
409 {
410 	recount_rollup(&recount_thread_plan, th->rth_lifetime,
411 	    recount_task_terminated_plan.rpl_topo, tk->rtk_terminated);
412 }
413 
414 #pragma mark - scheduler
415 
416 // `result = lhs - rhs` for snapshots.
417 OS_ALWAYS_INLINE
418 static void
recount_snap_diff(struct recount_snap * result,const struct recount_snap * lhs,const struct recount_snap * rhs)419 recount_snap_diff(struct recount_snap *result,
420     const struct recount_snap *lhs, const struct recount_snap *rhs)
421 {
422 	assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach);
423 	result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach;
424 #if CONFIG_PERVASIVE_CPI
425 	assert3u(lhs->rsn_insns, >=, rhs->rsn_insns);
426 	assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles);
427 	result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles;
428 	result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns;
429 #endif // CONFIG_PERVASIVE_CPI
430 }
431 
432 static void
_fix_time_precision(struct recount_usage * usage)433 _fix_time_precision(struct recount_usage *usage)
434 {
435 #if PRECISE_USER_KERNEL_TIME
436 #pragma unused(usage)
437 #else // PRECISE_USER_KERNEL_TIME
438 	// Attribute all time to user, as the system is only acting "on behalf
439 	// of" user processes -- a bit sketchy.
440 	usage->ru_user_time_mach += usage->ru_system_time_mach;
441 	usage->ru_system_time_mach = 0;
442 #endif // !PRECISE_USER_KERNEL_TIME
443 }
444 
445 void
recount_current_thread_usage(struct recount_usage * usage)446 recount_current_thread_usage(struct recount_usage *usage)
447 {
448 	assert(ml_get_interrupts_enabled() == FALSE);
449 	thread_t thread = current_thread();
450 	struct recount_snap snap = { 0 };
451 	recount_snapshot(&snap);
452 	recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
453 	    usage);
454 	struct recount_snap *last = recount_get_snap(current_processor());
455 	struct recount_snap diff = { 0 };
456 	recount_snap_diff(&diff, &snap, last);
457 	recount_usage_add_snap(usage, &usage->ru_system_time_mach, &diff);
458 	_fix_time_precision(usage);
459 }
460 
461 void
recount_current_thread_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)462 recount_current_thread_usage_perf_only(struct recount_usage *usage,
463     struct recount_usage *usage_perf_only)
464 {
465 	struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 };
466 	recount_current_thread_perf_level_usage(usage_perf_levels);
467 	recount_sum_usage(&recount_thread_plan, usage_perf_levels, usage);
468 	*usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE];
469 	_fix_time_precision(usage);
470 	_fix_time_precision(usage_perf_only);
471 }
472 
473 void
recount_thread_perf_level_usage(struct thread * thread,struct recount_usage * usage_levels)474 recount_thread_perf_level_usage(struct thread *thread,
475     struct recount_usage *usage_levels)
476 {
477 	recount_rollup(&recount_thread_plan, thread->th_recount.rth_lifetime,
478 	    RCT_TOPO_CPU_KIND, usage_levels);
479 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
480 	for (size_t i = 0; i < topo_count; i++) {
481 		_fix_time_precision(&usage_levels[i]);
482 	}
483 }
484 
485 void
recount_current_thread_perf_level_usage(struct recount_usage * usage_levels)486 recount_current_thread_perf_level_usage(struct recount_usage *usage_levels)
487 {
488 	assert(ml_get_interrupts_enabled() == FALSE);
489 	processor_t processor = current_processor();
490 	thread_t thread = current_thread();
491 	struct recount_snap snap = { 0 };
492 	recount_snapshot(&snap);
493 	recount_rollup_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
494 	    RCT_TOPO_CPU_KIND, usage_levels);
495 	struct recount_snap *last = recount_get_snap(processor);
496 	struct recount_snap diff = { 0 };
497 	recount_snap_diff(&diff, &snap, last);
498 	size_t cur_i = recount_topo_index(RCT_TOPO_CPU_KIND, processor);
499 	struct recount_usage *cur_usage = &usage_levels[cur_i];
500 	recount_usage_add_snap(cur_usage, &cur_usage->ru_system_time_mach, &diff);
501 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
502 	for (size_t i = 0; i < topo_count; i++) {
503 		_fix_time_precision(&usage_levels[i]);
504 	}
505 }
506 
507 uint64_t
recount_current_thread_energy_nj(void)508 recount_current_thread_energy_nj(void)
509 {
510 #if RECOUNT_ENERGY
511 	assert(ml_get_interrupts_enabled() == FALSE);
512 	thread_t thread = current_thread();
513 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
514 	uint64_t energy_nj = 0;
515 	for (size_t i = 0; i < topo_count; i++) {
516 		energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj;
517 	}
518 	return energy_nj;
519 #else // RECOUNT_ENERGY
520 	return 0;
521 #endif // !RECOUNT_ENERGY
522 }
523 
524 static void
_times_add_usage(struct recount_times_mach * times,struct recount_usage * usage)525 _times_add_usage(struct recount_times_mach *times, struct recount_usage *usage)
526 {
527 	times->rtm_user += usage->ru_user_time_mach;
528 #if PRECISE_USER_KERNEL_TIME
529 	times->rtm_system += usage->ru_system_time_mach;
530 #else // PRECISE_USER_KERNEL_TIME
531 	times->rtm_user += usage->ru_system_time_mach;
532 #endif // !PRECISE_USER_KERNEL_TIME
533 }
534 
535 struct recount_times_mach
recount_thread_times(struct thread * thread)536 recount_thread_times(struct thread *thread)
537 {
538 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
539 	struct recount_times_mach times = { 0 };
540 	for (size_t i = 0; i < topo_count; i++) {
541 		_times_add_usage(&times, &thread->th_recount.rth_lifetime[i].rt_usage);
542 	}
543 	return times;
544 }
545 
546 uint64_t
recount_thread_time_mach(struct thread * thread)547 recount_thread_time_mach(struct thread *thread)
548 {
549 	struct recount_times_mach times = recount_thread_times(thread);
550 	return times.rtm_user + times.rtm_system;
551 }
552 
553 static uint64_t
_time_since_last_snapshot(void)554 _time_since_last_snapshot(void)
555 {
556 	struct recount_snap *last = recount_get_snap(current_processor());
557 	uint64_t cur_time = mach_absolute_time();
558 	return cur_time - last->rsn_time_mach;
559 }
560 
561 uint64_t
recount_current_thread_time_mach(void)562 recount_current_thread_time_mach(void)
563 {
564 	assert(ml_get_interrupts_enabled() == FALSE);
565 	uint64_t previous_time = recount_thread_time_mach(current_thread());
566 	return previous_time + _time_since_last_snapshot();
567 }
568 
569 struct recount_times_mach
recount_current_thread_times(void)570 recount_current_thread_times(void)
571 {
572 	assert(ml_get_interrupts_enabled() == FALSE);
573 	struct recount_times_mach times = recount_thread_times(
574 		current_thread());
575 #if PRECISE_USER_KERNEL_TIME
576 	// This code is executing in the kernel, so the time since the last snapshot
577 	// (with precise user/kernel time) is since entering the kernel.
578 	times.rtm_system += _time_since_last_snapshot();
579 #else // PRECISE_USER_KERNEL_TIME
580 	times.rtm_user += _time_since_last_snapshot();
581 #endif // !PRECISE_USER_KERNEL_TIME
582 	return times;
583 }
584 
585 void
recount_thread_usage(thread_t thread,struct recount_usage * usage)586 recount_thread_usage(thread_t thread, struct recount_usage *usage)
587 {
588 	recount_sum(&recount_thread_plan, thread->th_recount.rth_lifetime, usage);
589 	_fix_time_precision(usage);
590 }
591 
592 uint64_t
recount_current_thread_interrupt_time_mach(void)593 recount_current_thread_interrupt_time_mach(void)
594 {
595 	thread_t thread = current_thread();
596 	return thread->th_recount.rth_interrupt_time_mach;
597 }
598 
599 void
recount_work_interval_usage(struct work_interval * work_interval,struct recount_usage * usage)600 recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage)
601 {
602 	recount_sum(&recount_work_interval_plan, work_interval_get_recount_tracks(work_interval), usage);
603 	_fix_time_precision(usage);
604 }
605 
606 struct recount_times_mach
recount_work_interval_times(struct work_interval * work_interval)607 recount_work_interval_times(struct work_interval *work_interval)
608 {
609 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
610 	struct recount_times_mach times = { 0 };
611 	for (size_t i = 0; i < topo_count; i++) {
612 		_times_add_usage(&times, &work_interval_get_recount_tracks(work_interval)[i].rt_usage);
613 	}
614 	return times;
615 }
616 
617 uint64_t
recount_work_interval_energy_nj(struct work_interval * work_interval)618 recount_work_interval_energy_nj(struct work_interval *work_interval)
619 {
620 #if RECOUNT_ENERGY
621 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
622 	uint64_t energy = 0;
623 	for (size_t i = 0; i < topo_count; i++) {
624 		energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj;
625 	}
626 	return energy;
627 #else // RECOUNT_ENERGY
628 #pragma unused(work_interval)
629 	return 0;
630 #endif // !RECOUNT_ENERGY
631 }
632 
633 void
recount_current_task_usage(struct recount_usage * usage)634 recount_current_task_usage(struct recount_usage *usage)
635 {
636 	task_t task = current_task();
637 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
638 	recount_sum(&recount_task_plan, tracks, usage);
639 	_fix_time_precision(usage);
640 }
641 
642 void
recount_current_task_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)643 recount_current_task_usage_perf_only(struct recount_usage *usage,
644     struct recount_usage *usage_perf_only)
645 {
646 	task_t task = current_task();
647 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
648 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
649 	    tracks, RCT_CPU_PERFORMANCE, usage, usage_perf_only);
650 	_fix_time_precision(usage);
651 	_fix_time_precision(usage_perf_only);
652 }
653 
654 void
recount_task_times_perf_only(struct task * task,struct recount_times_mach * sum,struct recount_times_mach * sum_perf_only)655 recount_task_times_perf_only(struct task *task,
656     struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only)
657 {
658 	const recount_topo_t topo = recount_task_plan.rpl_topo;
659 	const size_t topo_count = recount_topo_count(topo);
660 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
661 	for (size_t i = 0; i < topo_count; i++) {
662 		struct recount_usage *usage = &tracks[i].rt_usage;
663 		_times_add_usage(sum, usage);
664 		if (recount_topo_matches_cpu_kind(topo, RCT_CPU_PERFORMANCE, i)) {
665 			_times_add_usage(sum_perf_only, usage);
666 		}
667 	}
668 }
669 
670 void
recount_task_terminated_usage(task_t task,struct recount_usage * usage)671 recount_task_terminated_usage(task_t task, struct recount_usage *usage)
672 {
673 	recount_sum_usage(&recount_task_terminated_plan,
674 	    task->tk_recount.rtk_terminated, usage);
675 	_fix_time_precision(usage);
676 }
677 
678 struct recount_times_mach
recount_task_terminated_times(struct task * task)679 recount_task_terminated_times(struct task *task)
680 {
681 	size_t topo_count = recount_topo_count(recount_task_terminated_plan.rpl_topo);
682 	struct recount_times_mach times = { 0 };
683 	for (size_t i = 0; i < topo_count; i++) {
684 		_times_add_usage(&times, &task->tk_recount.rtk_terminated[i]);
685 	}
686 	return times;
687 }
688 
689 void
recount_task_terminated_usage_perf_only(task_t task,struct recount_usage * usage,struct recount_usage * perf_only)690 recount_task_terminated_usage_perf_only(task_t task,
691     struct recount_usage *usage, struct recount_usage *perf_only)
692 {
693 	recount_sum_usage_and_isolate_cpu_kind(&recount_task_terminated_plan,
694 	    task->tk_recount.rtk_terminated, RCT_CPU_PERFORMANCE, usage, perf_only);
695 	_fix_time_precision(usage);
696 	_fix_time_precision(perf_only);
697 }
698 
699 void
recount_task_usage_perf_only(task_t task,struct recount_usage * sum,struct recount_usage * sum_perf_only)700 recount_task_usage_perf_only(task_t task, struct recount_usage *sum,
701     struct recount_usage *sum_perf_only)
702 {
703 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
704 	    task->tk_recount.rtk_lifetime, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
705 	_fix_time_precision(sum);
706 	_fix_time_precision(sum_perf_only);
707 }
708 
709 void
recount_task_usage(task_t task,struct recount_usage * usage)710 recount_task_usage(task_t task, struct recount_usage *usage)
711 {
712 	recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, usage);
713 	_fix_time_precision(usage);
714 }
715 
716 struct recount_times_mach
recount_task_times(struct task * task)717 recount_task_times(struct task *task)
718 {
719 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
720 	struct recount_times_mach times = { 0 };
721 	for (size_t i = 0; i < topo_count; i++) {
722 		_times_add_usage(&times, &task->tk_recount.rtk_lifetime[i].rt_usage);
723 	}
724 	return times;
725 }
726 
727 uint64_t
recount_task_energy_nj(struct task * task)728 recount_task_energy_nj(struct task *task)
729 {
730 #if RECOUNT_ENERGY
731 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
732 	uint64_t energy = 0;
733 	for (size_t i = 0; i < topo_count; i++) {
734 		energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj;
735 	}
736 	return energy;
737 #else // RECOUNT_ENERGY
738 #pragma unused(task)
739 	return 0;
740 #endif // !RECOUNT_ENERGY
741 }
742 
743 void
recount_coalition_usage_perf_only(struct recount_coalition * coal,struct recount_usage * sum,struct recount_usage * sum_perf_only)744 recount_coalition_usage_perf_only(struct recount_coalition *coal,
745     struct recount_usage *sum, struct recount_usage *sum_perf_only)
746 {
747 	recount_sum_usage_and_isolate_cpu_kind(&recount_coalition_plan,
748 	    coal->rco_exited, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
749 	_fix_time_precision(sum);
750 	_fix_time_precision(sum_perf_only);
751 }
752 
753 OS_ALWAYS_INLINE
754 static void
recount_absorb_snap(struct recount_snap * to_add,thread_t thread,task_t task,processor_t processor,bool from_user)755 recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task,
756     processor_t processor, bool from_user)
757 {
758 	// Idle threads do not attribute their usage back to the task or processor,
759 	// as the time is not spent "running."
760 	//
761 	// The processor-level metrics include idle time, instead, as the idle time
762 	// needs to be read as up-to-date from `recount_processor_usage`.
763 
764 	const bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0;
765 
766 	struct recount_track *wi_tracks_array = NULL;
767 	if (!was_idle) {
768 		wi_tracks_array = work_interval_get_recount_tracks(
769 			thread->th_work_interval);
770 	}
771 	bool absorb_work_interval = wi_tracks_array != NULL;
772 
773 	struct recount_track *th_track = recount_update_start(
774 		thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
775 		processor);
776 	struct recount_track *wi_track = NULL;
777 	if (absorb_work_interval) {
778 		wi_track = recount_update_start(wi_tracks_array,
779 		    recount_work_interval_plan.rpl_topo, processor);
780 	}
781 	struct recount_track *tk_track = was_idle ? NULL : recount_update_start(
782 		task->tk_recount.rtk_lifetime, recount_task_plan.rpl_topo, processor);
783 	struct recount_track *pr_track = was_idle ? NULL : recount_update_start(
784 		&processor->pr_recount.rpr_active, recount_processor_plan.rpl_topo,
785 		processor);
786 	recount_update_commit();
787 
788 	uint64_t *th_time = NULL, *wi_time = NULL, *tk_time = NULL, *pr_time = NULL;
789 	if (from_user) {
790 		th_time = &th_track->rt_usage.ru_user_time_mach;
791 		wi_time = &wi_track->rt_usage.ru_user_time_mach;
792 		tk_time = &tk_track->rt_usage.ru_user_time_mach;
793 		pr_time = &pr_track->rt_usage.ru_user_time_mach;
794 	} else {
795 		th_time = &th_track->rt_usage.ru_system_time_mach;
796 		wi_time = &wi_track->rt_usage.ru_system_time_mach;
797 		tk_time = &tk_track->rt_usage.ru_system_time_mach;
798 		pr_time = &pr_track->rt_usage.ru_system_time_mach;
799 	}
800 
801 	recount_usage_add_snap(&th_track->rt_usage, th_time, to_add);
802 	if (!was_idle) {
803 		if (absorb_work_interval) {
804 			recount_usage_add_snap(&wi_track->rt_usage, wi_time, to_add);
805 		}
806 		recount_usage_add_snap(&tk_track->rt_usage, tk_time, to_add);
807 		recount_usage_add_snap(&pr_track->rt_usage, pr_time, to_add);
808 	}
809 
810 	recount_update_commit();
811 	recount_update_end(th_track);
812 	if (!was_idle) {
813 		if (absorb_work_interval) {
814 			recount_update_end(wi_track);
815 		}
816 		recount_update_end(tk_track);
817 		recount_update_end(pr_track);
818 	}
819 }
820 
821 void
recount_switch_thread(struct recount_snap * cur,struct thread * off_thread,struct task * off_task)822 recount_switch_thread(struct recount_snap *cur, struct thread *off_thread,
823     struct task *off_task)
824 {
825 	assert(ml_get_interrupts_enabled() == FALSE);
826 
827 	if (__improbable(!recount_started)) {
828 		return;
829 	}
830 
831 	processor_t processor = current_processor();
832 
833 	struct recount_snap *last = recount_get_snap(processor);
834 	struct recount_snap diff = { 0 };
835 	recount_snap_diff(&diff, cur, last);
836 	recount_absorb_snap(&diff, off_thread, off_task, processor, false);
837 	memcpy(last, cur, sizeof(*last));
838 }
839 
840 void
recount_add_energy(struct thread * off_thread,struct task * off_task,uint64_t energy_nj)841 recount_add_energy(struct thread *off_thread, struct task *off_task,
842     uint64_t energy_nj)
843 {
844 #if RECOUNT_ENERGY
845 	assert(ml_get_interrupts_enabled() == FALSE);
846 	if (__improbable(!recount_started)) {
847 		return;
848 	}
849 
850 	bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0;
851 	struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval);
852 	bool collect_work_interval_telemetry = wi_tracks_array != NULL;
853 	processor_t processor = current_processor();
854 
855 	struct recount_track *th_track = recount_update_single_start(
856 		off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
857 		processor);
858 	struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL :
859 	    recount_update_single_start(wi_tracks_array,
860 	    recount_work_interval_plan.rpl_topo, processor);
861 	struct recount_track *tk_track = was_idle ? NULL :
862 	    recount_update_single_start(off_task->tk_recount.rtk_lifetime,
863 	    recount_task_plan.rpl_topo, processor);
864 	struct recount_track *pr_track = was_idle ? NULL :
865 	    recount_update_single_start(&processor->pr_recount.rpr_active,
866 	    recount_processor_plan.rpl_topo, processor);
867 
868 	th_track->rt_usage.ru_energy_nj += energy_nj;
869 	if (!was_idle) {
870 		if (collect_work_interval_telemetry) {
871 			wi_track->rt_usage.ru_energy_nj += energy_nj;
872 		}
873 		tk_track->rt_usage.ru_energy_nj += energy_nj;
874 		pr_track->rt_usage.ru_energy_nj += energy_nj;
875 	}
876 #else // RECOUNT_ENERGY
877 #pragma unused(off_thread, off_task, energy_nj)
878 #endif // !RECOUNT_ENERGY
879 }
880 
881 #define MT_KDBG_IC_CPU_CSWITCH \
882 	KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1)
883 
884 #define MT_KDBG_IC_CPU_CSWITCH_ON \
885     KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1)
886 
887 void
recount_log_switch_thread(const struct recount_snap * snap)888 recount_log_switch_thread(const struct recount_snap *snap)
889 {
890 #if CONFIG_PERVASIVE_CPI
891 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
892 		// In Monotonic's event hierarchy for backwards-compatibility.
893 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles);
894 	}
895 #else // CONFIG_PERVASIVE_CPI
896 #pragma unused(snap)
897 #endif // CONFIG_PERVASIVE_CPI
898 }
899 
900 void
recount_log_switch_thread_on(const struct recount_snap * snap)901 recount_log_switch_thread_on(const struct recount_snap *snap)
902 {
903 #if CONFIG_PERVASIVE_CPI
904 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) {
905 		if (!snap) {
906 			snap = recount_get_snap(current_processor());
907 		}
908 		// In Monotonic's event hierarchy for backwards-compatibility.
909 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles);
910 	}
911 #else // CONFIG_PERVASIVE_CPI
912 #pragma unused(snap)
913 #endif // CONFIG_PERVASIVE_CPI
914 }
915 
916 OS_ALWAYS_INLINE
917 PRECISE_TIME_ONLY_FUNC
918 static void
recount_precise_transition_diff(struct recount_snap * diff,struct recount_snap * last,struct recount_snap * cur)919 recount_precise_transition_diff(struct recount_snap *diff,
920     struct recount_snap *last, struct recount_snap *cur)
921 {
922 #if PRECISE_USER_KERNEL_PMCS
923 #if PRECISE_USER_KERNEL_PMC_TUNABLE
924 	// The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable
925 	// in this configuration.
926 	if (__improbable(no_precise_pmcs)) {
927 		cur->rsn_time_mach = recount_timestamp_speculative();
928 		diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
929 	} else
930 #endif // PRECISE_USER_KERNEL_PMC_TUNABLE
931 	{
932 		recount_snapshot_speculative(cur);
933 		recount_snap_diff(diff, cur, last);
934 	}
935 #else // PRECISE_USER_KERNEL_PMCS
936 	cur->rsn_time_mach = recount_timestamp_speculative();
937 	diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
938 #endif // !PRECISE_USER_KERNEL_PMCS
939 }
940 
941 /// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive.
942 ///
943 /// Must be called with interrupts disabled.
944 ///
945 /// - Parameter from_user: Whether the kernel is being entered from user space.
946 ///
947 /// - Returns: The value of Mach time that was sampled inside this function.
948 PRECISE_TIME_FATAL_FUNC
949 static uint64_t
recount_kernel_transition(bool from_user)950 recount_kernel_transition(bool from_user)
951 {
952 #if PRECISE_USER_KERNEL_TIME
953 	// Omit interrupts-disabled assertion for performance reasons.
954 	processor_t processor = current_processor();
955 	thread_t thread = processor->active_thread;
956 	task_t task = get_thread_ro_unchecked(thread)->tro_task;
957 
958 	struct recount_snap *last = recount_get_snap(processor);
959 	struct recount_snap diff = { 0 };
960 	struct recount_snap cur = { 0 };
961 	recount_precise_transition_diff(&diff, last, &cur);
962 	recount_absorb_snap(&diff, thread, task, processor, from_user);
963 	memcpy(last, &cur, sizeof(*last));
964 
965 	return cur.rsn_time_mach;
966 #else // PRECISE_USER_KERNEL_TIME
967 #pragma unused(from_user)
968 	panic("recount: kernel transition called with precise time off");
969 #endif // !PRECISE_USER_KERNEL_TIME
970 }
971 
972 PRECISE_TIME_FATAL_FUNC
973 void
recount_leave_user(void)974 recount_leave_user(void)
975 {
976 	recount_kernel_transition(true);
977 }
978 
979 PRECISE_TIME_FATAL_FUNC
980 void
recount_enter_user(void)981 recount_enter_user(void)
982 {
983 	recount_kernel_transition(false);
984 }
985 
986 void
recount_enter_interrupt(void)987 recount_enter_interrupt(void)
988 {
989 	processor_t processor = current_processor();
990 	struct recount_snap *last = recount_get_interrupt_snap(processor);
991 	recount_snapshot_speculative(last);
992 }
993 
994 void
recount_leave_interrupt(void)995 recount_leave_interrupt(void)
996 {
997 	processor_t processor = current_processor();
998 	thread_t thread = processor->active_thread;
999 	struct recount_snap *last = recount_get_snap(processor);
1000 	uint64_t last_time = last->rsn_time_mach;
1001 	recount_snapshot_speculative(last);
1002 	processor->pr_recount.rpr_interrupt_time_mach +=
1003 	    last->rsn_time_mach - last_time;
1004 	thread->th_recount.rth_interrupt_time_mach +=
1005 	    last->rsn_time_mach - last_time;
1006 }
1007 
1008 #if __x86_64__
1009 
1010 void
recount_enter_intel_interrupt(x86_saved_state_t * state)1011 recount_enter_intel_interrupt(x86_saved_state_t *state)
1012 {
1013 	// The low bits of `%cs` being set indicate interrupt was delivered while
1014 	// executing in user space.
1015 	bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs :
1016 	    state->ss_32.cs) & 0x03;
1017 	uint64_t timestamp = recount_kernel_transition(from_user);
1018 	current_cpu_datap()->cpu_int_event_time = timestamp;
1019 }
1020 
1021 void
recount_leave_intel_interrupt(void)1022 recount_leave_intel_interrupt(void)
1023 {
1024 	// XXX This is not actually entering user space, but it does update the
1025 	//     system timer, which is desirable.
1026 	recount_enter_user();
1027 	current_cpu_datap()->cpu_int_event_time = 0;
1028 }
1029 
1030 #endif // __x86_64__
1031 
1032 // Set on rpr_state_last_abs_time when the processor is idle.
1033 #define RCT_PR_IDLING (0x1ULL << 63)
1034 
1035 void
recount_processor_idle(struct recount_processor * pr,struct recount_snap * snap)1036 recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap)
1037 {
1038 	__assert_only uint64_t state_time = os_atomic_load_wide(
1039 		&pr->rpr_state_last_abs_time, relaxed);
1040 	assert((state_time & RCT_PR_IDLING) == 0);
1041 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1042 	uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach;
1043 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1044 	    relaxed);
1045 }
1046 
1047 OS_PURE OS_ALWAYS_INLINE
1048 static inline uint64_t
_state_time(uint64_t state_stamp)1049 _state_time(uint64_t state_stamp)
1050 {
1051 	return state_stamp & ~(RCT_PR_IDLING);
1052 }
1053 
1054 void
recount_processor_init(processor_t processor)1055 recount_processor_init(processor_t processor)
1056 {
1057 #if __AMP__
1058 	processor->pr_recount.rpr_cpu_kind_index =
1059 	    processor->processor_set->pset_cluster_type == PSET_AMP_P ?
1060 	    RCT_CPU_PERFORMANCE : RCT_CPU_EFFICIENCY;
1061 #else // __AMP__
1062 #pragma unused(processor)
1063 #endif // !__AMP__
1064 }
1065 
1066 void
recount_processor_run(struct recount_processor * pr,struct recount_snap * snap)1067 recount_processor_run(struct recount_processor *pr, struct recount_snap *snap)
1068 {
1069 	uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed);
1070 	assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING);
1071 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1072 	uint64_t new_state_stamp = snap->rsn_time_mach;
1073 	pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state);
1074 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1075 	    relaxed);
1076 }
1077 
1078 void
recount_processor_online(processor_t processor,struct recount_snap * cur)1079 recount_processor_online(processor_t processor, struct recount_snap *cur)
1080 {
1081 	recount_processor_run(&processor->pr_recount, cur);
1082 	struct recount_snap *pr_snap = recount_get_snap(processor);
1083 	memcpy(pr_snap, cur, sizeof(*pr_snap));
1084 }
1085 
1086 void
recount_processor_usage(struct recount_processor * pr,struct recount_usage * usage,uint64_t * idle_time_out)1087 recount_processor_usage(struct recount_processor *pr,
1088     struct recount_usage *usage, uint64_t *idle_time_out)
1089 {
1090 	recount_sum(&recount_processor_plan, &pr->rpr_active, usage);
1091 	_fix_time_precision(usage);
1092 
1093 	uint64_t idle_time = pr->rpr_idle_time_mach;
1094 	uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time,
1095 	    relaxed);
1096 	bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING;
1097 	if (idle) {
1098 		// Since processors can idle for some time without an update, make sure
1099 		// the idle time is up-to-date with respect to the caller.
1100 		idle_time += mach_absolute_time() - _state_time(idle_stamp);
1101 	}
1102 	*idle_time_out = idle_time;
1103 }
1104 
1105 uint64_t
recount_current_processor_interrupt_time_mach(void)1106 recount_current_processor_interrupt_time_mach(void)
1107 {
1108 	assert(!preemption_enabled());
1109 	return current_processor()->pr_recount.rpr_interrupt_time_mach;
1110 }
1111 
1112 bool
recount_task_thread_perf_level_usage(struct task * task,uint64_t tid,struct recount_usage * usage_levels)1113 recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
1114     struct recount_usage *usage_levels)
1115 {
1116 	thread_t thread = task_findtid(task, tid);
1117 	if (thread != THREAD_NULL) {
1118 		if (thread == current_thread()) {
1119 			boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1120 			recount_current_thread_perf_level_usage(usage_levels);
1121 			ml_set_interrupts_enabled(interrupt_state);
1122 		} else {
1123 			recount_thread_perf_level_usage(thread, usage_levels);
1124 		}
1125 	}
1126 	return thread != THREAD_NULL;
1127 }
1128 
1129 #pragma mark - utilities
1130 
1131 // For rolling up counts, convert an index from one topography to another.
1132 static size_t
recount_convert_topo_index(recount_topo_t from,recount_topo_t to,size_t i)1133 recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i)
1134 {
1135 	if (from == to) {
1136 		return i;
1137 	} else if (to == RCT_TOPO_SYSTEM) {
1138 		return 0;
1139 	} else if (from == RCT_TOPO_CPU) {
1140 		assertf(to == RCT_TOPO_CPU_KIND,
1141 		    "recount: cannot convert from CPU topography to %d", to);
1142 		return _topo_cpu_kinds[i];
1143 	} else {
1144 		panic("recount: unexpected rollup request from %d to %d", from, to);
1145 	}
1146 }
1147 
1148 // Get the track index of the provided processor and topography.
1149 OS_ALWAYS_INLINE
1150 static size_t
recount_topo_index(recount_topo_t topo,processor_t processor)1151 recount_topo_index(recount_topo_t topo, processor_t processor)
1152 {
1153 	switch (topo) {
1154 	case RCT_TOPO_SYSTEM:
1155 		return 0;
1156 	case RCT_TOPO_CPU:
1157 		return processor->cpu_id;
1158 	case RCT_TOPO_CPU_KIND:
1159 #if __AMP__
1160 		return processor->pr_recount.rpr_cpu_kind_index;
1161 #else // __AMP__
1162 		return 0;
1163 #endif // !__AMP__
1164 	default:
1165 		panic("recount: invalid topology %u to index", topo);
1166 	}
1167 }
1168 
1169 // Return the number of tracks needed for a given topography.
1170 size_t
recount_topo_count(recount_topo_t topo)1171 recount_topo_count(recount_topo_t topo)
1172 {
1173 	// Allow the compiler to reason about at least the system and CPU kind
1174 	// counts.
1175 	switch (topo) {
1176 	case RCT_TOPO_SYSTEM:
1177 		return 1;
1178 
1179 	case RCT_TOPO_CPU_KIND:
1180 #if __AMP__
1181 		return 2;
1182 #else // __AMP__
1183 		return 1;
1184 #endif // !__AMP__
1185 
1186 	case RCT_TOPO_CPU:
1187 #if __arm__ || __arm64__
1188 		return ml_get_cpu_count();
1189 #else // __arm__ || __arm64__
1190 		return ml_early_cpu_max_number() + 1;
1191 #endif // !__arm__ && !__arm64__
1192 
1193 	default:
1194 		panic("recount: invalid topography %d", topo);
1195 	}
1196 }
1197 
1198 static bool
recount_topo_matches_cpu_kind(recount_topo_t topo,recount_cpu_kind_t kind,size_t idx)1199 recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind,
1200     size_t idx)
1201 {
1202 #if !__AMP__
1203 #pragma unused(kind, idx)
1204 #endif // !__AMP__
1205 	switch (topo) {
1206 	case RCT_TOPO_SYSTEM:
1207 		return true;
1208 
1209 	case RCT_TOPO_CPU_KIND:
1210 #if __AMP__
1211 		return kind == idx;
1212 #else // __AMP__
1213 		return false;
1214 #endif // !__AMP__
1215 
1216 	case RCT_TOPO_CPU: {
1217 #if __AMP__
1218 		return _topo_cpu_kinds[idx] == kind;
1219 #else // __AMP__
1220 		return false;
1221 #endif // !__AMP__
1222 	}
1223 
1224 	default:
1225 		panic("recount: unexpected topography %d", topo);
1226 	}
1227 }
1228 
1229 struct recount_track *
recount_tracks_create(recount_plan_t plan)1230 recount_tracks_create(recount_plan_t plan)
1231 {
1232 	return kalloc_type_tag(struct recount_track,
1233 	           recount_topo_count(plan->rpl_topo), Z_WAITOK | Z_ZERO | Z_NOFAIL,
1234 	           VM_KERN_MEMORY_RECOUNT);
1235 }
1236 
1237 static void
recount_tracks_copy(recount_plan_t plan,struct recount_track * dst,struct recount_track * src)1238 recount_tracks_copy(recount_plan_t plan, struct recount_track *dst,
1239     struct recount_track *src)
1240 {
1241 	size_t topo_count = recount_topo_count(plan->rpl_topo);
1242 	for (size_t i = 0; i < topo_count; i++) {
1243 		recount_read_track(&dst[i].rt_usage, &src[i]);
1244 	}
1245 }
1246 
1247 void
recount_tracks_destroy(recount_plan_t plan,struct recount_track * tracks)1248 recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks)
1249 {
1250 	kfree_type(struct recount_track, recount_topo_count(plan->rpl_topo),
1251 	    tracks);
1252 }
1253 
1254 void
recount_thread_init(struct recount_thread * th)1255 recount_thread_init(struct recount_thread *th)
1256 {
1257 	th->rth_lifetime = recount_tracks_create(&recount_thread_plan);
1258 }
1259 
1260 void
recount_thread_copy(struct recount_thread * dst,struct recount_thread * src)1261 recount_thread_copy(struct recount_thread *dst, struct recount_thread *src)
1262 {
1263 	recount_tracks_copy(&recount_thread_plan, dst->rth_lifetime,
1264 	    src->rth_lifetime);
1265 }
1266 
1267 void
recount_task_copy(struct recount_task * dst,const struct recount_task * src)1268 recount_task_copy(struct recount_task *dst, const struct recount_task *src)
1269 {
1270 	recount_tracks_copy(&recount_task_plan, dst->rtk_lifetime,
1271 	    src->rtk_lifetime);
1272 }
1273 
1274 void
recount_thread_deinit(struct recount_thread * th)1275 recount_thread_deinit(struct recount_thread *th)
1276 {
1277 	recount_tracks_destroy(&recount_thread_plan, th->rth_lifetime);
1278 }
1279 
1280 void
recount_task_init(struct recount_task * tk)1281 recount_task_init(struct recount_task *tk)
1282 {
1283 	tk->rtk_lifetime = recount_tracks_create(&recount_task_plan);
1284 	tk->rtk_terminated = recount_usage_alloc(
1285 		recount_task_terminated_plan.rpl_topo);
1286 }
1287 
1288 void
recount_task_deinit(struct recount_task * tk)1289 recount_task_deinit(struct recount_task *tk)
1290 {
1291 	recount_tracks_destroy(&recount_task_plan, tk->rtk_lifetime);
1292 	recount_usage_free(recount_task_terminated_plan.rpl_topo,
1293 	    tk->rtk_terminated);
1294 }
1295 
1296 void
recount_coalition_init(struct recount_coalition * co)1297 recount_coalition_init(struct recount_coalition *co)
1298 {
1299 	co->rco_exited = recount_usage_alloc(recount_coalition_plan.rpl_topo);
1300 }
1301 
1302 void
recount_coalition_deinit(struct recount_coalition * co)1303 recount_coalition_deinit(struct recount_coalition *co)
1304 {
1305 	recount_usage_free(recount_coalition_plan.rpl_topo, co->rco_exited);
1306 }
1307 
1308 void
recount_work_interval_init(struct recount_work_interval * wi)1309 recount_work_interval_init(struct recount_work_interval *wi)
1310 {
1311 	wi->rwi_current_instance = recount_tracks_create(&recount_work_interval_plan);
1312 }
1313 
1314 void
recount_work_interval_deinit(struct recount_work_interval * wi)1315 recount_work_interval_deinit(struct recount_work_interval *wi)
1316 {
1317 	recount_tracks_destroy(&recount_work_interval_plan, wi->rwi_current_instance);
1318 }
1319 
1320 struct recount_usage *
recount_usage_alloc(recount_topo_t topo)1321 recount_usage_alloc(recount_topo_t topo)
1322 {
1323 	return kalloc_type_tag(struct recount_usage, recount_topo_count(topo),
1324 	           Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT);
1325 }
1326 
1327 void
recount_usage_free(recount_topo_t topo,struct recount_usage * usage)1328 recount_usage_free(recount_topo_t topo, struct recount_usage *usage)
1329 {
1330 	kfree_type(struct recount_usage, recount_topo_count(topo),
1331 	    usage);
1332 }
1333