xref: /xnu-12377.81.4/osfmk/kern/recount.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 // Copyright (c) 2021 Apple Inc.  All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 
27 #include <kern/assert.h>
28 #include <kern/kalloc.h>
29 #include <pexpert/pexpert.h>
30 #include <sys/kdebug.h>
31 #include <sys/_types/_size_t.h>
32 #include <kern/monotonic.h>
33 #include <kern/percpu.h>
34 #include <kern/processor.h>
35 #include <kern/recount.h>
36 #include <kern/startup.h>
37 #include <kern/task.h>
38 #include <kern/thread.h>
39 #include <kern/work_interval.h>
40 #include <mach/mach_time.h>
41 #include <mach/mach_types.h>
42 #include <machine/config.h>
43 #include <machine/machine_routines.h>
44 #include <os/atomic_private.h>
45 #include <stdbool.h>
46 #include <stdint.h>
47 
48 // Recount's machine-independent implementation and interfaces for the kernel
49 // at-large.
50 
51 #define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME
52 
53 // On non-release kernels, allow precise PMC (instructions, cycles) updates to
54 // be disabled for performance characterization.
55 #if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG)
56 #define PRECISE_USER_KERNEL_PMC_TUNABLE 1
57 
58 TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs", false);
59 #endif // PRECISE_USER_KERNEL_PMCS
60 
61 #if !PRECISE_USER_KERNEL_TIME
62 #define PRECISE_TIME_FATAL_FUNC OS_NORETURN
63 #define PRECISE_TIME_ONLY_FUNC OS_UNUSED
64 #else // !PRECISE_USER_KERNEL_TIME
65 #define PRECISE_TIME_FATAL_FUNC
66 #define PRECISE_TIME_ONLY_FUNC
67 #endif // PRECISE_USER_KERNEL_TIME
68 
69 #if !PRECISE_USER_KERNEL_PMCS
70 #define PRECISE_PMCS_ONLY_FUNC OS_UNUSED
71 #else // !PRECISE_PMCS_ONLY_FUNC
72 #define PRECISE_PMCS_ONLY_FUNC
73 #endif // PRECISE_USER_KERNEL_PMCS
74 
75 #if HAS_CPU_DPE_COUNTER
76 // Only certain platforms have DPE counters.
77 #define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY
78 #else // HAS_CPU_DPE_COUNTER
79 #define RECOUNT_ENERGY 0
80 #endif // !HAS_CPU_DPE_COUNTER
81 
82 // Topography helpers.
83 size_t recount_topo_count(recount_topo_t topo);
84 static bool recount_topo_matches_cpu_kind(recount_topo_t topo,
85     recount_cpu_kind_t kind, size_t idx);
86 static size_t recount_topo_index(recount_topo_t topo, processor_t processor);
87 static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to,
88     size_t i);
89 
90 // Prevent counter updates before the system is ready.
91 __security_const_late bool _recount_started = false;
92 
93 // Lookup table that matches CPU numbers (indices) to their track index.
94 __security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 };
95 
96 // Allocation metadata and zones.
97 
98 // Keep static strings for `zone_create`.
99 static const char *_usage_zone_names[RCT_TOPO_COUNT] = {
100 	[RCT_TOPO_CPU] = "recount_usage_cpu",
101 	[RCT_TOPO_CPU_KIND] = "recount_usage_cpu_kind",
102 };
103 
104 static const char *_track_zone_names[RCT_TOPO_COUNT] = {
105 	[RCT_TOPO_CPU] = "recount_track_cpu",
106 	[RCT_TOPO_CPU_KIND] = "recount_track_cpu_kind",
107 };
108 
109 static const bool _topo_allocates[RCT_TOPO_COUNT] = {
110 	[RCT_TOPO_SYSTEM] = false,
111 	[RCT_TOPO_CPU] = true,
112 	[RCT_TOPO_CPU_KIND] = true,
113 };
114 
115 // Fixed-size zones for allocations.
116 __security_const_late zone_t _recount_usage_zones[RCT_TOPO_COUNT] = { };
117 __security_const_late zone_t _recount_track_zones[RCT_TOPO_COUNT] = { };
118 
119 __startup_func
120 static void
recount_startup(void)121 recount_startup(void)
122 {
123 #if __AMP__
124 	unsigned int cpu_count = ml_get_cpu_count();
125 	const ml_topology_info_t *topo_info = ml_get_topology_info();
126 	for (unsigned int i = 0; i < cpu_count; i++) {
127 		cluster_type_t type = topo_info->cpus[i].cluster_type;
128 		uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE :
129 		    RCT_CPU_EFFICIENCY;
130 		_topo_cpu_kinds[i] = cluster_i;
131 	}
132 #endif // __AMP__
133 
134 	for (unsigned int i = 0; i < RCT_TOPO_COUNT; i++) {
135 		if (_topo_allocates[i]) {
136 			const char *usage_name = _usage_zone_names[i];
137 			assert(usage_name != NULL);
138 			_recount_usage_zones[i] = zone_create(usage_name,
139 			    sizeof(struct recount_usage) * recount_topo_count(i),
140 			    0);
141 
142 			const char *track_name = _track_zone_names[i];
143 			assert(track_name != NULL);
144 			_recount_track_zones[i] = zone_create(track_name,
145 			    sizeof(struct recount_track) * recount_topo_count(i),
146 			    0);
147 		}
148 	}
149 
150 	_recount_started = true;
151 }
152 
153 STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup);
154 
155 #pragma mark - tracks
156 
157 RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND);
158 RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU);
159 RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU);
160 RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND);
161 RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND);
162 RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM);
163 
164 OS_ALWAYS_INLINE
165 static inline uint64_t
recount_timestamp_speculative(void)166 recount_timestamp_speculative(void)
167 {
168 #if __arm__ || __arm64__
169 	return ml_get_speculative_timebase();
170 #else // __arm__ || __arm64__
171 	return mach_absolute_time();
172 #endif // !__arm__ && !__arm64__
173 }
174 
175 OS_ALWAYS_INLINE
176 void
recount_snapshot_speculative(struct recount_snap * snap)177 recount_snapshot_speculative(struct recount_snap *snap)
178 {
179 	snap->rsn_time_mach = recount_timestamp_speculative();
180 #if CONFIG_PERVASIVE_CPI
181 	mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns);
182 #endif // CONFIG_PERVASIVE_CPI
183 }
184 
185 void
recount_snapshot(struct recount_snap * snap)186 recount_snapshot(struct recount_snap *snap)
187 {
188 #if __arm__ || __arm64__
189 	__builtin_arm_isb(ISB_SY);
190 #endif // __arm__ || __arm64__
191 	recount_snapshot_speculative(snap);
192 }
193 
194 static struct recount_snap *
recount_get_snap(processor_t processor)195 recount_get_snap(processor_t processor)
196 {
197 	return &processor->pr_recount.rpr_snap;
198 }
199 
200 // A simple sequence lock implementation.
201 
202 OS_ALWAYS_INLINE
203 static void
_seqlock_shared_lock_slowpath(const uint32_t * lck,uint32_t gen)204 _seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen)
205 {
206 	disable_preemption();
207 	do {
208 		gen = hw_wait_while_equals32((uint32_t *)(uintptr_t)lck, gen);
209 	} while (__improbable((gen & 1) != 0));
210 	os_atomic_thread_fence(acquire);
211 	enable_preemption();
212 }
213 
214 OS_ALWAYS_INLINE
215 static uintptr_t
_seqlock_shared_lock(const uint32_t * lck)216 _seqlock_shared_lock(const uint32_t *lck)
217 {
218 	uint32_t gen = os_atomic_load(lck, acquire);
219 	if (__improbable((gen & 1) != 0)) {
220 		_seqlock_shared_lock_slowpath(lck, gen);
221 	}
222 	return gen;
223 }
224 
225 OS_ALWAYS_INLINE
226 static bool
_seqlock_shared_try_unlock(const uint32_t * lck,uintptr_t on_enter)227 _seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter)
228 {
229 	return os_atomic_load(lck, acquire) == on_enter;
230 }
231 
232 OS_ALWAYS_INLINE
233 static void
_seqlock_excl_lock_relaxed(uint32_t * lck)234 _seqlock_excl_lock_relaxed(uint32_t *lck)
235 {
236 	__assert_only uintptr_t new = os_atomic_inc(lck, relaxed);
237 	assert3u((new & 1), ==, 1);
238 }
239 
240 OS_ALWAYS_INLINE
241 static void
_seqlock_excl_commit(void)242 _seqlock_excl_commit(void)
243 {
244 	os_atomic_thread_fence(release);
245 }
246 
247 OS_ALWAYS_INLINE
248 static void
_seqlock_excl_unlock_relaxed(uint32_t * lck)249 _seqlock_excl_unlock_relaxed(uint32_t *lck)
250 {
251 	__assert_only uint32_t new = os_atomic_inc(lck, relaxed);
252 	assert3u((new & 1), ==, 0);
253 }
254 
255 OS_ALWAYS_INLINE
256 static struct recount_track *
recount_update_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)257 recount_update_start(struct recount_track *tracks, recount_topo_t topo,
258     processor_t processor)
259 {
260 	struct recount_track *track = &tracks[recount_topo_index(topo, processor)];
261 	_seqlock_excl_lock_relaxed(&track->rt_sync);
262 	return track;
263 }
264 
265 #if RECOUNT_ENERGY
266 
267 static struct recount_track *
recount_update_single_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)268 recount_update_single_start(struct recount_track *tracks, recount_topo_t topo,
269     processor_t processor)
270 {
271 	return &tracks[recount_topo_index(topo, processor)];
272 }
273 
274 #endif // RECOUNT_ENERGY
275 
276 static void
recount_update_commit(void)277 recount_update_commit(void)
278 {
279 	_seqlock_excl_commit();
280 }
281 
282 static void
recount_update_end(struct recount_track * track)283 recount_update_end(struct recount_track *track)
284 {
285 	_seqlock_excl_unlock_relaxed(&track->rt_sync);
286 }
287 
288 static const struct recount_usage *
recount_read_start(const struct recount_track * track,uintptr_t * on_enter)289 recount_read_start(const struct recount_track *track, uintptr_t *on_enter)
290 {
291 	const struct recount_usage *stats = &track->rt_usage;
292 	*on_enter = _seqlock_shared_lock(&track->rt_sync);
293 	return stats;
294 }
295 
296 static bool
recount_try_read_end(const struct recount_track * track,uintptr_t on_enter)297 recount_try_read_end(const struct recount_track *track, uintptr_t on_enter)
298 {
299 	return _seqlock_shared_try_unlock(&track->rt_sync, on_enter);
300 }
301 
302 static void
recount_read_track(struct recount_usage * stats,const struct recount_track * track)303 recount_read_track(struct recount_usage *stats,
304     const struct recount_track *track)
305 {
306 	uintptr_t on_enter = 0;
307 	do {
308 		const struct recount_usage *vol_stats =
309 		    recount_read_start(track, &on_enter);
310 		*stats = *vol_stats;
311 	} while (!recount_try_read_end(track, on_enter));
312 }
313 
314 static void
recount_metrics_add(struct recount_metrics * sum,const struct recount_metrics * to_add)315 recount_metrics_add(struct recount_metrics *sum, const struct recount_metrics *to_add)
316 {
317 	sum->rm_time_mach += to_add->rm_time_mach;
318 #if CONFIG_PERVASIVE_CPI
319 	sum->rm_instructions += to_add->rm_instructions;
320 	sum->rm_cycles += to_add->rm_cycles;
321 #endif // CONFIG_PERVASIVE_CPI
322 }
323 
324 static void
recount_usage_add(struct recount_usage * sum,const struct recount_usage * to_add)325 recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add)
326 {
327 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
328 		recount_metrics_add(&sum->ru_metrics[i], &to_add->ru_metrics[i]);
329 	}
330 #if CONFIG_PERVASIVE_ENERGY
331 	sum->ru_energy_nj += to_add->ru_energy_nj;
332 #endif // CONFIG_PERVASIVE_CPI
333 }
334 
335 OS_ALWAYS_INLINE
336 static inline void
recount_usage_add_snap(struct recount_usage * usage,recount_level_t level,struct recount_snap * snap)337 recount_usage_add_snap(struct recount_usage *usage, recount_level_t level,
338     struct recount_snap *snap)
339 {
340 	struct recount_metrics *metrics = &usage->ru_metrics[level];
341 
342 	metrics->rm_time_mach += snap->rsn_time_mach;
343 #if CONFIG_PERVASIVE_CPI
344 	metrics->rm_cycles += snap->rsn_cycles;
345 	metrics->rm_instructions += snap->rsn_insns;
346 #else // CONFIG_PERVASIVE_CPI
347 #pragma unused(usage)
348 #endif // !CONFIG_PERVASIVE_CPI
349 }
350 
351 static void
recount_rollup(recount_plan_t plan,const struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)352 recount_rollup(recount_plan_t plan, const struct recount_track *tracks,
353     recount_topo_t to_topo, struct recount_usage *stats)
354 {
355 	recount_topo_t from_topo = plan->rpl_topo;
356 	size_t topo_count = recount_topo_count(from_topo);
357 	struct recount_usage tmp = { 0 };
358 	for (size_t i = 0; i < topo_count; i++) {
359 		recount_read_track(&tmp, &tracks[i]);
360 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
361 		recount_usage_add(&stats[to_i], &tmp);
362 	}
363 }
364 
365 // This function must be run when counters cannot increment for the track, like from the current thread.
366 static void
recount_rollup_unsafe(recount_plan_t plan,struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)367 recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks,
368     recount_topo_t to_topo, struct recount_usage *stats)
369 {
370 	recount_topo_t from_topo = plan->rpl_topo;
371 	size_t topo_count = recount_topo_count(from_topo);
372 	for (size_t i = 0; i < topo_count; i++) {
373 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
374 		recount_usage_add(&stats[to_i], &tracks[i].rt_usage);
375 	}
376 }
377 
378 void
recount_sum(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)379 recount_sum(recount_plan_t plan, const struct recount_track *tracks,
380     struct recount_usage *sum)
381 {
382 	recount_rollup(plan, tracks, RCT_TOPO_SYSTEM, sum);
383 }
384 
385 void
recount_sum_unsafe(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)386 recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
387     struct recount_usage *sum)
388 {
389 	recount_topo_t topo = plan->rpl_topo;
390 	size_t topo_count = recount_topo_count(topo);
391 	for (size_t i = 0; i < topo_count; i++) {
392 		recount_usage_add(sum, &tracks[i].rt_usage);
393 	}
394 }
395 
396 void
recount_sum_and_isolate_cpu_kind(recount_plan_t plan,struct recount_track * tracks,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)397 recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
398     struct recount_track *tracks, recount_cpu_kind_t kind,
399     struct recount_usage *sum, struct recount_usage *only_kind)
400 {
401 	size_t topo_count = recount_topo_count(plan->rpl_topo);
402 	struct recount_usage tmp = { 0 };
403 	for (size_t i = 0; i < topo_count; i++) {
404 		recount_read_track(&tmp, &tracks[i]);
405 		recount_usage_add(sum, &tmp);
406 		if (recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
407 			recount_usage_add(only_kind, &tmp);
408 		}
409 	}
410 }
411 
412 static void
recount_sum_usage(recount_plan_t plan,const struct recount_usage * usages,struct recount_usage * sum)413 recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages,
414     struct recount_usage *sum)
415 {
416 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
417 	for (size_t i = 0; i < topo_count; i++) {
418 		recount_usage_add(sum, &usages[i]);
419 	}
420 }
421 
422 void
recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,struct recount_usage * usage,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)423 recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
424     struct recount_usage *usage, recount_cpu_kind_t kind,
425     struct recount_usage *sum, struct recount_usage *only_kind)
426 {
427 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
428 	for (size_t i = 0; i < topo_count; i++) {
429 		recount_usage_add(sum, &usage[i]);
430 		if (only_kind && recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
431 			recount_usage_add(only_kind, &usage[i]);
432 		}
433 	}
434 }
435 
436 void
recount_sum_perf_levels(recount_plan_t plan,struct recount_track * tracks,struct recount_usage * sums)437 recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks,
438     struct recount_usage *sums)
439 {
440 	recount_rollup(plan, tracks, RCT_TOPO_CPU_KIND, sums);
441 }
442 
443 struct recount_times_mach
recount_usage_times_mach(struct recount_usage * usage)444 recount_usage_times_mach(struct recount_usage *usage)
445 {
446 	return (struct recount_times_mach){
447 		       .rtm_user = usage->ru_metrics[RCT_LVL_USER].rm_time_mach,
448 		       .rtm_system = recount_usage_system_time_mach(usage),
449 	};
450 }
451 
452 uint64_t
recount_usage_system_time_mach(struct recount_usage * usage)453 recount_usage_system_time_mach(struct recount_usage *usage)
454 {
455 	uint64_t system_time = usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach;
456 #if RECOUNT_SECURE_METRICS
457 	system_time += usage->ru_metrics[RCT_LVL_SECURE].rm_time_mach;
458 #endif // RECOUNT_SECURE_METRICS
459 	return system_time;
460 }
461 
462 uint64_t
recount_usage_time_mach(struct recount_usage * usage)463 recount_usage_time_mach(struct recount_usage *usage)
464 {
465 	uint64_t time = 0;
466 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
467 		time += usage->ru_metrics[i].rm_time_mach;
468 	}
469 	return time;
470 }
471 
472 uint64_t
recount_usage_cycles(struct recount_usage * usage)473 recount_usage_cycles(struct recount_usage *usage)
474 {
475 	uint64_t cycles = 0;
476 #if CONFIG_CPU_COUNTERS
477 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
478 		cycles += usage->ru_metrics[i].rm_cycles;
479 	}
480 #else // CONFIG_CPU_COUNTERS
481 #pragma unused(usage)
482 #endif // !CONFIG_CPU_COUNTERS
483 	return cycles;
484 }
485 
486 uint64_t
recount_usage_instructions(struct recount_usage * usage)487 recount_usage_instructions(struct recount_usage *usage)
488 {
489 	uint64_t instructions = 0;
490 #if CONFIG_CPU_COUNTERS
491 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
492 		instructions += usage->ru_metrics[i].rm_instructions;
493 	}
494 #else // CONFIG_CPU_COUNTERS
495 #pragma unused(usage)
496 #endif // !CONFIG_CPU_COUNTERS
497 	return instructions;
498 }
499 
500 // Plan-specific helpers.
501 
502 void
recount_coalition_rollup_task(struct recount_coalition * co,struct recount_task * tk)503 recount_coalition_rollup_task(struct recount_coalition *co,
504     struct recount_task *tk)
505 {
506 	recount_rollup(&recount_task_plan, tk->rtk_lifetime,
507 	    recount_coalition_plan.rpl_topo, co->rco_exited);
508 }
509 
510 void
recount_task_rollup_thread(struct recount_task * tk,const struct recount_thread * th)511 recount_task_rollup_thread(struct recount_task *tk,
512     const struct recount_thread *th)
513 {
514 	recount_rollup(&recount_thread_plan, th->rth_lifetime,
515 	    recount_task_terminated_plan.rpl_topo, tk->rtk_terminated);
516 }
517 
518 #pragma mark - scheduler
519 
520 // `result = lhs - rhs` for snapshots.
521 OS_ALWAYS_INLINE
522 static void
recount_snap_diff(struct recount_snap * result,const struct recount_snap * lhs,const struct recount_snap * rhs)523 recount_snap_diff(struct recount_snap *result,
524     const struct recount_snap *lhs, const struct recount_snap *rhs)
525 {
526 	assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach);
527 	result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach;
528 #if CONFIG_PERVASIVE_CPI
529 	assert3u(lhs->rsn_insns, >=, rhs->rsn_insns);
530 	assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles);
531 	result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles;
532 	result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns;
533 #endif // CONFIG_PERVASIVE_CPI
534 }
535 
536 static void
_fix_time_precision(struct recount_usage * usage)537 _fix_time_precision(struct recount_usage *usage)
538 {
539 #if PRECISE_USER_KERNEL_TIME
540 #pragma unused(usage)
541 #else // PRECISE_USER_KERNEL_TIME
542 	// Attribute all time to user, as the system is only acting "on behalf
543 	// of" user processes -- a bit sketchy.
544 	usage->ru_metrics[RCT_LVL_USER].rm_time_mach +=
545 	    recount_usage_system_time_mach(usage);
546 	usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach = 0;
547 #endif // !PRECISE_USER_KERNEL_TIME
548 }
549 
550 void
recount_current_thread_usage(struct recount_usage * usage)551 recount_current_thread_usage(struct recount_usage *usage)
552 {
553 	assert(ml_get_interrupts_enabled() == FALSE);
554 	thread_t thread = current_thread();
555 	struct recount_snap snap = { 0 };
556 	recount_snapshot(&snap);
557 	recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
558 	    usage);
559 	struct recount_snap *last = recount_get_snap(current_processor());
560 	struct recount_snap diff = { 0 };
561 	recount_snap_diff(&diff, &snap, last);
562 	recount_usage_add_snap(usage, RCT_LVL_KERNEL, &diff);
563 	_fix_time_precision(usage);
564 }
565 
566 void
recount_current_thread_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)567 recount_current_thread_usage_perf_only(struct recount_usage *usage,
568     struct recount_usage *usage_perf_only)
569 {
570 	struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 };
571 	recount_current_thread_perf_level_usage(usage_perf_levels);
572 	recount_sum_usage(&recount_thread_plan, usage_perf_levels, usage);
573 	*usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE];
574 	_fix_time_precision(usage);
575 	_fix_time_precision(usage_perf_only);
576 }
577 
578 void
recount_thread_perf_level_usage(struct thread * thread,struct recount_usage * usage_levels)579 recount_thread_perf_level_usage(struct thread *thread,
580     struct recount_usage *usage_levels)
581 {
582 	recount_rollup(&recount_thread_plan, thread->th_recount.rth_lifetime,
583 	    RCT_TOPO_CPU_KIND, usage_levels);
584 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
585 	for (size_t i = 0; i < topo_count; i++) {
586 		_fix_time_precision(&usage_levels[i]);
587 	}
588 }
589 
590 void
recount_current_thread_perf_level_usage(struct recount_usage * usage_levels)591 recount_current_thread_perf_level_usage(struct recount_usage *usage_levels)
592 {
593 	assert(ml_get_interrupts_enabled() == FALSE);
594 	processor_t processor = current_processor();
595 	thread_t thread = current_thread();
596 	struct recount_snap snap = { 0 };
597 	recount_snapshot(&snap);
598 	recount_rollup_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
599 	    RCT_TOPO_CPU_KIND, usage_levels);
600 	struct recount_snap *last = recount_get_snap(processor);
601 	struct recount_snap diff = { 0 };
602 	recount_snap_diff(&diff, &snap, last);
603 	size_t cur_i = recount_topo_index(RCT_TOPO_CPU_KIND, processor);
604 	struct recount_usage *cur_usage = &usage_levels[cur_i];
605 	recount_usage_add_snap(cur_usage, RCT_LVL_KERNEL, &diff);
606 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
607 	for (size_t i = 0; i < topo_count; i++) {
608 		_fix_time_precision(&usage_levels[i]);
609 	}
610 }
611 
612 uint64_t
recount_current_thread_energy_nj(void)613 recount_current_thread_energy_nj(void)
614 {
615 #if RECOUNT_ENERGY
616 	assert(ml_get_interrupts_enabled() == FALSE);
617 	thread_t thread = current_thread();
618 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
619 	uint64_t energy_nj = 0;
620 	for (size_t i = 0; i < topo_count; i++) {
621 		energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj;
622 	}
623 	return energy_nj;
624 #else // RECOUNT_ENERGY
625 	return 0;
626 #endif // !RECOUNT_ENERGY
627 }
628 
629 static void
_times_add_usage(struct recount_times_mach * times,struct recount_usage * usage)630 _times_add_usage(struct recount_times_mach *times, struct recount_usage *usage)
631 {
632 	times->rtm_user += usage->ru_metrics[RCT_LVL_USER].rm_time_mach;
633 #if PRECISE_USER_KERNEL_TIME
634 	times->rtm_system += recount_usage_system_time_mach(usage);
635 #else // PRECISE_USER_KERNEL_TIME
636 	times->rtm_user += recount_usage_system_time_mach(usage);
637 #endif // !PRECISE_USER_KERNEL_TIME
638 }
639 
640 struct recount_times_mach
recount_thread_times(struct thread * thread)641 recount_thread_times(struct thread *thread)
642 {
643 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
644 	struct recount_times_mach times = { 0 };
645 	for (size_t i = 0; i < topo_count; i++) {
646 		_times_add_usage(&times, &thread->th_recount.rth_lifetime[i].rt_usage);
647 	}
648 	return times;
649 }
650 
651 uint64_t
recount_thread_time_mach(struct thread * thread)652 recount_thread_time_mach(struct thread *thread)
653 {
654 	struct recount_times_mach times = recount_thread_times(thread);
655 	return times.rtm_user + times.rtm_system;
656 }
657 
658 static uint64_t
_time_since_last_snapshot(void)659 _time_since_last_snapshot(void)
660 {
661 	struct recount_snap *last = recount_get_snap(current_processor());
662 	uint64_t cur_time = mach_absolute_time();
663 	return cur_time - last->rsn_time_mach;
664 }
665 
666 uint64_t
recount_current_thread_time_mach(void)667 recount_current_thread_time_mach(void)
668 {
669 	assert(ml_get_interrupts_enabled() == FALSE);
670 	uint64_t previous_time = recount_thread_time_mach(current_thread());
671 	return previous_time + _time_since_last_snapshot();
672 }
673 
674 struct recount_times_mach
recount_current_thread_times(void)675 recount_current_thread_times(void)
676 {
677 	assert(ml_get_interrupts_enabled() == FALSE);
678 	struct recount_times_mach times = recount_thread_times(
679 		current_thread());
680 #if PRECISE_USER_KERNEL_TIME
681 	// This code is executing in the kernel, so the time since the last snapshot
682 	// (with precise user/kernel time) is since entering the kernel.
683 	times.rtm_system += _time_since_last_snapshot();
684 #else // PRECISE_USER_KERNEL_TIME
685 	times.rtm_user += _time_since_last_snapshot();
686 #endif // !PRECISE_USER_KERNEL_TIME
687 	return times;
688 }
689 
690 void
recount_thread_usage(thread_t thread,struct recount_usage * usage)691 recount_thread_usage(thread_t thread, struct recount_usage *usage)
692 {
693 	recount_sum(&recount_thread_plan, thread->th_recount.rth_lifetime, usage);
694 	_fix_time_precision(usage);
695 }
696 
697 uint64_t
recount_current_thread_interrupt_time_mach(void)698 recount_current_thread_interrupt_time_mach(void)
699 {
700 	thread_t thread = current_thread();
701 	return thread->th_recount.rth_interrupt_duration_mach;
702 }
703 
704 void
recount_work_interval_usage(struct work_interval * work_interval,struct recount_usage * usage)705 recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage)
706 {
707 	recount_sum(&recount_work_interval_plan, work_interval_get_recount_tracks(work_interval), usage);
708 	_fix_time_precision(usage);
709 }
710 
711 struct recount_times_mach
recount_work_interval_times(struct work_interval * work_interval)712 recount_work_interval_times(struct work_interval *work_interval)
713 {
714 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
715 	struct recount_times_mach times = { 0 };
716 	for (size_t i = 0; i < topo_count; i++) {
717 		_times_add_usage(&times, &work_interval_get_recount_tracks(work_interval)[i].rt_usage);
718 	}
719 	return times;
720 }
721 
722 uint64_t
recount_work_interval_energy_nj(struct work_interval * work_interval)723 recount_work_interval_energy_nj(struct work_interval *work_interval)
724 {
725 #if RECOUNT_ENERGY
726 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
727 	uint64_t energy = 0;
728 	for (size_t i = 0; i < topo_count; i++) {
729 		energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj;
730 	}
731 	return energy;
732 #else // RECOUNT_ENERGY
733 #pragma unused(work_interval)
734 	return 0;
735 #endif // !RECOUNT_ENERGY
736 }
737 
738 void
recount_current_task_usage(struct recount_usage * usage)739 recount_current_task_usage(struct recount_usage *usage)
740 {
741 	task_t task = current_task();
742 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
743 	recount_sum(&recount_task_plan, tracks, usage);
744 	_fix_time_precision(usage);
745 }
746 
747 void
recount_current_task_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)748 recount_current_task_usage_perf_only(struct recount_usage *usage,
749     struct recount_usage *usage_perf_only)
750 {
751 	task_t task = current_task();
752 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
753 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
754 	    tracks, RCT_CPU_PERFORMANCE, usage, usage_perf_only);
755 	_fix_time_precision(usage);
756 	_fix_time_precision(usage_perf_only);
757 }
758 
759 void
recount_task_times_perf_only(struct task * task,struct recount_times_mach * sum,struct recount_times_mach * sum_perf_only)760 recount_task_times_perf_only(struct task *task,
761     struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only)
762 {
763 	const recount_topo_t topo = recount_task_plan.rpl_topo;
764 	const size_t topo_count = recount_topo_count(topo);
765 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
766 	for (size_t i = 0; i < topo_count; i++) {
767 		struct recount_usage *usage = &tracks[i].rt_usage;
768 		_times_add_usage(sum, usage);
769 		if (recount_topo_matches_cpu_kind(topo, RCT_CPU_PERFORMANCE, i)) {
770 			_times_add_usage(sum_perf_only, usage);
771 		}
772 	}
773 }
774 
775 void
recount_task_terminated_usage(task_t task,struct recount_usage * usage)776 recount_task_terminated_usage(task_t task, struct recount_usage *usage)
777 {
778 	recount_sum_usage(&recount_task_terminated_plan,
779 	    task->tk_recount.rtk_terminated, usage);
780 	_fix_time_precision(usage);
781 }
782 
783 struct recount_times_mach
recount_task_terminated_times(struct task * task)784 recount_task_terminated_times(struct task *task)
785 {
786 	size_t topo_count = recount_topo_count(recount_task_terminated_plan.rpl_topo);
787 	struct recount_times_mach times = { 0 };
788 	for (size_t i = 0; i < topo_count; i++) {
789 		_times_add_usage(&times, &task->tk_recount.rtk_terminated[i]);
790 	}
791 	return times;
792 }
793 
794 void
recount_task_terminated_usage_perf_only(task_t task,struct recount_usage * usage,struct recount_usage * perf_only)795 recount_task_terminated_usage_perf_only(task_t task,
796     struct recount_usage *usage, struct recount_usage *perf_only)
797 {
798 	recount_sum_usage_and_isolate_cpu_kind(&recount_task_terminated_plan,
799 	    task->tk_recount.rtk_terminated, RCT_CPU_PERFORMANCE, usage, perf_only);
800 	_fix_time_precision(usage);
801 	_fix_time_precision(perf_only);
802 }
803 
804 void
recount_task_usage_perf_only(task_t task,struct recount_usage * sum,struct recount_usage * sum_perf_only)805 recount_task_usage_perf_only(task_t task, struct recount_usage *sum,
806     struct recount_usage *sum_perf_only)
807 {
808 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
809 	    task->tk_recount.rtk_lifetime, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
810 	_fix_time_precision(sum);
811 	_fix_time_precision(sum_perf_only);
812 }
813 
814 void
recount_task_usage(task_t task,struct recount_usage * usage)815 recount_task_usage(task_t task, struct recount_usage *usage)
816 {
817 	recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, usage);
818 	_fix_time_precision(usage);
819 }
820 
821 struct recount_times_mach
recount_task_times(struct task * task)822 recount_task_times(struct task *task)
823 {
824 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
825 	struct recount_times_mach times = { 0 };
826 	for (size_t i = 0; i < topo_count; i++) {
827 		_times_add_usage(&times, &task->tk_recount.rtk_lifetime[i].rt_usage);
828 	}
829 	return times;
830 }
831 
832 uint64_t
recount_task_energy_nj(struct task * task)833 recount_task_energy_nj(struct task *task)
834 {
835 #if RECOUNT_ENERGY
836 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
837 	uint64_t energy = 0;
838 	for (size_t i = 0; i < topo_count; i++) {
839 		energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj;
840 	}
841 	return energy;
842 #else // RECOUNT_ENERGY
843 #pragma unused(task)
844 	return 0;
845 #endif // !RECOUNT_ENERGY
846 }
847 
848 void
recount_coalition_usage_perf_only(struct recount_coalition * coal,struct recount_usage * sum,struct recount_usage * sum_perf_only)849 recount_coalition_usage_perf_only(struct recount_coalition *coal,
850     struct recount_usage *sum, struct recount_usage *sum_perf_only)
851 {
852 	recount_sum_usage_and_isolate_cpu_kind(&recount_coalition_plan,
853 	    coal->rco_exited, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
854 	_fix_time_precision(sum);
855 	_fix_time_precision(sum_perf_only);
856 }
857 
858 OS_ALWAYS_INLINE
859 static void
recount_absorb_snap(struct recount_snap * to_add,thread_t thread,task_t task,processor_t processor,recount_level_t level)860 recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task,
861     processor_t processor, recount_level_t level)
862 {
863 	// Idle threads do not attribute their usage back to the task or processor,
864 	// as the time is not spent "running."
865 	//
866 	// The processor-level metrics include idle time, instead, as the idle time
867 	// needs to be read as up-to-date from `recount_processor_usage`.
868 
869 	const bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0;
870 
871 	struct recount_track *wi_tracks_array = NULL;
872 	if (__probable(!was_idle)) {
873 		wi_tracks_array = work_interval_get_recount_tracks(
874 			thread->th_work_interval);
875 	}
876 	const bool absorb_work_interval = wi_tracks_array != NULL;
877 
878 	struct recount_track *th_track = recount_update_start(
879 		thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
880 		processor);
881 	struct recount_track *wi_track = NULL;
882 	if (__improbable(absorb_work_interval)) {
883 		wi_track = recount_update_start(wi_tracks_array,
884 		    recount_work_interval_plan.rpl_topo, processor);
885 	}
886 	struct recount_track *tk_track = was_idle ? NULL : recount_update_start(
887 		task->tk_recount.rtk_lifetime, recount_task_plan.rpl_topo, processor);
888 	struct recount_track *pr_track = was_idle ? NULL : recount_update_start(
889 		&processor->pr_recount.rpr_active, recount_processor_plan.rpl_topo,
890 		processor);
891 	recount_update_commit();
892 
893 	recount_usage_add_snap(&th_track->rt_usage, level, to_add);
894 	if (__probable(!was_idle)) {
895 		if (__improbable(absorb_work_interval)) {
896 			recount_usage_add_snap(&wi_track->rt_usage, level, to_add);
897 		}
898 		recount_usage_add_snap(&tk_track->rt_usage, level, to_add);
899 		recount_usage_add_snap(&pr_track->rt_usage, level, to_add);
900 	}
901 
902 	recount_update_commit();
903 	recount_update_end(th_track);
904 	if (__probable(!was_idle)) {
905 		if (absorb_work_interval) {
906 			recount_update_end(wi_track);
907 		}
908 		recount_update_end(tk_track);
909 		recount_update_end(pr_track);
910 	}
911 }
912 
913 void
recount_switch_thread(struct recount_snap * cur,struct thread * off_thread,struct task * off_task)914 recount_switch_thread(struct recount_snap *cur, struct thread *off_thread,
915     struct task *off_task)
916 {
917 	if (__improbable(!_recount_started)) {
918 		return;
919 	}
920 
921 	processor_t processor = current_processor();
922 
923 	struct recount_snap *last = recount_get_snap(processor);
924 	struct recount_snap diff = { 0 };
925 	recount_snap_diff(&diff, cur, last);
926 	recount_absorb_snap(&diff, off_thread, off_task, processor,
927 #if RECOUNT_THREAD_BASED_LEVEL
928 	    off_thread->th_recount.rth_current_level
929 #else // RECOUNT_THREAD_BASED_LEVEL
930 	    RCT_LVL_KERNEL
931 #endif // !RECOUNT_THREAD_BASED_LEVEL
932 	    );
933 	memcpy(last, cur, sizeof(*last));
934 }
935 
936 void
recount_add_energy(struct thread * off_thread,struct task * off_task,uint64_t energy_nj)937 recount_add_energy(struct thread *off_thread, struct task *off_task,
938     uint64_t energy_nj)
939 {
940 #if RECOUNT_ENERGY
941 	if (__improbable(!_recount_started)) {
942 		return;
943 	}
944 
945 	bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0;
946 	struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval);
947 	bool collect_work_interval_telemetry = wi_tracks_array != NULL;
948 	processor_t processor = current_processor();
949 
950 	struct recount_track *th_track = recount_update_single_start(
951 		off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
952 		processor);
953 	struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL :
954 	    recount_update_single_start(wi_tracks_array,
955 	    recount_work_interval_plan.rpl_topo, processor);
956 	struct recount_track *tk_track = was_idle ? NULL :
957 	    recount_update_single_start(off_task->tk_recount.rtk_lifetime,
958 	    recount_task_plan.rpl_topo, processor);
959 	struct recount_track *pr_track = was_idle ? NULL :
960 	    recount_update_single_start(&processor->pr_recount.rpr_active,
961 	    recount_processor_plan.rpl_topo, processor);
962 
963 	th_track->rt_usage.ru_energy_nj += energy_nj;
964 	if (!was_idle) {
965 		if (collect_work_interval_telemetry) {
966 			wi_track->rt_usage.ru_energy_nj += energy_nj;
967 		}
968 		tk_track->rt_usage.ru_energy_nj += energy_nj;
969 		pr_track->rt_usage.ru_energy_nj += energy_nj;
970 	}
971 #else // RECOUNT_ENERGY
972 #pragma unused(off_thread, off_task, energy_nj)
973 #endif // !RECOUNT_ENERGY
974 }
975 
976 #define MT_KDBG_IC_CPU_CSWITCH \
977 	KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1)
978 
979 #define MT_KDBG_IC_CPU_CSWITCH_ON \
980     KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1)
981 
982 void
recount_log_switch_thread(const struct recount_snap * snap)983 recount_log_switch_thread(const struct recount_snap *snap)
984 {
985 #if CONFIG_PERVASIVE_CPI
986 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
987 		// In Monotonic's event hierarchy for backwards-compatibility.
988 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles);
989 	}
990 #else // CONFIG_PERVASIVE_CPI
991 #pragma unused(snap)
992 #endif // CONFIG_PERVASIVE_CPI
993 }
994 
995 void
recount_log_switch_thread_on(const struct recount_snap * snap)996 recount_log_switch_thread_on(const struct recount_snap *snap)
997 {
998 #if CONFIG_PERVASIVE_CPI
999 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) {
1000 		if (!snap) {
1001 			snap = recount_get_snap(current_processor());
1002 		}
1003 		// In Monotonic's event hierarchy for backwards-compatibility.
1004 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles);
1005 	}
1006 #else // CONFIG_PERVASIVE_CPI
1007 #pragma unused(snap)
1008 #endif // CONFIG_PERVASIVE_CPI
1009 }
1010 
1011 OS_ALWAYS_INLINE
1012 PRECISE_TIME_ONLY_FUNC
1013 static void
recount_precise_transition_diff(struct recount_snap * diff,struct recount_snap * last,struct recount_snap * cur)1014 recount_precise_transition_diff(struct recount_snap *diff,
1015     struct recount_snap *last, struct recount_snap *cur)
1016 {
1017 #if PRECISE_USER_KERNEL_PMCS
1018 #if PRECISE_USER_KERNEL_PMC_TUNABLE
1019 	// The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable
1020 	// in this configuration.
1021 	if (__improbable(no_precise_pmcs)) {
1022 		cur->rsn_time_mach = recount_timestamp_speculative();
1023 		diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1024 	} else
1025 #endif // PRECISE_USER_KERNEL_PMC_TUNABLE
1026 	{
1027 		recount_snapshot_speculative(cur);
1028 		recount_snap_diff(diff, cur, last);
1029 	}
1030 #else // PRECISE_USER_KERNEL_PMCS
1031 	cur->rsn_time_mach = recount_timestamp_speculative();
1032 	diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1033 #endif // !PRECISE_USER_KERNEL_PMCS
1034 }
1035 
1036 #if MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1037 
1038 PRECISE_TIME_ONLY_FUNC
1039 static void
recount_assert_level(thread_t thread,recount_level_t old)1040 recount_assert_level(thread_t thread, recount_level_t old)
1041 {
1042 	assert3u(thread->th_recount.rth_current_level, ==, old);
1043 }
1044 
1045 #else // MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1046 
1047 PRECISE_TIME_ONLY_FUNC
1048 static void
recount_assert_level(thread_t __unused thread,recount_level_t __unused old)1049 recount_assert_level(thread_t __unused thread,
1050     recount_level_t __unused old)
1051 {
1052 }
1053 
1054 #endif // !(MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL)
1055 
1056 /// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive.
1057 ///
1058 /// Must be called with interrupts disabled.
1059 ///
1060 /// - Parameter from: What level is being switched from.
1061 /// - Parameter to: What level is being switched to.
1062 ///
1063 /// - Returns: The value of Mach time that was sampled inside this function.
1064 PRECISE_TIME_FATAL_FUNC
1065 OS_ALWAYS_INLINE
1066 static uint64_t
recount_transition(recount_level_t from,recount_level_t to)1067 recount_transition(recount_level_t from, recount_level_t to)
1068 {
1069 #if PRECISE_USER_KERNEL_TIME
1070 	// Omit interrupts-disabled assertion for performance reasons.
1071 	processor_t processor = current_processor();
1072 	thread_t thread = processor->active_thread;
1073 	if (thread) {
1074 		task_t task = get_thread_ro_unchecked(thread)->tro_task;
1075 
1076 		recount_assert_level(thread, from);
1077 #if RECOUNT_THREAD_BASED_LEVEL
1078 		thread->th_recount.rth_current_level = to;
1079 #else // RECOUNT_THREAD_BASED_LEVEL
1080 #pragma unused(to)
1081 #endif // !RECOUNT_THREAD_BASED_LEVEL
1082 		struct recount_snap *last = recount_get_snap(processor);
1083 		struct recount_snap diff = { 0 };
1084 		struct recount_snap cur = { 0 };
1085 		recount_precise_transition_diff(&diff, last, &cur);
1086 		recount_absorb_snap(&diff, thread, task, processor, from);
1087 		memcpy(last, &cur, sizeof(*last));
1088 
1089 		return cur.rsn_time_mach;
1090 	} else {
1091 		return 0;
1092 	}
1093 #else // PRECISE_USER_KERNEL_TIME
1094 #pragma unused(from, to)
1095 	panic("recount: kernel transition called with precise time off");
1096 #endif // !PRECISE_USER_KERNEL_TIME
1097 }
1098 
1099 PRECISE_TIME_FATAL_FUNC
1100 void
recount_leave_user(void)1101 recount_leave_user(void)
1102 {
1103 	recount_transition(RCT_LVL_USER, RCT_LVL_KERNEL);
1104 }
1105 
1106 PRECISE_TIME_FATAL_FUNC
1107 void
recount_enter_user(void)1108 recount_enter_user(void)
1109 {
1110 	recount_transition(RCT_LVL_KERNEL, RCT_LVL_USER);
1111 }
1112 
1113 void
recount_enter_interrupt(void)1114 recount_enter_interrupt(void)
1115 {
1116 	processor_t processor = current_processor();
1117 #if MACH_ASSERT
1118 	if (processor->pr_recount.rpr_last_interrupt_enter_time_mach != 0) {
1119 		panic("recount: unbalanced interrupt enter/leave, started at %llu",
1120 		    processor->pr_recount.rpr_last_interrupt_enter_time_mach);
1121 	}
1122 #endif // MACH_ASSERT
1123 	processor->pr_recount.rpr_last_interrupt_enter_time_mach = recount_timestamp_speculative();
1124 }
1125 
1126 void
recount_leave_interrupt(void)1127 recount_leave_interrupt(void)
1128 {
1129 	processor_t processor = current_processor();
1130 	thread_t thread = processor->active_thread;
1131 	uint64_t now = recount_timestamp_speculative();
1132 	uint64_t since = now - processor->pr_recount.rpr_last_interrupt_enter_time_mach;
1133 	processor->pr_recount.rpr_interrupt_duration_mach += since;
1134 	thread->th_recount.rth_interrupt_duration_mach += since;
1135 	processor->pr_recount.rpr_last_interrupt_leave_time_mach = now;
1136 #if MACH_ASSERT
1137 	processor->pr_recount.rpr_last_interrupt_enter_time_mach = 0;
1138 #endif // MACH_ASSERT
1139 }
1140 
1141 #if __x86_64__
1142 
1143 void
recount_enter_intel_interrupt(x86_saved_state_t * state)1144 recount_enter_intel_interrupt(x86_saved_state_t *state)
1145 {
1146 	// The low bits of `%cs` being set indicate interrupt was delivered while
1147 	// executing in user space.
1148 	bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs :
1149 	    state->ss_32.cs) & 0x03;
1150 	uint64_t timestamp = recount_transition(
1151 		from_user ? RCT_LVL_USER : RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1152 	current_cpu_datap()->cpu_int_event_time = timestamp;
1153 }
1154 
1155 void
recount_leave_intel_interrupt(void)1156 recount_leave_intel_interrupt(void)
1157 {
1158 	recount_transition(RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1159 	current_cpu_datap()->cpu_int_event_time = 0;
1160 }
1161 
1162 #endif // __x86_64__
1163 
1164 #if RECOUNT_SECURE_METRICS
1165 
1166 PRECISE_TIME_FATAL_FUNC
1167 void
recount_leave_secure(void)1168 recount_leave_secure(void)
1169 {
1170 	boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1171 	recount_transition(RCT_LVL_SECURE, RCT_LVL_KERNEL);
1172 	ml_set_interrupts_enabled(intrs_en);
1173 }
1174 
1175 PRECISE_TIME_FATAL_FUNC
1176 void
recount_enter_secure(void)1177 recount_enter_secure(void)
1178 {
1179 	boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1180 	recount_transition(RCT_LVL_KERNEL, RCT_LVL_SECURE);
1181 	ml_set_interrupts_enabled(intrs_en);
1182 }
1183 
1184 #endif // RECOUNT_SECURE_METRICS
1185 
1186 // Set on rpr_state_last_abs_time when the processor is idle.
1187 #define RCT_PR_IDLING (0x1ULL << 63)
1188 
1189 void
recount_processor_idle(struct recount_processor * pr,struct recount_snap * snap)1190 recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap)
1191 {
1192 	__assert_only uint64_t state_time = os_atomic_load_wide(
1193 		&pr->rpr_state_last_abs_time, relaxed);
1194 	assert((state_time & RCT_PR_IDLING) == 0);
1195 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1196 	uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach;
1197 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1198 	    relaxed);
1199 }
1200 
1201 OS_PURE OS_ALWAYS_INLINE
1202 static inline uint64_t
_state_time(uint64_t state_stamp)1203 _state_time(uint64_t state_stamp)
1204 {
1205 	return state_stamp & ~(RCT_PR_IDLING);
1206 }
1207 
1208 void
recount_processor_init(processor_t processor)1209 recount_processor_init(processor_t processor)
1210 {
1211 #if __AMP__
1212 	processor->pr_recount.rpr_cpu_kind_index =
1213 	    processor->processor_set->pset_cluster_type == PSET_AMP_P ?
1214 	    RCT_CPU_PERFORMANCE : RCT_CPU_EFFICIENCY;
1215 #else // __AMP__
1216 #pragma unused(processor)
1217 #endif // !__AMP__
1218 }
1219 
1220 void
recount_processor_run(struct recount_processor * pr,struct recount_snap * snap)1221 recount_processor_run(struct recount_processor *pr, struct recount_snap *snap)
1222 {
1223 	uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed);
1224 	assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING);
1225 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1226 	uint64_t new_state_stamp = snap->rsn_time_mach;
1227 	pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state);
1228 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1229 	    relaxed);
1230 }
1231 
1232 void
recount_processor_online(processor_t processor,struct recount_snap * cur)1233 recount_processor_online(processor_t processor, struct recount_snap *cur)
1234 {
1235 	recount_processor_run(&processor->pr_recount, cur);
1236 	struct recount_snap *pr_snap = recount_get_snap(processor);
1237 	memcpy(pr_snap, cur, sizeof(*pr_snap));
1238 }
1239 
1240 void
recount_processor_usage(struct recount_processor * pr,struct recount_usage * usage,uint64_t * idle_time_out)1241 recount_processor_usage(struct recount_processor *pr,
1242     struct recount_usage *usage, uint64_t *idle_time_out)
1243 {
1244 	recount_sum(&recount_processor_plan, &pr->rpr_active, usage);
1245 	_fix_time_precision(usage);
1246 
1247 	uint64_t idle_time = pr->rpr_idle_time_mach;
1248 	uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time,
1249 	    relaxed);
1250 	bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING;
1251 	if (idle) {
1252 		// Since processors can idle for some time without an update, make sure
1253 		// the idle time is up-to-date with respect to the caller.
1254 		idle_time += mach_absolute_time() - _state_time(idle_stamp);
1255 	}
1256 	*idle_time_out = idle_time;
1257 }
1258 
1259 uint64_t
recount_current_processor_interrupt_duration_mach(void)1260 recount_current_processor_interrupt_duration_mach(void)
1261 {
1262 	assert(!preemption_enabled());
1263 	return current_processor()->pr_recount.rpr_interrupt_duration_mach;
1264 }
1265 
1266 bool
recount_task_thread_perf_level_usage(struct task * task,uint64_t tid,struct recount_usage * usage_levels)1267 recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
1268     struct recount_usage *usage_levels)
1269 {
1270 	thread_t thread = task_findtid(task, tid);
1271 	if (thread != THREAD_NULL) {
1272 		if (thread == current_thread()) {
1273 			boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1274 			recount_current_thread_perf_level_usage(usage_levels);
1275 			ml_set_interrupts_enabled(interrupt_state);
1276 		} else {
1277 			recount_thread_perf_level_usage(thread, usage_levels);
1278 		}
1279 		thread_deallocate(thread);
1280 	}
1281 	return thread != THREAD_NULL;
1282 }
1283 
1284 #pragma mark - utilities
1285 
1286 // For rolling up counts, convert an index from one topography to another.
1287 static size_t
recount_convert_topo_index(recount_topo_t from,recount_topo_t to,size_t i)1288 recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i)
1289 {
1290 	if (from == to) {
1291 		return i;
1292 	} else if (to == RCT_TOPO_SYSTEM) {
1293 		return 0;
1294 	} else if (from == RCT_TOPO_CPU) {
1295 		assertf(to == RCT_TOPO_CPU_KIND,
1296 		    "recount: cannot convert from CPU topography to %d", to);
1297 		return _topo_cpu_kinds[i];
1298 	} else {
1299 		panic("recount: unexpected rollup request from %d to %d", from, to);
1300 	}
1301 }
1302 
1303 // Get the track index of the provided processor and topography.
1304 OS_ALWAYS_INLINE
1305 static size_t
recount_topo_index(recount_topo_t topo,processor_t processor)1306 recount_topo_index(recount_topo_t topo, processor_t processor)
1307 {
1308 	switch (topo) {
1309 	case RCT_TOPO_SYSTEM:
1310 		return 0;
1311 	case RCT_TOPO_CPU:
1312 		return processor->cpu_id;
1313 	case RCT_TOPO_CPU_KIND:
1314 #if __AMP__
1315 		return processor->pr_recount.rpr_cpu_kind_index;
1316 #else // __AMP__
1317 		return 0;
1318 #endif // !__AMP__
1319 	default:
1320 		panic("recount: invalid topology %u to index", topo);
1321 	}
1322 }
1323 
1324 // Return the number of tracks needed for a given topography.
1325 size_t
recount_topo_count(recount_topo_t topo)1326 recount_topo_count(recount_topo_t topo)
1327 {
1328 	// Allow the compiler to reason about at least the system and CPU kind
1329 	// counts.
1330 	switch (topo) {
1331 	case RCT_TOPO_SYSTEM:
1332 		return 1;
1333 
1334 	case RCT_TOPO_CPU_KIND:
1335 #if __AMP__
1336 		return 2;
1337 #else // __AMP__
1338 		return 1;
1339 #endif // !__AMP__
1340 
1341 	case RCT_TOPO_CPU:
1342 #if __arm__ || __arm64__
1343 		return ml_get_cpu_count();
1344 #else // __arm__ || __arm64__
1345 		return ml_early_cpu_max_number() + 1;
1346 #endif // !__arm__ && !__arm64__
1347 
1348 	default:
1349 		panic("recount: invalid topography %d", topo);
1350 	}
1351 }
1352 
1353 static bool
recount_topo_matches_cpu_kind(recount_topo_t topo,recount_cpu_kind_t kind,size_t idx)1354 recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind,
1355     size_t idx)
1356 {
1357 #if !__AMP__
1358 #pragma unused(kind, idx)
1359 #endif // !__AMP__
1360 	switch (topo) {
1361 	case RCT_TOPO_SYSTEM:
1362 		return true;
1363 
1364 	case RCT_TOPO_CPU_KIND:
1365 #if __AMP__
1366 		return kind == idx;
1367 #else // __AMP__
1368 		return false;
1369 #endif // !__AMP__
1370 
1371 	case RCT_TOPO_CPU: {
1372 #if __AMP__
1373 		return _topo_cpu_kinds[idx] == kind;
1374 #else // __AMP__
1375 		return false;
1376 #endif // !__AMP__
1377 	}
1378 
1379 	default:
1380 		panic("recount: unexpected topography %d", topo);
1381 	}
1382 }
1383 
1384 struct recount_track *
recount_tracks_create(recount_plan_t plan)1385 recount_tracks_create(recount_plan_t plan)
1386 {
1387 	assert(_topo_allocates[plan->rpl_topo]);
1388 	return zalloc_flags(_recount_track_zones[plan->rpl_topo],
1389 	           Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1390 }
1391 
1392 static void
recount_tracks_copy(recount_plan_t plan,struct recount_track * dst,struct recount_track * src)1393 recount_tracks_copy(recount_plan_t plan, struct recount_track *dst,
1394     struct recount_track *src)
1395 {
1396 	size_t topo_count = recount_topo_count(plan->rpl_topo);
1397 	for (size_t i = 0; i < topo_count; i++) {
1398 		recount_read_track(&dst[i].rt_usage, &src[i]);
1399 	}
1400 }
1401 
1402 void
recount_tracks_destroy(recount_plan_t plan,struct recount_track * tracks)1403 recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks)
1404 {
1405 	assert(_topo_allocates[plan->rpl_topo]);
1406 	zfree(_recount_track_zones[plan->rpl_topo], tracks);
1407 }
1408 
1409 void
recount_thread_init(struct recount_thread * th)1410 recount_thread_init(struct recount_thread *th)
1411 {
1412 	th->rth_lifetime = recount_tracks_create(&recount_thread_plan);
1413 }
1414 
1415 void
recount_thread_copy(struct recount_thread * dst,struct recount_thread * src)1416 recount_thread_copy(struct recount_thread *dst, struct recount_thread *src)
1417 {
1418 	recount_tracks_copy(&recount_thread_plan, dst->rth_lifetime,
1419 	    src->rth_lifetime);
1420 }
1421 
1422 void
recount_task_copy(struct recount_task * dst,const struct recount_task * src)1423 recount_task_copy(struct recount_task *dst, const struct recount_task *src)
1424 {
1425 	recount_tracks_copy(&recount_task_plan, dst->rtk_lifetime,
1426 	    src->rtk_lifetime);
1427 }
1428 
1429 void
recount_thread_deinit(struct recount_thread * th)1430 recount_thread_deinit(struct recount_thread *th)
1431 {
1432 	recount_tracks_destroy(&recount_thread_plan, th->rth_lifetime);
1433 }
1434 
1435 void
recount_task_init(struct recount_task * tk)1436 recount_task_init(struct recount_task *tk)
1437 {
1438 	tk->rtk_lifetime = recount_tracks_create(&recount_task_plan);
1439 	tk->rtk_terminated = recount_usage_alloc(
1440 		recount_task_terminated_plan.rpl_topo);
1441 }
1442 
1443 void
recount_task_deinit(struct recount_task * tk)1444 recount_task_deinit(struct recount_task *tk)
1445 {
1446 	recount_tracks_destroy(&recount_task_plan, tk->rtk_lifetime);
1447 	recount_usage_free(recount_task_terminated_plan.rpl_topo,
1448 	    tk->rtk_terminated);
1449 }
1450 
1451 void
recount_coalition_init(struct recount_coalition * co)1452 recount_coalition_init(struct recount_coalition *co)
1453 {
1454 	co->rco_exited = recount_usage_alloc(recount_coalition_plan.rpl_topo);
1455 }
1456 
1457 void
recount_coalition_deinit(struct recount_coalition * co)1458 recount_coalition_deinit(struct recount_coalition *co)
1459 {
1460 	recount_usage_free(recount_coalition_plan.rpl_topo, co->rco_exited);
1461 }
1462 
1463 void
recount_work_interval_init(struct recount_work_interval * wi)1464 recount_work_interval_init(struct recount_work_interval *wi)
1465 {
1466 	wi->rwi_current_instance = recount_tracks_create(&recount_work_interval_plan);
1467 }
1468 
1469 void
recount_work_interval_deinit(struct recount_work_interval * wi)1470 recount_work_interval_deinit(struct recount_work_interval *wi)
1471 {
1472 	recount_tracks_destroy(&recount_work_interval_plan, wi->rwi_current_instance);
1473 }
1474 
1475 struct recount_usage *
recount_usage_alloc(recount_topo_t topo)1476 recount_usage_alloc(recount_topo_t topo)
1477 {
1478 	assert(_topo_allocates[topo]);
1479 	return zalloc_flags(_recount_usage_zones[topo],
1480 	           Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1481 }
1482 
1483 void
recount_usage_free(recount_topo_t topo,struct recount_usage * usage)1484 recount_usage_free(recount_topo_t topo, struct recount_usage *usage)
1485 {
1486 	assert(_topo_allocates[topo]);
1487 	zfree(_recount_usage_zones[topo], usage);
1488 }
1489