xref: /xnu-10063.141.1/osfmk/kern/recount.c (revision d8b80295118ef25ac3a784134bcf95cd8e88109f)
1 // Copyright (c) 2021 Apple Inc.  All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 
27 #include <kern/assert.h>
28 #include <kern/kalloc.h>
29 #include <pexpert/pexpert.h>
30 #include <sys/kdebug.h>
31 #include <sys/_types/_size_t.h>
32 #include <kern/monotonic.h>
33 #include <kern/percpu.h>
34 #include <kern/processor.h>
35 #include <kern/recount.h>
36 #include <kern/startup.h>
37 #include <kern/task.h>
38 #include <kern/thread.h>
39 #include <kern/work_interval.h>
40 #include <mach/mach_time.h>
41 #include <mach/mach_types.h>
42 #include <machine/config.h>
43 #include <machine/machine_routines.h>
44 #include <os/atomic_private.h>
45 #include <stdbool.h>
46 #include <stdint.h>
47 
48 // Recount's machine-independent implementation and interfaces for the kernel
49 // at-large.
50 
51 #define PRECISE_USER_KERNEL_PMCS PRECISE_USER_KERNEL_TIME
52 
53 // On non-release kernels, allow precise PMC (instructions, cycles) updates to
54 // be disabled for performance characterization.
55 #if PRECISE_USER_KERNEL_PMCS && (DEVELOPMENT || DEBUG)
56 #define PRECISE_USER_KERNEL_PMC_TUNABLE 1
57 
58 TUNABLE(bool, no_precise_pmcs, "-no-precise-pmcs", false);
59 #endif // PRECISE_USER_KERNEL_PMCS
60 
61 #if !PRECISE_USER_KERNEL_TIME
62 #define PRECISE_TIME_FATAL_FUNC OS_NORETURN
63 #define PRECISE_TIME_ONLY_FUNC OS_UNUSED
64 #else // !PRECISE_USER_KERNEL_TIME
65 #define PRECISE_TIME_FATAL_FUNC
66 #define PRECISE_TIME_ONLY_FUNC
67 #endif // PRECISE_USER_KERNEL_TIME
68 
69 #if !PRECISE_USER_KERNEL_PMCS
70 #define PRECISE_PMCS_ONLY_FUNC OS_UNUSED
71 #else // !PRECISE_PMCS_ONLY_FUNC
72 #define PRECISE_PMCS_ONLY_FUNC
73 #endif // PRECISE_USER_KERNEL_PMCS
74 
75 #if HAS_CPU_DPE_COUNTER
76 // Only certain platforms have DPE counters.
77 #define RECOUNT_ENERGY CONFIG_PERVASIVE_ENERGY
78 #else // HAS_CPU_DPE_COUNTER
79 #define RECOUNT_ENERGY 0
80 #endif // !HAS_CPU_DPE_COUNTER
81 
82 // Topography helpers.
83 size_t recount_topo_count(recount_topo_t topo);
84 static bool recount_topo_matches_cpu_kind(recount_topo_t topo,
85     recount_cpu_kind_t kind, size_t idx);
86 static size_t recount_topo_index(recount_topo_t topo, processor_t processor);
87 static size_t recount_convert_topo_index(recount_topo_t from, recount_topo_t to,
88     size_t i);
89 
90 // Prevent counter updates before the system is ready.
91 __security_const_late bool _recount_started = false;
92 
93 // Lookup table that matches CPU numbers (indices) to their track index.
94 __security_const_late uint8_t _topo_cpu_kinds[MAX_CPUS] = { 0 };
95 
96 // Allocation metadata and zones.
97 
98 // Keep static strings for `zone_create`.
99 static const char *_usage_zone_names[RCT_TOPO_COUNT] = {
100 	[RCT_TOPO_CPU] = "recount_usage_cpu",
101 	[RCT_TOPO_CPU_KIND] = "recount_usage_cpu_kind",
102 };
103 
104 static const char *_track_zone_names[RCT_TOPO_COUNT] = {
105 	[RCT_TOPO_CPU] = "recount_track_cpu",
106 	[RCT_TOPO_CPU_KIND] = "recount_track_cpu_kind",
107 };
108 
109 static const bool _topo_allocates[RCT_TOPO_COUNT] = {
110 	[RCT_TOPO_SYSTEM] = false,
111 	[RCT_TOPO_CPU] = true,
112 	[RCT_TOPO_CPU_KIND] = true,
113 };
114 
115 // Fixed-size zones for allocations.
116 __security_const_late zone_t _recount_usage_zones[RCT_TOPO_COUNT] = { };
117 __security_const_late zone_t _recount_track_zones[RCT_TOPO_COUNT] = { };
118 
119 __startup_func
120 static void
recount_startup(void)121 recount_startup(void)
122 {
123 #if __AMP__
124 	unsigned int cpu_count = ml_get_cpu_count();
125 	const ml_topology_info_t *topo_info = ml_get_topology_info();
126 	for (unsigned int i = 0; i < cpu_count; i++) {
127 		cluster_type_t type = topo_info->cpus[i].cluster_type;
128 		uint8_t cluster_i = (type == CLUSTER_TYPE_P) ? RCT_CPU_PERFORMANCE :
129 		    RCT_CPU_EFFICIENCY;
130 		_topo_cpu_kinds[i] = cluster_i;
131 	}
132 #endif // __AMP__
133 
134 	for (unsigned int i = 0; i < RCT_TOPO_COUNT; i++) {
135 		if (_topo_allocates[i]) {
136 			const char *usage_name = _usage_zone_names[i];
137 			assert(usage_name != NULL);
138 			_recount_usage_zones[i] = zone_create(usage_name,
139 			    sizeof(struct recount_usage) * recount_topo_count(i),
140 			    0);
141 
142 			const char *track_name = _track_zone_names[i];
143 			assert(track_name != NULL);
144 			_recount_track_zones[i] = zone_create(track_name,
145 			    sizeof(struct recount_track) * recount_topo_count(i),
146 			    0);
147 		}
148 	}
149 
150 	_recount_started = true;
151 }
152 
153 STARTUP(PERCPU, STARTUP_RANK_LAST, recount_startup);
154 
155 #pragma mark - tracks
156 
157 RECOUNT_PLAN_DEFINE(recount_thread_plan, RCT_TOPO_CPU_KIND);
158 RECOUNT_PLAN_DEFINE(recount_work_interval_plan, RCT_TOPO_CPU);
159 RECOUNT_PLAN_DEFINE(recount_task_plan, RCT_TOPO_CPU);
160 RECOUNT_PLAN_DEFINE(recount_task_terminated_plan, RCT_TOPO_CPU_KIND);
161 RECOUNT_PLAN_DEFINE(recount_coalition_plan, RCT_TOPO_CPU_KIND);
162 RECOUNT_PLAN_DEFINE(recount_processor_plan, RCT_TOPO_SYSTEM);
163 
164 OS_ALWAYS_INLINE
165 static inline uint64_t
recount_timestamp_speculative(void)166 recount_timestamp_speculative(void)
167 {
168 #if __arm__ || __arm64__
169 	return ml_get_speculative_timebase();
170 #else // __arm__ || __arm64__
171 	return mach_absolute_time();
172 #endif // !__arm__ && !__arm64__
173 }
174 
175 OS_ALWAYS_INLINE
176 void
recount_snapshot_speculative(struct recount_snap * snap)177 recount_snapshot_speculative(struct recount_snap *snap)
178 {
179 	snap->rsn_time_mach = recount_timestamp_speculative();
180 #if CONFIG_PERVASIVE_CPI
181 	mt_cur_cpu_cycles_instrs_speculative(&snap->rsn_cycles, &snap->rsn_insns);
182 #endif // CONFIG_PERVASIVE_CPI
183 }
184 
185 void
recount_snapshot(struct recount_snap * snap)186 recount_snapshot(struct recount_snap *snap)
187 {
188 #if __arm__ || __arm64__
189 	__builtin_arm_isb(ISB_SY);
190 #endif // __arm__ || __arm64__
191 	recount_snapshot_speculative(snap);
192 }
193 
194 static struct recount_snap *
recount_get_snap(processor_t processor)195 recount_get_snap(processor_t processor)
196 {
197 	return &processor->pr_recount.rpr_snap;
198 }
199 
200 // A simple sequence lock implementation.
201 
202 static void
_seqlock_shared_lock_slowpath(const uint32_t * lck,uint32_t gen)203 _seqlock_shared_lock_slowpath(const uint32_t *lck, uint32_t gen)
204 {
205 	disable_preemption();
206 	do {
207 		gen = hw_wait_while_equals32((uint32_t *)(uintptr_t)lck, gen);
208 	} while (__improbable((gen & 1) != 0));
209 	os_atomic_thread_fence(acquire);
210 	enable_preemption();
211 }
212 
213 static uintptr_t
_seqlock_shared_lock(const uint32_t * lck)214 _seqlock_shared_lock(const uint32_t *lck)
215 {
216 	uint32_t gen = os_atomic_load(lck, acquire);
217 	if (__improbable((gen & 1) != 0)) {
218 		_seqlock_shared_lock_slowpath(lck, gen);
219 	}
220 	return gen;
221 }
222 
223 static bool
_seqlock_shared_try_unlock(const uint32_t * lck,uintptr_t on_enter)224 _seqlock_shared_try_unlock(const uint32_t *lck, uintptr_t on_enter)
225 {
226 	return os_atomic_load(lck, acquire) == on_enter;
227 }
228 
229 static void
_seqlock_excl_lock_relaxed(uint32_t * lck)230 _seqlock_excl_lock_relaxed(uint32_t *lck)
231 {
232 	__assert_only uintptr_t new = os_atomic_inc(lck, relaxed);
233 	assert3u((new & 1), ==, 1);
234 }
235 
236 static void
_seqlock_excl_commit(void)237 _seqlock_excl_commit(void)
238 {
239 	os_atomic_thread_fence(release);
240 }
241 
242 static void
_seqlock_excl_unlock_relaxed(uint32_t * lck)243 _seqlock_excl_unlock_relaxed(uint32_t *lck)
244 {
245 	__assert_only uint32_t new = os_atomic_inc(lck, relaxed);
246 	assert3u((new & 1), ==, 0);
247 }
248 
249 static struct recount_track *
recount_update_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)250 recount_update_start(struct recount_track *tracks, recount_topo_t topo,
251     processor_t processor)
252 {
253 	struct recount_track *track = &tracks[recount_topo_index(topo, processor)];
254 	_seqlock_excl_lock_relaxed(&track->rt_sync);
255 	return track;
256 }
257 
258 #if RECOUNT_ENERGY
259 
260 static struct recount_track *
recount_update_single_start(struct recount_track * tracks,recount_topo_t topo,processor_t processor)261 recount_update_single_start(struct recount_track *tracks, recount_topo_t topo,
262     processor_t processor)
263 {
264 	return &tracks[recount_topo_index(topo, processor)];
265 }
266 
267 #endif // RECOUNT_ENERGY
268 
269 static void
recount_update_commit(void)270 recount_update_commit(void)
271 {
272 	_seqlock_excl_commit();
273 }
274 
275 static void
recount_update_end(struct recount_track * track)276 recount_update_end(struct recount_track *track)
277 {
278 	_seqlock_excl_unlock_relaxed(&track->rt_sync);
279 }
280 
281 static const struct recount_usage *
recount_read_start(const struct recount_track * track,uintptr_t * on_enter)282 recount_read_start(const struct recount_track *track, uintptr_t *on_enter)
283 {
284 	const struct recount_usage *stats = &track->rt_usage;
285 	*on_enter = _seqlock_shared_lock(&track->rt_sync);
286 	return stats;
287 }
288 
289 static bool
recount_try_read_end(const struct recount_track * track,uintptr_t on_enter)290 recount_try_read_end(const struct recount_track *track, uintptr_t on_enter)
291 {
292 	return _seqlock_shared_try_unlock(&track->rt_sync, on_enter);
293 }
294 
295 static void
recount_read_track(struct recount_usage * stats,const struct recount_track * track)296 recount_read_track(struct recount_usage *stats,
297     const struct recount_track *track)
298 {
299 	uintptr_t on_enter = 0;
300 	do {
301 		const struct recount_usage *vol_stats =
302 		    recount_read_start(track, &on_enter);
303 		*stats = *vol_stats;
304 	} while (!recount_try_read_end(track, on_enter));
305 }
306 
307 static void
recount_metrics_add(struct recount_metrics * sum,const struct recount_metrics * to_add)308 recount_metrics_add(struct recount_metrics *sum, const struct recount_metrics *to_add)
309 {
310 	sum->rm_time_mach += to_add->rm_time_mach;
311 #if CONFIG_PERVASIVE_CPI
312 	sum->rm_instructions += to_add->rm_instructions;
313 	sum->rm_cycles += to_add->rm_cycles;
314 #endif // CONFIG_PERVASIVE_CPI
315 }
316 
317 static void
recount_usage_add(struct recount_usage * sum,const struct recount_usage * to_add)318 recount_usage_add(struct recount_usage *sum, const struct recount_usage *to_add)
319 {
320 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
321 		recount_metrics_add(&sum->ru_metrics[i], &to_add->ru_metrics[i]);
322 	}
323 #if CONFIG_PERVASIVE_ENERGY
324 	sum->ru_energy_nj += to_add->ru_energy_nj;
325 #endif // CONFIG_PERVASIVE_CPI
326 }
327 
328 OS_ALWAYS_INLINE
329 static inline void
recount_usage_add_snap(struct recount_usage * usage,recount_level_t level,struct recount_snap * snap)330 recount_usage_add_snap(struct recount_usage *usage, recount_level_t level,
331     struct recount_snap *snap)
332 {
333 	struct recount_metrics *metrics = &usage->ru_metrics[level];
334 
335 	metrics->rm_time_mach += snap->rsn_time_mach;
336 #if CONFIG_PERVASIVE_CPI
337 	metrics->rm_cycles += snap->rsn_cycles;
338 	metrics->rm_instructions += snap->rsn_insns;
339 #else // CONFIG_PERVASIVE_CPI
340 #pragma unused(usage)
341 #endif // !CONFIG_PERVASIVE_CPI
342 }
343 
344 static void
recount_rollup(recount_plan_t plan,const struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)345 recount_rollup(recount_plan_t plan, const struct recount_track *tracks,
346     recount_topo_t to_topo, struct recount_usage *stats)
347 {
348 	recount_topo_t from_topo = plan->rpl_topo;
349 	size_t topo_count = recount_topo_count(from_topo);
350 	struct recount_usage tmp = { 0 };
351 	for (size_t i = 0; i < topo_count; i++) {
352 		recount_read_track(&tmp, &tracks[i]);
353 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
354 		recount_usage_add(&stats[to_i], &tmp);
355 	}
356 }
357 
358 // This function must be run when counters cannot increment for the track, like from the current thread.
359 static void
recount_rollup_unsafe(recount_plan_t plan,struct recount_track * tracks,recount_topo_t to_topo,struct recount_usage * stats)360 recount_rollup_unsafe(recount_plan_t plan, struct recount_track *tracks,
361     recount_topo_t to_topo, struct recount_usage *stats)
362 {
363 	recount_topo_t from_topo = plan->rpl_topo;
364 	size_t topo_count = recount_topo_count(from_topo);
365 	for (size_t i = 0; i < topo_count; i++) {
366 		size_t to_i = recount_convert_topo_index(from_topo, to_topo, i);
367 		recount_usage_add(&stats[to_i], &tracks[i].rt_usage);
368 	}
369 }
370 
371 void
recount_sum(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)372 recount_sum(recount_plan_t plan, const struct recount_track *tracks,
373     struct recount_usage *sum)
374 {
375 	recount_rollup(plan, tracks, RCT_TOPO_SYSTEM, sum);
376 }
377 
378 void
recount_sum_unsafe(recount_plan_t plan,const struct recount_track * tracks,struct recount_usage * sum)379 recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
380     struct recount_usage *sum)
381 {
382 	recount_topo_t topo = plan->rpl_topo;
383 	size_t topo_count = recount_topo_count(topo);
384 	for (size_t i = 0; i < topo_count; i++) {
385 		recount_usage_add(sum, &tracks[i].rt_usage);
386 	}
387 }
388 
389 void
recount_sum_and_isolate_cpu_kind(recount_plan_t plan,struct recount_track * tracks,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)390 recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
391     struct recount_track *tracks, recount_cpu_kind_t kind,
392     struct recount_usage *sum, struct recount_usage *only_kind)
393 {
394 	size_t topo_count = recount_topo_count(plan->rpl_topo);
395 	struct recount_usage tmp = { 0 };
396 	for (size_t i = 0; i < topo_count; i++) {
397 		recount_read_track(&tmp, &tracks[i]);
398 		recount_usage_add(sum, &tmp);
399 		if (recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
400 			recount_usage_add(only_kind, &tmp);
401 		}
402 	}
403 }
404 
405 static void
recount_sum_usage(recount_plan_t plan,const struct recount_usage * usages,struct recount_usage * sum)406 recount_sum_usage(recount_plan_t plan, const struct recount_usage *usages,
407     struct recount_usage *sum)
408 {
409 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
410 	for (size_t i = 0; i < topo_count; i++) {
411 		recount_usage_add(sum, &usages[i]);
412 	}
413 }
414 
415 void
recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,struct recount_usage * usage,recount_cpu_kind_t kind,struct recount_usage * sum,struct recount_usage * only_kind)416 recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
417     struct recount_usage *usage, recount_cpu_kind_t kind,
418     struct recount_usage *sum, struct recount_usage *only_kind)
419 {
420 	const size_t topo_count = recount_topo_count(plan->rpl_topo);
421 	for (size_t i = 0; i < topo_count; i++) {
422 		recount_usage_add(sum, &usage[i]);
423 		if (only_kind && recount_topo_matches_cpu_kind(plan->rpl_topo, kind, i)) {
424 			recount_usage_add(only_kind, &usage[i]);
425 		}
426 	}
427 }
428 
429 void
recount_sum_perf_levels(recount_plan_t plan,struct recount_track * tracks,struct recount_usage * sums)430 recount_sum_perf_levels(recount_plan_t plan, struct recount_track *tracks,
431     struct recount_usage *sums)
432 {
433 	recount_rollup(plan, tracks, RCT_TOPO_CPU_KIND, sums);
434 }
435 
436 struct recount_times_mach
recount_usage_times_mach(struct recount_usage * usage)437 recount_usage_times_mach(struct recount_usage *usage)
438 {
439 	return (struct recount_times_mach){
440 		       .rtm_user = usage->ru_metrics[RCT_LVL_USER].rm_time_mach,
441 		       .rtm_system = recount_usage_system_time_mach(usage),
442 	};
443 }
444 
445 uint64_t
recount_usage_system_time_mach(struct recount_usage * usage)446 recount_usage_system_time_mach(struct recount_usage *usage)
447 {
448 	uint64_t system_time = usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach;
449 #if RECOUNT_SECURE_METRICS
450 	system_time += usage->ru_metrics[RCT_LVL_SECURE].rm_time_mach;
451 #endif // RECOUNT_SECURE_METRICS
452 	return system_time;
453 }
454 
455 uint64_t
recount_usage_time_mach(struct recount_usage * usage)456 recount_usage_time_mach(struct recount_usage *usage)
457 {
458 	uint64_t time = 0;
459 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
460 		time += usage->ru_metrics[i].rm_time_mach;
461 	}
462 	return time;
463 }
464 
465 uint64_t
recount_usage_cycles(struct recount_usage * usage)466 recount_usage_cycles(struct recount_usage *usage)
467 {
468 	uint64_t cycles = 0;
469 #if CONFIG_CPU_COUNTERS
470 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
471 		cycles += usage->ru_metrics[i].rm_cycles;
472 	}
473 #else // CONFIG_CPU_COUNTERS
474 #pragma unused(usage)
475 #endif // !CONFIG_CPU_COUNTERS
476 	return cycles;
477 }
478 
479 uint64_t
recount_usage_instructions(struct recount_usage * usage)480 recount_usage_instructions(struct recount_usage *usage)
481 {
482 	uint64_t instructions = 0;
483 #if CONFIG_CPU_COUNTERS
484 	for (unsigned int i = 0; i < RCT_LVL_COUNT; i++) {
485 		instructions += usage->ru_metrics[i].rm_instructions;
486 	}
487 #else // CONFIG_CPU_COUNTERS
488 #pragma unused(usage)
489 #endif // !CONFIG_CPU_COUNTERS
490 	return instructions;
491 }
492 
493 // Plan-specific helpers.
494 
495 void
recount_coalition_rollup_task(struct recount_coalition * co,struct recount_task * tk)496 recount_coalition_rollup_task(struct recount_coalition *co,
497     struct recount_task *tk)
498 {
499 	recount_rollup(&recount_task_plan, tk->rtk_lifetime,
500 	    recount_coalition_plan.rpl_topo, co->rco_exited);
501 }
502 
503 void
recount_task_rollup_thread(struct recount_task * tk,const struct recount_thread * th)504 recount_task_rollup_thread(struct recount_task *tk,
505     const struct recount_thread *th)
506 {
507 	recount_rollup(&recount_thread_plan, th->rth_lifetime,
508 	    recount_task_terminated_plan.rpl_topo, tk->rtk_terminated);
509 }
510 
511 #pragma mark - scheduler
512 
513 // `result = lhs - rhs` for snapshots.
514 OS_ALWAYS_INLINE
515 static void
recount_snap_diff(struct recount_snap * result,const struct recount_snap * lhs,const struct recount_snap * rhs)516 recount_snap_diff(struct recount_snap *result,
517     const struct recount_snap *lhs, const struct recount_snap *rhs)
518 {
519 	assert3u(lhs->rsn_time_mach, >=, rhs->rsn_time_mach);
520 	result->rsn_time_mach = lhs->rsn_time_mach - rhs->rsn_time_mach;
521 #if CONFIG_PERVASIVE_CPI
522 	assert3u(lhs->rsn_insns, >=, rhs->rsn_insns);
523 	assert3u(lhs->rsn_cycles, >=, rhs->rsn_cycles);
524 	result->rsn_cycles = lhs->rsn_cycles - rhs->rsn_cycles;
525 	result->rsn_insns = lhs->rsn_insns - rhs->rsn_insns;
526 #endif // CONFIG_PERVASIVE_CPI
527 }
528 
529 static void
_fix_time_precision(struct recount_usage * usage)530 _fix_time_precision(struct recount_usage *usage)
531 {
532 #if PRECISE_USER_KERNEL_TIME
533 #pragma unused(usage)
534 #else // PRECISE_USER_KERNEL_TIME
535 	// Attribute all time to user, as the system is only acting "on behalf
536 	// of" user processes -- a bit sketchy.
537 	usage->ru_metrics[RCT_LVL_USER].rm_time_mach +=
538 	    recount_usage_system_time_mach(usage);
539 	usage->ru_metrics[RCT_LVL_KERNEL].rm_time_mach = 0;
540 #endif // !PRECISE_USER_KERNEL_TIME
541 }
542 
543 void
recount_current_thread_usage(struct recount_usage * usage)544 recount_current_thread_usage(struct recount_usage *usage)
545 {
546 	assert(ml_get_interrupts_enabled() == FALSE);
547 	thread_t thread = current_thread();
548 	struct recount_snap snap = { 0 };
549 	recount_snapshot(&snap);
550 	recount_sum_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
551 	    usage);
552 	struct recount_snap *last = recount_get_snap(current_processor());
553 	struct recount_snap diff = { 0 };
554 	recount_snap_diff(&diff, &snap, last);
555 	recount_usage_add_snap(usage, RCT_LVL_KERNEL, &diff);
556 	_fix_time_precision(usage);
557 }
558 
559 void
recount_current_thread_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)560 recount_current_thread_usage_perf_only(struct recount_usage *usage,
561     struct recount_usage *usage_perf_only)
562 {
563 	struct recount_usage usage_perf_levels[RCT_CPU_KIND_COUNT] = { 0 };
564 	recount_current_thread_perf_level_usage(usage_perf_levels);
565 	recount_sum_usage(&recount_thread_plan, usage_perf_levels, usage);
566 	*usage_perf_only = usage_perf_levels[RCT_CPU_PERFORMANCE];
567 	_fix_time_precision(usage);
568 	_fix_time_precision(usage_perf_only);
569 }
570 
571 void
recount_thread_perf_level_usage(struct thread * thread,struct recount_usage * usage_levels)572 recount_thread_perf_level_usage(struct thread *thread,
573     struct recount_usage *usage_levels)
574 {
575 	recount_rollup(&recount_thread_plan, thread->th_recount.rth_lifetime,
576 	    RCT_TOPO_CPU_KIND, usage_levels);
577 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
578 	for (size_t i = 0; i < topo_count; i++) {
579 		_fix_time_precision(&usage_levels[i]);
580 	}
581 }
582 
583 void
recount_current_thread_perf_level_usage(struct recount_usage * usage_levels)584 recount_current_thread_perf_level_usage(struct recount_usage *usage_levels)
585 {
586 	assert(ml_get_interrupts_enabled() == FALSE);
587 	processor_t processor = current_processor();
588 	thread_t thread = current_thread();
589 	struct recount_snap snap = { 0 };
590 	recount_snapshot(&snap);
591 	recount_rollup_unsafe(&recount_thread_plan, thread->th_recount.rth_lifetime,
592 	    RCT_TOPO_CPU_KIND, usage_levels);
593 	struct recount_snap *last = recount_get_snap(processor);
594 	struct recount_snap diff = { 0 };
595 	recount_snap_diff(&diff, &snap, last);
596 	size_t cur_i = recount_topo_index(RCT_TOPO_CPU_KIND, processor);
597 	struct recount_usage *cur_usage = &usage_levels[cur_i];
598 	recount_usage_add_snap(cur_usage, RCT_LVL_KERNEL, &diff);
599 	size_t topo_count = recount_topo_count(RCT_TOPO_CPU_KIND);
600 	for (size_t i = 0; i < topo_count; i++) {
601 		_fix_time_precision(&usage_levels[i]);
602 	}
603 }
604 
605 uint64_t
recount_current_thread_energy_nj(void)606 recount_current_thread_energy_nj(void)
607 {
608 #if RECOUNT_ENERGY
609 	assert(ml_get_interrupts_enabled() == FALSE);
610 	thread_t thread = current_thread();
611 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
612 	uint64_t energy_nj = 0;
613 	for (size_t i = 0; i < topo_count; i++) {
614 		energy_nj += thread->th_recount.rth_lifetime[i].rt_usage.ru_energy_nj;
615 	}
616 	return energy_nj;
617 #else // RECOUNT_ENERGY
618 	return 0;
619 #endif // !RECOUNT_ENERGY
620 }
621 
622 static void
_times_add_usage(struct recount_times_mach * times,struct recount_usage * usage)623 _times_add_usage(struct recount_times_mach *times, struct recount_usage *usage)
624 {
625 	times->rtm_user += usage->ru_metrics[RCT_LVL_USER].rm_time_mach;
626 #if PRECISE_USER_KERNEL_TIME
627 	times->rtm_system += recount_usage_system_time_mach(usage);
628 #else // PRECISE_USER_KERNEL_TIME
629 	times->rtm_user += recount_usage_system_time_mach(usage);
630 #endif // !PRECISE_USER_KERNEL_TIME
631 }
632 
633 struct recount_times_mach
recount_thread_times(struct thread * thread)634 recount_thread_times(struct thread *thread)
635 {
636 	size_t topo_count = recount_topo_count(recount_thread_plan.rpl_topo);
637 	struct recount_times_mach times = { 0 };
638 	for (size_t i = 0; i < topo_count; i++) {
639 		_times_add_usage(&times, &thread->th_recount.rth_lifetime[i].rt_usage);
640 	}
641 	return times;
642 }
643 
644 uint64_t
recount_thread_time_mach(struct thread * thread)645 recount_thread_time_mach(struct thread *thread)
646 {
647 	struct recount_times_mach times = recount_thread_times(thread);
648 	return times.rtm_user + times.rtm_system;
649 }
650 
651 static uint64_t
_time_since_last_snapshot(void)652 _time_since_last_snapshot(void)
653 {
654 	struct recount_snap *last = recount_get_snap(current_processor());
655 	uint64_t cur_time = mach_absolute_time();
656 	return cur_time - last->rsn_time_mach;
657 }
658 
659 uint64_t
recount_current_thread_time_mach(void)660 recount_current_thread_time_mach(void)
661 {
662 	assert(ml_get_interrupts_enabled() == FALSE);
663 	uint64_t previous_time = recount_thread_time_mach(current_thread());
664 	return previous_time + _time_since_last_snapshot();
665 }
666 
667 struct recount_times_mach
recount_current_thread_times(void)668 recount_current_thread_times(void)
669 {
670 	assert(ml_get_interrupts_enabled() == FALSE);
671 	struct recount_times_mach times = recount_thread_times(
672 		current_thread());
673 #if PRECISE_USER_KERNEL_TIME
674 	// This code is executing in the kernel, so the time since the last snapshot
675 	// (with precise user/kernel time) is since entering the kernel.
676 	times.rtm_system += _time_since_last_snapshot();
677 #else // PRECISE_USER_KERNEL_TIME
678 	times.rtm_user += _time_since_last_snapshot();
679 #endif // !PRECISE_USER_KERNEL_TIME
680 	return times;
681 }
682 
683 void
recount_thread_usage(thread_t thread,struct recount_usage * usage)684 recount_thread_usage(thread_t thread, struct recount_usage *usage)
685 {
686 	recount_sum(&recount_thread_plan, thread->th_recount.rth_lifetime, usage);
687 	_fix_time_precision(usage);
688 }
689 
690 uint64_t
recount_current_thread_interrupt_time_mach(void)691 recount_current_thread_interrupt_time_mach(void)
692 {
693 	thread_t thread = current_thread();
694 	return thread->th_recount.rth_interrupt_duration_mach;
695 }
696 
697 void
recount_work_interval_usage(struct work_interval * work_interval,struct recount_usage * usage)698 recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage)
699 {
700 	recount_sum(&recount_work_interval_plan, work_interval_get_recount_tracks(work_interval), usage);
701 	_fix_time_precision(usage);
702 }
703 
704 struct recount_times_mach
recount_work_interval_times(struct work_interval * work_interval)705 recount_work_interval_times(struct work_interval *work_interval)
706 {
707 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
708 	struct recount_times_mach times = { 0 };
709 	for (size_t i = 0; i < topo_count; i++) {
710 		_times_add_usage(&times, &work_interval_get_recount_tracks(work_interval)[i].rt_usage);
711 	}
712 	return times;
713 }
714 
715 uint64_t
recount_work_interval_energy_nj(struct work_interval * work_interval)716 recount_work_interval_energy_nj(struct work_interval *work_interval)
717 {
718 #if RECOUNT_ENERGY
719 	size_t topo_count = recount_topo_count(recount_work_interval_plan.rpl_topo);
720 	uint64_t energy = 0;
721 	for (size_t i = 0; i < topo_count; i++) {
722 		energy += work_interval_get_recount_tracks(work_interval)[i].rt_usage.ru_energy_nj;
723 	}
724 	return energy;
725 #else // RECOUNT_ENERGY
726 #pragma unused(work_interval)
727 	return 0;
728 #endif // !RECOUNT_ENERGY
729 }
730 
731 void
recount_current_task_usage(struct recount_usage * usage)732 recount_current_task_usage(struct recount_usage *usage)
733 {
734 	task_t task = current_task();
735 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
736 	recount_sum(&recount_task_plan, tracks, usage);
737 	_fix_time_precision(usage);
738 }
739 
740 void
recount_current_task_usage_perf_only(struct recount_usage * usage,struct recount_usage * usage_perf_only)741 recount_current_task_usage_perf_only(struct recount_usage *usage,
742     struct recount_usage *usage_perf_only)
743 {
744 	task_t task = current_task();
745 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
746 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
747 	    tracks, RCT_CPU_PERFORMANCE, usage, usage_perf_only);
748 	_fix_time_precision(usage);
749 	_fix_time_precision(usage_perf_only);
750 }
751 
752 void
recount_task_times_perf_only(struct task * task,struct recount_times_mach * sum,struct recount_times_mach * sum_perf_only)753 recount_task_times_perf_only(struct task *task,
754     struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only)
755 {
756 	const recount_topo_t topo = recount_task_plan.rpl_topo;
757 	const size_t topo_count = recount_topo_count(topo);
758 	struct recount_track *tracks = task->tk_recount.rtk_lifetime;
759 	for (size_t i = 0; i < topo_count; i++) {
760 		struct recount_usage *usage = &tracks[i].rt_usage;
761 		_times_add_usage(sum, usage);
762 		if (recount_topo_matches_cpu_kind(topo, RCT_CPU_PERFORMANCE, i)) {
763 			_times_add_usage(sum_perf_only, usage);
764 		}
765 	}
766 }
767 
768 void
recount_task_terminated_usage(task_t task,struct recount_usage * usage)769 recount_task_terminated_usage(task_t task, struct recount_usage *usage)
770 {
771 	recount_sum_usage(&recount_task_terminated_plan,
772 	    task->tk_recount.rtk_terminated, usage);
773 	_fix_time_precision(usage);
774 }
775 
776 struct recount_times_mach
recount_task_terminated_times(struct task * task)777 recount_task_terminated_times(struct task *task)
778 {
779 	size_t topo_count = recount_topo_count(recount_task_terminated_plan.rpl_topo);
780 	struct recount_times_mach times = { 0 };
781 	for (size_t i = 0; i < topo_count; i++) {
782 		_times_add_usage(&times, &task->tk_recount.rtk_terminated[i]);
783 	}
784 	return times;
785 }
786 
787 void
recount_task_terminated_usage_perf_only(task_t task,struct recount_usage * usage,struct recount_usage * perf_only)788 recount_task_terminated_usage_perf_only(task_t task,
789     struct recount_usage *usage, struct recount_usage *perf_only)
790 {
791 	recount_sum_usage_and_isolate_cpu_kind(&recount_task_terminated_plan,
792 	    task->tk_recount.rtk_terminated, RCT_CPU_PERFORMANCE, usage, perf_only);
793 	_fix_time_precision(usage);
794 	_fix_time_precision(perf_only);
795 }
796 
797 void
recount_task_usage_perf_only(task_t task,struct recount_usage * sum,struct recount_usage * sum_perf_only)798 recount_task_usage_perf_only(task_t task, struct recount_usage *sum,
799     struct recount_usage *sum_perf_only)
800 {
801 	recount_sum_and_isolate_cpu_kind(&recount_task_plan,
802 	    task->tk_recount.rtk_lifetime, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
803 	_fix_time_precision(sum);
804 	_fix_time_precision(sum_perf_only);
805 }
806 
807 void
recount_task_usage(task_t task,struct recount_usage * usage)808 recount_task_usage(task_t task, struct recount_usage *usage)
809 {
810 	recount_sum(&recount_task_plan, task->tk_recount.rtk_lifetime, usage);
811 	_fix_time_precision(usage);
812 }
813 
814 struct recount_times_mach
recount_task_times(struct task * task)815 recount_task_times(struct task *task)
816 {
817 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
818 	struct recount_times_mach times = { 0 };
819 	for (size_t i = 0; i < topo_count; i++) {
820 		_times_add_usage(&times, &task->tk_recount.rtk_lifetime[i].rt_usage);
821 	}
822 	return times;
823 }
824 
825 uint64_t
recount_task_energy_nj(struct task * task)826 recount_task_energy_nj(struct task *task)
827 {
828 #if RECOUNT_ENERGY
829 	size_t topo_count = recount_topo_count(recount_task_plan.rpl_topo);
830 	uint64_t energy = 0;
831 	for (size_t i = 0; i < topo_count; i++) {
832 		energy += task->tk_recount.rtk_lifetime[i].rt_usage.ru_energy_nj;
833 	}
834 	return energy;
835 #else // RECOUNT_ENERGY
836 #pragma unused(task)
837 	return 0;
838 #endif // !RECOUNT_ENERGY
839 }
840 
841 void
recount_coalition_usage_perf_only(struct recount_coalition * coal,struct recount_usage * sum,struct recount_usage * sum_perf_only)842 recount_coalition_usage_perf_only(struct recount_coalition *coal,
843     struct recount_usage *sum, struct recount_usage *sum_perf_only)
844 {
845 	recount_sum_usage_and_isolate_cpu_kind(&recount_coalition_plan,
846 	    coal->rco_exited, RCT_CPU_PERFORMANCE, sum, sum_perf_only);
847 	_fix_time_precision(sum);
848 	_fix_time_precision(sum_perf_only);
849 }
850 
851 OS_ALWAYS_INLINE
852 static void
recount_absorb_snap(struct recount_snap * to_add,thread_t thread,task_t task,processor_t processor,recount_level_t level)853 recount_absorb_snap(struct recount_snap *to_add, thread_t thread, task_t task,
854     processor_t processor, recount_level_t level)
855 {
856 	// Idle threads do not attribute their usage back to the task or processor,
857 	// as the time is not spent "running."
858 	//
859 	// The processor-level metrics include idle time, instead, as the idle time
860 	// needs to be read as up-to-date from `recount_processor_usage`.
861 
862 	const bool was_idle = (thread->options & TH_OPT_IDLE_THREAD) != 0;
863 
864 	struct recount_track *wi_tracks_array = NULL;
865 	if (!was_idle) {
866 		wi_tracks_array = work_interval_get_recount_tracks(
867 			thread->th_work_interval);
868 	}
869 	bool absorb_work_interval = wi_tracks_array != NULL;
870 
871 	struct recount_track *th_track = recount_update_start(
872 		thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
873 		processor);
874 	struct recount_track *wi_track = NULL;
875 	if (absorb_work_interval) {
876 		wi_track = recount_update_start(wi_tracks_array,
877 		    recount_work_interval_plan.rpl_topo, processor);
878 	}
879 	struct recount_track *tk_track = was_idle ? NULL : recount_update_start(
880 		task->tk_recount.rtk_lifetime, recount_task_plan.rpl_topo, processor);
881 	struct recount_track *pr_track = was_idle ? NULL : recount_update_start(
882 		&processor->pr_recount.rpr_active, recount_processor_plan.rpl_topo,
883 		processor);
884 	recount_update_commit();
885 
886 	recount_usage_add_snap(&th_track->rt_usage, level, to_add);
887 	if (!was_idle) {
888 		if (absorb_work_interval) {
889 			recount_usage_add_snap(&wi_track->rt_usage, level, to_add);
890 		}
891 		recount_usage_add_snap(&tk_track->rt_usage, level, to_add);
892 		recount_usage_add_snap(&pr_track->rt_usage, level, to_add);
893 	}
894 
895 	recount_update_commit();
896 	recount_update_end(th_track);
897 	if (!was_idle) {
898 		if (absorb_work_interval) {
899 			recount_update_end(wi_track);
900 		}
901 		recount_update_end(tk_track);
902 		recount_update_end(pr_track);
903 	}
904 }
905 
906 void
recount_switch_thread(struct recount_snap * cur,struct thread * off_thread,struct task * off_task)907 recount_switch_thread(struct recount_snap *cur, struct thread *off_thread,
908     struct task *off_task)
909 {
910 	assert(ml_get_interrupts_enabled() == FALSE);
911 
912 	if (__improbable(!_recount_started)) {
913 		return;
914 	}
915 
916 	processor_t processor = current_processor();
917 
918 	struct recount_snap *last = recount_get_snap(processor);
919 	struct recount_snap diff = { 0 };
920 	recount_snap_diff(&diff, cur, last);
921 	recount_absorb_snap(&diff, off_thread, off_task, processor,
922 #if RECOUNT_THREAD_BASED_LEVEL
923 	    off_thread->th_recount.rth_current_level
924 #else // RECOUNT_THREAD_BASED_LEVEL
925 	    RCT_LVL_KERNEL
926 #endif // !RECOUNT_THREAD_BASED_LEVEL
927 	    );
928 	memcpy(last, cur, sizeof(*last));
929 }
930 
931 void
recount_add_energy(struct thread * off_thread,struct task * off_task,uint64_t energy_nj)932 recount_add_energy(struct thread *off_thread, struct task *off_task,
933     uint64_t energy_nj)
934 {
935 #if RECOUNT_ENERGY
936 	assert(ml_get_interrupts_enabled() == FALSE);
937 	if (__improbable(!_recount_started)) {
938 		return;
939 	}
940 
941 	bool was_idle = (off_thread->options & TH_OPT_IDLE_THREAD) != 0;
942 	struct recount_track *wi_tracks_array = work_interval_get_recount_tracks(off_thread->th_work_interval);
943 	bool collect_work_interval_telemetry = wi_tracks_array != NULL;
944 	processor_t processor = current_processor();
945 
946 	struct recount_track *th_track = recount_update_single_start(
947 		off_thread->th_recount.rth_lifetime, recount_thread_plan.rpl_topo,
948 		processor);
949 	struct recount_track *wi_track = (was_idle || !collect_work_interval_telemetry) ? NULL :
950 	    recount_update_single_start(wi_tracks_array,
951 	    recount_work_interval_plan.rpl_topo, processor);
952 	struct recount_track *tk_track = was_idle ? NULL :
953 	    recount_update_single_start(off_task->tk_recount.rtk_lifetime,
954 	    recount_task_plan.rpl_topo, processor);
955 	struct recount_track *pr_track = was_idle ? NULL :
956 	    recount_update_single_start(&processor->pr_recount.rpr_active,
957 	    recount_processor_plan.rpl_topo, processor);
958 
959 	th_track->rt_usage.ru_energy_nj += energy_nj;
960 	if (!was_idle) {
961 		if (collect_work_interval_telemetry) {
962 			wi_track->rt_usage.ru_energy_nj += energy_nj;
963 		}
964 		tk_track->rt_usage.ru_energy_nj += energy_nj;
965 		pr_track->rt_usage.ru_energy_nj += energy_nj;
966 	}
967 #else // RECOUNT_ENERGY
968 #pragma unused(off_thread, off_task, energy_nj)
969 #endif // !RECOUNT_ENERGY
970 }
971 
972 #define MT_KDBG_IC_CPU_CSWITCH \
973 	KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES, 1)
974 
975 #define MT_KDBG_IC_CPU_CSWITCH_ON \
976     KDBG_EVENTID(DBG_MONOTONIC, DBG_MT_INSTRS_CYCLES_ON_CPU, 1)
977 
978 void
recount_log_switch_thread(const struct recount_snap * snap)979 recount_log_switch_thread(const struct recount_snap *snap)
980 {
981 #if CONFIG_PERVASIVE_CPI
982 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH)) {
983 		// In Monotonic's event hierarchy for backwards-compatibility.
984 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH, snap->rsn_insns, snap->rsn_cycles);
985 	}
986 #else // CONFIG_PERVASIVE_CPI
987 #pragma unused(snap)
988 #endif // CONFIG_PERVASIVE_CPI
989 }
990 
991 void
recount_log_switch_thread_on(const struct recount_snap * snap)992 recount_log_switch_thread_on(const struct recount_snap *snap)
993 {
994 #if CONFIG_PERVASIVE_CPI
995 	if (kdebug_debugid_explicitly_enabled(MT_KDBG_IC_CPU_CSWITCH_ON)) {
996 		if (!snap) {
997 			snap = recount_get_snap(current_processor());
998 		}
999 		// In Monotonic's event hierarchy for backwards-compatibility.
1000 		KDBG_RELEASE(MT_KDBG_IC_CPU_CSWITCH_ON, snap->rsn_insns, snap->rsn_cycles);
1001 	}
1002 #else // CONFIG_PERVASIVE_CPI
1003 #pragma unused(snap)
1004 #endif // CONFIG_PERVASIVE_CPI
1005 }
1006 
1007 OS_ALWAYS_INLINE
1008 PRECISE_TIME_ONLY_FUNC
1009 static void
recount_precise_transition_diff(struct recount_snap * diff,struct recount_snap * last,struct recount_snap * cur)1010 recount_precise_transition_diff(struct recount_snap *diff,
1011     struct recount_snap *last, struct recount_snap *cur)
1012 {
1013 #if PRECISE_USER_KERNEL_PMCS
1014 #if PRECISE_USER_KERNEL_PMC_TUNABLE
1015 	// The full `recount_snapshot_speculative` shouldn't get PMCs with a tunable
1016 	// in this configuration.
1017 	if (__improbable(no_precise_pmcs)) {
1018 		cur->rsn_time_mach = recount_timestamp_speculative();
1019 		diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1020 	} else
1021 #endif // PRECISE_USER_KERNEL_PMC_TUNABLE
1022 	{
1023 		recount_snapshot_speculative(cur);
1024 		recount_snap_diff(diff, cur, last);
1025 	}
1026 #else // PRECISE_USER_KERNEL_PMCS
1027 	cur->rsn_time_mach = recount_timestamp_speculative();
1028 	diff->rsn_time_mach = cur->rsn_time_mach - last->rsn_time_mach;
1029 #endif // !PRECISE_USER_KERNEL_PMCS
1030 }
1031 
1032 #if MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1033 
1034 PRECISE_TIME_ONLY_FUNC
1035 static void
recount_assert_level(thread_t thread,recount_level_t old)1036 recount_assert_level(thread_t thread, recount_level_t old)
1037 {
1038 	assert3u(thread->th_recount.rth_current_level, ==, old);
1039 }
1040 
1041 #else // MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL
1042 
1043 PRECISE_TIME_ONLY_FUNC
1044 static void
recount_assert_level(thread_t __unused thread,recount_level_t __unused old)1045 recount_assert_level(thread_t __unused thread,
1046     recount_level_t __unused old)
1047 {
1048 }
1049 
1050 #endif // !(MACH_ASSERT && RECOUNT_THREAD_BASED_LEVEL)
1051 
1052 /// Called when entering or exiting the kernel to maintain system vs. user counts, extremely performance sensitive.
1053 ///
1054 /// Must be called with interrupts disabled.
1055 ///
1056 /// - Parameter from: What level is being switched from.
1057 /// - Parameter to: What level is being switched to.
1058 ///
1059 /// - Returns: The value of Mach time that was sampled inside this function.
1060 PRECISE_TIME_FATAL_FUNC
1061 static uint64_t
recount_transition(recount_level_t from,recount_level_t to)1062 recount_transition(recount_level_t from, recount_level_t to)
1063 {
1064 #if PRECISE_USER_KERNEL_TIME
1065 	// Omit interrupts-disabled assertion for performance reasons.
1066 	processor_t processor = current_processor();
1067 	thread_t thread = processor->active_thread;
1068 	if (thread) {
1069 		task_t task = get_thread_ro_unchecked(thread)->tro_task;
1070 
1071 		recount_assert_level(thread, from);
1072 #if RECOUNT_THREAD_BASED_LEVEL
1073 		thread->th_recount.rth_current_level = to;
1074 #else // RECOUNT_THREAD_BASED_LEVEL
1075 #pragma unused(to)
1076 #endif // !RECOUNT_THREAD_BASED_LEVEL
1077 		struct recount_snap *last = recount_get_snap(processor);
1078 		struct recount_snap diff = { 0 };
1079 		struct recount_snap cur = { 0 };
1080 		recount_precise_transition_diff(&diff, last, &cur);
1081 		recount_absorb_snap(&diff, thread, task, processor, from);
1082 		memcpy(last, &cur, sizeof(*last));
1083 
1084 		return cur.rsn_time_mach;
1085 	} else {
1086 		return 0;
1087 	}
1088 #else // PRECISE_USER_KERNEL_TIME
1089 #pragma unused(from, to)
1090 	panic("recount: kernel transition called with precise time off");
1091 #endif // !PRECISE_USER_KERNEL_TIME
1092 }
1093 
1094 PRECISE_TIME_FATAL_FUNC
1095 void
recount_leave_user(void)1096 recount_leave_user(void)
1097 {
1098 	recount_transition(RCT_LVL_USER, RCT_LVL_KERNEL);
1099 }
1100 
1101 PRECISE_TIME_FATAL_FUNC
1102 void
recount_enter_user(void)1103 recount_enter_user(void)
1104 {
1105 	recount_transition(RCT_LVL_KERNEL, RCT_LVL_USER);
1106 }
1107 
1108 void
recount_enter_interrupt(void)1109 recount_enter_interrupt(void)
1110 {
1111 	processor_t processor = current_processor();
1112 #if MACH_ASSERT
1113 	if (processor->pr_recount.rpr_last_interrupt_enter_time_mach != 0) {
1114 		panic("recount: unbalanced interrupt enter/leave, started at %llu",
1115 		    processor->pr_recount.rpr_last_interrupt_enter_time_mach);
1116 	}
1117 #endif // MACH_ASSERT
1118 	processor->pr_recount.rpr_last_interrupt_enter_time_mach = recount_timestamp_speculative();
1119 }
1120 
1121 void
recount_leave_interrupt(void)1122 recount_leave_interrupt(void)
1123 {
1124 	processor_t processor = current_processor();
1125 	thread_t thread = processor->active_thread;
1126 	uint64_t now = recount_timestamp_speculative();
1127 	uint64_t since = now - processor->pr_recount.rpr_last_interrupt_enter_time_mach;
1128 	processor->pr_recount.rpr_interrupt_duration_mach += since;
1129 	thread->th_recount.rth_interrupt_duration_mach += since;
1130 	processor->pr_recount.rpr_last_interrupt_leave_time_mach = now;
1131 #if MACH_ASSERT
1132 	processor->pr_recount.rpr_last_interrupt_enter_time_mach = 0;
1133 #endif // MACH_ASSERT
1134 }
1135 
1136 #if __x86_64__
1137 
1138 void
recount_enter_intel_interrupt(x86_saved_state_t * state)1139 recount_enter_intel_interrupt(x86_saved_state_t *state)
1140 {
1141 	// The low bits of `%cs` being set indicate interrupt was delivered while
1142 	// executing in user space.
1143 	bool from_user = (is_saved_state64(state) ? state->ss_64.isf.cs :
1144 	    state->ss_32.cs) & 0x03;
1145 	uint64_t timestamp = recount_transition(
1146 		from_user ? RCT_LVL_USER : RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1147 	current_cpu_datap()->cpu_int_event_time = timestamp;
1148 }
1149 
1150 void
recount_leave_intel_interrupt(void)1151 recount_leave_intel_interrupt(void)
1152 {
1153 	recount_transition(RCT_LVL_KERNEL, RCT_LVL_KERNEL);
1154 	current_cpu_datap()->cpu_int_event_time = 0;
1155 }
1156 
1157 #endif // __x86_64__
1158 
1159 #if RECOUNT_SECURE_METRICS
1160 
1161 PRECISE_TIME_FATAL_FUNC
1162 void
recount_leave_secure(void)1163 recount_leave_secure(void)
1164 {
1165 	boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1166 	recount_transition(RCT_LVL_SECURE, RCT_LVL_KERNEL);
1167 	ml_set_interrupts_enabled(intrs_en);
1168 }
1169 
1170 PRECISE_TIME_FATAL_FUNC
1171 void
recount_enter_secure(void)1172 recount_enter_secure(void)
1173 {
1174 	boolean_t intrs_en = ml_set_interrupts_enabled(FALSE);
1175 	recount_transition(RCT_LVL_KERNEL, RCT_LVL_SECURE);
1176 	ml_set_interrupts_enabled(intrs_en);
1177 }
1178 
1179 #endif // RECOUNT_SECURE_METRICS
1180 
1181 // Set on rpr_state_last_abs_time when the processor is idle.
1182 #define RCT_PR_IDLING (0x1ULL << 63)
1183 
1184 void
recount_processor_idle(struct recount_processor * pr,struct recount_snap * snap)1185 recount_processor_idle(struct recount_processor *pr, struct recount_snap *snap)
1186 {
1187 	__assert_only uint64_t state_time = os_atomic_load_wide(
1188 		&pr->rpr_state_last_abs_time, relaxed);
1189 	assert((state_time & RCT_PR_IDLING) == 0);
1190 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1191 	uint64_t new_state_stamp = RCT_PR_IDLING | snap->rsn_time_mach;
1192 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1193 	    relaxed);
1194 }
1195 
1196 OS_PURE OS_ALWAYS_INLINE
1197 static inline uint64_t
_state_time(uint64_t state_stamp)1198 _state_time(uint64_t state_stamp)
1199 {
1200 	return state_stamp & ~(RCT_PR_IDLING);
1201 }
1202 
1203 void
recount_processor_init(processor_t processor)1204 recount_processor_init(processor_t processor)
1205 {
1206 #if __AMP__
1207 	processor->pr_recount.rpr_cpu_kind_index =
1208 	    processor->processor_set->pset_cluster_type == PSET_AMP_P ?
1209 	    RCT_CPU_PERFORMANCE : RCT_CPU_EFFICIENCY;
1210 #else // __AMP__
1211 #pragma unused(processor)
1212 #endif // !__AMP__
1213 }
1214 
1215 void
recount_processor_run(struct recount_processor * pr,struct recount_snap * snap)1216 recount_processor_run(struct recount_processor *pr, struct recount_snap *snap)
1217 {
1218 	uint64_t state = os_atomic_load_wide(&pr->rpr_state_last_abs_time, relaxed);
1219 	assert(state == 0 || (state & RCT_PR_IDLING) == RCT_PR_IDLING);
1220 	assert((snap->rsn_time_mach & RCT_PR_IDLING) == 0);
1221 	uint64_t new_state_stamp = snap->rsn_time_mach;
1222 	pr->rpr_idle_time_mach += snap->rsn_time_mach - _state_time(state);
1223 	os_atomic_store_wide(&pr->rpr_state_last_abs_time, new_state_stamp,
1224 	    relaxed);
1225 }
1226 
1227 void
recount_processor_online(processor_t processor,struct recount_snap * cur)1228 recount_processor_online(processor_t processor, struct recount_snap *cur)
1229 {
1230 	recount_processor_run(&processor->pr_recount, cur);
1231 	struct recount_snap *pr_snap = recount_get_snap(processor);
1232 	memcpy(pr_snap, cur, sizeof(*pr_snap));
1233 }
1234 
1235 void
recount_processor_usage(struct recount_processor * pr,struct recount_usage * usage,uint64_t * idle_time_out)1236 recount_processor_usage(struct recount_processor *pr,
1237     struct recount_usage *usage, uint64_t *idle_time_out)
1238 {
1239 	recount_sum(&recount_processor_plan, &pr->rpr_active, usage);
1240 	_fix_time_precision(usage);
1241 
1242 	uint64_t idle_time = pr->rpr_idle_time_mach;
1243 	uint64_t idle_stamp = os_atomic_load_wide(&pr->rpr_state_last_abs_time,
1244 	    relaxed);
1245 	bool idle = (idle_stamp & RCT_PR_IDLING) == RCT_PR_IDLING;
1246 	if (idle) {
1247 		// Since processors can idle for some time without an update, make sure
1248 		// the idle time is up-to-date with respect to the caller.
1249 		idle_time += mach_absolute_time() - _state_time(idle_stamp);
1250 	}
1251 	*idle_time_out = idle_time;
1252 }
1253 
1254 uint64_t
recount_current_processor_interrupt_duration_mach(void)1255 recount_current_processor_interrupt_duration_mach(void)
1256 {
1257 	assert(!preemption_enabled());
1258 	return current_processor()->pr_recount.rpr_interrupt_duration_mach;
1259 }
1260 
1261 bool
recount_task_thread_perf_level_usage(struct task * task,uint64_t tid,struct recount_usage * usage_levels)1262 recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
1263     struct recount_usage *usage_levels)
1264 {
1265 	thread_t thread = task_findtid(task, tid);
1266 	if (thread != THREAD_NULL) {
1267 		if (thread == current_thread()) {
1268 			boolean_t interrupt_state = ml_set_interrupts_enabled(FALSE);
1269 			recount_current_thread_perf_level_usage(usage_levels);
1270 			ml_set_interrupts_enabled(interrupt_state);
1271 		} else {
1272 			recount_thread_perf_level_usage(thread, usage_levels);
1273 		}
1274 	}
1275 	return thread != THREAD_NULL;
1276 }
1277 
1278 #pragma mark - utilities
1279 
1280 // For rolling up counts, convert an index from one topography to another.
1281 static size_t
recount_convert_topo_index(recount_topo_t from,recount_topo_t to,size_t i)1282 recount_convert_topo_index(recount_topo_t from, recount_topo_t to, size_t i)
1283 {
1284 	if (from == to) {
1285 		return i;
1286 	} else if (to == RCT_TOPO_SYSTEM) {
1287 		return 0;
1288 	} else if (from == RCT_TOPO_CPU) {
1289 		assertf(to == RCT_TOPO_CPU_KIND,
1290 		    "recount: cannot convert from CPU topography to %d", to);
1291 		return _topo_cpu_kinds[i];
1292 	} else {
1293 		panic("recount: unexpected rollup request from %d to %d", from, to);
1294 	}
1295 }
1296 
1297 // Get the track index of the provided processor and topography.
1298 OS_ALWAYS_INLINE
1299 static size_t
recount_topo_index(recount_topo_t topo,processor_t processor)1300 recount_topo_index(recount_topo_t topo, processor_t processor)
1301 {
1302 	switch (topo) {
1303 	case RCT_TOPO_SYSTEM:
1304 		return 0;
1305 	case RCT_TOPO_CPU:
1306 		return processor->cpu_id;
1307 	case RCT_TOPO_CPU_KIND:
1308 #if __AMP__
1309 		return processor->pr_recount.rpr_cpu_kind_index;
1310 #else // __AMP__
1311 		return 0;
1312 #endif // !__AMP__
1313 	default:
1314 		panic("recount: invalid topology %u to index", topo);
1315 	}
1316 }
1317 
1318 // Return the number of tracks needed for a given topography.
1319 size_t
recount_topo_count(recount_topo_t topo)1320 recount_topo_count(recount_topo_t topo)
1321 {
1322 	// Allow the compiler to reason about at least the system and CPU kind
1323 	// counts.
1324 	switch (topo) {
1325 	case RCT_TOPO_SYSTEM:
1326 		return 1;
1327 
1328 	case RCT_TOPO_CPU_KIND:
1329 #if __AMP__
1330 		return 2;
1331 #else // __AMP__
1332 		return 1;
1333 #endif // !__AMP__
1334 
1335 	case RCT_TOPO_CPU:
1336 #if __arm__ || __arm64__
1337 		return ml_get_cpu_count();
1338 #else // __arm__ || __arm64__
1339 		return ml_early_cpu_max_number() + 1;
1340 #endif // !__arm__ && !__arm64__
1341 
1342 	default:
1343 		panic("recount: invalid topography %d", topo);
1344 	}
1345 }
1346 
1347 static bool
recount_topo_matches_cpu_kind(recount_topo_t topo,recount_cpu_kind_t kind,size_t idx)1348 recount_topo_matches_cpu_kind(recount_topo_t topo, recount_cpu_kind_t kind,
1349     size_t idx)
1350 {
1351 #if !__AMP__
1352 #pragma unused(kind, idx)
1353 #endif // !__AMP__
1354 	switch (topo) {
1355 	case RCT_TOPO_SYSTEM:
1356 		return true;
1357 
1358 	case RCT_TOPO_CPU_KIND:
1359 #if __AMP__
1360 		return kind == idx;
1361 #else // __AMP__
1362 		return false;
1363 #endif // !__AMP__
1364 
1365 	case RCT_TOPO_CPU: {
1366 #if __AMP__
1367 		return _topo_cpu_kinds[idx] == kind;
1368 #else // __AMP__
1369 		return false;
1370 #endif // !__AMP__
1371 	}
1372 
1373 	default:
1374 		panic("recount: unexpected topography %d", topo);
1375 	}
1376 }
1377 
1378 struct recount_track *
recount_tracks_create(recount_plan_t plan)1379 recount_tracks_create(recount_plan_t plan)
1380 {
1381 	assert(_topo_allocates[plan->rpl_topo]);
1382 	return zalloc_flags(_recount_track_zones[plan->rpl_topo],
1383 	           Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1384 }
1385 
1386 static void
recount_tracks_copy(recount_plan_t plan,struct recount_track * dst,struct recount_track * src)1387 recount_tracks_copy(recount_plan_t plan, struct recount_track *dst,
1388     struct recount_track *src)
1389 {
1390 	size_t topo_count = recount_topo_count(plan->rpl_topo);
1391 	for (size_t i = 0; i < topo_count; i++) {
1392 		recount_read_track(&dst[i].rt_usage, &src[i]);
1393 	}
1394 }
1395 
1396 void
recount_tracks_destroy(recount_plan_t plan,struct recount_track * tracks)1397 recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks)
1398 {
1399 	assert(_topo_allocates[plan->rpl_topo]);
1400 	zfree(_recount_track_zones[plan->rpl_topo], tracks);
1401 }
1402 
1403 void
recount_thread_init(struct recount_thread * th)1404 recount_thread_init(struct recount_thread *th)
1405 {
1406 	th->rth_lifetime = recount_tracks_create(&recount_thread_plan);
1407 }
1408 
1409 void
recount_thread_copy(struct recount_thread * dst,struct recount_thread * src)1410 recount_thread_copy(struct recount_thread *dst, struct recount_thread *src)
1411 {
1412 	recount_tracks_copy(&recount_thread_plan, dst->rth_lifetime,
1413 	    src->rth_lifetime);
1414 }
1415 
1416 void
recount_task_copy(struct recount_task * dst,const struct recount_task * src)1417 recount_task_copy(struct recount_task *dst, const struct recount_task *src)
1418 {
1419 	recount_tracks_copy(&recount_task_plan, dst->rtk_lifetime,
1420 	    src->rtk_lifetime);
1421 }
1422 
1423 void
recount_thread_deinit(struct recount_thread * th)1424 recount_thread_deinit(struct recount_thread *th)
1425 {
1426 	recount_tracks_destroy(&recount_thread_plan, th->rth_lifetime);
1427 }
1428 
1429 void
recount_task_init(struct recount_task * tk)1430 recount_task_init(struct recount_task *tk)
1431 {
1432 	tk->rtk_lifetime = recount_tracks_create(&recount_task_plan);
1433 	tk->rtk_terminated = recount_usage_alloc(
1434 		recount_task_terminated_plan.rpl_topo);
1435 }
1436 
1437 void
recount_task_deinit(struct recount_task * tk)1438 recount_task_deinit(struct recount_task *tk)
1439 {
1440 	recount_tracks_destroy(&recount_task_plan, tk->rtk_lifetime);
1441 	recount_usage_free(recount_task_terminated_plan.rpl_topo,
1442 	    tk->rtk_terminated);
1443 }
1444 
1445 void
recount_coalition_init(struct recount_coalition * co)1446 recount_coalition_init(struct recount_coalition *co)
1447 {
1448 	co->rco_exited = recount_usage_alloc(recount_coalition_plan.rpl_topo);
1449 }
1450 
1451 void
recount_coalition_deinit(struct recount_coalition * co)1452 recount_coalition_deinit(struct recount_coalition *co)
1453 {
1454 	recount_usage_free(recount_coalition_plan.rpl_topo, co->rco_exited);
1455 }
1456 
1457 void
recount_work_interval_init(struct recount_work_interval * wi)1458 recount_work_interval_init(struct recount_work_interval *wi)
1459 {
1460 	wi->rwi_current_instance = recount_tracks_create(&recount_work_interval_plan);
1461 }
1462 
1463 void
recount_work_interval_deinit(struct recount_work_interval * wi)1464 recount_work_interval_deinit(struct recount_work_interval *wi)
1465 {
1466 	recount_tracks_destroy(&recount_work_interval_plan, wi->rwi_current_instance);
1467 }
1468 
1469 struct recount_usage *
recount_usage_alloc(recount_topo_t topo)1470 recount_usage_alloc(recount_topo_t topo)
1471 {
1472 	assert(_topo_allocates[topo]);
1473 	return zalloc_flags(_recount_usage_zones[topo],
1474 	           Z_VM_TAG(Z_WAITOK | Z_ZERO | Z_NOFAIL, VM_KERN_MEMORY_RECOUNT));
1475 }
1476 
1477 void
recount_usage_free(recount_topo_t topo,struct recount_usage * usage)1478 recount_usage_free(recount_topo_t topo, struct recount_usage *usage)
1479 {
1480 	assert(_topo_allocates[topo]);
1481 	zfree(_recount_usage_zones[topo], usage);
1482 }
1483