xref: /xnu-12377.81.4/osfmk/kern/recount.h (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 // Copyright (c) 2021-2023 Apple Inc.  All rights reserved.
2 //
3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@
4 //
5 // This file contains Original Code and/or Modifications of Original Code
6 // as defined in and that are subject to the Apple Public Source License
7 // Version 2.0 (the 'License'). You may not use this file except in
8 // compliance with the License. The rights granted to you under the License
9 // may not be used to create, or enable the creation or redistribution of,
10 // unlawful or unlicensed copies of an Apple operating system, or to
11 // circumvent, violate, or enable the circumvention or violation of, any
12 // terms of an Apple operating system software license agreement.
13 //
14 // Please obtain a copy of the License at
15 // http://www.opensource.apple.com/apsl/ and read it before using this file.
16 //
17 // The Original Code and all software distributed under the License are
18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
22 // Please see the License for the specific language governing rights and
23 // limitations under the License.
24 //
25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@
26 
27 #ifndef KERN_RECOUNT_H
28 #define KERN_RECOUNT_H
29 
30 #include <os/base.h>
31 #include <stdbool.h>
32 #include <stdint.h>
33 #include <sys/cdefs.h>
34 #include <sys/_types/_size_t.h>
35 
36 #if CONFIG_SPTM
37 // Track counters in secure execution contexts when the SPTM is available.
38 #define RECOUNT_SECURE_METRICS 1
39 #else // CONFIG_SPTM
40 #define RECOUNT_SECURE_METRICS 0
41 #endif // !CONFIG_SPTM
42 
43 #if __arm64__
44 // Only ARM64 keeps precise track of user/system based on thread state.
45 #define RECOUNT_THREAD_BASED_LEVEL 1
46 #else // __arm64__
47 #define RECOUNT_THREAD_BASED_LEVEL 0
48 #endif // !__arm64__
49 
50 __BEGIN_DECLS;
51 
52 // Recount maintains counters for resources used by software, like CPU time and cycles.
53 // These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in.
54 // For instance, while threads only differentiate on the broad CPU kinds due to memory constraints,
55 // the fewer number of tasks are free to use more memory and accumulate counters per-CPU.
56 //
57 // At context-switch, the scheduler calls `recount_switch_thread` to update the counters.
58 // The difference between the current counter values and per-CPU snapshots are added to each thread.
59 // On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel.
60 
61 #pragma mark - config
62 
63 // A domain of the system's CPU topology, used as granularity when tracking counter values.
64 __enum_decl(recount_topo_t, unsigned int, {
65 	// Attribute counts to the entire system, i.e. only a single counter.
66 	// Note that mutual exclusion must be provided to update this kind of counter.
67 	RCT_TOPO_SYSTEM,
68 	// Attribute counts to the CPU they accumulated on.
69 	// Mutual exclusion is not required to update this counter, but preemption must be disabled.
70 	RCT_TOPO_CPU,
71 	// Attribute counts to the CPU kind (e.g. P or E).
72 	// Note that mutual exclusion must be provided to update this kind of counter.
73 	RCT_TOPO_CPU_KIND,
74 	// The number of different topographies.
75 	RCT_TOPO_COUNT,
76 });
77 
78 // Get the number of elements in an array for per-topography data.
79 size_t recount_topo_count(recount_topo_t topo);
80 
81 // Recount's definitions of CPU kinds, in lieu of one from the platform layers.
82 __enum_decl(recount_cpu_kind_t, unsigned int, {
83 	RCT_CPU_EFFICIENCY,
84 	RCT_CPU_PERFORMANCE,
85 	RCT_CPU_KIND_COUNT,
86 });
87 
88 // A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters.
89 typedef const struct recount_plan {
90 	const char *rpl_name;
91 	recount_topo_t rpl_topo;
92 } *recount_plan_t;
93 
94 #define RECOUNT_PLAN_DECLARE(_name) \
95     extern const struct recount_plan _name;
96 
97 #define RECOUNT_PLAN_DEFINE(_name, _topo) \
98 	const struct recount_plan _name = { \
99 	        .rpl_name = #_name, \
100 	        .rpl_topo = _topo, \
101 	}
102 
103 // Represents exception levels that Recount can track metrics during.
104 __enum_closed_decl(recount_level_t, unsigned int, {
105 	// Exception level is transitioning from the kernel.
106 	// Must be first, as this is the initial state.
107 	RCT_LVL_KERNEL,
108 	// Exception level is transitioning from user space.
109 	RCT_LVL_USER,
110 #if RECOUNT_SECURE_METRICS
111 	// Exception level is transitioning from secure execution.
112 	RCT_LVL_SECURE,
113 #endif // RECOUNT_SECURE_METRICS
114 	RCT_LVL_COUNT,
115 });
116 
117 // The current objects with resource accounting policies.
118 RECOUNT_PLAN_DECLARE(recount_thread_plan);
119 RECOUNT_PLAN_DECLARE(recount_task_plan);
120 RECOUNT_PLAN_DECLARE(recount_task_terminated_plan);
121 RECOUNT_PLAN_DECLARE(recount_coalition_plan);
122 RECOUNT_PLAN_DECLARE(recount_processor_plan);
123 
124 #pragma mark - generic accounting
125 
126 // A track is where counter values can be updated atomically for readers by a
127 // single writer.
128 struct recount_track {
129 	// Used to synchronize updates so multiple values appear to be updated atomically.
130 	uint32_t rt_pad;
131 	uint32_t rt_sync;
132 
133 	// The CPU usage metrics currently supported by Recount.
134 	struct recount_usage {
135 		struct recount_metrics {
136 			// Time tracking, in Mach timebase units.
137 			uint64_t rm_time_mach;
138 #if CONFIG_PERVASIVE_CPI
139 			// CPU performance counter metrics, when available.
140 			uint64_t rm_instructions;
141 			uint64_t rm_cycles;
142 #endif // CONFIG_PERVASIVE_CPI
143 		} ru_metrics[RCT_LVL_COUNT];
144 
145 #if CONFIG_PERVASIVE_ENERGY
146 		// CPU energy in nanojoules, when available.
147 		// This is not a "metric" because it is sampled out-of-band by ApplePMGR through CLPC.
148 		uint64_t ru_energy_nj;
149 #endif // CONFIG_PERVASIVE_ENERGY
150 	} rt_usage;
151 };
152 
153 // Memory management routines for tracks and usage structures.
154 struct recount_track *recount_tracks_create(recount_plan_t plan);
155 void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks);
156 struct recount_usage *recount_usage_alloc(recount_topo_t topo);
157 void recount_usage_free(recount_topo_t topo, struct recount_usage *usage);
158 
159 // Attribute tracks to usage structures, to read their values for typical high-level interfaces.
160 
161 // Sum any tracks to a single sum.
162 void recount_sum(recount_plan_t plan, const struct recount_track *tracks,
163     struct recount_usage *sum);
164 
165 // Summarize tracks into a total sum and another for a particular CPU kind.
166 void recount_sum_and_isolate_cpu_kind(recount_plan_t plan,
167     struct recount_track *tracks, recount_cpu_kind_t kind,
168     struct recount_usage *sum, struct recount_usage *only_kind);
169 // The same as above, but for usage-only objects, like coalitions.
170 void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan,
171     struct recount_usage *usage_list, recount_cpu_kind_t kind,
172     struct recount_usage *sum, struct recount_usage *only_kind);
173 
174 // Sum the counters for each perf-level, in the order returned by the sysctls.
175 void recount_sum_perf_levels(recount_plan_t plan,
176     struct recount_track *tracks, struct recount_usage *sums);
177 
178 #pragma mark - xnu internals
179 
180 #if XNU_KERNEL_PRIVATE
181 
182 struct thread;
183 struct work_interval;
184 struct task;
185 struct proc;
186 
187 // A smaller usage structure if only times are needed by a client.
188 struct recount_times_mach {
189 	uint64_t rtm_user;
190 	uint64_t rtm_system;
191 };
192 
193 struct recount_times_mach recount_usage_times_mach(struct recount_usage *usage);
194 uint64_t recount_usage_system_time_mach(struct recount_usage *usage);
195 uint64_t recount_usage_time_mach(struct recount_usage *usage);
196 uint64_t recount_usage_cycles(struct recount_usage *usage);
197 uint64_t recount_usage_instructions(struct recount_usage *usage);
198 
199 // Access another thread's usage data.
200 void recount_thread_usage(struct thread *thread, struct recount_usage *usage);
201 void recount_thread_perf_level_usage(struct thread *thread,
202     struct recount_usage *usage_levels);
203 uint64_t recount_thread_time_mach(struct thread *thread);
204 struct recount_times_mach recount_thread_times(struct thread *thread);
205 
206 // Read the current thread's usage data, accumulating counts until now.
207 //
208 // Interrupts must be disabled.
209 void recount_current_thread_usage(struct recount_usage *usage);
210 struct recount_times_mach recount_current_thread_times(void);
211 void recount_current_thread_usage_perf_only(struct recount_usage *usage,
212     struct recount_usage *usage_perf_only);
213 void recount_current_thread_perf_level_usage(struct recount_usage
214     *usage_levels);
215 uint64_t recount_current_thread_time_mach(void);
216 uint64_t recount_current_thread_user_time_mach(void);
217 uint64_t recount_current_thread_interrupt_time_mach(void);
218 uint64_t recount_current_thread_energy_nj(void);
219 void recount_current_task_usage(struct recount_usage *usage);
220 void recount_current_task_usage_perf_only(struct recount_usage *usage,
221     struct recount_usage *usage_perf_only);
222 
223 // Access a work interval's usage data.
224 void recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage);
225 struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval);
226 uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval);
227 
228 // Access another task's usage data.
229 void recount_task_usage(struct task *task, struct recount_usage *usage);
230 struct recount_times_mach recount_task_times(struct task *task);
231 void recount_task_usage_perf_only(struct task *task, struct recount_usage *sum,
232     struct recount_usage *sum_perf_only);
233 void recount_task_times_perf_only(struct task *task,
234     struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only);
235 uint64_t recount_task_energy_nj(struct task *task);
236 bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid,
237     struct recount_usage *usage_levels);
238 
239 // Get the sum of all terminated threads in the task (not including active threads).
240 void recount_task_terminated_usage(struct task *task,
241     struct recount_usage *sum);
242 struct recount_times_mach recount_task_terminated_times(struct task *task);
243 void recount_task_terminated_usage_perf_only(struct task *task,
244     struct recount_usage *sum, struct recount_usage *perf_only);
245 
246 int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr,
247     size_t usize, int *ret);
248 
249 #endif // XNU_KERNEL_PRIVATE
250 
251 #if MACH_KERNEL_PRIVATE
252 
253 #include <kern/smp.h>
254 #include <mach/machine/thread_status.h>
255 #include <machine/machine_routines.h>
256 
257 #if __arm64__
258 static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) ==
259     (CLUSTER_TYPE_E > CLUSTER_TYPE_P));
260 #endif // __arm64__
261 
262 #pragma mark threads
263 
264 // The per-thread resource accounting structure.
265 struct recount_thread {
266 	// Resources consumed across the lifetime of the thread, according to
267 	// `recount_thread_plan`.
268 	struct recount_track *rth_lifetime;
269 	// Time spent by this thread running interrupt handlers.
270 	uint64_t rth_interrupt_duration_mach;
271 #if RECOUNT_THREAD_BASED_LEVEL
272 	// The current level this thread is executing in.
273 	recount_level_t rth_current_level;
274 #endif // RECOUNT_THREAD_BASED_LEVEL
275 };
276 void recount_thread_init(struct recount_thread *th);
277 void recount_thread_copy(struct recount_thread *dst,
278     struct recount_thread *src);
279 void recount_thread_deinit(struct recount_thread *th);
280 
281 #pragma mark work_intervals
282 
283 // The per-work-interval resource accounting structure.
284 struct recount_work_interval {
285 	// Resources consumed during the currently active work interval instance by
286 	// threads participating in the work interval, according to `recount_work_interval_plan`.
287 	struct recount_track *rwi_current_instance;
288 };
289 void recount_work_interval_init(struct recount_work_interval *wi);
290 void recount_work_interval_deinit(struct recount_work_interval *wi);
291 
292 #pragma mark tasks
293 
294 // The per-task resource accounting structure.
295 struct recount_task {
296 	// Resources consumed across the lifetime of the task, including active
297 	// threads, according to `recount_task_plan`.
298 	//
299 	// The `recount_task_plan` must be per-CPU to provide mutual exclusion for
300 	// writers.
301 	struct recount_track *rtk_lifetime;
302 	// Usage from threads that have terminated or child tasks that have exited,
303 	// according to `recount_task_terminated_plan`.
304 	//
305 	// Protected by the task lock when threads terminate.
306 	struct recount_usage *rtk_terminated;
307 };
308 void recount_task_init(struct recount_task *tk);
309 // Called on tasks that are moving their accounting information to a
310 // synthetic or re-exec-ed task.
311 void recount_task_copy(struct recount_task *dst,
312     const struct recount_task *src);
313 void recount_task_deinit(struct recount_task *tk);
314 
315 #pragma mark coalitions
316 
317 // The per-coalition resource accounting structure.
318 struct recount_coalition {
319 	// Resources consumed by exited tasks only, according to
320 	// `recount_coalition_plan`.
321 	//
322 	// Protected by the coalition lock when tasks exit and roll-up their
323 	// statistics.
324 	struct recount_usage *rco_exited;
325 };
326 void recount_coalition_init(struct recount_coalition *co);
327 void recount_coalition_deinit(struct recount_coalition *co);
328 
329 // Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure.
330 void recount_coalition_usage_perf_only(struct recount_coalition *coal,
331     struct recount_usage *sum, struct recount_usage *sum_perf_only);
332 
333 #pragma mark processors
334 
335 struct processor;
336 
337 // A snap records counter values at a specific point in time.
338 struct recount_snap {
339 	uint64_t rsn_time_mach;
340 #if CONFIG_PERVASIVE_CPI
341 	uint64_t rsn_insns;
342 	uint64_t rsn_cycles;
343 #endif // CONFIG_PERVASIVE_CPI
344 };
345 
346 // The per-processor resource accounting structure.
347 struct recount_processor {
348 	struct recount_snap rpr_snap;
349 	struct recount_track rpr_active;
350 #if MACH_ASSERT
351 	recount_level_t rpr_current_level;
352 #endif // MACH_ASSERT
353 	uint64_t rpr_interrupt_duration_mach;
354 	uint64_t rpr_last_interrupt_enter_time_mach;
355 	uint64_t rpr_last_interrupt_leave_time_mach;
356 	uint64_t rpr_idle_time_mach;
357 	_Atomic uint64_t rpr_state_last_abs_time;
358 #if __AMP__
359 	// Cache the RCT_TOPO_CPU_KIND offset, which cannot change.
360 	uint8_t rpr_cpu_kind_index;
361 #endif // __AMP__
362 };
363 void recount_processor_init(struct processor *processor);
364 
365 // Get a snapshot of the processor's usage, along with an up-to-date snapshot
366 // of its idle time (to now if the processor is currently idle).
367 void recount_processor_usage(struct recount_processor *pr,
368     struct recount_usage *usage, uint64_t *idle_time_mach);
369 
370 // Get the current amount of time spent handling interrupts by the current
371 // processor.
372 uint64_t recount_current_processor_interrupt_duration_mach(void);
373 
374 #pragma mark updates
375 
376 // The following interfaces are meant for specific adopters, like the
377 // scheduler or platform code responsible for entering and exiting the kernel.
378 
379 // Fill in a snap with the current values from time- and count-keeping hardware.
380 void recount_snapshot(struct recount_snap *snap);
381 
382 // During user/kernel transitions, other serializing events provide enough
383 // serialization around reading the counter values.
384 void recount_snapshot_speculative(struct recount_snap *snap);
385 
386 // Called by the scheduler when a context switch occurs.
387 void recount_switch_thread(struct recount_snap *snap, struct thread *off_thread,
388     struct task *off_task);
389 // Called by the machine-dependent code to accumulate energy.
390 void recount_add_energy(struct thread *off_thread, struct task *off_task,
391     uint64_t energy_nj);
392 // Log a kdebug event when a thread switches off-CPU.
393 void recount_log_switch_thread(const struct recount_snap *snap);
394 // Log a kdebug event when a thread switches on-CPU.
395 void recount_log_switch_thread_on(const struct recount_snap *snap);
396 
397 // This function requires that no writers race with it -- this is only safe in
398 // debugger context or while running in the context of the track being
399 // inspected.
400 void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks,
401     struct recount_usage *sum);
402 
403 // For handling precise user/kernel time updates.
404 void recount_leave_user(void);
405 void recount_enter_user(void);
406 // For handling interrupt time updates.
407 void recount_enter_interrupt(void);
408 void recount_leave_interrupt(void);
409 #if __x86_64__
410 // Handle interrupt time-keeping on Intel, which aren't unified with the trap
411 // handlers, so whether the user or system timers are updated depends on the
412 // save-state.
413 void recount_enter_intel_interrupt(x86_saved_state_t *state);
414 void recount_leave_intel_interrupt(void);
415 #endif // __x86_64__
416 
417 #endif // MACH_KERNEL_PRIVATE
418 
419 #if XNU_KERNEL_PRIVATE
420 
421 #if RECOUNT_SECURE_METRICS
422 // Handle guarded mode updates.
423 void recount_enter_secure(void);
424 void recount_leave_secure(void);
425 #endif // RECOUNT_SECURE_METRICS
426 
427 #endif // XNU_KERNEL_PRIVATE
428 
429 #if MACH_KERNEL_PRIVATE
430 
431 // Hooks for each processor idling, running, and onlining.
432 void recount_processor_idle(struct recount_processor *pr,
433     struct recount_snap *snap);
434 void recount_processor_run(struct recount_processor *pr,
435     struct recount_snap *snap);
436 void recount_processor_online(processor_t processor, struct recount_snap *snap);
437 
438 #pragma mark rollups
439 
440 // Called by the thread termination queue with the task lock held.
441 void recount_task_rollup_thread(struct recount_task *tk,
442     const struct recount_thread *th);
443 
444 // Called by the coalition roll-up statistics functions with coalition lock
445 // held.
446 void recount_coalition_rollup_task(struct recount_coalition *co,
447     struct recount_task *tk);
448 
449 #endif // MACH_KERNEL_PRIVATE
450 
451 __END_DECLS
452 
453 #endif // KERN_RECOUNT_H
454