1 // Copyright (c) 2021 Apple Inc. All rights reserved. 2 // 3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 4 // 5 // This file contains Original Code and/or Modifications of Original Code 6 // as defined in and that are subject to the Apple Public Source License 7 // Version 2.0 (the 'License'). You may not use this file except in 8 // compliance with the License. The rights granted to you under the License 9 // may not be used to create, or enable the creation or redistribution of, 10 // unlawful or unlicensed copies of an Apple operating system, or to 11 // circumvent, violate, or enable the circumvention or violation of, any 12 // terms of an Apple operating system software license agreement. 13 // 14 // Please obtain a copy of the License at 15 // http://www.opensource.apple.com/apsl/ and read it before using this file. 16 // 17 // The Original Code and all software distributed under the License are 18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 22 // Please see the License for the specific language governing rights and 23 // limitations under the License. 24 // 25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 26 27 #ifndef KERN_RECOUNT_H 28 #define KERN_RECOUNT_H 29 30 #include <os/base.h> 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <sys/cdefs.h> 34 #include <sys/_types/_size_t.h> 35 36 __BEGIN_DECLS; 37 38 // Recount maintains counters for resources used by software, like CPU time and cycles. 39 // These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in. 40 // For instance, while threads only differentiate on the broad CPU kinds due to memory constraints, 41 // the fewer number of tasks are free to use more memory and accumulate counters per-CPU. 42 // 43 // At context-switch, the scheduler calls `recount_switch_thread` to update the counters. 44 // The difference between the current counter values and per-CPU snapshots are added to each thread. 45 // On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel. 46 47 #pragma mark - config 48 49 // A domain of the system's CPU topology, used as granularity when tracking counter values. 50 __enum_decl(recount_topo_t, unsigned int, { 51 // Attribute counts to the entire system, i.e. only a single counter. 52 // Note that mutual exclusion must be provided to update this kind of counter. 53 RCT_TOPO_SYSTEM, 54 // Attribute counts to the CPU they accumulated on. 55 // Mutual exclusion is not required to update this counter, but preemption must be disabled. 56 RCT_TOPO_CPU, 57 // Attribute counts to the CPU kind (e.g. P or E). 58 // Note that mutual exclusion must be provided to update this kind of counter. 59 RCT_TOPO_CPU_KIND, 60 // The number of different topographies. 61 RCT_TOPO_COUNT, 62 }); 63 64 // Get the number of elements in an array for per-topography data. 65 size_t recount_topo_count(recount_topo_t topo); 66 67 // Recount's definitions of CPU kinds, in lieu of one from the platform layers. 68 __enum_decl(recount_cpu_kind_t, unsigned int, { 69 RCT_CPU_EFFICIENCY, 70 RCT_CPU_PERFORMANCE, 71 RCT_CPU_KIND_COUNT, 72 }); 73 74 // A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters. 75 typedef const struct recount_plan { 76 const char *rpl_name; 77 recount_topo_t rpl_topo; 78 } *recount_plan_t; 79 80 #define RECOUNT_PLAN_DECLARE(_name) \ 81 extern const struct recount_plan _name; 82 83 #define RECOUNT_PLAN_DEFINE(_name, _topo) \ 84 const struct recount_plan _name = { \ 85 .rpl_name = #_name, \ 86 .rpl_topo = _topo, \ 87 } 88 89 // The current objects with resource accounting policies. 90 RECOUNT_PLAN_DECLARE(recount_thread_plan); 91 RECOUNT_PLAN_DECLARE(recount_task_plan); 92 RECOUNT_PLAN_DECLARE(recount_task_terminated_plan); 93 RECOUNT_PLAN_DECLARE(recount_coalition_plan); 94 RECOUNT_PLAN_DECLARE(recount_processor_plan); 95 96 #pragma mark - generic accounting 97 98 // A track is where counter values can be updated atomically for readers by a 99 // single writer. 100 struct recount_track { 101 // Used to synchronize updates so multiple values appear to be updated atomically. 102 uint32_t rt_sync; 103 uint32_t rt_pad; 104 105 // The CPU usage metrics currently supported by Recount. 106 struct recount_usage { 107 // Basic time tracking, in units of Mach time. 108 uint64_t ru_user_time_mach; 109 uint64_t ru_system_time_mach; 110 #if CONFIG_PERVASIVE_CPI 111 // CPU performance counter metrics, when available. 112 uint64_t ru_cycles; 113 uint64_t ru_instructions; 114 #endif // CONFIG_PERVASIVE_CPI 115 #if CONFIG_PERVASIVE_ENERGY 116 // CPU energy in nanojoules, when available. 117 // This metric is updated out-of-band from the others because it can only be sampled by ApplePMGR through CLPC. 118 uint64_t ru_energy_nj; 119 #endif // CONFIG_PERVASIVE_ENERGY 120 } rt_usage; 121 }; 122 123 // Memory management routines for tracks and usage structures. 124 struct recount_track *recount_tracks_create(recount_plan_t plan); 125 void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks); 126 struct recount_usage *recount_usage_alloc(recount_topo_t topo); 127 void recount_usage_free(recount_topo_t topo, struct recount_usage *usage); 128 129 // Attribute tracks to usage structures, to read their values for typical high-level interfaces. 130 131 // Sum any tracks to a single sum. 132 void recount_sum(recount_plan_t plan, const struct recount_track *tracks, 133 struct recount_usage *sum); 134 135 // Summarize tracks into a total sum and another for a particular CPU kind. 136 void recount_sum_and_isolate_cpu_kind(recount_plan_t plan, 137 struct recount_track *tracks, recount_cpu_kind_t kind, 138 struct recount_usage *sum, struct recount_usage *only_kind); 139 // The same as above, but for usage-only objects, like coalitions. 140 void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan, 141 struct recount_usage *usage_list, recount_cpu_kind_t kind, 142 struct recount_usage *sum, struct recount_usage *only_kind); 143 144 // Sum the counters for each perf-level, in the order returned by the sysctls. 145 void recount_sum_perf_levels(recount_plan_t plan, 146 struct recount_track *tracks, struct recount_usage *sums); 147 148 #pragma mark - xnu internals 149 150 #if XNU_KERNEL_PRIVATE 151 152 struct thread; 153 struct work_interval; 154 struct task; 155 struct proc; 156 157 // A smaller usage structure if only times are needed by a client. 158 struct recount_times_mach { 159 uint64_t rtm_user; 160 uint64_t rtm_system; 161 }; 162 163 // Access another thread's usage data. 164 void recount_thread_usage(struct thread *thread, struct recount_usage *usage); 165 void recount_thread_perf_level_usage(struct thread *thread, 166 struct recount_usage *usage_levels); 167 uint64_t recount_thread_time_mach(struct thread *thread); 168 struct recount_times_mach recount_thread_times(struct thread *thread); 169 170 // Read the current thread's usage data, accumulating counts until now. 171 // 172 // Interrupts must be disabled. 173 void recount_current_thread_usage(struct recount_usage *usage); 174 struct recount_times_mach recount_current_thread_times(void); 175 void recount_current_thread_usage_perf_only(struct recount_usage *usage, 176 struct recount_usage *usage_perf_only); 177 void recount_current_thread_perf_level_usage(struct recount_usage 178 *usage_levels); 179 uint64_t recount_current_thread_time_mach(void); 180 uint64_t recount_current_thread_user_time_mach(void); 181 uint64_t recount_current_thread_interrupt_time_mach(void); 182 uint64_t recount_current_thread_energy_nj(void); 183 void recount_current_task_usage(struct recount_usage *usage); 184 void recount_current_task_usage_perf_only(struct recount_usage *usage, 185 struct recount_usage *usage_perf_only); 186 187 // Access a work interval's usage data. 188 void recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage); 189 struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval); 190 uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval); 191 192 // Access another task's usage data. 193 void recount_task_usage(struct task *task, struct recount_usage *usage); 194 struct recount_times_mach recount_task_times(struct task *task); 195 void recount_task_usage_perf_only(struct task *task, struct recount_usage *sum, 196 struct recount_usage *sum_perf_only); 197 void recount_task_times_perf_only(struct task *task, 198 struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only); 199 uint64_t recount_task_energy_nj(struct task *task); 200 bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid, 201 struct recount_usage *usage_levels); 202 203 // Get the sum of all terminated threads in the task (not including active threads). 204 void recount_task_terminated_usage(struct task *task, 205 struct recount_usage *sum); 206 struct recount_times_mach recount_task_terminated_times(struct task *task); 207 void recount_task_terminated_usage_perf_only(struct task *task, 208 struct recount_usage *sum, struct recount_usage *perf_only); 209 210 int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr, 211 size_t usize, int *ret); 212 213 #endif // XNU_KERNEL_PRIVATE 214 215 #if MACH_KERNEL_PRIVATE 216 217 #include <kern/smp.h> 218 #include <mach/machine/thread_status.h> 219 #include <machine/machine_routines.h> 220 221 #if __arm64__ 222 static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) == 223 (CLUSTER_TYPE_E > CLUSTER_TYPE_P)); 224 #endif // __arm64__ 225 226 #pragma mark threads 227 228 // The per-thread resource accounting structure. 229 struct recount_thread { 230 // Resources consumed across the lifetime of the thread, according to 231 // `recount_thread_plan`. 232 struct recount_track *rth_lifetime; 233 // Time spent by this thread running interrupt handlers. 234 uint64_t rth_interrupt_time_mach; 235 }; 236 void recount_thread_init(struct recount_thread *th); 237 void recount_thread_copy(struct recount_thread *dst, 238 struct recount_thread *src); 239 void recount_thread_deinit(struct recount_thread *th); 240 241 #pragma mark work_intervals 242 243 // The per-work-interval resource accounting structure. 244 struct recount_work_interval { 245 // Resources consumed during the currently active work interval instance by 246 // threads participating in the work interval, according to `recount_work_interval_plan`. 247 struct recount_track *rwi_current_instance; 248 }; 249 void recount_work_interval_init(struct recount_work_interval *wi); 250 void recount_work_interval_deinit(struct recount_work_interval *wi); 251 252 #pragma mark tasks 253 254 // The per-task resource accounting structure. 255 struct recount_task { 256 // Resources consumed across the lifetime of the task, including active 257 // threads, according to `recount_task_plan`. 258 // 259 // The `recount_task_plan` must be per-CPU to provide mutual exclusion for 260 // writers. 261 struct recount_track *rtk_lifetime; 262 // Usage from threads that have terminated or child tasks that have exited, 263 // according to `recount_task_terminated_plan`. 264 // 265 // Protected by the task lock when threads terminate. 266 struct recount_usage *rtk_terminated; 267 }; 268 void recount_task_init(struct recount_task *tk); 269 // Called on tasks that are moving their accounting information to a 270 // synthetic or re-exec-ed task. 271 void recount_task_copy(struct recount_task *dst, 272 const struct recount_task *src); 273 void recount_task_deinit(struct recount_task *tk); 274 275 #pragma mark coalitions 276 277 // The per-coalition resource accounting structure. 278 struct recount_coalition { 279 // Resources consumed by exited tasks only, according to 280 // `recount_coalition_plan`. 281 // 282 // Protected by the coalition lock when tasks exit and roll-up their 283 // statistics. 284 struct recount_usage *rco_exited; 285 }; 286 void recount_coalition_init(struct recount_coalition *co); 287 void recount_coalition_deinit(struct recount_coalition *co); 288 289 // Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure. 290 void recount_coalition_usage_perf_only(struct recount_coalition *coal, 291 struct recount_usage *sum, struct recount_usage *sum_perf_only); 292 293 #pragma mark processors 294 295 struct processor; 296 297 // A snap records counter values at a specific point in time. 298 struct recount_snap { 299 uint64_t rsn_time_mach; 300 #if CONFIG_PERVASIVE_CPI 301 uint64_t rsn_insns; 302 uint64_t rsn_cycles; 303 #endif // CONFIG_PERVASIVE_CPI 304 }; 305 306 // The per-processor resource accounting structure. 307 struct recount_processor { 308 struct recount_snap rpr_snap; 309 struct recount_track rpr_active; 310 struct recount_snap rpr_interrupt_snap; 311 uint64_t rpr_interrupt_time_mach; 312 uint64_t rpr_idle_time_mach; 313 _Atomic uint64_t rpr_state_last_abs_time; 314 #if __AMP__ 315 // Cache the RCT_TOPO_CPU_KIND offset, which cannot change. 316 uint8_t rpr_cpu_kind_index; 317 #endif // __AMP__ 318 }; 319 void recount_processor_init(struct processor *processor); 320 321 // Get a snapshot of the processor's usage, along with an up-to-date snapshot 322 // of its idle time (to now if the processor is currently idle). 323 void recount_processor_usage(struct recount_processor *pr, 324 struct recount_usage *usage, uint64_t *idle_time_mach); 325 326 // Get the current amount of time spent handling interrupts by the current 327 // processor. 328 uint64_t recount_current_processor_interrupt_time_mach(void); 329 330 #pragma mark updates 331 332 // The following interfaces are meant for specific adopters, like the 333 // scheduler or platform code responsible for entering and exiting the kernel. 334 335 // Fill in a snap with the current values from time- and count-keeping hardware. 336 void recount_snapshot(struct recount_snap *snap); 337 338 // During user/kernel transitions, other serializing events provide enough 339 // serialization around reading the counter values. 340 void recount_snapshot_speculative(struct recount_snap *snap); 341 342 // Called by the scheduler when a context switch occurs. 343 void recount_switch_thread(struct recount_snap *snap, struct thread *off_thread, 344 struct task *off_task); 345 // Called by the machine-dependent code to accumulate energy. 346 void recount_add_energy(struct thread *off_thread, struct task *off_task, 347 uint64_t energy_nj); 348 // Log a kdebug event when a thread switches off-CPU. 349 void recount_log_switch_thread(const struct recount_snap *snap); 350 // Log a kdebug event when a thread switches on-CPU. 351 void recount_log_switch_thread_on(const struct recount_snap *snap); 352 353 // This function requires that no writers race with it -- this is only safe in 354 // debugger context or while running in the context of the track being 355 // inspected. 356 void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks, 357 struct recount_usage *sum); 358 359 // For handling precise user/kernel time updates. 360 void recount_leave_user(void); 361 void recount_enter_user(void); 362 // For handling interrupt time updates. 363 void recount_enter_interrupt(void); 364 void recount_leave_interrupt(void); 365 #if __x86_64__ 366 // Handle interrupt time-keeping on Intel, which aren't unified with the trap 367 // handlers, so whether the user or system timers are updated depends on the 368 // save-state. 369 void recount_enter_intel_interrupt(x86_saved_state_t *state); 370 void recount_leave_intel_interrupt(void); 371 #endif // __x86_64__ 372 373 // Hooks for each processor idling, running, and onlining. 374 void recount_processor_idle(struct recount_processor *pr, 375 struct recount_snap *snap); 376 void recount_processor_run(struct recount_processor *pr, 377 struct recount_snap *snap); 378 void recount_processor_online(processor_t processor, struct recount_snap *snap); 379 380 #pragma mark rollups 381 382 // Called by the thread termination queue with the task lock held. 383 void recount_task_rollup_thread(struct recount_task *tk, 384 const struct recount_thread *th); 385 386 // Called by the coalition roll-up statistics functions with coalition lock 387 // held. 388 void recount_coalition_rollup_task(struct recount_coalition *co, 389 struct recount_task *tk); 390 391 #endif // MACH_KERNEL_PRIVATE 392 393 __END_DECLS 394 395 #endif // KERN_RECOUNT_H 396