1 // Copyright (c) 2021-2023 Apple Inc. All rights reserved. 2 // 3 // @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 4 // 5 // This file contains Original Code and/or Modifications of Original Code 6 // as defined in and that are subject to the Apple Public Source License 7 // Version 2.0 (the 'License'). You may not use this file except in 8 // compliance with the License. The rights granted to you under the License 9 // may not be used to create, or enable the creation or redistribution of, 10 // unlawful or unlicensed copies of an Apple operating system, or to 11 // circumvent, violate, or enable the circumvention or violation of, any 12 // terms of an Apple operating system software license agreement. 13 // 14 // Please obtain a copy of the License at 15 // http://www.opensource.apple.com/apsl/ and read it before using this file. 16 // 17 // The Original Code and all software distributed under the License are 18 // distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 19 // EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 20 // INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 21 // FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 22 // Please see the License for the specific language governing rights and 23 // limitations under the License. 24 // 25 // @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 26 27 #ifndef KERN_RECOUNT_H 28 #define KERN_RECOUNT_H 29 30 #include <os/base.h> 31 #include <stdbool.h> 32 #include <stdint.h> 33 #include <sys/cdefs.h> 34 #include <sys/_types/_size_t.h> 35 36 #if CONFIG_SPTM 37 // Track counters in secure execution contexts when the SPTM is available. 38 #define RECOUNT_SECURE_METRICS 1 39 #else // CONFIG_SPTM 40 #define RECOUNT_SECURE_METRICS 0 41 #endif // !CONFIG_SPTM 42 43 #if __arm64__ 44 // Only ARM64 keeps precise track of user/system based on thread state. 45 #define RECOUNT_THREAD_BASED_LEVEL 1 46 #else // __arm64__ 47 #define RECOUNT_THREAD_BASED_LEVEL 0 48 #endif // !__arm64__ 49 50 __BEGIN_DECLS; 51 52 // Recount maintains counters for resources used by software, like CPU time and cycles. 53 // These counters are tracked at different levels of granularity depending on what execution bucket they're tracked in. 54 // For instance, while threads only differentiate on the broad CPU kinds due to memory constraints, 55 // the fewer number of tasks are free to use more memory and accumulate counters per-CPU. 56 // 57 // At context-switch, the scheduler calls `recount_switch_thread` to update the counters. 58 // The difference between the current counter values and per-CPU snapshots are added to each thread. 59 // On modern systems with fast timebase reads, the counters are also updated on entering and exiting the kernel. 60 61 #pragma mark - config 62 63 // A domain of the system's CPU topology, used as granularity when tracking counter values. 64 __enum_decl(recount_topo_t, unsigned int, { 65 // Attribute counts to the entire system, i.e. only a single counter. 66 // Note that mutual exclusion must be provided to update this kind of counter. 67 RCT_TOPO_SYSTEM, 68 // Attribute counts to the CPU they accumulated on. 69 // Mutual exclusion is not required to update this counter, but preemption must be disabled. 70 RCT_TOPO_CPU, 71 // Attribute counts to the CPU kind (e.g. P or E). 72 // Note that mutual exclusion must be provided to update this kind of counter. 73 RCT_TOPO_CPU_KIND, 74 // The number of different topographies. 75 RCT_TOPO_COUNT, 76 }); 77 78 // Get the number of elements in an array for per-topography data. 79 size_t recount_topo_count(recount_topo_t topo); 80 81 // Recount's definitions of CPU kinds, in lieu of one from the platform layers. 82 __enum_decl(recount_cpu_kind_t, unsigned int, { 83 RCT_CPU_EFFICIENCY, 84 RCT_CPU_PERFORMANCE, 85 RCT_CPU_KIND_COUNT, 86 }); 87 88 // A `recount_plan` structure controls the granularity of counting for a set of tracks and must be consulted when updating their counters. 89 typedef const struct recount_plan { 90 const char *rpl_name; 91 recount_topo_t rpl_topo; 92 } *recount_plan_t; 93 94 #define RECOUNT_PLAN_DECLARE(_name) \ 95 extern const struct recount_plan _name; 96 97 #define RECOUNT_PLAN_DEFINE(_name, _topo) \ 98 const struct recount_plan _name = { \ 99 .rpl_name = #_name, \ 100 .rpl_topo = _topo, \ 101 } 102 103 // Represents exception levels that Recount can track metrics during. 104 __enum_closed_decl(recount_level_t, unsigned int, { 105 // Exception level is transitioning from the kernel. 106 // Must be first, as this is the initial state. 107 RCT_LVL_KERNEL, 108 // Exception level is transitioning from user space. 109 RCT_LVL_USER, 110 #if RECOUNT_SECURE_METRICS 111 // Exception level is transitioning from secure execution. 112 RCT_LVL_SECURE, 113 #endif // RECOUNT_SECURE_METRICS 114 RCT_LVL_COUNT, 115 }); 116 117 // The current objects with resource accounting policies. 118 RECOUNT_PLAN_DECLARE(recount_thread_plan); 119 RECOUNT_PLAN_DECLARE(recount_task_plan); 120 RECOUNT_PLAN_DECLARE(recount_task_terminated_plan); 121 RECOUNT_PLAN_DECLARE(recount_coalition_plan); 122 RECOUNT_PLAN_DECLARE(recount_processor_plan); 123 124 #pragma mark - generic accounting 125 126 // A track is where counter values can be updated atomically for readers by a 127 // single writer. 128 struct recount_track { 129 // Used to synchronize updates so multiple values appear to be updated atomically. 130 uint32_t rt_pad; 131 uint32_t rt_sync; 132 133 // The CPU usage metrics currently supported by Recount. 134 struct recount_usage { 135 struct recount_metrics { 136 // Time tracking, in Mach timebase units. 137 uint64_t rm_time_mach; 138 #if CONFIG_PERVASIVE_CPI 139 // CPU performance counter metrics, when available. 140 uint64_t rm_instructions; 141 uint64_t rm_cycles; 142 #endif // CONFIG_PERVASIVE_CPI 143 } ru_metrics[RCT_LVL_COUNT]; 144 145 #if CONFIG_PERVASIVE_ENERGY 146 // CPU energy in nanojoules, when available. 147 // This is not a "metric" because it is sampled out-of-band by ApplePMGR through CLPC. 148 uint64_t ru_energy_nj; 149 #endif // CONFIG_PERVASIVE_ENERGY 150 } rt_usage; 151 }; 152 153 // Memory management routines for tracks and usage structures. 154 struct recount_track *recount_tracks_create(recount_plan_t plan); 155 void recount_tracks_destroy(recount_plan_t plan, struct recount_track *tracks); 156 struct recount_usage *recount_usage_alloc(recount_topo_t topo); 157 void recount_usage_free(recount_topo_t topo, struct recount_usage *usage); 158 159 // Attribute tracks to usage structures, to read their values for typical high-level interfaces. 160 161 // Sum any tracks to a single sum. 162 void recount_sum(recount_plan_t plan, const struct recount_track *tracks, 163 struct recount_usage *sum); 164 165 // Summarize tracks into a total sum and another for a particular CPU kind. 166 void recount_sum_and_isolate_cpu_kind(recount_plan_t plan, 167 struct recount_track *tracks, recount_cpu_kind_t kind, 168 struct recount_usage *sum, struct recount_usage *only_kind); 169 // The same as above, but for usage-only objects, like coalitions. 170 void recount_sum_usage_and_isolate_cpu_kind(recount_plan_t plan, 171 struct recount_usage *usage_list, recount_cpu_kind_t kind, 172 struct recount_usage *sum, struct recount_usage *only_kind); 173 174 // Sum the counters for each perf-level, in the order returned by the sysctls. 175 void recount_sum_perf_levels(recount_plan_t plan, 176 struct recount_track *tracks, struct recount_usage *sums); 177 178 #pragma mark - xnu internals 179 180 #if XNU_KERNEL_PRIVATE 181 182 struct thread; 183 struct work_interval; 184 struct task; 185 struct proc; 186 187 // A smaller usage structure if only times are needed by a client. 188 struct recount_times_mach { 189 uint64_t rtm_user; 190 uint64_t rtm_system; 191 }; 192 193 struct recount_times_mach recount_usage_times_mach(struct recount_usage *usage); 194 uint64_t recount_usage_system_time_mach(struct recount_usage *usage); 195 uint64_t recount_usage_time_mach(struct recount_usage *usage); 196 uint64_t recount_usage_cycles(struct recount_usage *usage); 197 uint64_t recount_usage_instructions(struct recount_usage *usage); 198 199 // Access another thread's usage data. 200 void recount_thread_usage(struct thread *thread, struct recount_usage *usage); 201 void recount_thread_perf_level_usage(struct thread *thread, 202 struct recount_usage *usage_levels); 203 uint64_t recount_thread_time_mach(struct thread *thread); 204 struct recount_times_mach recount_thread_times(struct thread *thread); 205 206 // Read the current thread's usage data, accumulating counts until now. 207 // 208 // Interrupts must be disabled. 209 void recount_current_thread_usage(struct recount_usage *usage); 210 struct recount_times_mach recount_current_thread_times(void); 211 void recount_current_thread_usage_perf_only(struct recount_usage *usage, 212 struct recount_usage *usage_perf_only); 213 void recount_current_thread_perf_level_usage(struct recount_usage 214 *usage_levels); 215 uint64_t recount_current_thread_time_mach(void); 216 uint64_t recount_current_thread_user_time_mach(void); 217 uint64_t recount_current_thread_interrupt_time_mach(void); 218 uint64_t recount_current_thread_energy_nj(void); 219 void recount_current_task_usage(struct recount_usage *usage); 220 void recount_current_task_usage_perf_only(struct recount_usage *usage, 221 struct recount_usage *usage_perf_only); 222 223 // Access a work interval's usage data. 224 void recount_work_interval_usage(struct work_interval *work_interval, struct recount_usage *usage); 225 struct recount_times_mach recount_work_interval_times(struct work_interval *work_interval); 226 uint64_t recount_work_interval_energy_nj(struct work_interval *work_interval); 227 228 // Access another task's usage data. 229 void recount_task_usage(struct task *task, struct recount_usage *usage); 230 struct recount_times_mach recount_task_times(struct task *task); 231 void recount_task_usage_perf_only(struct task *task, struct recount_usage *sum, 232 struct recount_usage *sum_perf_only); 233 void recount_task_times_perf_only(struct task *task, 234 struct recount_times_mach *sum, struct recount_times_mach *sum_perf_only); 235 uint64_t recount_task_energy_nj(struct task *task); 236 bool recount_task_thread_perf_level_usage(struct task *task, uint64_t tid, 237 struct recount_usage *usage_levels); 238 239 // Get the sum of all terminated threads in the task (not including active threads). 240 void recount_task_terminated_usage(struct task *task, 241 struct recount_usage *sum); 242 struct recount_times_mach recount_task_terminated_times(struct task *task); 243 void recount_task_terminated_usage_perf_only(struct task *task, 244 struct recount_usage *sum, struct recount_usage *perf_only); 245 246 int proc_pidthreadcounts(struct proc *p, uint64_t thuniqueid, user_addr_t uaddr, 247 size_t usize, int *ret); 248 249 #endif // XNU_KERNEL_PRIVATE 250 251 #if MACH_KERNEL_PRIVATE 252 253 #include <kern/smp.h> 254 #include <mach/machine/thread_status.h> 255 #include <machine/machine_routines.h> 256 257 #if __arm64__ 258 static_assert((RCT_CPU_EFFICIENCY > RCT_CPU_PERFORMANCE) == 259 (CLUSTER_TYPE_E > CLUSTER_TYPE_P)); 260 #endif // __arm64__ 261 262 #pragma mark threads 263 264 // The per-thread resource accounting structure. 265 struct recount_thread { 266 // Resources consumed across the lifetime of the thread, according to 267 // `recount_thread_plan`. 268 struct recount_track *rth_lifetime; 269 // Time spent by this thread running interrupt handlers. 270 uint64_t rth_interrupt_duration_mach; 271 #if RECOUNT_THREAD_BASED_LEVEL 272 // The current level this thread is executing in. 273 recount_level_t rth_current_level; 274 #endif // RECOUNT_THREAD_BASED_LEVEL 275 }; 276 void recount_thread_init(struct recount_thread *th); 277 void recount_thread_copy(struct recount_thread *dst, 278 struct recount_thread *src); 279 void recount_thread_deinit(struct recount_thread *th); 280 281 #pragma mark work_intervals 282 283 // The per-work-interval resource accounting structure. 284 struct recount_work_interval { 285 // Resources consumed during the currently active work interval instance by 286 // threads participating in the work interval, according to `recount_work_interval_plan`. 287 struct recount_track *rwi_current_instance; 288 }; 289 void recount_work_interval_init(struct recount_work_interval *wi); 290 void recount_work_interval_deinit(struct recount_work_interval *wi); 291 292 #pragma mark tasks 293 294 // The per-task resource accounting structure. 295 struct recount_task { 296 // Resources consumed across the lifetime of the task, including active 297 // threads, according to `recount_task_plan`. 298 // 299 // The `recount_task_plan` must be per-CPU to provide mutual exclusion for 300 // writers. 301 struct recount_track *rtk_lifetime; 302 // Usage from threads that have terminated or child tasks that have exited, 303 // according to `recount_task_terminated_plan`. 304 // 305 // Protected by the task lock when threads terminate. 306 struct recount_usage *rtk_terminated; 307 }; 308 void recount_task_init(struct recount_task *tk); 309 // Called on tasks that are moving their accounting information to a 310 // synthetic or re-exec-ed task. 311 void recount_task_copy(struct recount_task *dst, 312 const struct recount_task *src); 313 void recount_task_deinit(struct recount_task *tk); 314 315 #pragma mark coalitions 316 317 // The per-coalition resource accounting structure. 318 struct recount_coalition { 319 // Resources consumed by exited tasks only, according to 320 // `recount_coalition_plan`. 321 // 322 // Protected by the coalition lock when tasks exit and roll-up their 323 // statistics. 324 struct recount_usage *rco_exited; 325 }; 326 void recount_coalition_init(struct recount_coalition *co); 327 void recount_coalition_deinit(struct recount_coalition *co); 328 329 // Get the sum of all currently-exited tasks in the coalition, and a separate P-only structure. 330 void recount_coalition_usage_perf_only(struct recount_coalition *coal, 331 struct recount_usage *sum, struct recount_usage *sum_perf_only); 332 333 #pragma mark processors 334 335 struct processor; 336 337 // A snap records counter values at a specific point in time. 338 struct recount_snap { 339 uint64_t rsn_time_mach; 340 #if CONFIG_PERVASIVE_CPI 341 uint64_t rsn_insns; 342 uint64_t rsn_cycles; 343 #endif // CONFIG_PERVASIVE_CPI 344 }; 345 346 // The per-processor resource accounting structure. 347 struct recount_processor { 348 struct recount_snap rpr_snap; 349 struct recount_track rpr_active; 350 #if MACH_ASSERT 351 recount_level_t rpr_current_level; 352 #endif // MACH_ASSERT 353 uint64_t rpr_interrupt_duration_mach; 354 uint64_t rpr_last_interrupt_enter_time_mach; 355 uint64_t rpr_last_interrupt_leave_time_mach; 356 uint64_t rpr_idle_time_mach; 357 _Atomic uint64_t rpr_state_last_abs_time; 358 #if __AMP__ 359 // Cache the RCT_TOPO_CPU_KIND offset, which cannot change. 360 uint8_t rpr_cpu_kind_index; 361 #endif // __AMP__ 362 }; 363 void recount_processor_init(struct processor *processor); 364 365 // Get a snapshot of the processor's usage, along with an up-to-date snapshot 366 // of its idle time (to now if the processor is currently idle). 367 void recount_processor_usage(struct recount_processor *pr, 368 struct recount_usage *usage, uint64_t *idle_time_mach); 369 370 // Get the current amount of time spent handling interrupts by the current 371 // processor. 372 uint64_t recount_current_processor_interrupt_duration_mach(void); 373 374 #pragma mark updates 375 376 // The following interfaces are meant for specific adopters, like the 377 // scheduler or platform code responsible for entering and exiting the kernel. 378 379 // Fill in a snap with the current values from time- and count-keeping hardware. 380 void recount_snapshot(struct recount_snap *snap); 381 382 // During user/kernel transitions, other serializing events provide enough 383 // serialization around reading the counter values. 384 void recount_snapshot_speculative(struct recount_snap *snap); 385 386 // Called by the scheduler when a context switch occurs. 387 void recount_switch_thread(struct recount_snap *snap, struct thread *off_thread, 388 struct task *off_task); 389 // Called by the machine-dependent code to accumulate energy. 390 void recount_add_energy(struct thread *off_thread, struct task *off_task, 391 uint64_t energy_nj); 392 // Log a kdebug event when a thread switches off-CPU. 393 void recount_log_switch_thread(const struct recount_snap *snap); 394 // Log a kdebug event when a thread switches on-CPU. 395 void recount_log_switch_thread_on(const struct recount_snap *snap); 396 397 // This function requires that no writers race with it -- this is only safe in 398 // debugger context or while running in the context of the track being 399 // inspected. 400 void recount_sum_unsafe(recount_plan_t plan, const struct recount_track *tracks, 401 struct recount_usage *sum); 402 403 // For handling precise user/kernel time updates. 404 void recount_leave_user(void); 405 void recount_enter_user(void); 406 // For handling interrupt time updates. 407 void recount_enter_interrupt(void); 408 void recount_leave_interrupt(void); 409 #if __x86_64__ 410 // Handle interrupt time-keeping on Intel, which aren't unified with the trap 411 // handlers, so whether the user or system timers are updated depends on the 412 // save-state. 413 void recount_enter_intel_interrupt(x86_saved_state_t *state); 414 void recount_leave_intel_interrupt(void); 415 #endif // __x86_64__ 416 417 #endif // MACH_KERNEL_PRIVATE 418 419 #if XNU_KERNEL_PRIVATE 420 421 #if RECOUNT_SECURE_METRICS 422 // Handle guarded mode updates. 423 void recount_enter_secure(void); 424 void recount_leave_secure(void); 425 #endif // RECOUNT_SECURE_METRICS 426 427 #endif // XNU_KERNEL_PRIVATE 428 429 #if MACH_KERNEL_PRIVATE 430 431 // Hooks for each processor idling, running, and onlining. 432 void recount_processor_idle(struct recount_processor *pr, 433 struct recount_snap *snap); 434 void recount_processor_run(struct recount_processor *pr, 435 struct recount_snap *snap); 436 void recount_processor_online(processor_t processor, struct recount_snap *snap); 437 438 #pragma mark rollups 439 440 // Called by the thread termination queue with the task lock held. 441 void recount_task_rollup_thread(struct recount_task *tk, 442 const struct recount_thread *th); 443 444 // Called by the coalition roll-up statistics functions with coalition lock 445 // held. 446 void recount_coalition_rollup_task(struct recount_coalition *co, 447 struct recount_task *tk); 448 449 #endif // MACH_KERNEL_PRIVATE 450 451 __END_DECLS 452 453 #endif // KERN_RECOUNT_H 454