1 /* 2 * Copyright (c) 2018 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29 #ifndef _KERN_SCHED_CLUTCH_H_ 30 #define _KERN_SCHED_CLUTCH_H_ 31 32 #include <kern/sched.h> 33 #include <kern/priority_queue.h> 34 #include <kern/bits.h> 35 #include <kern/kern_types.h> 36 #include <kern/sched_common.h> 37 38 #if !SCHED_TEST_HARNESS 39 40 #include <machine/atomic.h> 41 #include <kern/thread_group.h> 42 43 #endif /* !SCHED_TEST_HARNESS */ 44 45 #if CONFIG_SCHED_CLUTCH 46 47 /* 48 * Threads hard-bound to specific processors are not managed in 49 * the Clutch hierarchy. This helper macro is used to indicate 50 * whether a thread should be enqueued in the hierarchy. 51 */ 52 #define SCHED_CLUTCH_THREAD_ELIGIBLE(thread) ((thread->bound_processor) == PROCESSOR_NULL) 53 54 #if CONFIG_SCHED_EDGE 55 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread) (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE) 56 57 #else /* CONFIG_SCHED_EDGE */ 58 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread) (0) 59 #endif /* CONFIG_SCHED_EDGE */ 60 61 /* 62 * Clutch Bucket Runqueue Structure. 63 */ 64 struct sched_clutch_bucket_runq { 65 int scbrq_highq; 66 int scbrq_count; 67 bitmap_t scbrq_bitmap[BITMAP_LEN(NRQS_MAX)]; 68 circle_queue_head_t scbrq_queues[NRQS_MAX]; 69 }; 70 typedef struct sched_clutch_bucket_runq *sched_clutch_bucket_runq_t; 71 72 /* 73 * 74 * Clutch hierarchy locking protocol 75 * 76 * The scheduler clutch hierarchy is protected by a combination of 77 * atomics and pset lock. 78 * See the legend of field annotations below: 79 * 80 * (P): Reads/writes protected by the pset lock. 81 * (A): Reads/writes done atomically. 82 * (I): Safe to read unprotected because values are not updated 83 * after initialization. 84 * (W): Reads/writes done atomically, but writes are only 85 * published with the pset lock held. 86 */ 87 88 /* 89 * struct sched_clutch_root_bucket 90 * 91 * A clutch_root_bucket represents all threads across all thread groups 92 * that are in the same scheduler bucket (FG/IN/...). The clutch_root_bucket 93 * is selected for execution by the root level bucket selection algorithm 94 * which bases the decision on the clutch_root_bucket's deadline (EDF). The 95 * deadline for a root bucket is calculated based on its runnable timestamp 96 * and the worst-case-execution-latency values specied in sched_clutch_root_bucket_wcel[] 97 */ 98 struct sched_clutch_root_bucket { 99 /* (I) sched bucket represented by this root bucket */ 100 uint8_t scrb_bucket; 101 /* (I) Indicates the root bucket represents cluster bound threads */ 102 bool scrb_bound; 103 /* (P) Indicates if the root bucket is in starvation avoidance mode */ 104 bool scrb_starvation_avoidance; 105 106 union { 107 /* (P) priority queue for all unbound clutch buckets in this sched bucket */ 108 struct sched_clutch_bucket_runq scrb_clutch_buckets; 109 /* (P) Runqueue for all bound threads part of this root bucket */ 110 struct run_queue scrb_bound_thread_runq; 111 }; 112 /* (P) priority queue entry to use for enqueueing root bucket into root prioq */ 113 struct priority_queue_entry_deadline scrb_pqlink; 114 /* (P) warped deadline for root bucket */ 115 uint64_t scrb_warped_deadline; 116 /* (P) warp remaining for root bucket */ 117 uint64_t scrb_warp_remaining; 118 /* (P) timestamp for the start of the starvation avoidance window */ 119 uint64_t scrb_starvation_ts; 120 }; 121 typedef struct sched_clutch_root_bucket *sched_clutch_root_bucket_t; 122 123 #if CONFIG_SCHED_EDGE 124 125 struct sched_edge_steal_silo { 126 /* 127 * (P) priority queue per QoS bucket, containing runnable 128 * clutch buckets enqueued in the owning clutch hierarchy 129 * and recommended to the same preferred pset (may be 130 * different from the clutch hierarchy where the clutch 131 * buckets are enqueued). 132 */ 133 struct priority_queue_sched_max sess_steal_queues[TH_BUCKET_SCHED_MAX]; 134 /* (W) bitmap of which steal queues contain threads */ 135 bitmap_t _Atomic sess_populated_steal_queues[BITMAP_LEN(TH_BUCKET_SCHED_MAX)]; 136 }; 137 typedef struct sched_edge_steal_silo *sched_edge_steal_silo_t; 138 139 #endif /* CONFIG_SCHED_EDGE */ 140 141 /* 142 * struct sched_clutch_root 143 * 144 * A clutch_root represents the root of the hierarchy. It maintains a 145 * priority queue of all runnable root buckets. The clutch_root also 146 * maintains the information about the last clutch_root_bucket scheduled 147 * in order to implement bucket level quantum. The bucket level quantums 148 * allow low priority buckets to get a "fair" chance of using the CPU even 149 * if they contain a bunch of short executing threads. The bucket quantums 150 * are configured using sched_clutch_root_bucket_quantum[] 151 */ 152 struct sched_clutch_root { 153 /* (P) root level priority; represents the highest runnable thread in the hierarchy */ 154 int16_t scr_priority; 155 /* (P) total number of runnable threads in the hierarchy */ 156 uint16_t scr_thr_count; 157 /* (P) root level urgency; represents the urgency of the whole hierarchy for pre-emption purposes */ 158 int16_t scr_urgency; 159 #if CONFIG_SCHED_EDGE 160 /* (P) runnable shared resource load enqueued in this cluster/root hierarchy */ 161 uint16_t scr_shared_rsrc_load_runnable[CLUSTER_SHARED_RSRC_TYPE_COUNT]; 162 #endif /* CONFIG_SCHED_EDGE */ 163 164 uint32_t scr_cluster_id; 165 /* (I) processor set this hierarchy belongs to */ 166 processor_set_t scr_pset; 167 /* 168 * (P) list of all runnable clutch buckets across the system; 169 * allows easy iteration in the sched tick based timesharing code 170 */ 171 queue_head_t scr_clutch_buckets; 172 173 #if CONFIG_SCHED_EDGE 174 /* 175 * (P) silo per pset recommendation, consisting of steal (priority) 176 * queues per QoS bucket that track runnable clutch buckets enqueued 177 * in this hierarchy. This allows other psets to steal threads of 178 * specific recommendations/QoSes from this pset in a fine-grained 179 * manner, respecting the Edge matrix. 180 */ 181 struct sched_edge_steal_silo scr_steal_silos[MAX_PSETS]; 182 /* (W) bitmap of which steal silos contain threads */ 183 bitmap_t _Atomic scr_populated_steal_silos[BITMAP_LEN(MAX_PSETS)]; 184 /* (W) bitmap of which pset recommendations can migrate here */ 185 bitmap_t _Atomic scr_incoming_migration_allowed[TH_BUCKET_SCHED_MAX][BITMAP_LEN(MAX_PSETS)]; 186 #endif /* CONFIG_SCHED_EDGE */ 187 188 /* Root level bucket management */ 189 190 /* (P) bitmap of all runnable unbounded root buckets */ 191 bitmap_t scr_unbound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)]; 192 /* (P) bitmap of all runnable unbounded root buckets which have warps remaining */ 193 bitmap_t scr_unbound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)]; 194 /* (P) bitmap of all runnable bounded root buckets */ 195 bitmap_t scr_bound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)]; 196 /* (P) bitmap of all runnable bounded root buckets which have warps remaining */ 197 bitmap_t scr_bound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)]; 198 199 /* (P) priority queue of all runnable unbounded root buckets in deadline order */ 200 struct priority_queue_deadline_min scr_unbound_root_buckets; 201 /* (P) priority queue of all bounded root buckets in deadline order */ 202 struct priority_queue_deadline_min scr_bound_root_buckets; 203 204 /* (P) cumulative run counts at each bucket for load average calculation */ 205 uint16_t _Atomic scr_cumulative_run_count[TH_BUCKET_SCHED_MAX]; 206 207 /* (P) storage for all unbound clutch_root_buckets */ 208 struct sched_clutch_root_bucket scr_unbound_buckets[TH_BUCKET_SCHED_MAX]; 209 /* (P) storage for all bound clutch_root_buckets */ 210 struct sched_clutch_root_bucket scr_bound_buckets[TH_BUCKET_SCHED_MAX]; 211 }; 212 typedef struct sched_clutch_root *sched_clutch_root_t; 213 214 /* forward declaration for sched_clutch */ 215 struct sched_clutch; 216 217 /* 218 * sched_clutch_bucket_cpu_data_t 219 * 220 * Used for maintaining clutch bucket used and blocked time. The 221 * values are used for calculating the interactivity score for the 222 * clutch bucket. 223 */ 224 #define CLUTCH_CPU_DATA_MAX (UINT64_MAX) 225 typedef uint64_t clutch_cpu_data_t; 226 typedef unsigned __int128 clutch_cpu_data_wide_t; 227 228 typedef union sched_clutch_bucket_cpu_data { 229 struct { 230 /* Clutch bucket CPU used across all threads */ 231 clutch_cpu_data_t scbcd_cpu_used; 232 /* Clutch bucket voluntary blocked time */ 233 clutch_cpu_data_t scbcd_cpu_blocked; 234 } cpu_data; 235 clutch_cpu_data_wide_t scbcd_cpu_data_packed; 236 } sched_clutch_bucket_cpu_data_t; 237 238 /* 239 * struct sched_clutch_bucket 240 * 241 * A sched_clutch_bucket represents the set of threads for a thread 242 * group at a particular scheduling bucket in a specific cluster. 243 * It maintains information about the CPU usage & blocking behavior 244 * of all threads part of the clutch_bucket. It inherits the timeshare 245 * values from the clutch_bucket_group for decay and timesharing among 246 * threads in the clutch. 247 * 248 * Since the clutch bucket is a per thread group per-QoS entity it is 249 * important to keep its size small and the structure well aligned. 250 */ 251 struct sched_clutch_bucket { 252 #if CONFIG_SCHED_EDGE 253 /* (P) preferred pset id when the clutch_bucket was enqueued */ 254 pset_id_t scb_preferred_pset_when_enqueued; 255 #endif /* CONFIG_SCHED_EDGE */ 256 /* (I) bucket for the clutch_bucket */ 257 uint8_t scb_bucket; 258 /* (P) priority of the clutch bucket */ 259 uint8_t scb_priority; 260 /* (P) number of threads in this clutch_bucket; should match runq.count */ 261 uint16_t scb_thr_count; 262 263 /* Pointer to the clutch bucket group this clutch bucket belongs to */ 264 struct sched_clutch_bucket_group *scb_group; 265 /* (P) pointer to the root of the hierarchy this bucket is in */ 266 struct sched_clutch_root *scb_root; 267 /* (P) priority queue of threads based on their promoted/base priority */ 268 struct priority_queue_sched_max scb_clutchpri_prioq; 269 /* (P) runq of threads in clutch_bucket */ 270 struct priority_queue_sched_stable_max scb_thread_runq; 271 272 /* (P) linkage for all clutch_buckets in a root bucket; used for tick operations */ 273 queue_chain_t scb_listlink; 274 /* (P) linkage for clutch_bucket in root_bucket runqueue */ 275 queue_chain_t scb_runqlink; 276 /* (P) queue of threads for timesharing purposes */ 277 queue_head_t scb_thread_timeshare_queue; 278 #if CONFIG_SCHED_EDGE 279 /* (P) linkage for clutch_bucket in its steal_queue */ 280 struct priority_queue_entry_sched scb_stealqlink; 281 #endif /* CONFIG_SCHED_EDGE */ 282 }; 283 typedef struct sched_clutch_bucket *sched_clutch_bucket_t; 284 285 /* 286 * sched_clutch_counter_time_t 287 * 288 * Holds thread counts and a timestamp (typically for a clutch bucket group). 289 * Used to allow atomic updates to these fields. 290 */ 291 typedef union sched_clutch_counter_time { 292 struct { 293 uint64_t scct_count; 294 uint64_t scct_timestamp; 295 }; 296 unsigned __int128 scct_packed; 297 } __attribute__((aligned(16))) sched_clutch_counter_time_t; 298 299 /* 300 * struct sched_clutch_bucket_group 301 * 302 * It represents all the threads for a thread group at a particular 303 * QoS/Scheduling bucket. This structure also maintains the timesharing 304 * properties that are used for decay calculation for all threads in the 305 * thread group at the specific scheduling bucket. 306 */ 307 struct sched_clutch_bucket_group { 308 /* (I) bucket for the clutch_bucket_group */ 309 uint8_t scbg_bucket; 310 /* (A) sched tick when the clutch bucket group load/shifts were updated */ 311 uint32_t _Atomic scbg_timeshare_tick; 312 /* (A) priority shifts for threads in the clutch_bucket_group */ 313 uint32_t _Atomic scbg_pri_shift; 314 /* (A) preferred cluster ID for clutch bucket */ 315 uint32_t _Atomic scbg_preferred_cluster; 316 /* (I) clutch to which this clutch bucket_group belongs */ 317 struct sched_clutch *scbg_clutch; 318 /* (A) holds blocked timestamp and runnable/running count */ 319 sched_clutch_counter_time_t scbg_blocked_data; 320 /* (P/A depending on scheduler) holds pending timestamp and thread count */ 321 sched_clutch_counter_time_t scbg_pending_data; 322 /* (P/A depending on scheduler) holds interactivity timestamp and score */ 323 sched_clutch_counter_time_t scbg_interactivity_data; 324 /* (A) CPU usage information for the clutch bucket group */ 325 sched_clutch_bucket_cpu_data_t scbg_cpu_data; 326 /* Storage for all clutch buckets for a thread group at scbg_bucket */ 327 struct sched_clutch_bucket *scbg_clutch_buckets; 328 }; 329 typedef struct sched_clutch_bucket_group *sched_clutch_bucket_group_t; 330 331 332 /* 333 * struct sched_clutch 334 * 335 * A sched_clutch is a 1:1 mapping to a thread group. It maintains the 336 * storage for all clutch buckets for this thread group and some properties 337 * of the thread group (such as flags etc.) 338 */ 339 struct sched_clutch { 340 /* 341 * (A) number of runnable threads in sched_clutch; needs to be atomic 342 * to support cross cluster sched_clutch migrations. 343 */ 344 uint16_t _Atomic sc_thr_count; 345 /* 346 * Grouping specific parameters. Currently the implementation only 347 * supports thread_group based grouping. 348 */ 349 union { 350 /* (I) Pointer to thread group */ 351 struct thread_group *sc_tg; 352 }; 353 /* (I) storage for all clutch_buckets for this clutch */ 354 struct sched_clutch_bucket_group sc_clutch_groups[TH_BUCKET_SCHED_MAX]; 355 }; 356 typedef struct sched_clutch *sched_clutch_t; 357 358 359 /* Clutch lifecycle management */ 360 void sched_clutch_init_with_thread_group(sched_clutch_t, struct thread_group *); 361 void sched_clutch_destroy(sched_clutch_t); 362 363 /* Clutch thread membership management */ 364 void sched_clutch_thread_clutch_update(thread_t, sched_clutch_t, sched_clutch_t); 365 uint32_t sched_edge_thread_preferred_cluster(thread_t); 366 367 /* Clutch timesharing stats management */ 368 uint32_t sched_clutch_thread_run_bucket_incr(thread_t, sched_bucket_t); 369 uint32_t sched_clutch_thread_run_bucket_decr(thread_t, sched_bucket_t); 370 void sched_clutch_cpu_usage_update(thread_t, uint64_t); 371 uint32_t sched_clutch_thread_pri_shift(thread_t, sched_bucket_t); 372 373 /* Clutch properties accessors */ 374 uint32_t sched_clutch_root_count(sched_clutch_root_t); 375 376 /* Grouping specific external routines */ 377 extern sched_clutch_t sched_clutch_for_thread(thread_t); 378 extern sched_clutch_t sched_clutch_for_thread_group(struct thread_group *); 379 380 #if DEVELOPMENT || DEBUG 381 382 extern kern_return_t sched_clutch_thread_group_cpu_time_for_thread(thread_t thread, int sched_bucket, uint64_t *cpu_stats); 383 384 #endif /* DEVELOPMENT || DEBUG */ 385 386 #if CONFIG_SCHED_EDGE 387 388 /* 389 * Getter and Setter for Edge configuration. Used by CLPC to affect thread migration behavior. 390 */ 391 void sched_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t num_psets); 392 void sched_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t num_psets); 393 void sched_edge_tg_preferred_cluster_change(struct thread_group *tg, uint32_t *tg_bucket_preferred_cluster, sched_perfcontrol_preferred_cluster_options_t options); 394 395 /* 396 * Iterate through the entire edge matrix by src pset, dst pset, and scheduling 397 * bucket (dimension: num_psets X num_psets X TH_BUCKET_SCHED_MAX) 398 */ 399 #define sched_edge_matrix_iterate(src_id, dst_id, bucket, ...) \ 400 for (pset_id_t src_id = 0; src_id < sched_num_psets; src_id++) { \ 401 for (pset_id_t dst_id = 0; dst_id < sched_num_psets; dst_id++) { \ 402 for (sched_bucket_t bucket = 0; bucket < TH_BUCKET_SCHED_MAX; bucket++) { \ 403 __VA_ARGS__; \ 404 } \ 405 } \ 406 } 407 408 uint16_t sched_edge_cluster_cumulative_count(sched_clutch_root_t root_clutch, sched_bucket_t bucket); 409 uint16_t sched_edge_shared_rsrc_runnable_load(sched_clutch_root_t root_clutch, cluster_shared_rsrc_type_t load_type); 410 411 /* 412 * sched_edge_search_order_weight_then_locality_cmp() 413 * 414 * Search order that prioritizes outgoing edges with a lower 415 * migration weight, then breaks ties with die-locality followed 416 * by least pset id. 417 */ 418 extern int (*sched_edge_search_order_weight_then_locality_cmp)(const void *a, const void *b); 419 420 /* 421 * Used to keep stir-the-pot state up-to-date for the current 422 * processor, as new threads come on-core. 423 */ 424 extern void sched_edge_stir_the_pot_update_registry_state(thread_t thread); 425 extern void sched_edge_stir_the_pot_clear_registry_entry(void); 426 427 #endif /* CONFIG_SCHED_EDGE */ 428 429 #endif /* CONFIG_SCHED_CLUTCH */ 430 431 #endif /* _KERN_SCHED_CLUTCH_H_ */ 432