xref: /xnu-12377.81.4/osfmk/kern/sched_clutch.h (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _KERN_SCHED_CLUTCH_H_
30 #define _KERN_SCHED_CLUTCH_H_
31 
32 #include <kern/sched.h>
33 #include <kern/priority_queue.h>
34 #include <kern/bits.h>
35 #include <kern/kern_types.h>
36 #include <kern/sched_common.h>
37 
38 #if !SCHED_TEST_HARNESS
39 
40 #include <machine/atomic.h>
41 #include <kern/thread_group.h>
42 
43 #endif /* !SCHED_TEST_HARNESS */
44 
45 #if CONFIG_SCHED_CLUTCH
46 
47 /*
48  * Threads hard-bound to specific processors are not managed in
49  * the Clutch hierarchy. This helper macro is used to indicate
50  * whether a thread should be enqueued in the hierarchy.
51  */
52 #define SCHED_CLUTCH_THREAD_ELIGIBLE(thread)    ((thread->bound_processor) == PROCESSOR_NULL)
53 
54 #if CONFIG_SCHED_EDGE
55 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread)       (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE)
56 
57 #else /* CONFIG_SCHED_EDGE */
58 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread)       (0)
59 #endif /* CONFIG_SCHED_EDGE */
60 
61 /*
62  * Clutch Bucket Runqueue Structure.
63  */
64 struct sched_clutch_bucket_runq {
65 	int                     scbrq_highq;
66 	int                     scbrq_count;
67 	bitmap_t                scbrq_bitmap[BITMAP_LEN(NRQS_MAX)];
68 	circle_queue_head_t     scbrq_queues[NRQS_MAX];
69 };
70 typedef struct sched_clutch_bucket_runq *sched_clutch_bucket_runq_t;
71 
72 /*
73  *
74  * Clutch hierarchy locking protocol
75  *
76  * The scheduler clutch hierarchy is protected by a combination of
77  * atomics and pset lock.
78  * See the legend of field annotations below:
79  *
80  * (P): Reads/writes protected by the pset lock.
81  * (A): Reads/writes done atomically.
82  * (I): Safe to read unprotected because values are not updated
83  *      after initialization.
84  * (W): Reads/writes done atomically, but writes are only
85  *      published with the pset lock held.
86  */
87 
88 /*
89  * struct sched_clutch_root_bucket
90  *
91  * A clutch_root_bucket represents all threads across all thread groups
92  * that are in the same scheduler bucket (FG/IN/...). The clutch_root_bucket
93  * is selected for execution by the root level bucket selection algorithm
94  * which bases the decision on the clutch_root_bucket's deadline (EDF). The
95  * deadline for a root bucket is calculated based on its runnable timestamp
96  * and the worst-case-execution-latency values specied in sched_clutch_root_bucket_wcel[]
97  */
98 struct sched_clutch_root_bucket {
99 	/* (I) sched bucket represented by this root bucket */
100 	uint8_t                         scrb_bucket;
101 	/* (I) Indicates the root bucket represents cluster bound threads */
102 	bool                            scrb_bound;
103 	/* (P) Indicates if the root bucket is in starvation avoidance mode */
104 	bool                            scrb_starvation_avoidance;
105 
106 	union {
107 		/* (P) priority queue for all unbound clutch buckets in this sched bucket */
108 		struct sched_clutch_bucket_runq scrb_clutch_buckets;
109 		/* (P) Runqueue for all bound threads part of this root bucket */
110 		struct run_queue                scrb_bound_thread_runq;
111 	};
112 	/* (P) priority queue entry to use for enqueueing root bucket into root prioq */
113 	struct priority_queue_entry_deadline scrb_pqlink;
114 	/* (P) warped deadline for root bucket */
115 	uint64_t                        scrb_warped_deadline;
116 	/* (P) warp remaining for root bucket */
117 	uint64_t                        scrb_warp_remaining;
118 	/* (P) timestamp for the start of the starvation avoidance window */
119 	uint64_t                        scrb_starvation_ts;
120 };
121 typedef struct sched_clutch_root_bucket *sched_clutch_root_bucket_t;
122 
123 #if CONFIG_SCHED_EDGE
124 
125 struct sched_edge_steal_silo {
126 	/*
127 	 * (P) priority queue per QoS bucket, containing runnable
128 	 * clutch buckets enqueued in the owning clutch hierarchy
129 	 * and recommended to the same preferred pset (may be
130 	 * different from the clutch hierarchy where the clutch
131 	 * buckets are enqueued).
132 	 */
133 	struct priority_queue_sched_max sess_steal_queues[TH_BUCKET_SCHED_MAX];
134 	/* (W) bitmap of which steal queues contain threads */
135 	bitmap_t _Atomic                sess_populated_steal_queues[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
136 };
137 typedef struct sched_edge_steal_silo *sched_edge_steal_silo_t;
138 
139 #endif /* CONFIG_SCHED_EDGE */
140 
141 /*
142  * struct sched_clutch_root
143  *
144  * A clutch_root represents the root of the hierarchy. It maintains a
145  * priority queue of all runnable root buckets. The clutch_root also
146  * maintains the information about the last clutch_root_bucket scheduled
147  * in order to implement bucket level quantum. The bucket level quantums
148  * allow low priority buckets to get a "fair" chance of using the CPU even
149  * if they contain a bunch of short executing threads. The bucket quantums
150  * are configured using sched_clutch_root_bucket_quantum[]
151  */
152 struct sched_clutch_root {
153 	/* (P) root level priority; represents the highest runnable thread in the hierarchy */
154 	int16_t                         scr_priority;
155 	/* (P) total number of runnable threads in the hierarchy */
156 	uint16_t                        scr_thr_count;
157 	/* (P) root level urgency; represents the urgency of the whole hierarchy for pre-emption purposes */
158 	int16_t                         scr_urgency;
159 #if CONFIG_SCHED_EDGE
160 	/* (P) runnable shared resource load enqueued in this cluster/root hierarchy */
161 	uint16_t                        scr_shared_rsrc_load_runnable[CLUSTER_SHARED_RSRC_TYPE_COUNT];
162 #endif /* CONFIG_SCHED_EDGE */
163 
164 	uint32_t                        scr_cluster_id;
165 	/* (I) processor set this hierarchy belongs to */
166 	processor_set_t                 scr_pset;
167 	/*
168 	 * (P) list of all runnable clutch buckets across the system;
169 	 * allows easy iteration in the sched tick based timesharing code
170 	 */
171 	queue_head_t                    scr_clutch_buckets;
172 
173 #if CONFIG_SCHED_EDGE
174 	/*
175 	 * (P) silo per pset recommendation, consisting of steal (priority)
176 	 * queues per QoS bucket that track runnable clutch buckets enqueued
177 	 * in this hierarchy. This allows other psets to steal threads of
178 	 * specific recommendations/QoSes from this pset in a fine-grained
179 	 * manner, respecting the Edge matrix.
180 	 */
181 	struct sched_edge_steal_silo    scr_steal_silos[MAX_PSETS];
182 	/* (W) bitmap of which steal silos contain threads */
183 	bitmap_t _Atomic                scr_populated_steal_silos[BITMAP_LEN(MAX_PSETS)];
184 	/* (W) bitmap of which pset recommendations can migrate here */
185 	bitmap_t _Atomic                scr_incoming_migration_allowed[TH_BUCKET_SCHED_MAX][BITMAP_LEN(MAX_PSETS)];
186 #endif /* CONFIG_SCHED_EDGE */
187 
188 	/* Root level bucket management */
189 
190 	/* (P) bitmap of all runnable unbounded root buckets */
191 	bitmap_t                        scr_unbound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
192 	/* (P) bitmap of all runnable unbounded root buckets which have warps remaining */
193 	bitmap_t                        scr_unbound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
194 	/* (P) bitmap of all runnable bounded root buckets */
195 	bitmap_t                        scr_bound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
196 	/* (P) bitmap of all runnable bounded root buckets which have warps remaining */
197 	bitmap_t                        scr_bound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
198 
199 	/* (P) priority queue of all runnable unbounded root buckets in deadline order */
200 	struct priority_queue_deadline_min scr_unbound_root_buckets;
201 	/* (P) priority queue of all bounded root buckets in deadline order */
202 	struct priority_queue_deadline_min scr_bound_root_buckets;
203 
204 	/* (P) cumulative run counts at each bucket for load average calculation */
205 	uint16_t _Atomic                scr_cumulative_run_count[TH_BUCKET_SCHED_MAX];
206 
207 	/* (P) storage for all unbound clutch_root_buckets */
208 	struct sched_clutch_root_bucket scr_unbound_buckets[TH_BUCKET_SCHED_MAX];
209 	/* (P) storage for all bound clutch_root_buckets */
210 	struct sched_clutch_root_bucket scr_bound_buckets[TH_BUCKET_SCHED_MAX];
211 };
212 typedef struct sched_clutch_root *sched_clutch_root_t;
213 
214 /* forward declaration for sched_clutch */
215 struct sched_clutch;
216 
217 /*
218  * sched_clutch_bucket_cpu_data_t
219  *
220  * Used for maintaining clutch bucket used and blocked time. The
221  * values are used for calculating the interactivity score for the
222  * clutch bucket.
223  */
224 #define CLUTCH_CPU_DATA_MAX             (UINT64_MAX)
225 typedef uint64_t                        clutch_cpu_data_t;
226 typedef unsigned __int128               clutch_cpu_data_wide_t;
227 
228 typedef union sched_clutch_bucket_cpu_data {
229 	struct {
230 		/* Clutch bucket CPU used across all threads */
231 		clutch_cpu_data_t       scbcd_cpu_used;
232 		/* Clutch bucket voluntary blocked time */
233 		clutch_cpu_data_t       scbcd_cpu_blocked;
234 	} cpu_data;
235 	clutch_cpu_data_wide_t          scbcd_cpu_data_packed;
236 } sched_clutch_bucket_cpu_data_t;
237 
238 /*
239  * struct sched_clutch_bucket
240  *
241  * A sched_clutch_bucket represents the set of threads for a thread
242  * group at a particular scheduling bucket in a specific cluster.
243  * It maintains information about the CPU usage & blocking behavior
244  * of all threads part of the clutch_bucket. It inherits the timeshare
245  * values from the clutch_bucket_group for decay and timesharing among
246  * threads in the clutch.
247  *
248  * Since the clutch bucket is a per thread group per-QoS entity it is
249  * important to keep its size small and the structure well aligned.
250  */
251 struct sched_clutch_bucket {
252 #if CONFIG_SCHED_EDGE
253 	/* (P) preferred pset id when the clutch_bucket was enqueued */
254 	pset_id_t                       scb_preferred_pset_when_enqueued;
255 #endif /* CONFIG_SCHED_EDGE */
256 	/* (I) bucket for the clutch_bucket */
257 	uint8_t                         scb_bucket;
258 	/* (P) priority of the clutch bucket */
259 	uint8_t                         scb_priority;
260 	/* (P) number of threads in this clutch_bucket; should match runq.count */
261 	uint16_t                        scb_thr_count;
262 
263 	/* Pointer to the clutch bucket group this clutch bucket belongs to */
264 	struct sched_clutch_bucket_group *scb_group;
265 	/* (P) pointer to the root of the hierarchy this bucket is in */
266 	struct sched_clutch_root        *scb_root;
267 	/* (P) priority queue of threads based on their promoted/base priority */
268 	struct priority_queue_sched_max scb_clutchpri_prioq;
269 	/* (P) runq of threads in clutch_bucket */
270 	struct priority_queue_sched_stable_max scb_thread_runq;
271 
272 	/* (P) linkage for all clutch_buckets in a root bucket; used for tick operations */
273 	queue_chain_t                   scb_listlink;
274 	/* (P) linkage for clutch_bucket in root_bucket runqueue */
275 	queue_chain_t                   scb_runqlink;
276 	/* (P) queue of threads for timesharing purposes */
277 	queue_head_t                    scb_thread_timeshare_queue;
278 #if CONFIG_SCHED_EDGE
279 	/* (P) linkage for clutch_bucket in its steal_queue */
280 	struct priority_queue_entry_sched     scb_stealqlink;
281 #endif /* CONFIG_SCHED_EDGE */
282 };
283 typedef struct sched_clutch_bucket *sched_clutch_bucket_t;
284 
285 /*
286  * sched_clutch_counter_time_t
287  *
288  * Holds thread counts and a timestamp (typically for a clutch bucket group).
289  * Used to allow atomic updates to these fields.
290  */
291 typedef union sched_clutch_counter_time {
292 	struct {
293 		uint64_t                scct_count;
294 		uint64_t                scct_timestamp;
295 	};
296 	unsigned __int128               scct_packed;
297 } __attribute__((aligned(16))) sched_clutch_counter_time_t;
298 
299 /*
300  * struct sched_clutch_bucket_group
301  *
302  * It represents all the threads for a thread group at a particular
303  * QoS/Scheduling bucket. This structure also maintains the timesharing
304  * properties that are used for decay calculation for all threads in the
305  * thread group at the specific scheduling bucket.
306  */
307 struct sched_clutch_bucket_group {
308 	/* (I) bucket for the clutch_bucket_group */
309 	uint8_t                         scbg_bucket;
310 	/* (A) sched tick when the clutch bucket group load/shifts were updated */
311 	uint32_t _Atomic                scbg_timeshare_tick;
312 	/* (A) priority shifts for threads in the clutch_bucket_group */
313 	uint32_t _Atomic                scbg_pri_shift;
314 	/* (A) preferred cluster ID for clutch bucket */
315 	uint32_t _Atomic                scbg_preferred_cluster;
316 	/* (I) clutch to which this clutch bucket_group belongs */
317 	struct sched_clutch             *scbg_clutch;
318 	/* (A) holds blocked timestamp and runnable/running count */
319 	sched_clutch_counter_time_t     scbg_blocked_data;
320 	/* (P/A depending on scheduler) holds pending timestamp and thread count */
321 	sched_clutch_counter_time_t     scbg_pending_data;
322 	/* (P/A depending on scheduler) holds interactivity timestamp and score */
323 	sched_clutch_counter_time_t     scbg_interactivity_data;
324 	/* (A) CPU usage information for the clutch bucket group */
325 	sched_clutch_bucket_cpu_data_t  scbg_cpu_data;
326 	/* Storage for all clutch buckets for a thread group at scbg_bucket */
327 	struct sched_clutch_bucket      *scbg_clutch_buckets;
328 };
329 typedef struct sched_clutch_bucket_group *sched_clutch_bucket_group_t;
330 
331 
332 /*
333  * struct sched_clutch
334  *
335  * A sched_clutch is a 1:1 mapping to a thread group. It maintains the
336  * storage for all clutch buckets for this thread group and some properties
337  * of the thread group (such as flags etc.)
338  */
339 struct sched_clutch {
340 	/*
341 	 * (A) number of runnable threads in sched_clutch; needs to be atomic
342 	 * to support cross cluster sched_clutch migrations.
343 	 */
344 	uint16_t _Atomic                sc_thr_count;
345 	/*
346 	 * Grouping specific parameters. Currently the implementation only
347 	 * supports thread_group based grouping.
348 	 */
349 	union {
350 		/* (I) Pointer to thread group */
351 		struct thread_group     *sc_tg;
352 	};
353 	/* (I) storage for all clutch_buckets for this clutch */
354 	struct sched_clutch_bucket_group sc_clutch_groups[TH_BUCKET_SCHED_MAX];
355 };
356 typedef struct sched_clutch *sched_clutch_t;
357 
358 
359 /* Clutch lifecycle management */
360 void sched_clutch_init_with_thread_group(sched_clutch_t, struct thread_group *);
361 void sched_clutch_destroy(sched_clutch_t);
362 
363 /* Clutch thread membership management */
364 void sched_clutch_thread_clutch_update(thread_t, sched_clutch_t, sched_clutch_t);
365 uint32_t sched_edge_thread_preferred_cluster(thread_t);
366 
367 /* Clutch timesharing stats management */
368 uint32_t sched_clutch_thread_run_bucket_incr(thread_t, sched_bucket_t);
369 uint32_t sched_clutch_thread_run_bucket_decr(thread_t, sched_bucket_t);
370 void sched_clutch_cpu_usage_update(thread_t, uint64_t);
371 uint32_t sched_clutch_thread_pri_shift(thread_t, sched_bucket_t);
372 
373 /* Clutch properties accessors */
374 uint32_t sched_clutch_root_count(sched_clutch_root_t);
375 
376 /* Grouping specific external routines */
377 extern sched_clutch_t sched_clutch_for_thread(thread_t);
378 extern sched_clutch_t sched_clutch_for_thread_group(struct thread_group *);
379 
380 #if DEVELOPMENT || DEBUG
381 
382 extern kern_return_t sched_clutch_thread_group_cpu_time_for_thread(thread_t thread, int sched_bucket, uint64_t *cpu_stats);
383 
384 #endif /* DEVELOPMENT || DEBUG */
385 
386 #if CONFIG_SCHED_EDGE
387 
388 /*
389  * Getter and Setter for Edge configuration. Used by CLPC to affect thread migration behavior.
390  */
391 void sched_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t num_psets);
392 void sched_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t num_psets);
393 void sched_edge_tg_preferred_cluster_change(struct thread_group *tg, uint32_t *tg_bucket_preferred_cluster, sched_perfcontrol_preferred_cluster_options_t options);
394 
395 /*
396  * Iterate through the entire edge matrix by src pset, dst pset, and scheduling
397  * bucket (dimension: num_psets X num_psets X TH_BUCKET_SCHED_MAX)
398  */
399 #define sched_edge_matrix_iterate(src_id, dst_id, bucket, ...) \
400 	for (pset_id_t src_id = 0; src_id < sched_num_psets; src_id++) { \
401 	    for (pset_id_t dst_id = 0; dst_id < sched_num_psets; dst_id++) { \
402 	        for (sched_bucket_t bucket = 0; bucket < TH_BUCKET_SCHED_MAX; bucket++) { \
403 	            __VA_ARGS__; \
404 	        } \
405 	    } \
406 	}
407 
408 uint16_t sched_edge_cluster_cumulative_count(sched_clutch_root_t root_clutch, sched_bucket_t bucket);
409 uint16_t sched_edge_shared_rsrc_runnable_load(sched_clutch_root_t root_clutch, cluster_shared_rsrc_type_t load_type);
410 
411 /*
412  * sched_edge_search_order_weight_then_locality_cmp()
413  *
414  * Search order that prioritizes outgoing edges with a lower
415  * migration weight, then breaks ties with die-locality followed
416  * by least pset id.
417  */
418 extern int (*sched_edge_search_order_weight_then_locality_cmp)(const void *a, const void *b);
419 
420 /*
421  * Used to keep stir-the-pot state up-to-date for the current
422  * processor, as new threads come on-core.
423  */
424 extern void sched_edge_stir_the_pot_update_registry_state(thread_t thread);
425 extern void sched_edge_stir_the_pot_clear_registry_entry(void);
426 
427 #endif /* CONFIG_SCHED_EDGE */
428 
429 #endif /* CONFIG_SCHED_CLUTCH */
430 
431 #endif /* _KERN_SCHED_CLUTCH_H_ */
432