xref: /xnu-12377.41.6/osfmk/kern/sched_clutch.h (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2018 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _KERN_SCHED_CLUTCH_H_
30 #define _KERN_SCHED_CLUTCH_H_
31 
32 #include <kern/sched.h>
33 #include <kern/priority_queue.h>
34 #include <kern/bits.h>
35 #include <kern/kern_types.h>
36 
37 #if !SCHED_TEST_HARNESS
38 
39 #include <machine/atomic.h>
40 #include <kern/thread_group.h>
41 
42 #endif /* !SCHED_TEST_HARNESS */
43 
44 #if CONFIG_SCHED_CLUTCH
45 
46 /*
47  * Threads hard-bound to specific processors are not managed in
48  * the Clutch hierarchy. This helper macro is used to indicate
49  * whether a thread should be enqueued in the hierarchy.
50  */
51 #define SCHED_CLUTCH_THREAD_ELIGIBLE(thread)    ((thread->bound_processor) == PROCESSOR_NULL)
52 
53 #if CONFIG_SCHED_EDGE
54 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread)       (thread->th_bound_cluster_id != THREAD_BOUND_CLUSTER_NONE)
55 
56 #else /* CONFIG_SCHED_EDGE */
57 #define SCHED_CLUTCH_THREAD_CLUSTER_BOUND(thread)       (0)
58 #endif /* CONFIG_SCHED_EDGE */
59 
60 /*
61  * Clutch Bucket Runqueue Structure.
62  */
63 struct sched_clutch_bucket_runq {
64 	int                     scbrq_highq;
65 	int                     scbrq_count;
66 	bitmap_t                scbrq_bitmap[BITMAP_LEN(NRQS_MAX)];
67 	circle_queue_head_t     scbrq_queues[NRQS_MAX];
68 };
69 typedef struct sched_clutch_bucket_runq *sched_clutch_bucket_runq_t;
70 
71 /*
72  *
73  * Clutch hierarchy locking protocol
74  *
75  * The scheduler clutch hierarchy is protected by a combination of
76  * atomics and pset lock.
77  * - All fields protected by the pset lock are annotated with (P)
78  * - All fields updated using atomics are annotated with (A)
79  * - All fields that are unprotected and are not updated after
80  *   initialization are annotated with (I)
81  */
82 
83 /*
84  * struct sched_clutch_root_bucket
85  *
86  * A clutch_root_bucket represents all threads across all thread groups
87  * that are in the same scheduler bucket (FG/IN/...). The clutch_root_bucket
88  * is selected for execution by the root level bucket selection algorithm
89  * which bases the decision on the clutch_root_bucket's deadline (EDF). The
90  * deadline for a root bucket is calculated based on its runnable timestamp
91  * and the worst-case-execution-latency values specied in sched_clutch_root_bucket_wcel[]
92  */
93 struct sched_clutch_root_bucket {
94 	/* (I) sched bucket represented by this root bucket */
95 	uint8_t                         scrb_bucket;
96 	/* (I) Indicates the root bucket represents cluster bound threads */
97 	bool                            scrb_bound;
98 	/* (P) Indicates if the root bucket is in starvation avoidance mode */
99 	bool                            scrb_starvation_avoidance;
100 
101 	union {
102 		/* (P) priority queue for all unbound clutch buckets in this sched bucket */
103 		struct sched_clutch_bucket_runq scrb_clutch_buckets;
104 		/* (P) Runqueue for all bound threads part of this root bucket */
105 		struct run_queue                scrb_bound_thread_runq;
106 	};
107 	/* (P) priority queue entry to use for enqueueing root bucket into root prioq */
108 	struct priority_queue_entry_deadline scrb_pqlink;
109 	/* (P) warped deadline for root bucket */
110 	uint64_t                        scrb_warped_deadline;
111 	/* (P) warp remaining for root bucket */
112 	uint64_t                        scrb_warp_remaining;
113 	/* (P) timestamp for the start of the starvation avoidance window */
114 	uint64_t                        scrb_starvation_ts;
115 };
116 typedef struct sched_clutch_root_bucket *sched_clutch_root_bucket_t;
117 
118 /*
119  * struct sched_clutch_root
120  *
121  * A clutch_root represents the root of the hierarchy. It maintains a
122  * priority queue of all runnable root buckets. The clutch_root also
123  * maintains the information about the last clutch_root_bucket scheduled
124  * in order to implement bucket level quantum. The bucket level quantums
125  * allow low priority buckets to get a "fair" chance of using the CPU even
126  * if they contain a bunch of short executing threads. The bucket quantums
127  * are configured using sched_clutch_root_bucket_quantum[]
128  */
129 struct sched_clutch_root {
130 	/* (P) root level priority; represents the highest runnable thread in the hierarchy */
131 	int16_t                         scr_priority;
132 	/* (P) total number of runnable threads in the hierarchy */
133 	uint16_t                        scr_thr_count;
134 	/* (P) root level urgency; represents the urgency of the whole hierarchy for pre-emption purposes */
135 	int16_t                         scr_urgency;
136 #if CONFIG_SCHED_EDGE
137 	/* (P) runnable shared resource load enqueued in this cluster/root hierarchy */
138 	uint16_t                        scr_shared_rsrc_load_runnable[CLUSTER_SHARED_RSRC_TYPE_COUNT];
139 #endif /* CONFIG_SCHED_EDGE */
140 
141 	uint32_t                        scr_cluster_id;
142 	/* (I) processor set this hierarchy belongs to */
143 	processor_set_t                 scr_pset;
144 	/*
145 	 * (P) list of all runnable clutch buckets across the system;
146 	 * allows easy iteration in the sched tick based timesharing code
147 	 */
148 	queue_head_t                    scr_clutch_buckets;
149 
150 	/*
151 	 * (P) priority queue of all runnable foreign buckets in this hierarchy;
152 	 * used for tracking thread groups which need to be migrated when
153 	 * psets are available or rebalancing threads on CPU idle.
154 	 */
155 	struct priority_queue_sched_max scr_foreign_buckets;
156 
157 	/* Root level bucket management */
158 
159 	/* (P) bitmap of all runnable unbounded root buckets */
160 	bitmap_t                        scr_unbound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
161 	/* (P) bitmap of all runnable unbounded root buckets which have warps remaining */
162 	bitmap_t                        scr_unbound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
163 	/* (P) bitmap of all runnable bounded root buckets */
164 	bitmap_t                        scr_bound_runnable_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
165 	/* (P) bitmap of all runnable bounded root buckets which have warps remaining */
166 	bitmap_t                        scr_bound_warp_available[BITMAP_LEN(TH_BUCKET_SCHED_MAX)];
167 
168 	/* (P) priority queue of all runnable unbounded root buckets in deadline order */
169 	struct priority_queue_deadline_min scr_unbound_root_buckets;
170 	/* (P) priority queue of all bounded root buckets in deadline order */
171 	struct priority_queue_deadline_min scr_bound_root_buckets;
172 
173 	/* (P) cumulative run counts at each bucket for load average calculation */
174 	uint16_t _Atomic                scr_cumulative_run_count[TH_BUCKET_SCHED_MAX];
175 
176 	/* (P) storage for all unbound clutch_root_buckets */
177 	struct sched_clutch_root_bucket scr_unbound_buckets[TH_BUCKET_SCHED_MAX];
178 	/* (P) storage for all bound clutch_root_buckets */
179 	struct sched_clutch_root_bucket scr_bound_buckets[TH_BUCKET_SCHED_MAX];
180 };
181 typedef struct sched_clutch_root *sched_clutch_root_t;
182 
183 /* forward declaration for sched_clutch */
184 struct sched_clutch;
185 
186 /*
187  * sched_clutch_bucket_cpu_data_t
188  *
189  * Used for maintaining clutch bucket used and blocked time. The
190  * values are used for calculating the interactivity score for the
191  * clutch bucket.
192  */
193 #define CLUTCH_CPU_DATA_MAX             (UINT64_MAX)
194 typedef uint64_t                        clutch_cpu_data_t;
195 typedef unsigned __int128               clutch_cpu_data_wide_t;
196 
197 typedef union sched_clutch_bucket_cpu_data {
198 	struct {
199 		/* Clutch bucket CPU used across all threads */
200 		clutch_cpu_data_t       scbcd_cpu_used;
201 		/* Clutch bucket voluntary blocked time */
202 		clutch_cpu_data_t       scbcd_cpu_blocked;
203 	} cpu_data;
204 	clutch_cpu_data_wide_t          scbcd_cpu_data_packed;
205 } sched_clutch_bucket_cpu_data_t;
206 
207 /*
208  * struct sched_clutch_bucket
209  *
210  * A sched_clutch_bucket represents the set of threads for a thread
211  * group at a particular scheduling bucket in a specific cluster.
212  * It maintains information about the CPU usage & blocking behavior
213  * of all threads part of the clutch_bucket. It inherits the timeshare
214  * values from the clutch_bucket_group for decay and timesharing among
215  * threads in the clutch.
216  *
217  * Since the clutch bucket is a per thread group per-QoS entity it is
218  * important to keep its size small and the structure well aligned.
219  */
220 struct sched_clutch_bucket {
221 #if CONFIG_SCHED_EDGE
222 	/* (P) flag to indicate if the bucket is a foreign bucket */
223 	bool                            scb_foreign;
224 #endif /* CONFIG_SCHED_EDGE */
225 	/* (I) bucket for the clutch_bucket */
226 	uint8_t                         scb_bucket;
227 	/* (P) priority of the clutch bucket */
228 	uint8_t                         scb_priority;
229 	/* (P) number of threads in this clutch_bucket; should match runq.count */
230 	uint16_t                        scb_thr_count;
231 
232 	/* Pointer to the clutch bucket group this clutch bucket belongs to */
233 	struct sched_clutch_bucket_group *scb_group;
234 	/* (A) pointer to the root of the hierarchy this bucket is in */
235 	struct sched_clutch_root        *scb_root;
236 	/* (P) priority queue of threads based on their promoted/base priority */
237 	struct priority_queue_sched_max scb_clutchpri_prioq;
238 	/* (P) runq of threads in clutch_bucket */
239 	struct priority_queue_sched_stable_max scb_thread_runq;
240 
241 	/* (P) linkage for all clutch_buckets in a root bucket; used for tick operations */
242 	queue_chain_t                   scb_listlink;
243 	/* (P) linkage for clutch_bucket in root_bucket runqueue */
244 	queue_chain_t                   scb_runqlink;
245 	/* (P) queue of threads for timesharing purposes */
246 	queue_head_t                    scb_thread_timeshare_queue;
247 #if CONFIG_SCHED_EDGE
248 	/* (P) linkage for all "foreign" clutch buckets in the root clutch */
249 	struct priority_queue_entry_sched     scb_foreignlink;
250 #endif /* CONFIG_SCHED_EDGE */
251 };
252 typedef struct sched_clutch_bucket *sched_clutch_bucket_t;
253 
254 /*
255  * sched_clutch_counter_time_t
256  *
257  * Holds thread counts and a timestamp (typically for a clutch bucket group).
258  * Used to allow atomic updates to these fields.
259  */
260 typedef union sched_clutch_counter_time {
261 	struct {
262 		uint64_t                scct_count;
263 		uint64_t                scct_timestamp;
264 	};
265 	unsigned __int128               scct_packed;
266 } __attribute__((aligned(16))) sched_clutch_counter_time_t;
267 
268 /*
269  * struct sched_clutch_bucket_group
270  *
271  * It represents all the threads for a thread group at a particular
272  * QoS/Scheduling bucket. This structure also maintains the timesharing
273  * properties that are used for decay calculation for all threads in the
274  * thread group at the specific scheduling bucket.
275  */
276 struct sched_clutch_bucket_group {
277 	/* (I) bucket for the clutch_bucket_group */
278 	uint8_t                         scbg_bucket;
279 	/* (A) sched tick when the clutch bucket group load/shifts were updated */
280 	uint32_t _Atomic                scbg_timeshare_tick;
281 	/* (A) priority shifts for threads in the clutch_bucket_group */
282 	uint32_t _Atomic                scbg_pri_shift;
283 	/* (A) preferred cluster ID for clutch bucket */
284 	uint32_t _Atomic                scbg_preferred_cluster;
285 	/* (I) clutch to which this clutch bucket_group belongs */
286 	struct sched_clutch             *scbg_clutch;
287 	/* (A) holds blocked timestamp and runnable/running count */
288 	sched_clutch_counter_time_t     scbg_blocked_data;
289 	/* (P/A depending on scheduler) holds pending timestamp and thread count */
290 	sched_clutch_counter_time_t     scbg_pending_data;
291 	/* (P/A depending on scheduler) holds interactivity timestamp and score */
292 	sched_clutch_counter_time_t     scbg_interactivity_data;
293 	/* (A) CPU usage information for the clutch bucket group */
294 	sched_clutch_bucket_cpu_data_t  scbg_cpu_data;
295 	/* Storage for all clutch buckets for a thread group at scbg_bucket */
296 	struct sched_clutch_bucket      *scbg_clutch_buckets;
297 };
298 typedef struct sched_clutch_bucket_group *sched_clutch_bucket_group_t;
299 
300 
301 /*
302  * struct sched_clutch
303  *
304  * A sched_clutch is a 1:1 mapping to a thread group. It maintains the
305  * storage for all clutch buckets for this thread group and some properties
306  * of the thread group (such as flags etc.)
307  */
308 struct sched_clutch {
309 	/*
310 	 * (A) number of runnable threads in sched_clutch; needs to be atomic
311 	 * to support cross cluster sched_clutch migrations.
312 	 */
313 	uint16_t _Atomic                sc_thr_count;
314 	/*
315 	 * Grouping specific parameters. Currently the implementation only
316 	 * supports thread_group based grouping.
317 	 */
318 	union {
319 		/* (I) Pointer to thread group */
320 		struct thread_group     *sc_tg;
321 	};
322 	/* (I) storage for all clutch_buckets for this clutch */
323 	struct sched_clutch_bucket_group sc_clutch_groups[TH_BUCKET_SCHED_MAX];
324 };
325 typedef struct sched_clutch *sched_clutch_t;
326 
327 
328 /* Clutch lifecycle management */
329 void sched_clutch_init_with_thread_group(sched_clutch_t, struct thread_group *);
330 void sched_clutch_destroy(sched_clutch_t);
331 
332 /* Clutch thread membership management */
333 void sched_clutch_thread_clutch_update(thread_t, sched_clutch_t, sched_clutch_t);
334 uint32_t sched_edge_thread_preferred_cluster(thread_t);
335 
336 /* Clutch timesharing stats management */
337 uint32_t sched_clutch_thread_run_bucket_incr(thread_t, sched_bucket_t);
338 uint32_t sched_clutch_thread_run_bucket_decr(thread_t, sched_bucket_t);
339 void sched_clutch_cpu_usage_update(thread_t, uint64_t);
340 uint32_t sched_clutch_thread_pri_shift(thread_t, sched_bucket_t);
341 
342 /* Clutch properties accessors */
343 uint32_t sched_clutch_root_count(sched_clutch_root_t);
344 
345 /* Grouping specific external routines */
346 extern sched_clutch_t sched_clutch_for_thread(thread_t);
347 extern sched_clutch_t sched_clutch_for_thread_group(struct thread_group *);
348 
349 #if DEVELOPMENT || DEBUG
350 
351 extern kern_return_t sched_clutch_thread_group_cpu_time_for_thread(thread_t thread, int sched_bucket, uint64_t *cpu_stats);
352 
353 #endif /* DEVELOPMENT || DEBUG */
354 
355 #if CONFIG_SCHED_EDGE
356 
357 /*
358  * Getter and Setter for Edge configuration. Used by CLPC to affect thread migration behavior.
359  */
360 void sched_edge_matrix_get(sched_clutch_edge *edge_matrix, bool *edge_request_bitmap, uint64_t flags, uint64_t num_psets);
361 void sched_edge_matrix_set(sched_clutch_edge *edge_matrix, bool *edge_changes_bitmap, uint64_t flags, uint64_t num_psets);
362 void sched_edge_tg_preferred_cluster_change(struct thread_group *tg, uint32_t *tg_bucket_preferred_cluster, sched_perfcontrol_preferred_cluster_options_t options);
363 
364 uint16_t sched_edge_cluster_cumulative_count(sched_clutch_root_t root_clutch, sched_bucket_t bucket);
365 uint16_t sched_edge_shared_rsrc_runnable_load(sched_clutch_root_t root_clutch, cluster_shared_rsrc_type_t load_type);
366 
367 /*
368  * sched_edge_search_order_weight_then_locality_cmp()
369  *
370  * Search order that prioritizes outgoing edges with a lower
371  * migration weight, then breaks ties with die-locality followed
372  * by least pset id.
373  */
374 extern int (*sched_edge_search_order_weight_then_locality_cmp)(const void *a, const void *b);
375 
376 /*
377  * Used to keep stir-the-pot state up-to-date for the current
378  * processor, as new threads come on-core.
379  */
380 extern void sched_edge_stir_the_pot_update_registry_state(thread_t thread);
381 extern void sched_edge_stir_the_pot_clear_registry_entry(void);
382 
383 #endif /* CONFIG_SCHED_EDGE */
384 
385 #endif /* CONFIG_SCHED_CLUTCH */
386 
387 #endif /* _KERN_SCHED_CLUTCH_H_ */
388