xref: /xnu-10063.121.3/osfmk/kern/sched.h (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	sched.h
60  *	Author:	Avadis Tevanian, Jr.
61  *	Date:	1985
62  *
63  *	Header file for scheduler.
64  *
65  */
66 
67 #ifndef _KERN_SCHED_H_
68 #define _KERN_SCHED_H_
69 
70 #include <mach/policy.h>
71 #include <kern/kern_types.h>
72 #include <kern/smp.h>
73 #include <kern/circle_queue.h>
74 #include <kern/macro_help.h>
75 #include <kern/timer_call.h>
76 #include <kern/ast.h>
77 #include <kern/bits.h>
78 
79 #define NRQS_MAX        (128)                           /* maximum number of priority levels */
80 
81 #define MAXPRI          (NRQS_MAX-1)
82 #define MINPRI          0                               /* lowest legal priority schedulable */
83 #define IDLEPRI         MINPRI                          /* idle thread priority */
84 #define NOPRI           -1
85 
86 /*
87  *	High-level priority assignments
88  *
89  *************************************************************************
90  * 127		Reserved (real-time)
91  *				A
92  *				+
93  *			(32 levels)
94  *				+
95  *				V
96  * 96		Reserved (real-time)
97  * 95		Kernel mode only
98  *				A
99  *				+
100  *			(16 levels)
101  *				+
102  *				V
103  * 80		Kernel mode only
104  * 79		System high priority
105  *				A
106  *				+
107  *			(16 levels)
108  *				+
109  *				V
110  * 64		System high priority
111  * 63		Elevated priorities
112  *				A
113  *				+
114  *			(12 levels)
115  *				+
116  *				V
117  * 52		Elevated priorities
118  * 51		Elevated priorities (incl. BSD +nice)
119  *				A
120  *				+
121  *			(20 levels)
122  *				+
123  *				V
124  * 32		Elevated priorities (incl. BSD +nice)
125  * 31		Default (default base for threads)
126  * 30		Lowered priorities (incl. BSD -nice)
127  *				A
128  *				+
129  *			(20 levels)
130  *				+
131  *				V
132  * 11		Lowered priorities (incl. BSD -nice)
133  * 10		Lowered priorities (aged pri's)
134  *				A
135  *				+
136  *			(11 levels)
137  *				+
138  *				V
139  * 0		Lowered priorities (aged pri's / idle)
140  *************************************************************************
141  */
142 
143 #define BASEPRI_RTQUEUES        (BASEPRI_REALTIME + 1)                          /* 97 */
144 #define BASEPRI_REALTIME        (MAXPRI - (NRQS_MAX / 4) + 1)                   /* 96 */
145 
146 #define MAXPRI_KERNEL           (BASEPRI_REALTIME - 1)                          /* 95 */
147 #define BASEPRI_PREEMPT_HIGH    (BASEPRI_PREEMPT + 1)                           /* 93 */
148 #define BASEPRI_PREEMPT         (MAXPRI_KERNEL - 3)                             /* 92 */
149 #define BASEPRI_VM              (BASEPRI_PREEMPT - 1)                           /* 91 */
150 
151 #define BASEPRI_KERNEL          (MINPRI_KERNEL + 1)                             /* 81 */
152 #define MINPRI_KERNEL           (MAXPRI_KERNEL - (NRQS_MAX / 8) + 1)            /* 80 */
153 
154 #define MAXPRI_RESERVED         (MINPRI_KERNEL - 1)                             /* 79 */
155 #define BASEPRI_GRAPHICS        (MAXPRI_RESERVED - 3)                           /* 76 */
156 #define MINPRI_RESERVED         (MAXPRI_RESERVED - (NRQS_MAX / 8) + 1)          /* 64 */
157 
158 #define MAXPRI_USER             (MINPRI_RESERVED - 1)                           /* 63 */
159 #define BASEPRI_CONTROL         (BASEPRI_DEFAULT + 17)                          /* 48 */
160 #define BASEPRI_FOREGROUND      (BASEPRI_DEFAULT + 16)                          /* 47 */
161 #define BASEPRI_BACKGROUND      (BASEPRI_DEFAULT + 15)                          /* 46 */
162 #define BASEPRI_USER_INITIATED  (BASEPRI_DEFAULT +  6)                          /* 37 */
163 #define BASEPRI_DEFAULT         (MAXPRI_USER - (NRQS_MAX / 4))                  /* 31 */
164 #define MAXPRI_SUPPRESSED       (BASEPRI_DEFAULT - 3)                           /* 28 */
165 #define BASEPRI_UTILITY         (BASEPRI_DEFAULT - 11)                          /* 20 */
166 #define MAXPRI_THROTTLE         (MINPRI + 4)                                    /*  4 */
167 #define MINPRI_USER             MINPRI                                          /*  0 */
168 
169 #define DEPRESSPRI              (MINPRI)                /* depress priority */
170 
171 #define MAXPRI_PROMOTE          (MAXPRI_KERNEL)         /* ceiling for mutex promotion */
172 #define MINPRI_RWLOCK           (BASEPRI_BACKGROUND)    /* floor when holding rwlock count */
173 #define MINPRI_EXEC             (BASEPRI_DEFAULT)       /* floor when in exec state */
174 #define MINPRI_WAITQ            (BASEPRI_DEFAULT)       /* floor when in waitq handover state */
175 #define MINPRI_FLOOR            (BASEPRI_BACKGROUND)    /* floor when boost requested */
176 
177 #define NRQS                    (BASEPRI_REALTIME)      /* Non-realtime levels for runqs */
178 #define NRTQS                   (MAXPRI - BASEPRI_REALTIME) /* Realtime levels for runqs */
179 
180 /* Ensure that NRQS is large enough to represent all non-realtime threads; even promoted ones */
181 _Static_assert((NRQS == (MAXPRI_PROMOTE + 1)), "Runqueues are too small to hold all non-realtime threads");
182 
183 /* Type used for thread->sched_mode and saved_mode */
184 typedef enum {
185 	TH_MODE_NONE = 0,                                       /* unassigned, usually for saved_mode only */
186 	TH_MODE_REALTIME,                                       /* time constraints supplied */
187 	TH_MODE_FIXED,                                          /* use fixed priorities, no decay */
188 	TH_MODE_TIMESHARE,                                      /* use timesharing algorithm */
189 } sched_mode_t;
190 
191 /*
192  * Determine whether the target platform should run the Clutch/Edge Scheduler.
193  * All arm64 platforms are eligible to do so.
194  */
195 #if defined(__arm64__) && CONFIG_CLUTCH && !CONFIG_SCHED_EDGE_OPT_OUT
196 
197 /*
198  * Single-cluster, symmetric (SMP) systems can run with just the Clutch policy, but
199  * multi-cluster, asymmetric (AMP) systems must further enable the Edge policy
200  * extension to Clutch in order to manage scheduling across the multiple CPU clusters.
201  */
202 #define CONFIG_SCHED_CLUTCH 1
203 #if __AMP__
204 #define CONFIG_SCHED_EDGE   1
205 #endif /* __AMP__ */
206 
207 #endif /* defined(__arm64__) && CONFIG_CLUTCH && !CONFIG_SCHED_EDGE_OPT_OUT */
208 
209 /*
210  * Since the clutch scheduler organizes threads based on the thread group
211  * and the scheduling bucket, its important to not mix threads from multiple
212  * priority bands into the same bucket. To achieve that, in the clutch bucket
213  * world, there is a scheduling bucket per QoS effectively.
214  */
215 
216 /* Buckets used for load calculation */
217 typedef enum {
218 	TH_BUCKET_FIXPRI = 0,                   /* Fixed-priority */
219 	TH_BUCKET_SHARE_FG,                     /* Timeshare thread above BASEPRI_DEFAULT */
220 #if CONFIG_SCHED_CLUTCH
221 	TH_BUCKET_SHARE_IN,                     /* Timeshare thread between BASEPRI_USER_INITIATED and BASEPRI_DEFAULT */
222 #endif /* CONFIG_SCHED_CLUTCH */
223 	TH_BUCKET_SHARE_DF,                     /* Timeshare thread between BASEPRI_DEFAULT and BASEPRI_UTILITY */
224 	TH_BUCKET_SHARE_UT,                     /* Timeshare thread between BASEPRI_UTILITY and MAXPRI_THROTTLE */
225 	TH_BUCKET_SHARE_BG,                     /* Timeshare thread between MAXPRI_THROTTLE and MINPRI */
226 	TH_BUCKET_RUN,                          /* All runnable threads */
227 	TH_BUCKET_SCHED_MAX = TH_BUCKET_RUN,    /* Maximum schedulable buckets */
228 	TH_BUCKET_MAX,
229 } sched_bucket_t;
230 
231 /*
232  *	Macro to check for invalid priorities.
233  */
234 #define invalid_pri(pri) ((pri) < MINPRI || (pri) > MAXPRI)
235 
236 struct runq_stats {
237 	uint64_t                count_sum;
238 	uint64_t                last_change_timestamp;
239 };
240 
241 #if defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO)
242 
243 struct run_queue {
244 	int                     highq;                          /* highest runnable queue */
245 	bitmap_t                bitmap[BITMAP_LEN(NRQS)];       /* run queue bitmap array */
246 	int                     count;                          /* # of threads total */
247 	int                     urgency;                        /* level of preemption urgency */
248 	circle_queue_head_t     queues[NRQS];           /* one for each priority */
249 
250 	struct runq_stats       runq_stats;
251 };
252 
253 inline static void
rq_bitmap_set(bitmap_t * __header_indexable map,u_int n)254 rq_bitmap_set(bitmap_t *__header_indexable map, u_int n)
255 {
256 	assert(n < NRQS);
257 	bitmap_set(map, n);
258 }
259 
260 inline static void
rq_bitmap_clear(bitmap_t * __header_indexable map,u_int n)261 rq_bitmap_clear(bitmap_t *__header_indexable map, u_int n)
262 {
263 	assert(n < NRQS);
264 	bitmap_clear(map, n);
265 }
266 
267 #endif /* defined(CONFIG_SCHED_TIMESHARE_CORE) || defined(CONFIG_SCHED_PROTO) */
268 
269 typedef struct {
270 	queue_head_t            pri_queue;                      /* runnable RT threads for this priority */
271 	uint64_t                pri_earliest_deadline;          /* earliest deadline for this priority */
272 	int                     pri_count;                      /* # of threads for this priority */
273 	uint32_t                pri_constraint;                 /* constraint of earliest deadline thread for this priority */
274 } rt_queue_pri_t;
275 
276 struct rt_queue {
277 	_Atomic uint64_t        earliest_deadline;              /* earliest deadline */
278 	_Atomic int             count;                          /* # of threads total */
279 	_Atomic uint32_t        constraint;                     /* constraint of earliest deadline thread */
280 	_Atomic int             ed_index;                       /* index of earliest deadline thread */
281 
282 	bitmap_t                bitmap[BITMAP_LEN(NRTQS)];
283 
284 	rt_queue_pri_t          rt_queue_pri[NRTQS];
285 
286 	struct runq_stats       runq_stats;
287 };
288 typedef struct rt_queue *rt_queue_t;
289 
290 #define RT_CONSTRAINT_NONE              UINT32_MAX
291 #define RT_DEADLINE_NONE                UINT64_MAX
292 #define RT_DEADLINE_QUANTUM_EXPIRED     (UINT64_MAX - 1)
293 
294 #if defined(CONFIG_SCHED_GRRR_CORE)
295 
296 /*
297  * We map standard Mach priorities to an abstract scale that more properly
298  * indicates how we want processor time allocated under contention.
299  */
300 typedef uint8_t grrr_proportional_priority_t;
301 typedef uint8_t grrr_group_index_t;
302 
303 #define NUM_GRRR_PROPORTIONAL_PRIORITIES        256
304 #define MAX_GRRR_PROPORTIONAL_PRIORITY ((grrr_proportional_priority_t)255)
305 
306 #if 0
307 #define NUM_GRRR_GROUPS 8                                       /* log(256) */
308 #endif
309 
310 #define NUM_GRRR_GROUPS 64                                      /* 256/4 */
311 
312 struct grrr_group {
313 	queue_chain_t                   priority_order;                         /* next greatest weight group */
314 	grrr_proportional_priority_t            minpriority;
315 	grrr_group_index_t              index;
316 
317 	queue_head_t                    clients;
318 	int                                             count;
319 	uint32_t                                weight;
320 #if 0
321 	uint32_t                                deferred_removal_weight;
322 #endif
323 	uint32_t                                work;
324 	thread_t                                current_client;
325 };
326 
327 struct grrr_run_queue {
328 	int                                     count;
329 	uint32_t                        last_rescale_tick;
330 	struct grrr_group       groups[NUM_GRRR_GROUPS];
331 	queue_head_t            sorted_group_list;
332 	uint32_t                        weight;
333 	grrr_group_t            current_group;
334 
335 	struct runq_stats   runq_stats;
336 };
337 
338 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
339 
340 extern int rt_runq_count(processor_set_t);
341 extern uint64_t rt_runq_earliest_deadline(processor_set_t);
342 
343 #if defined(CONFIG_SCHED_MULTIQ)
344 sched_group_t   sched_group_create(void);
345 void            sched_group_destroy(sched_group_t sched_group);
346 #endif /* defined(CONFIG_SCHED_MULTIQ) */
347 
348 
349 
350 /*
351  *	Scheduler routines.
352  */
353 
354 /* Handle quantum expiration for an executing thread */
355 extern void             thread_quantum_expire(
356 	timer_call_param_t      processor,
357 	timer_call_param_t      thread);
358 
359 /* Handle preemption timer expiration for an executing thread */
360 extern void             thread_preempt_expire(
361 	timer_call_param_t      processor,
362 	timer_call_param_t      thread);
363 
364 /* Context switch check for current processor */
365 extern ast_t    csw_check(
366 	thread_t      thread,
367 	processor_t   processor,
368 	ast_t         check_reason);
369 
370 /* Check for pending ASTs */
371 extern void ast_check(processor_t processor);
372 
373 extern ast_t update_pending_nonurgent_preemption(processor_t processor, ast_t reason);
374 extern void clear_pending_nonurgent_preemption(processor_t processor);
375 
376 extern void sched_update_generation_count(void);
377 
378 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
379 extern uint32_t std_quantum, min_std_quantum;
380 extern uint32_t std_quantum_us;
381 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
382 
383 extern uint32_t thread_depress_time;
384 extern uint32_t default_timeshare_computation;
385 extern uint32_t default_timeshare_constraint;
386 
387 extern uint32_t max_rt_quantum, min_rt_quantum;
388 
389 extern int default_preemption_rate;
390 
391 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
392 
393 /*
394  *	Age usage  at approximately (1 << SCHED_TICK_SHIFT) times per second
395  *	Aging may be deferred during periods where all processors are idle
396  *	and cumulatively applied during periods of activity.
397  */
398 #define SCHED_TICK_SHIFT        3
399 #define SCHED_TICK_MAX_DELTA    (8)
400 
401 extern unsigned         sched_tick;
402 extern uint32_t         sched_tick_interval;
403 
404 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
405 
406 extern uint64_t         sched_one_second_interval;
407 
408 /* Periodic computation of various averages */
409 extern void            compute_sched_load(void);
410 
411 extern void             compute_averages(uint64_t);
412 
413 extern void             compute_averunnable(
414 	void                    *nrun);
415 
416 extern void             compute_stack_target(
417 	void                    *arg);
418 
419 extern void             compute_pageout_gc_throttle(
420 	void                    *arg);
421 
422 extern void             compute_pmap_gc_throttle(
423 	void                    *arg);
424 
425 /*
426  *	Conversion factor from usage
427  *	to priority.
428  */
429 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
430 
431 #define MAX_LOAD (NRQS - 1)
432 #define SCHED_PRI_SHIFT_MAX ((8 * sizeof(uint32_t)) - 1)
433 extern uint32_t         sched_pri_shifts[TH_BUCKET_MAX];
434 extern uint32_t         sched_fixed_shift;
435 extern int8_t           sched_load_shifts[NRQS];
436 extern uint32_t         sched_decay_usage_age_factor;
437 void sched_timeshare_consider_maintenance(uint64_t ctime, bool safe_point);
438 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
439 
440 void sched_consider_recommended_cores(uint64_t ctime, thread_t thread);
441 
442 extern int32_t          sched_poll_yield_shift;
443 extern uint64_t         sched_safe_rt_duration;
444 extern uint64_t         sched_safe_fixed_duration;
445 
446 extern uint32_t         sched_load_average, sched_mach_factor;
447 
448 extern uint32_t         avenrun[3], mach_factor[3];
449 
450 extern uint64_t         max_unsafe_rt_computation;
451 extern uint64_t         max_unsafe_fixed_computation;
452 extern uint64_t         max_poll_computation;
453 
454 extern uint32_t         sched_run_buckets[TH_BUCKET_MAX];
455 
456 extern uint32_t sched_run_incr(thread_t thread);
457 extern uint32_t sched_run_decr(thread_t thread);
458 extern void sched_update_thread_bucket(thread_t thread);
459 
460 extern uint32_t sched_smt_run_incr(thread_t thread);
461 extern uint32_t sched_smt_run_decr(thread_t thread);
462 extern void sched_smt_update_thread_bucket(thread_t thread);
463 
464 #define SCHED_DECAY_TICKS       32
465 struct shift_data {
466 	int     shift1;
467 	int     shift2;
468 };
469 
470 /*
471  * Save the current thread time and compute a delta since the last call for the
472  * scheduler tick.
473  */
474 #define sched_tick_delta(thread, delta) \
475 MACRO_BEGIN \
476     uint64_t _total = recount_thread_time_mach(thread); \
477     (delta) = (typeof(delta))(_total - thread->sched_time_save); \
478     thread->sched_time_save = _total; \
479 MACRO_END
480 
481 #define SCHED_MAX_BACKUP_PROCESSORS             7
482 #if defined(__x86_64__)
483 #define SCHED_DEFAULT_BACKUP_PROCESSORS         1
484 #define SCHED_DEFAULT_BACKUP_PROCESSORS_SMT     2
485 #else
486 #define SCHED_DEFAULT_BACKUP_PROCESSORS         0
487 #define SCHED_DEFAULT_BACKUP_PROCESSORS_SMT     0
488 #endif
489 extern int sched_rt_n_backup_processors;
490 
491 extern bool system_is_SMT;
492 
493 #endif  /* _KERN_SCHED_H_ */
494