xref: /xnu-11417.101.15/osfmk/kern/sched_prim.h (revision e3723e1f17661b24996789d8afc084c0c3303b26)
1 /*
2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	sched_prim.h
60  *	Author:	David Golub
61  *
62  *	Scheduling primitive definitions file
63  *
64  */
65 
66 #ifndef _KERN_SCHED_PRIM_H_
67 #define _KERN_SCHED_PRIM_H_
68 
69 #include <sys/cdefs.h>
70 #include <mach/boolean.h>
71 #include <mach/machine/vm_types.h>
72 #include <mach/kern_return.h>
73 #include <kern/clock.h>
74 #include <kern/kern_types.h>
75 #include <kern/percpu.h>
76 #include <kern/thread.h>
77 #include <kern/block_hint.h>
78 
79 extern int              thread_get_current_cpuid(void);
80 
81 #if XNU_KERNEL_PRIVATE
82 
83 /*
84  * The quantum length used for Fixed and RT sched modes. In general the quantum
85  * can vary - for example for background or QOS.
86  */
87 uint64_t sched_get_quantum_us(void);
88 
89 #endif /* XNU_KERNEL_PRIVATE */
90 
91 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
92 
93 #include <kern/sched_urgency.h>
94 #include <kern/thread_group.h>
95 #include <kern/waitq.h>
96 
97 /* Initialization */
98 extern void             sched_init(void);
99 
100 extern void             sched_startup(void);
101 
102 extern void             sched_timebase_init(void);
103 
104 extern void             pset_rt_init(processor_set_t pset);
105 
106 extern void             sched_rtlocal_init(processor_set_t pset);
107 
108 extern rt_queue_t       sched_rtlocal_runq(processor_set_t pset);
109 
110 extern void             sched_rtlocal_queue_shutdown(processor_t processor);
111 
112 extern int64_t          sched_rtlocal_runq_count_sum(void);
113 
114 extern thread_t         sched_rtlocal_steal_thread(processor_set_t stealing_pset, uint64_t earliest_deadline);
115 
116 extern thread_t         sched_rt_choose_thread(processor_set_t pset);
117 
118 extern void             sched_check_spill(processor_set_t pset, thread_t thread);
119 
120 extern bool             sched_thread_should_yield(processor_t processor, thread_t thread);
121 
122 extern bool             sched_steal_thread_DISABLED(processor_set_t pset);
123 extern bool             sched_steal_thread_enabled(processor_set_t pset);
124 
125 /* Force a preemption point for a thread and wait for it to stop running */
126 extern boolean_t        thread_stop(
127 	thread_t        thread,
128 	boolean_t       until_not_runnable);
129 
130 /* Release a previous stop request */
131 extern void                     thread_unstop(
132 	thread_t        thread);
133 
134 /* Wait for a thread to stop running */
135 extern void                     thread_wait(
136 	thread_t        thread,
137 	boolean_t       until_not_runnable);
138 
139 /* Unblock thread on wake up */
140 extern boolean_t        thread_unblock(
141 	thread_t                thread,
142 	wait_result_t   wresult);
143 
144 /* Unblock and dispatch thread */
145 extern void thread_go(
146 	thread_t                thread,
147 	wait_result_t           wresult,
148 	bool                    try_handoff);
149 
150 /* Check if direct handoff is allowed */
151 extern boolean_t
152 thread_allowed_for_handoff(
153 	thread_t         thread);
154 
155 /* Handle threads at context switch */
156 extern void                     thread_dispatch(
157 	thread_t                old_thread,
158 	thread_t                new_thread);
159 
160 /* Switch directly to a particular thread */
161 extern int                      thread_run(
162 	thread_t                        self,
163 	thread_continue_t       continuation,
164 	void                            *parameter,
165 	thread_t                        new_thread);
166 
167 /* Resume thread with new stack */
168 extern __dead2 void     thread_continue(thread_t old_thread);
169 
170 /* Invoke continuation */
171 extern __dead2 void     call_continuation(
172 	thread_continue_t       continuation,
173 	void                    *parameter,
174 	wait_result_t           wresult,
175 	boolean_t               enable_interrupts);
176 
177 /*
178  * Flags that can be passed to set_sched_pri
179  * to skip side effects
180  */
181 __options_decl(set_sched_pri_options_t, uint32_t, {
182 	SETPRI_DEFAULT  = 0x0,
183 	SETPRI_LAZY     = 0x1,  /* Avoid setting AST flags or sending IPIs */
184 });
185 
186 /* Set the current scheduled priority */
187 extern void set_sched_pri(
188 	thread_t      thread,
189 	int16_t       priority,
190 	set_sched_pri_options_t options);
191 
192 /* Set base priority of the specified thread */
193 extern void             sched_set_thread_base_priority(
194 	thread_t                thread,
195 	int                             priority);
196 
197 /* Set absolute base priority of the specified thread */
198 extern void             sched_set_kernel_thread_priority(
199 	thread_t                thread,
200 	int                             priority);
201 
202 
203 /* Set the thread's true scheduling mode */
204 extern void             sched_set_thread_mode(thread_t thread,
205     sched_mode_t mode);
206 
207 /*
208  * Set the thread's scheduling mode taking into account that the thread may have
209  * been demoted.
210  * */
211 extern void             sched_set_thread_mode_user(thread_t thread,
212     sched_mode_t mode);
213 
214 /*
215  * Get the thread's scheduling mode taking into account that the thread may have
216  * been demoted.
217  * */
218 extern sched_mode_t     sched_get_thread_mode_user(thread_t thread);
219 
220 
221 /* Demote the true scheduler mode */
222 extern void             sched_thread_mode_demote(thread_t thread,
223     uint32_t reason);
224 /* Un-demote the true scheduler mode */
225 extern void             sched_thread_mode_undemote(thread_t thread,
226     uint32_t reason);
227 /* Check for a specific demotion */
228 extern bool             sched_thread_mode_has_demotion(thread_t thread,
229     uint32_t reason);
230 
231 extern void sched_thread_promote_reason(thread_t thread, uint32_t reason, uintptr_t trace_obj);
232 extern void sched_thread_unpromote_reason(thread_t thread, uint32_t reason, uintptr_t trace_obj);
233 
234 /* Re-evaluate base priority of thread (thread locked) */
235 void thread_recompute_priority(thread_t thread);
236 
237 /* Re-evaluate scheduled priority of thread (thread locked) */
238 extern void thread_recompute_sched_pri(
239 	thread_t thread,
240 	set_sched_pri_options_t options);
241 
242 /* Periodic scheduler activity */
243 extern void             sched_init_thread(void);
244 
245 /* Perform sched_tick housekeeping activities */
246 extern boolean_t                can_update_priority(
247 	thread_t                thread);
248 
249 extern void             update_priority(
250 	thread_t                thread);
251 
252 extern void             lightweight_update_priority(
253 	thread_t                thread);
254 
255 extern void             sched_default_quantum_expire(thread_t thread);
256 
257 /* Idle processor thread continuation */
258 extern void             idle_thread(
259 	void*           parameter,
260 	wait_result_t   result);
261 
262 extern void idle_thread_create(
263 	processor_t             processor,
264 	thread_continue_t       continuation);
265 
266 /* Continuation return from syscall */
267 extern void     thread_syscall_return(
268 	kern_return_t   ret);
269 
270 /* Context switch */
271 extern wait_result_t    thread_block_reason(
272 	thread_continue_t       continuation,
273 	void                            *parameter,
274 	ast_t                           reason);
275 
276 __options_decl(sched_options_t, uint32_t, {
277 	SCHED_NONE      = 0x0,
278 	SCHED_TAILQ     = 0x1,
279 	SCHED_HEADQ     = 0x2,
280 	SCHED_PREEMPT   = 0x4,
281 	SCHED_REBALANCE = 0x8,
282 });
283 
284 /* Reschedule thread for execution */
285 extern void             thread_setrun(
286 	thread_t        thread,
287 	sched_options_t options);
288 
289 extern processor_set_t  task_choose_pset(
290 	task_t                  task);
291 
292 /* Bind the current thread to a particular processor */
293 extern processor_t      thread_bind(
294 	processor_t             processor);
295 
296 extern void             thread_bind_during_wakeup(
297 	thread_t                thread,
298 	processor_t             processor);
299 
300 extern void             thread_unbind_after_queue_shutdown(
301 	thread_t                thread,
302 	processor_t             processor);
303 
304 extern bool pset_has_stealable_threads(
305 	processor_set_t         pset);
306 
307 extern bool pset_has_stealable_rt_threads(
308 	processor_set_t         pset);
309 
310 extern processor_set_t choose_starting_pset(
311 	pset_node_t  node,
312 	thread_t     thread,
313 	processor_t *processor_hint);
314 
315 extern int pset_available_cpu_count(
316 	processor_set_t pset);
317 
318 extern bool pset_is_recommended(
319 	processor_set_t pset);
320 
321 extern bool pset_type_is_recommended(
322 	processor_set_t pset);
323 
324 extern pset_node_t sched_choose_node(
325 	thread_t     thread);
326 
327 #if CONFIG_SCHED_SMT
328 extern processor_t      choose_processor_smt(
329 	processor_set_t                pset,
330 	processor_t                    processor,
331 	thread_t                       thread);
332 #else /* CONFIG_SCHED_SMT */
333 /* Choose the best processor to run a thread */
334 extern processor_t      choose_processor(
335 	processor_set_t                pset,
336 	processor_t                    processor,
337 	thread_t                       thread);
338 #endif /* CONFIG_SCHED_SMT */
339 
340 extern bool sched_SMT_balance(
341 	processor_t processor,
342 	processor_set_t pset);
343 
344 extern void thread_quantum_init(
345 	thread_t thread,
346 	uint64_t now);
347 
348 
349 extern void             run_queue_init(
350 	run_queue_t             runq);
351 
352 extern thread_t run_queue_dequeue(
353 	run_queue_t           runq,
354 	sched_options_t       options);
355 
356 extern boolean_t        run_queue_enqueue(
357 	run_queue_t           runq,
358 	thread_t              thread,
359 	sched_options_t       options);
360 
361 extern void     run_queue_remove(
362 	run_queue_t            runq,
363 	thread_t                       thread);
364 
365 extern thread_t run_queue_peek(
366 	run_queue_t            runq);
367 
368 struct sched_update_scan_context {
369 	uint64_t        earliest_bg_make_runnable_time;
370 	uint64_t        earliest_normal_make_runnable_time;
371 	uint64_t        earliest_rt_make_runnable_time;
372 	uint64_t        sched_tick_last_abstime;
373 };
374 typedef struct sched_update_scan_context *sched_update_scan_context_t;
375 
376 extern void             sched_rtlocal_runq_scan(sched_update_scan_context_t scan_context);
377 
378 extern void sched_pset_made_schedulable(
379 	processor_t processor,
380 	processor_set_t pset,
381 	boolean_t drop_lock);
382 
383 extern void sched_cpu_init_completed(void);
384 
385 /*
386  * Enum to define various events which need IPIs. The IPI policy
387  * engine decides what kind of IPI to use based on destination
388  * processor state, thread and one of the following scheduling events.
389  */
390 typedef enum {
391 	SCHED_IPI_EVENT_BOUND_THR   = 0x1,
392 	SCHED_IPI_EVENT_PREEMPT     = 0x2,
393 	SCHED_IPI_EVENT_SMT_REBAL   = 0x3,
394 	SCHED_IPI_EVENT_SPILL       = 0x4,
395 	SCHED_IPI_EVENT_REBALANCE   = 0x5,
396 	SCHED_IPI_EVENT_RT_PREEMPT  = 0x6,
397 } sched_ipi_event_t;
398 
399 
400 /* Enum to define various IPI types used by the scheduler */
401 typedef enum {
402 	SCHED_IPI_NONE              = 0x0,
403 	SCHED_IPI_IMMEDIATE         = 0x1,
404 	SCHED_IPI_IDLE              = 0x2,
405 	SCHED_IPI_DEFERRED          = 0x3,
406 } sched_ipi_type_t;
407 
408 /* The IPI policy engine behaves in the following manner:
409  * - All scheduler events which need an IPI invoke sched_ipi_action() with
410  *   the appropriate destination processor, thread and event.
411  * - sched_ipi_action() performs basic checks, invokes the scheduler specific
412  *   ipi_policy routine and sets pending_AST bits based on the result.
413  * - Once the pset lock is dropped, the scheduler invokes sched_ipi_perform()
414  *   routine which actually sends the appropriate IPI to the destination core.
415  */
416 extern sched_ipi_type_t sched_ipi_action(processor_t dst, thread_t thread, sched_ipi_event_t event);
417 extern void sched_ipi_perform(processor_t dst, sched_ipi_type_t ipi);
418 
419 /* sched_ipi_policy() is the global default IPI policy for all schedulers */
420 extern sched_ipi_type_t sched_ipi_policy(processor_t dst, thread_t thread,
421     boolean_t dst_idle, sched_ipi_event_t event);
422 
423 /* sched_ipi_deferred_policy() is the global default deferred IPI policy for all schedulers */
424 extern sched_ipi_type_t sched_ipi_deferred_policy(processor_set_t pset,
425     processor_t dst, thread_t thread, sched_ipi_event_t event);
426 
427 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
428 
429 extern boolean_t        thread_update_add_thread(thread_t thread);
430 extern void             thread_update_process_threads(void);
431 extern boolean_t        runq_scan(run_queue_t runq, sched_update_scan_context_t scan_context);
432 
433 #if CONFIG_SCHED_CLUTCH
434 extern boolean_t        sched_clutch_timeshare_scan(queue_t thread_queue, uint16_t count, sched_update_scan_context_t scan_context);
435 #endif /* CONFIG_SCHED_CLUTCH */
436 
437 extern void sched_timeshare_init(void);
438 extern void sched_timeshare_timebase_init(void);
439 extern void sched_timeshare_maintenance_continue(void);
440 
441 
442 
443 extern boolean_t priority_is_urgent(int priority);
444 extern uint32_t sched_timeshare_initial_quantum_size(thread_t thread);
445 
446 extern int sched_compute_timeshare_priority(thread_t thread);
447 
448 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
449 
450 /* Remove thread from its run queue */
451 extern boolean_t        thread_run_queue_remove(thread_t thread);
452 thread_t thread_run_queue_remove_for_handoff(thread_t thread);
453 
454 /* Put a thread back in the run queue after being yanked */
455 extern void thread_run_queue_reinsert(thread_t thread, sched_options_t options);
456 
457 extern void             thread_timer_expire(
458 	void                    *thread,
459 	void                    *p1);
460 
461 extern bool thread_is_eager_preempt(thread_t thread);
462 
463 extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
464 
465 /* Set the maximum interrupt level for the thread */
466 __private_extern__ wait_interrupt_t thread_interrupt_level(
467 	wait_interrupt_t interruptible);
468 
469 __private_extern__ wait_result_t thread_mark_wait_locked(
470 	thread_t                 thread,
471 	wait_interrupt_t interruptible);
472 
473 /* Wake up locked thread directly, passing result */
474 __private_extern__ kern_return_t clear_wait_internal(
475 	thread_t                thread,
476 	wait_result_t   result);
477 
478 struct sched_statistics {
479 	uint32_t        csw_count;
480 	uint32_t        preempt_count;
481 	uint32_t        preempted_rt_count;
482 	uint32_t        preempted_by_rt_count;
483 	uint32_t        rt_sched_count;
484 	uint32_t        interrupt_count;
485 	uint32_t        ipi_count;
486 	uint32_t        timer_pop_count;
487 	uint32_t        idle_transitions;
488 	uint32_t        quantum_timer_expirations;
489 };
490 PERCPU_DECL(struct sched_statistics, sched_stats);
491 extern bool             sched_stats_active;
492 
493 extern void sched_stats_handle_csw(
494 	processor_t processor,
495 	int reasons,
496 	int selfpri,
497 	int otherpri);
498 
499 extern void sched_stats_handle_runq_change(
500 	struct runq_stats *stats,
501 	int old_count);
502 
503 #define SCHED_STATS_INC(field)                                                  \
504 MACRO_BEGIN                                                                     \
505 	if (__improbable(sched_stats_active)) {                                 \
506 	        PERCPU_GET(sched_stats)->field++;                               \
507 	}                                                                       \
508 MACRO_END
509 
510 #if DEBUG
511 
512 #define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri)                  \
513 MACRO_BEGIN                                                                     \
514 	if (__improbable(sched_stats_active)) {                                 \
515 	        sched_stats_handle_csw((processor),                             \
516 	            (reasons), (selfpri), (otherpri));                          \
517 	}                                                                       \
518 MACRO_END
519 
520 
521 #define SCHED_STATS_RUNQ_CHANGE(stats, old_count)                               \
522 MACRO_BEGIN                                                                     \
523 	if (__improbable(sched_stats_active)) {                                 \
524 	        sched_stats_handle_runq_change((stats), (old_count));           \
525 	}                                                                       \
526 MACRO_END
527 
528 #else /* DEBUG */
529 
530 #define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) do { }while(0)
531 #define SCHED_STATS_RUNQ_CHANGE(stats, old_count) do { }while(0)
532 
533 #endif /* DEBUG */
534 
535 extern uint32_t sched_debug_flags;
536 #define SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS           0x00000001
537 #define SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS   0x00000002
538 #define SCHED_DEBUG_FLAG_AST_CHECK_TRACEPOINTS   0x00000004
539 
540 #define SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(...)                         \
541 MACRO_BEGIN                                                                     \
542 	if (__improbable(sched_debug_flags &                                    \
543 	    SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS)) {                           \
544 	        KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                             \
545 	}                                                                       \
546 MACRO_END
547 
548 #define SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT_IST(...)             \
549 MACRO_BEGIN                                                                     \
550 	if (__improbable(sched_debug_flags &                                    \
551 	    SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS)) {                   \
552 	        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, __VA_ARGS__);           \
553 	}                                                                       \
554 MACRO_END
555 
556 #define SCHED_DEBUG_AST_CHECK_KDBG_RELEASE(...)                                 \
557 MACRO_BEGIN                                                                     \
558 	if (__improbable(sched_debug_flags &                                    \
559 	    SCHED_DEBUG_FLAG_AST_CHECK_TRACEPOINTS)) {                          \
560 	        KDBG_RELEASE(__VA_ARGS__);                                      \
561 	}                                                                       \
562 MACRO_END
563 
564 
565 /* Tells if there are "active" RT threads in the system (provided by CPU PM) */
566 extern void     active_rt_threads(
567 	boolean_t       active);
568 
569 /* Returns the perfcontrol attribute for the thread */
570 extern perfcontrol_class_t thread_get_perfcontrol_class(
571 	thread_t        thread);
572 
573 /* Generic routine for Non-AMP schedulers to calculate parallelism */
574 extern uint32_t sched_qos_max_parallelism(int qos, uint64_t options);
575 
576 extern void check_monotonic_time(uint64_t ctime);
577 
578 #endif /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
579 
580 __BEGIN_DECLS
581 
582 #ifdef  XNU_KERNEL_PRIVATE
583 
584 extern void thread_soft_bind_cluster_type(thread_t, char cluster_type);
585 
586 __options_decl(thread_bind_option_t, uint64_t, {
587 	/* Unbind a previously cluster bound thread */
588 	THREAD_UNBIND                   = 0x1,
589 	/*
590 	 * Bind thread to the cluster only if it is eligible to run on that cluster. If
591 	 * the thread is not eligible to run on the cluster, thread_soft_bind_cluster_id()
592 	 * returns KERN_INVALID_POLICY.
593 	 */
594 	THREAD_BIND_ELIGIBLE_ONLY       = 0x2,
595 });
596 extern kern_return_t thread_soft_bind_cluster_id(thread_t thread, uint32_t cluster_id, thread_bind_option_t options);
597 
598 extern int sched_get_rt_n_backup_processors(void);
599 extern void sched_set_rt_n_backup_processors(int n);
600 
601 extern int sched_get_rt_deadline_epsilon(void);
602 extern void sched_set_rt_deadline_epsilon(int new_epsilon_us);
603 
604 /* Toggles a global override to turn off CPU Throttling */
605 extern void     sys_override_cpu_throttle(boolean_t enable_override);
606 
607 extern int sched_get_powered_cores(void);
608 extern void sched_set_powered_cores(int n);
609 
610 uint64_t sched_sysctl_get_recommended_cores(void);
611 
612 /*
613  ****************** Only exported until BSD stops using ********************
614  */
615 
616 extern void                     thread_vm_bind_group_add(void);
617 
618 /* Wake up thread directly, passing result */
619 extern kern_return_t clear_wait(
620 	thread_t                thread,
621 	wait_result_t   result);
622 
623 /* Start thread running */
624 extern void             thread_bootstrap_return(void) __attribute__((noreturn));
625 
626 /* Return from exception (BSD-visible interface) */
627 extern void             thread_exception_return(void) __dead2;
628 
629 #define SCHED_STRING_MAX_LENGTH (48)
630 /* String declaring the name of the current scheduler */
631 extern char sched_string[SCHED_STRING_MAX_LENGTH];
632 
633 __options_decl(thread_handoff_option_t, uint32_t, {
634 	THREAD_HANDOFF_NONE          = 0,
635 	THREAD_HANDOFF_SETRUN_NEEDED = 0x1,
636 });
637 
638 /* Remove thread from its run queue */
639 thread_t thread_prepare_for_handoff(thread_t thread, thread_handoff_option_t option);
640 
641 /* Attempt to context switch to a specific runnable thread */
642 extern wait_result_t thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option);
643 
644 __attribute__((nonnull(2)))
645 extern void thread_handoff_parameter(thread_t thread,
646     thread_continue_t continuation, void *parameter, thread_handoff_option_t) __dead2;
647 
648 extern struct waitq     *assert_wait_queue(event_t event);
649 
650 extern kern_return_t thread_wakeup_one_with_pri(event_t event, int priority);
651 
652 extern thread_t thread_wakeup_identify(event_t event, int priority);
653 
654 /*
655  * sched_cond_t:
656  *
657  * A atomic condition variable used to synchronize wake/block operations on threads.
658  * Bits defined below are reserved for use by sched_prim. Remaining
659  * bits may be used by caller for additional synchronization semantics.
660  */
661 __options_decl(sched_cond_t, uint32_t, {
662 	SCHED_COND_INIT = 0x0000,    /* initialize all bits to zero (inactive and not awoken) */
663 	SCHED_COND_ACTIVE = 0x0001,  /* target thread is active */
664 	SCHED_COND_WAKEUP = 0x0002   /* wakeup has been issued for target thread */
665 });
666 typedef _Atomic sched_cond_t sched_cond_atomic_t;
667 
668 /*
669  * sched_cond_init:
670  *
671  * Initialize an atomic condition variable. Note that this does not occur atomically and should be
672  * performed during thread initialization, before the condition is observable by other threads.
673  */
674 extern void sched_cond_init(
675 	sched_cond_atomic_t *cond);
676 
677 /*
678  * sched_cond_signal:
679  *
680  * Wakeup the specified thread if it is waiting on this event and it has not already been issued a wakeup.
681  *
682  * parameters:
683  *      thread    thread to awaken
684  *      cond      atomic condition variable
685  */
686 extern kern_return_t sched_cond_signal(
687 	sched_cond_atomic_t *cond,
688 	thread_t thread);
689 
690 /*
691  * sched_cond_wait_parameter:
692  *
693  * Assert wait and block on cond if no wakeup has been issued.
694  * If a wakeup has been issued on cond since the last `sched_cond_ack`, clear_wait and
695  * return `THREAD_AWAKENED`.
696  *
697  * `sched_cond_wait_parameter` must be paired with `sched_cond_ack`.
698  *
699  * NOTE: `continuation` will only be jumped to if a wakeup has not been issued
700  *
701  * parameters:
702  *      cond             atomic condition variable to synchronize on
703  *      interruptible    interruptible value to pass to assert_wait
704  *      continuation     continuation if block succeeds
705  *      parameter
706  */
707 extern wait_result_t sched_cond_wait_parameter(
708 	sched_cond_atomic_t *cond,
709 	wait_interrupt_t interruptible,
710 	thread_continue_t continuation,
711 	void *parameter);
712 
713 /*
714  * sched_cond_wait:
715  *
716  * Assert wait and block on cond if no wakeup has been issued.
717  * If a wakeup has been issued on cond since the last `sched_cond_ack`, clear_wait and
718  * return `THREAD_AWAKENED`.
719  *
720  * `sched_cond_wait` must be paired with `sched_cond_ack`.
721  *
722  * NOTE: `continuation` will only be jumped to if a wakeup has not been issued
723  *
724  * parameters:
725  *      cond             atomic condition variable to synchronize on
726  *      interruptible    interruptible value to pass to assert_wait
727  *      continuation     continuation if block succeeds
728  */
729 extern wait_result_t sched_cond_wait(
730 	sched_cond_atomic_t *cond,
731 	wait_interrupt_t interruptible,
732 	thread_continue_t continuation);
733 
734 /*
735  * sched_cond_ack:
736  *
737  * Acknowledge an issued wakeup by clearing WAKEUP and setting ACTIVE (via XOR).
738  * It is the callers responsibility to ensure that the ACTIVE bit is always low prior to calling
739  * (i.e. by calling `sched_cond_wait` prior to any rerun or block).
740  * Synchronization schemes that allow for WAKEUP bit to be reset prior to wakeup
741  * (e.g. a cancellation mechanism) should check that WAKEUP was indeed cleared.
742  *
743  * e.g.
744  * ```
745  * if (sched_cond_ack(&my_state) & SCHED_THREAD_WAKEUP) {
746  *     // WAKEUP bit was no longer set by the time this thread woke up
747  *     do_cancellation_policy();
748  * }
749  * ```
750  *
751  * parameters:
752  *      cond:    atomic condition variable
753  */
754 extern sched_cond_t sched_cond_ack(
755 	sched_cond_atomic_t *cond);
756 
757 #endif  /* XNU_KERNEL_PRIVATE */
758 
759 #if defined(KERNEL_PRIVATE) || SCHED_TEST_HARNESS
760 /* Set pending block hint for a particular object before we go into a wait state */
761 extern void             thread_set_pending_block_hint(
762 	thread_t                        thread,
763 	block_hint_t                    block_hint);
764 
765 #define QOS_PARALLELISM_COUNT_LOGICAL   0x1
766 #define QOS_PARALLELISM_REALTIME        0x2
767 #define QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE              0x4
768 
769 extern uint32_t qos_max_parallelism(int qos, uint64_t options);
770 #endif /* defined(KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
771 
772 #if XNU_KERNEL_PRIVATE
773 extern void             thread_yield_with_continuation(
774 	thread_continue_t       continuation,
775 	void                            *parameter) __dead2;
776 #endif /* XNU_KERNEL_PRIVATE */
777 
778 /* Context switch */
779 extern wait_result_t    thread_block(
780 	thread_continue_t       continuation);
781 
782 extern wait_result_t    thread_block_parameter(
783 	thread_continue_t       continuation,
784 	void                            *parameter);
785 
786 /* Declare thread will wait on a particular event */
787 extern wait_result_t    assert_wait(
788 	event_t                         event,
789 	wait_interrupt_t        interruptible);
790 
791 /* Assert that the thread intends to wait with a timeout */
792 extern wait_result_t    assert_wait_timeout(
793 	event_t                         event,
794 	wait_interrupt_t        interruptible,
795 	uint32_t                        interval,
796 	uint32_t                        scale_factor);
797 
798 /* Assert that the thread intends to wait with an urgency, timeout and leeway */
799 extern wait_result_t    assert_wait_timeout_with_leeway(
800 	event_t                         event,
801 	wait_interrupt_t        interruptible,
802 	wait_timeout_urgency_t  urgency,
803 	uint32_t                        interval,
804 	uint32_t                        leeway,
805 	uint32_t                        scale_factor);
806 
807 extern wait_result_t    assert_wait_deadline(
808 	event_t                         event,
809 	wait_interrupt_t        interruptible,
810 	uint64_t                        deadline);
811 
812 /* Assert that the thread intends to wait with an urgency, deadline, and leeway */
813 extern wait_result_t    assert_wait_deadline_with_leeway(
814 	event_t                         event,
815 	wait_interrupt_t        interruptible,
816 	wait_timeout_urgency_t  urgency,
817 	uint64_t                        deadline,
818 	uint64_t                        leeway);
819 
820 
821 /* Wake up thread (or threads) waiting on a particular event */
822 extern kern_return_t    thread_wakeup_prim(
823 	event_t                         event,
824 	boolean_t                       one_thread,
825 	wait_result_t                   result);
826 
827 /* Wake up up to given number of threads waiting on a particular event */
828 extern kern_return_t    thread_wakeup_nthreads_prim(
829 	event_t                         event,
830 	uint32_t                        nthreads,
831 	wait_result_t                   result);
832 
833 #define thread_wakeup(x)                                        \
834 	                thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
835 #define thread_wakeup_with_result(x, z)         \
836 	                thread_wakeup_prim((x), FALSE, (z))
837 #define thread_wakeup_one(x)                            \
838 	                thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
839 
840 #define thread_wakeup_nthreads(x, nthreads) \
841 	                thread_wakeup_nthreads_prim((x), (nthreads), THREAD_AWAKENED)
842 #define thread_wakeup_nthreads_with_result(x, nthreads, z) \
843 	                thread_wakeup_nthreads_prim((x), (nthreads), (z))
844 
845 /* Wakeup the specified thread if it is waiting on this event */
846 extern kern_return_t thread_wakeup_thread(event_t event, thread_t thread);
847 
848 extern boolean_t preemption_enabled(void);
849 
850 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
851 
852 
853 #if   !CONFIG_SCHED_TIMESHARE_CORE && !CONFIG_SCHED_CLUTCH && !CONFIG_SCHED_EDGE
854 #error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
855 #endif
856 
857 /*
858  * The scheduling policy is fixed at compile-time, in order to save the performance
859  * cost of function pointer indirection that we would otherwise pay each time when
860  * making a policy-specific callout.
861  */
862 #if __AMP__
863 
864 #if CONFIG_SCHED_EDGE
865 extern const struct sched_dispatch_table sched_edge_dispatch;
866 #define SCHED(f) (sched_edge_dispatch.f)
867 #else /* CONFIG_SCHED_EDGE */
868 extern const struct sched_dispatch_table sched_amp_dispatch;
869 #define SCHED(f) (sched_amp_dispatch.f)
870 #endif /* CONFIG_SCHED_EDGE */
871 
872 #else /* __AMP__ */
873 
874 #if CONFIG_SCHED_CLUTCH
875 extern const struct sched_dispatch_table sched_clutch_dispatch;
876 #define SCHED(f) (sched_clutch_dispatch.f)
877 #else /* CONFIG_SCHED_CLUTCH */
878 extern const struct sched_dispatch_table sched_dualq_dispatch;
879 #define SCHED(f) (sched_dualq_dispatch.f)
880 #endif /* CONFIG_SCHED_CLUTCH */
881 
882 #endif /* __AMP__ */
883 
884 struct sched_dispatch_table {
885 	const char *sched_name;
886 	void    (*init)(void);                          /* Init global state */
887 	void    (*timebase_init)(void);         /* Timebase-dependent initialization */
888 	void    (*processor_init)(processor_t processor);       /* Per-processor scheduler init */
889 	void    (*pset_init)(processor_set_t pset);     /* Per-processor set scheduler init */
890 
891 	void    (*maintenance_continuation)(void);      /* Function called regularly */
892 
893 	/*
894 	 * Choose a thread of greater or equal priority from the per-processor
895 	 * runqueue for timeshare/fixed threads
896 	 */
897 	thread_t        (*choose_thread)(
898 		processor_t           processor,
899 		int                           priority,
900 		thread_t              prev_thread,
901 		ast_t reason);
902 
903 	/* True if scheduler supports stealing threads for this pset */
904 	bool    (*steal_thread_enabled)(processor_set_t pset);
905 
906 	/*
907 	 * Steal a thread from another processor in the pset so that it can run
908 	 * immediately
909 	 */
910 	thread_t        (*steal_thread)(
911 		processor_set_t         pset);
912 
913 	/*
914 	 * Compute priority for a timeshare thread based on base priority.
915 	 */
916 	int (*compute_timeshare_priority)(thread_t thread);
917 
918 	/*
919 	 * Pick the best node for a thread to run on.
920 	 */
921 	pset_node_t (*choose_node)(
922 		thread_t                      thread);
923 
924 	/*
925 	 * Pick the best processor for a thread (any kind of thread) to run on.
926 	 */
927 	processor_t     (*choose_processor)(
928 		processor_set_t                pset,
929 		processor_t                    processor,
930 		thread_t                       thread);
931 	/*
932 	 * Enqueue a timeshare or fixed priority thread onto the per-processor
933 	 * runqueue
934 	 */
935 	boolean_t (*processor_enqueue)(
936 		processor_t                    processor,
937 		thread_t                       thread,
938 		sched_options_t                options);
939 
940 	/* Migrate threads away in preparation for processor shutdown */
941 	void (*processor_queue_shutdown)(
942 		processor_t                    processor);
943 
944 	/* Remove the specific thread from the per-processor runqueue */
945 	boolean_t       (*processor_queue_remove)(
946 		processor_t             processor,
947 		thread_t                thread);
948 
949 	/*
950 	 * Does the per-processor runqueue have any timeshare or fixed priority
951 	 * threads on it? Called without pset lock held, so should
952 	 * not assume immutability while executing.
953 	 */
954 	boolean_t       (*processor_queue_empty)(processor_t            processor);
955 
956 	/*
957 	 * Would this priority trigger an urgent preemption if it's sitting
958 	 * on the per-processor runqueue?
959 	 */
960 	boolean_t       (*priority_is_urgent)(int priority);
961 
962 	/*
963 	 * Does the per-processor runqueue contain runnable threads that
964 	 * should cause the currently-running thread to be preempted?
965 	 */
966 	ast_t           (*processor_csw_check)(processor_t processor);
967 
968 	/*
969 	 * Does the per-processor runqueue contain a runnable thread
970 	 * of > or >= priority, as a preflight for choose_thread() or other
971 	 * thread selection
972 	 */
973 	boolean_t       (*processor_queue_has_priority)(processor_t             processor,
974 	    int                             priority,
975 	    boolean_t               gte);
976 
977 	/* Quantum size for the specified non-realtime thread. */
978 	uint32_t        (*initial_quantum_size)(thread_t thread);
979 
980 	/* Scheduler mode for a new thread */
981 	sched_mode_t    (*initial_thread_sched_mode)(task_t parent_task);
982 
983 	/*
984 	 * Is it safe to call update_priority, which may change a thread's
985 	 * runqueue or other state. This can be used to throttle changes
986 	 * to dynamic priority.
987 	 */
988 	boolean_t       (*can_update_priority)(thread_t thread);
989 
990 	/*
991 	 * Update both scheduled priority and other persistent state.
992 	 * Side effects may including migration to another processor's runqueue.
993 	 */
994 	void            (*update_priority)(thread_t thread);
995 
996 	/* Lower overhead update to scheduled priority and state. */
997 	void            (*lightweight_update_priority)(thread_t thread);
998 
999 	/* Callback for non-realtime threads when the quantum timer fires */
1000 	void            (*quantum_expire)(thread_t thread);
1001 
1002 	/*
1003 	 * Runnable threads on per-processor runqueue. Should only
1004 	 * be used for relative comparisons of load between processors.
1005 	 */
1006 	int                     (*processor_runq_count)(processor_t     processor);
1007 
1008 	/* Aggregate runcount statistics for per-processor runqueue */
1009 	uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
1010 
1011 	boolean_t       (*processor_bound_count)(processor_t processor);
1012 
1013 	void            (*thread_update_scan)(sched_update_scan_context_t scan_context);
1014 
1015 	/* Supports more than one pset */
1016 	boolean_t   multiple_psets_enabled;
1017 
1018 	/* Supports avoid-processor */
1019 	boolean_t   avoid_processor_enabled;
1020 
1021 	/* Returns true if this processor should avoid running this thread. */
1022 	bool    (*thread_avoid_processor)(processor_t processor, thread_t thread, ast_t reason);
1023 
1024 	/*
1025 	 * Invoked when a processor is about to choose the idle thread
1026 	 * Used to send IPIs to a processor which would be preferred to be idle instead.
1027 	 * Returns true if the current processor should anticipate a quick IPI reply back
1028 	 * from another core.
1029 	 * Called with pset lock held, returns with pset lock unlocked.
1030 	 */
1031 	bool    (*processor_balance)(processor_t processor, processor_set_t pset);
1032 	rt_queue_t      (*rt_runq)(processor_set_t pset);
1033 	void    (*rt_init)(processor_set_t pset);
1034 	void    (*rt_queue_shutdown)(processor_t processor);
1035 	void    (*rt_runq_scan)(sched_update_scan_context_t scan_context);
1036 	int64_t (*rt_runq_count_sum)(void);
1037 	thread_t (*rt_steal_thread)(processor_set_t pset, uint64_t earliest_deadline);
1038 
1039 	uint32_t (*qos_max_parallelism)(int qos, uint64_t options);
1040 	void    (*check_spill)(processor_set_t pset, thread_t thread);
1041 	sched_ipi_type_t (*ipi_policy)(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event);
1042 	bool    (*thread_should_yield)(processor_t processor, thread_t thread);
1043 
1044 	/* Routine to update run counts */
1045 	uint32_t (*run_count_incr)(thread_t thread);
1046 	uint32_t (*run_count_decr)(thread_t thread);
1047 
1048 	/* Routine to update scheduling bucket for a thread */
1049 	void (*update_thread_bucket)(thread_t thread);
1050 
1051 	/* Routine to inform the scheduler when a new pset becomes schedulable */
1052 	void (*pset_made_schedulable)(processor_t processor, processor_set_t pset, boolean_t drop_lock);
1053 #if CONFIG_THREAD_GROUPS
1054 	/* Routine to inform the scheduler when CLPC changes a thread group recommendation */
1055 	void (*thread_group_recommendation_change)(struct thread_group *tg, cluster_type_t new_recommendation);
1056 #endif
1057 	/* Routine to inform the scheduler when all CPUs have finished initializing */
1058 	void (*cpu_init_completed)(void);
1059 	/* Routine to check if a thread is eligible to execute on a specific pset */
1060 	bool (*thread_eligible_for_pset)(thread_t thread, processor_set_t pset);
1061 };
1062 
1063 extern const struct sched_dispatch_table sched_dualq_dispatch;
1064 #if __AMP__
1065 extern const struct sched_dispatch_table sched_amp_dispatch;
1066 #endif
1067 
1068 #if defined(CONFIG_SCHED_CLUTCH)
1069 extern const struct sched_dispatch_table sched_clutch_dispatch;
1070 #endif
1071 
1072 #if defined(CONFIG_SCHED_EDGE)
1073 extern const struct sched_dispatch_table sched_edge_dispatch;
1074 #endif
1075 
1076 extern void sched_set_max_unsafe_rt_quanta(int max);
1077 extern void sched_set_max_unsafe_fixed_quanta(int max);
1078 
1079 #endif  /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
1080 
1081 __END_DECLS
1082 
1083 #endif  /* _KERN_SCHED_PRIM_H_ */
1084