xref: /xnu-10002.81.5/osfmk/kern/sched_prim.h (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	sched_prim.h
60  *	Author:	David Golub
61  *
62  *	Scheduling primitive definitions file
63  *
64  */
65 
66 #ifndef _KERN_SCHED_PRIM_H_
67 #define _KERN_SCHED_PRIM_H_
68 
69 #include <sys/cdefs.h>
70 #include <mach/boolean.h>
71 #include <mach/machine/vm_types.h>
72 #include <mach/kern_return.h>
73 #include <kern/clock.h>
74 #include <kern/kern_types.h>
75 #include <kern/percpu.h>
76 #include <kern/thread.h>
77 #include <kern/block_hint.h>
78 
79 extern int              thread_get_current_cpuid(void);
80 
81 #ifdef  MACH_KERNEL_PRIVATE
82 
83 #include <kern/sched_urgency.h>
84 #include <kern/thread_group.h>
85 #include <kern/waitq.h>
86 
87 /* Initialization */
88 extern void             sched_init(void);
89 
90 extern void             sched_startup(void);
91 
92 extern void             sched_timebase_init(void);
93 
94 extern void             pset_rt_init(processor_set_t pset);
95 
96 extern void             sched_rtlocal_init(processor_set_t pset);
97 
98 extern rt_queue_t       sched_rtlocal_runq(processor_set_t pset);
99 
100 extern void             sched_rtlocal_queue_shutdown(processor_t processor);
101 
102 extern int64_t          sched_rtlocal_runq_count_sum(void);
103 
104 extern thread_t         sched_rtlocal_steal_thread(processor_set_t stealing_pset, uint64_t earliest_deadline);
105 
106 extern thread_t         sched_rt_choose_thread(processor_set_t pset);
107 
108 extern void             sched_check_spill(processor_set_t pset, thread_t thread);
109 
110 extern bool             sched_thread_should_yield(processor_t processor, thread_t thread);
111 
112 extern bool             sched_steal_thread_DISABLED(processor_set_t pset);
113 extern bool             sched_steal_thread_enabled(processor_set_t pset);
114 
115 /* Force a preemption point for a thread and wait for it to stop running */
116 extern boolean_t        thread_stop(
117 	thread_t        thread,
118 	boolean_t       until_not_runnable);
119 
120 /* Release a previous stop request */
121 extern void                     thread_unstop(
122 	thread_t        thread);
123 
124 /* Wait for a thread to stop running */
125 extern void                     thread_wait(
126 	thread_t        thread,
127 	boolean_t       until_not_runnable);
128 
129 /* Unblock thread on wake up */
130 extern boolean_t        thread_unblock(
131 	thread_t                thread,
132 	wait_result_t   wresult);
133 
134 /* Unblock and dispatch thread */
135 extern void thread_go(
136 	thread_t                thread,
137 	wait_result_t           wresult,
138 	bool                    try_handoff);
139 
140 /* Check if direct handoff is allowed */
141 extern boolean_t
142 thread_allowed_for_handoff(
143 	thread_t         thread);
144 
145 /* Handle threads at context switch */
146 extern void                     thread_dispatch(
147 	thread_t                old_thread,
148 	thread_t                new_thread);
149 
150 /* Switch directly to a particular thread */
151 extern int                      thread_run(
152 	thread_t                        self,
153 	thread_continue_t       continuation,
154 	void                            *parameter,
155 	thread_t                        new_thread);
156 
157 /* Resume thread with new stack */
158 extern __dead2 void     thread_continue(thread_t old_thread);
159 
160 /* Invoke continuation */
161 extern __dead2 void     call_continuation(
162 	thread_continue_t       continuation,
163 	void                    *parameter,
164 	wait_result_t           wresult,
165 	boolean_t               enable_interrupts);
166 
167 /*
168  * Flags that can be passed to set_sched_pri
169  * to skip side effects
170  */
171 __options_decl(set_sched_pri_options_t, uint32_t, {
172 	SETPRI_DEFAULT  = 0x0,
173 	SETPRI_LAZY     = 0x1,  /* Avoid setting AST flags or sending IPIs */
174 });
175 
176 /* Set the current scheduled priority */
177 extern void set_sched_pri(
178 	thread_t      thread,
179 	int16_t       priority,
180 	set_sched_pri_options_t options);
181 
182 /* Set base priority of the specified thread */
183 extern void             sched_set_thread_base_priority(
184 	thread_t                thread,
185 	int                             priority);
186 
187 /* Set absolute base priority of the specified thread */
188 extern void             sched_set_kernel_thread_priority(
189 	thread_t                thread,
190 	int                             priority);
191 
192 
193 /* Set the thread's true scheduling mode */
194 extern void             sched_set_thread_mode(thread_t thread,
195     sched_mode_t mode);
196 
197 /*
198  * Set the thread's scheduling mode taking into account that the thread may have
199  * been demoted.
200  * */
201 extern void             sched_set_thread_mode_user(thread_t thread,
202     sched_mode_t mode);
203 
204 /*
205  * Get the thread's scheduling mode taking into account that the thread may have
206  * been demoted.
207  * */
208 extern sched_mode_t     sched_get_thread_mode_user(thread_t thread);
209 
210 
211 /* Demote the true scheduler mode */
212 extern void             sched_thread_mode_demote(thread_t thread,
213     uint32_t reason);
214 /* Un-demote the true scheduler mode */
215 extern void             sched_thread_mode_undemote(thread_t thread,
216     uint32_t reason);
217 /* Check for a specific demotion */
218 extern bool             sched_thread_mode_has_demotion(thread_t thread,
219     uint32_t reason);
220 
221 extern void sched_thread_promote_reason(thread_t thread, uint32_t reason, uintptr_t trace_obj);
222 extern void sched_thread_unpromote_reason(thread_t thread, uint32_t reason, uintptr_t trace_obj);
223 
224 /* Re-evaluate base priority of thread (thread locked) */
225 void thread_recompute_priority(thread_t thread);
226 
227 /* Re-evaluate scheduled priority of thread (thread locked) */
228 extern void thread_recompute_sched_pri(
229 	thread_t thread,
230 	set_sched_pri_options_t options);
231 
232 /* Periodic scheduler activity */
233 extern void             sched_init_thread(void);
234 
235 /* Perform sched_tick housekeeping activities */
236 extern boolean_t                can_update_priority(
237 	thread_t                thread);
238 
239 extern void             update_priority(
240 	thread_t                thread);
241 
242 extern void             lightweight_update_priority(
243 	thread_t                thread);
244 
245 extern void             sched_default_quantum_expire(thread_t thread);
246 
247 /* Idle processor thread continuation */
248 extern void             idle_thread(
249 	void*           parameter,
250 	wait_result_t   result);
251 
252 extern kern_return_t    idle_thread_create(
253 	processor_t             processor);
254 
255 /* Continuation return from syscall */
256 extern void     thread_syscall_return(
257 	kern_return_t   ret);
258 
259 /* Context switch */
260 extern wait_result_t    thread_block_reason(
261 	thread_continue_t       continuation,
262 	void                            *parameter,
263 	ast_t                           reason);
264 
265 __options_decl(sched_options_t, uint32_t, {
266 	SCHED_NONE      = 0x0,
267 	SCHED_TAILQ     = 0x1,
268 	SCHED_HEADQ     = 0x2,
269 	SCHED_PREEMPT   = 0x4,
270 	SCHED_REBALANCE = 0x8,
271 });
272 
273 /* Reschedule thread for execution */
274 extern void             thread_setrun(
275 	thread_t        thread,
276 	sched_options_t options);
277 
278 extern processor_set_t  task_choose_pset(
279 	task_t                  task);
280 
281 /* Bind the current thread to a particular processor */
282 extern processor_t      thread_bind(
283 	processor_t             processor);
284 
285 extern void             thread_bind_during_wakeup(
286 	thread_t                thread,
287 	processor_t             processor);
288 
289 extern void             thread_unbind_after_queue_shutdown(
290 	thread_t                thread,
291 	processor_t             processor);
292 
293 extern bool pset_has_stealable_threads(
294 	processor_set_t         pset);
295 
296 extern bool pset_has_stealable_rt_threads(
297 	processor_set_t         pset);
298 
299 extern processor_set_t choose_starting_pset(
300 	pset_node_t  node,
301 	thread_t     thread,
302 	processor_t *processor_hint);
303 
304 extern int pset_available_cpu_count(
305 	processor_set_t pset);
306 
307 extern bool pset_is_recommended(
308 	processor_set_t pset);
309 
310 extern pset_node_t sched_choose_node(
311 	thread_t     thread);
312 
313 /* Choose the best processor to run a thread */
314 extern processor_t      choose_processor(
315 	processor_set_t                pset,
316 	processor_t                    processor,
317 	thread_t                       thread);
318 
319 extern bool sched_SMT_balance(
320 	processor_t processor,
321 	processor_set_t pset);
322 
323 extern void thread_quantum_init(
324 	thread_t thread,
325 	uint64_t now);
326 
327 
328 extern void             run_queue_init(
329 	run_queue_t             runq);
330 
331 extern thread_t run_queue_dequeue(
332 	run_queue_t           runq,
333 	sched_options_t       options);
334 
335 extern boolean_t        run_queue_enqueue(
336 	run_queue_t           runq,
337 	thread_t              thread,
338 	sched_options_t       options);
339 
340 extern void     run_queue_remove(
341 	run_queue_t            runq,
342 	thread_t                       thread);
343 
344 extern thread_t run_queue_peek(
345 	run_queue_t            runq);
346 
347 struct sched_update_scan_context {
348 	uint64_t        earliest_bg_make_runnable_time;
349 	uint64_t        earliest_normal_make_runnable_time;
350 	uint64_t        earliest_rt_make_runnable_time;
351 	uint64_t        sched_tick_last_abstime;
352 };
353 typedef struct sched_update_scan_context *sched_update_scan_context_t;
354 
355 extern void             sched_rtlocal_runq_scan(sched_update_scan_context_t scan_context);
356 
357 extern void sched_pset_made_schedulable(
358 	processor_t processor,
359 	processor_set_t pset,
360 	boolean_t drop_lock);
361 
362 extern void sched_cpu_init_completed(void);
363 
364 /*
365  * Enum to define various events which need IPIs. The IPI policy
366  * engine decides what kind of IPI to use based on destination
367  * processor state, thread and one of the following scheduling events.
368  */
369 typedef enum {
370 	SCHED_IPI_EVENT_BOUND_THR   = 0x1,
371 	SCHED_IPI_EVENT_PREEMPT     = 0x2,
372 	SCHED_IPI_EVENT_SMT_REBAL   = 0x3,
373 	SCHED_IPI_EVENT_SPILL       = 0x4,
374 	SCHED_IPI_EVENT_REBALANCE   = 0x5,
375 	SCHED_IPI_EVENT_RT_PREEMPT  = 0x6,
376 } sched_ipi_event_t;
377 
378 
379 /* Enum to define various IPI types used by the scheduler */
380 typedef enum {
381 	SCHED_IPI_NONE              = 0x0,
382 	SCHED_IPI_IMMEDIATE         = 0x1,
383 	SCHED_IPI_IDLE              = 0x2,
384 	SCHED_IPI_DEFERRED          = 0x3,
385 } sched_ipi_type_t;
386 
387 /* The IPI policy engine behaves in the following manner:
388  * - All scheduler events which need an IPI invoke sched_ipi_action() with
389  *   the appropriate destination processor, thread and event.
390  * - sched_ipi_action() performs basic checks, invokes the scheduler specific
391  *   ipi_policy routine and sets pending_AST bits based on the result.
392  * - Once the pset lock is dropped, the scheduler invokes sched_ipi_perform()
393  *   routine which actually sends the appropriate IPI to the destination core.
394  */
395 extern sched_ipi_type_t sched_ipi_action(processor_t dst, thread_t thread, sched_ipi_event_t event);
396 extern void sched_ipi_perform(processor_t dst, sched_ipi_type_t ipi);
397 
398 /* sched_ipi_policy() is the global default IPI policy for all schedulers */
399 extern sched_ipi_type_t sched_ipi_policy(processor_t dst, thread_t thread,
400     boolean_t dst_idle, sched_ipi_event_t event);
401 
402 /* sched_ipi_deferred_policy() is the global default deferred IPI policy for all schedulers */
403 extern sched_ipi_type_t sched_ipi_deferred_policy(processor_set_t pset,
404     processor_t dst, thread_t thread, sched_ipi_event_t event);
405 
406 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
407 
408 extern boolean_t        thread_update_add_thread(thread_t thread);
409 extern void             thread_update_process_threads(void);
410 extern boolean_t        runq_scan(run_queue_t runq, sched_update_scan_context_t scan_context);
411 
412 #if CONFIG_SCHED_CLUTCH
413 extern boolean_t        sched_clutch_timeshare_scan(queue_t thread_queue, uint16_t count, sched_update_scan_context_t scan_context);
414 #endif /* CONFIG_SCHED_CLUTCH */
415 
416 extern void sched_timeshare_init(void);
417 extern void sched_timeshare_timebase_init(void);
418 extern void sched_timeshare_maintenance_continue(void);
419 
420 extern boolean_t priority_is_urgent(int priority);
421 extern uint32_t sched_timeshare_initial_quantum_size(thread_t thread);
422 
423 extern int sched_compute_timeshare_priority(thread_t thread);
424 
425 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
426 
427 /* Remove thread from its run queue */
428 extern boolean_t        thread_run_queue_remove(thread_t thread);
429 thread_t thread_run_queue_remove_for_handoff(thread_t thread);
430 
431 /* Put a thread back in the run queue after being yanked */
432 extern void thread_run_queue_reinsert(thread_t thread, sched_options_t options);
433 
434 extern void             thread_timer_expire(
435 	void                    *thread,
436 	void                    *p1);
437 
438 extern bool thread_is_eager_preempt(thread_t thread);
439 
440 extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
441 
442 /* Set the maximum interrupt level for the thread */
443 __private_extern__ wait_interrupt_t thread_interrupt_level(
444 	wait_interrupt_t interruptible);
445 
446 __private_extern__ wait_result_t thread_mark_wait_locked(
447 	thread_t                 thread,
448 	wait_interrupt_t interruptible);
449 
450 /* Wake up locked thread directly, passing result */
451 __private_extern__ kern_return_t clear_wait_internal(
452 	thread_t                thread,
453 	wait_result_t   result);
454 
455 struct sched_statistics {
456 	uint32_t        csw_count;
457 	uint32_t        preempt_count;
458 	uint32_t        preempted_rt_count;
459 	uint32_t        preempted_by_rt_count;
460 	uint32_t        rt_sched_count;
461 	uint32_t        interrupt_count;
462 	uint32_t        ipi_count;
463 	uint32_t        timer_pop_count;
464 	uint32_t        idle_transitions;
465 	uint32_t        quantum_timer_expirations;
466 };
467 PERCPU_DECL(struct sched_statistics, sched_stats);
468 extern bool             sched_stats_active;
469 
470 extern void sched_stats_handle_csw(
471 	processor_t processor,
472 	int reasons,
473 	int selfpri,
474 	int otherpri);
475 
476 extern void sched_stats_handle_runq_change(
477 	struct runq_stats *stats,
478 	int old_count);
479 
480 #define SCHED_STATS_INC(field)                                                  \
481 MACRO_BEGIN                                                                     \
482 	if (__improbable(sched_stats_active)) {                                 \
483 	        PERCPU_GET(sched_stats)->field++;                               \
484 	}                                                                       \
485 MACRO_END
486 
487 #if DEBUG
488 
489 #define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri)                  \
490 MACRO_BEGIN                                                                     \
491 	if (__improbable(sched_stats_active)) {                                 \
492 	        sched_stats_handle_csw((processor),                             \
493 	            (reasons), (selfpri), (otherpri));                          \
494 	}                                                                       \
495 MACRO_END
496 
497 
498 #define SCHED_STATS_RUNQ_CHANGE(stats, old_count)                               \
499 MACRO_BEGIN                                                                     \
500 	if (__improbable(sched_stats_active)) {                                 \
501 	        sched_stats_handle_runq_change((stats), (old_count));           \
502 	}                                                                       \
503 MACRO_END
504 
505 #else /* DEBUG */
506 
507 #define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) do { }while(0)
508 #define SCHED_STATS_RUNQ_CHANGE(stats, old_count) do { }while(0)
509 
510 #endif /* DEBUG */
511 
512 extern uint32_t sched_debug_flags;
513 #define SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS           0x00000001
514 #define SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS   0x00000002
515 #define SCHED_DEBUG_FLAG_AST_CHECK_TRACEPOINTS   0x00000004
516 
517 #define SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(...)                         \
518 MACRO_BEGIN                                                                     \
519 	if (__improbable(sched_debug_flags &                                    \
520 	    SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS)) {                           \
521 	        KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                             \
522 	}                                                                       \
523 MACRO_END
524 
525 #define SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT_IST(...)             \
526 MACRO_BEGIN                                                                     \
527 	if (__improbable(sched_debug_flags &                                    \
528 	    SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS)) {                   \
529 	        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, __VA_ARGS__);           \
530 	}                                                                       \
531 MACRO_END
532 
533 #define SCHED_DEBUG_AST_CHECK_KDBG_RELEASE(...)                                 \
534 MACRO_BEGIN                                                                     \
535 	if (__improbable(sched_debug_flags &                                    \
536 	    SCHED_DEBUG_FLAG_AST_CHECK_TRACEPOINTS)) {                          \
537 	        KDBG_RELEASE(__VA_ARGS__);                                      \
538 	}                                                                       \
539 MACRO_END
540 
541 
542 /* Tells if there are "active" RT threads in the system (provided by CPU PM) */
543 extern void     active_rt_threads(
544 	boolean_t       active);
545 
546 /* Returns the perfcontrol attribute for the thread */
547 extern perfcontrol_class_t thread_get_perfcontrol_class(
548 	thread_t        thread);
549 
550 /* Generic routine for Non-AMP schedulers to calculate parallelism */
551 extern uint32_t sched_qos_max_parallelism(int qos, uint64_t options);
552 
553 extern void check_monotonic_time(uint64_t ctime);
554 
555 #endif /* MACH_KERNEL_PRIVATE */
556 
557 __BEGIN_DECLS
558 
559 #ifdef  XNU_KERNEL_PRIVATE
560 
561 extern void thread_bind_cluster_type(thread_t, char cluster_type, bool soft_bind);
562 
563 __options_decl(thread_bind_option_t, uint64_t, {
564 	/* Unbind a previously cluster bound thread */
565 	THREAD_UNBIND                   = 0x1,
566 	/*
567 	 * Soft bind the thread to the cluster; soft binding means the thread will be
568 	 * moved to an available cluster if the bound cluster is de-recommended/offline.
569 	 */
570 	THREAD_BIND_SOFT                = 0x2,
571 	/*
572 	 * Bind thread to the cluster only if it is eligible to run on that cluster. If
573 	 * the thread is not eligible to run on the cluster, thread_bind_cluster_id()
574 	 * returns KERN_INVALID_POLICY.
575 	 */
576 	THREAD_BIND_ELIGIBLE_ONLY       = 0x4,
577 });
578 extern kern_return_t thread_bind_cluster_id(thread_t thread, uint32_t cluster_id, thread_bind_option_t options);
579 
580 extern int sched_get_rt_n_backup_processors(void);
581 extern void sched_set_rt_n_backup_processors(int n);
582 
583 extern int sched_get_rt_deadline_epsilon(void);
584 extern void sched_set_rt_deadline_epsilon(int new_epsilon_us);
585 
586 /* Toggles a global override to turn off CPU Throttling */
587 extern void     sys_override_cpu_throttle(boolean_t enable_override);
588 
589 extern int sched_get_powered_cores(void);
590 extern void sched_set_powered_cores(int n);
591 
592 /*
593  ****************** Only exported until BSD stops using ********************
594  */
595 
596 extern void                     thread_vm_bind_group_add(void);
597 
598 /* Wake up thread directly, passing result */
599 extern kern_return_t clear_wait(
600 	thread_t                thread,
601 	wait_result_t   result);
602 
603 /* Start thread running */
604 extern void             thread_bootstrap_return(void) __attribute__((noreturn));
605 
606 /* Return from exception (BSD-visible interface) */
607 extern void             thread_exception_return(void) __dead2;
608 
609 #define SCHED_STRING_MAX_LENGTH (48)
610 /* String declaring the name of the current scheduler */
611 extern char sched_string[SCHED_STRING_MAX_LENGTH];
612 
613 __options_decl(thread_handoff_option_t, uint32_t, {
614 	THREAD_HANDOFF_NONE          = 0,
615 	THREAD_HANDOFF_SETRUN_NEEDED = 0x1,
616 });
617 
618 /* Remove thread from its run queue */
619 thread_t thread_prepare_for_handoff(thread_t thread, thread_handoff_option_t option);
620 
621 /* Attempt to context switch to a specific runnable thread */
622 extern wait_result_t thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option);
623 
624 __attribute__((nonnull(2)))
625 extern void thread_handoff_parameter(thread_t thread,
626     thread_continue_t continuation, void *parameter, thread_handoff_option_t) __dead2;
627 
628 extern struct waitq     *assert_wait_queue(event_t event);
629 
630 extern kern_return_t thread_wakeup_one_with_pri(event_t event, int priority);
631 
632 extern thread_t thread_wakeup_identify(event_t event, int priority);
633 
634 /*
635  * sched_cond_t:
636  *
637  * A atomic condition variable used to synchronize wake/block operations on threads.
638  * Bits defined below are reserved for use by sched_prim. Remaining
639  * bits may be used by caller for additional synchronization semantics.
640  */
641 __options_decl(sched_cond_t, uint32_t, {
642 	SCHED_COND_INIT = 0x0000,    /* initialize all bits to zero (inactive and not awoken) */
643 	SCHED_COND_ACTIVE = 0x0001,  /* target thread is active */
644 	SCHED_COND_WAKEUP = 0x0002   /* wakeup has been issued for target thread */
645 });
646 typedef _Atomic sched_cond_t sched_cond_atomic_t;
647 
648 /*
649  * sched_cond_init:
650  *
651  * Initialize an atomic condition variable. Note that this does not occur atomically and should be
652  * performed during thread initialization, before the condition is observable by other threads.
653  */
654 extern void sched_cond_init(
655 	sched_cond_atomic_t *cond);
656 
657 /*
658  * sched_cond_signal:
659  *
660  * Wakeup the specified thread if it is waiting on this event and it has not already been issued a wakeup.
661  *
662  * parameters:
663  *      thread    thread to awaken
664  *      cond      atomic condition variable
665  */
666 extern kern_return_t sched_cond_signal(
667 	sched_cond_atomic_t *cond,
668 	thread_t thread);
669 
670 /*
671  * sched_cond_wait_parameter:
672  *
673  * Assert wait and block on cond if no wakeup has been issued.
674  * If a wakeup has been issued on cond since the last `sched_cond_ack`, clear_wait and
675  * return `THREAD_AWAKENED`.
676  *
677  * `sched_cond_wait_parameter` must be paired with `sched_cond_ack`.
678  *
679  * NOTE: `continuation` will only be jumped to if a wakeup has not been issued
680  *
681  * parameters:
682  *      cond             atomic condition variable to synchronize on
683  *      interruptible    interruptible value to pass to assert_wait
684  *      continuation     continuation if block succeeds
685  *      parameter
686  */
687 extern wait_result_t sched_cond_wait_parameter(
688 	sched_cond_atomic_t *cond,
689 	wait_interrupt_t interruptible,
690 	thread_continue_t continuation,
691 	void *parameter);
692 
693 /*
694  * sched_cond_wait:
695  *
696  * Assert wait and block on cond if no wakeup has been issued.
697  * If a wakeup has been issued on cond since the last `sched_cond_ack`, clear_wait and
698  * return `THREAD_AWAKENED`.
699  *
700  * `sched_cond_wait` must be paired with `sched_cond_ack`.
701  *
702  * NOTE: `continuation` will only be jumped to if a wakeup has not been issued
703  *
704  * parameters:
705  *      cond             atomic condition variable to synchronize on
706  *      interruptible    interruptible value to pass to assert_wait
707  *      continuation     continuation if block succeeds
708  */
709 extern wait_result_t sched_cond_wait(
710 	sched_cond_atomic_t *cond,
711 	wait_interrupt_t interruptible,
712 	thread_continue_t continuation);
713 
714 /*
715  * sched_cond_ack:
716  *
717  * Acknowledge an issued wakeup by clearing WAKEUP and setting ACTIVE (via XOR).
718  * It is the callers responsibility to ensure that the ACTIVE bit is always low prior to calling
719  * (i.e. by calling `sched_cond_wait` prior to any rerun or block).
720  * Synchronization schemes that allow for WAKEUP bit to be reset prior to wakeup
721  * (e.g. a cancellation mechanism) should check that WAKEUP was indeed cleared.
722  *
723  * e.g.
724  * ```
725  * if (sched_cond_ack(&my_state) & SCHED_THREAD_WAKEUP) {
726  *     // WAKEUP bit was no longer set by the time this thread woke up
727  *     do_cancellation_policy();
728  * }
729  * ```
730  *
731  * parameters:
732  *      cond:    atomic condition variable
733  */
734 extern sched_cond_t sched_cond_ack(
735 	sched_cond_atomic_t *cond);
736 
737 #endif  /* XNU_KERNEL_PRIVATE */
738 
739 #ifdef KERNEL_PRIVATE
740 /* Set pending block hint for a particular object before we go into a wait state */
741 extern void             thread_set_pending_block_hint(
742 	thread_t                        thread,
743 	block_hint_t                    block_hint);
744 
745 #define QOS_PARALLELISM_COUNT_LOGICAL   0x1
746 #define QOS_PARALLELISM_REALTIME        0x2
747 #define QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE              0x4
748 
749 extern uint32_t qos_max_parallelism(int qos, uint64_t options);
750 #endif /* KERNEL_PRIVATE */
751 
752 #if XNU_KERNEL_PRIVATE
753 extern void             thread_yield_with_continuation(
754 	thread_continue_t       continuation,
755 	void                            *parameter) __dead2;
756 #endif
757 
758 /* Context switch */
759 extern wait_result_t    thread_block(
760 	thread_continue_t       continuation);
761 
762 extern wait_result_t    thread_block_parameter(
763 	thread_continue_t       continuation,
764 	void                            *parameter);
765 
766 /* Declare thread will wait on a particular event */
767 extern wait_result_t    assert_wait(
768 	event_t                         event,
769 	wait_interrupt_t        interruptible);
770 
771 /* Assert that the thread intends to wait with a timeout */
772 extern wait_result_t    assert_wait_timeout(
773 	event_t                         event,
774 	wait_interrupt_t        interruptible,
775 	uint32_t                        interval,
776 	uint32_t                        scale_factor);
777 
778 /* Assert that the thread intends to wait with an urgency, timeout and leeway */
779 extern wait_result_t    assert_wait_timeout_with_leeway(
780 	event_t                         event,
781 	wait_interrupt_t        interruptible,
782 	wait_timeout_urgency_t  urgency,
783 	uint32_t                        interval,
784 	uint32_t                        leeway,
785 	uint32_t                        scale_factor);
786 
787 extern wait_result_t    assert_wait_deadline(
788 	event_t                         event,
789 	wait_interrupt_t        interruptible,
790 	uint64_t                        deadline);
791 
792 /* Assert that the thread intends to wait with an urgency, deadline, and leeway */
793 extern wait_result_t    assert_wait_deadline_with_leeway(
794 	event_t                         event,
795 	wait_interrupt_t        interruptible,
796 	wait_timeout_urgency_t  urgency,
797 	uint64_t                        deadline,
798 	uint64_t                        leeway);
799 
800 
801 /* Wake up thread (or threads) waiting on a particular event */
802 extern kern_return_t    thread_wakeup_prim(
803 	event_t                         event,
804 	boolean_t                       one_thread,
805 	wait_result_t                   result);
806 
807 #define thread_wakeup(x)                                        \
808 	                thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
809 #define thread_wakeup_with_result(x, z)         \
810 	                thread_wakeup_prim((x), FALSE, (z))
811 #define thread_wakeup_one(x)                            \
812 	                thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
813 
814 /* Wakeup the specified thread if it is waiting on this event */
815 extern kern_return_t thread_wakeup_thread(event_t event, thread_t thread);
816 
817 extern boolean_t preemption_enabled(void);
818 
819 #ifdef MACH_KERNEL_PRIVATE
820 
821 /*
822  * Scheduler algorithm indirection. If only one algorithm is
823  * enabled at compile-time, a direction function call is used.
824  * If more than one is enabled, calls are dispatched through
825  * a function pointer table.
826  */
827 
828 #if   !defined(CONFIG_SCHED_TRADITIONAL) && !defined(CONFIG_SCHED_PROTO) && !defined(CONFIG_SCHED_GRRR) && !defined(CONFIG_SCHED_MULTIQ) && !defined(CONFIG_SCHED_CLUTCH) && !defined(CONFIG_SCHED_EDGE)
829 #error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
830 #endif
831 
832 #if __AMP__
833 
834 #if CONFIG_SCHED_EDGE
835 extern const struct sched_dispatch_table sched_edge_dispatch;
836 #define SCHED(f) (sched_edge_dispatch.f)
837 #else /* CONFIG_SCHED_EDGE */
838 extern const struct sched_dispatch_table sched_amp_dispatch;
839 #define SCHED(f) (sched_amp_dispatch.f)
840 #endif /* CONFIG_SCHED_EDGE */
841 
842 #else /* __AMP__ */
843 
844 #if CONFIG_SCHED_CLUTCH
845 extern const struct sched_dispatch_table sched_clutch_dispatch;
846 #define SCHED(f) (sched_clutch_dispatch.f)
847 #else /* CONFIG_SCHED_CLUTCH */
848 extern const struct sched_dispatch_table sched_dualq_dispatch;
849 #define SCHED(f) (sched_dualq_dispatch.f)
850 #endif /* CONFIG_SCHED_CLUTCH */
851 
852 #endif /* __AMP__ */
853 
854 struct sched_dispatch_table {
855 	const char *sched_name;
856 	void    (*init)(void);                          /* Init global state */
857 	void    (*timebase_init)(void);         /* Timebase-dependent initialization */
858 	void    (*processor_init)(processor_t processor);       /* Per-processor scheduler init */
859 	void    (*pset_init)(processor_set_t pset);     /* Per-processor set scheduler init */
860 
861 	void    (*maintenance_continuation)(void);      /* Function called regularly */
862 
863 	/*
864 	 * Choose a thread of greater or equal priority from the per-processor
865 	 * runqueue for timeshare/fixed threads
866 	 */
867 	thread_t        (*choose_thread)(
868 		processor_t           processor,
869 		int                           priority,
870 		ast_t reason);
871 
872 	/* True if scheduler supports stealing threads for this pset */
873 	bool    (*steal_thread_enabled)(processor_set_t pset);
874 
875 	/*
876 	 * Steal a thread from another processor in the pset so that it can run
877 	 * immediately
878 	 */
879 	thread_t        (*steal_thread)(
880 		processor_set_t         pset);
881 
882 	/*
883 	 * Compute priority for a timeshare thread based on base priority.
884 	 */
885 	int (*compute_timeshare_priority)(thread_t thread);
886 
887 	/*
888 	 * Pick the best node for a thread to run on.
889 	 */
890 	pset_node_t (*choose_node)(
891 		thread_t                      thread);
892 
893 	/*
894 	 * Pick the best processor for a thread (any kind of thread) to run on.
895 	 */
896 	processor_t     (*choose_processor)(
897 		processor_set_t                pset,
898 		processor_t                    processor,
899 		thread_t                       thread);
900 	/*
901 	 * Enqueue a timeshare or fixed priority thread onto the per-processor
902 	 * runqueue
903 	 */
904 	boolean_t (*processor_enqueue)(
905 		processor_t                    processor,
906 		thread_t                       thread,
907 		sched_options_t                options);
908 
909 	/* Migrate threads away in preparation for processor shutdown */
910 	void (*processor_queue_shutdown)(
911 		processor_t                    processor);
912 
913 	/* Remove the specific thread from the per-processor runqueue */
914 	boolean_t       (*processor_queue_remove)(
915 		processor_t             processor,
916 		thread_t                thread);
917 
918 	/*
919 	 * Does the per-processor runqueue have any timeshare or fixed priority
920 	 * threads on it? Called without pset lock held, so should
921 	 * not assume immutability while executing.
922 	 */
923 	boolean_t       (*processor_queue_empty)(processor_t            processor);
924 
925 	/*
926 	 * Would this priority trigger an urgent preemption if it's sitting
927 	 * on the per-processor runqueue?
928 	 */
929 	boolean_t       (*priority_is_urgent)(int priority);
930 
931 	/*
932 	 * Does the per-processor runqueue contain runnable threads that
933 	 * should cause the currently-running thread to be preempted?
934 	 */
935 	ast_t           (*processor_csw_check)(processor_t processor);
936 
937 	/*
938 	 * Does the per-processor runqueue contain a runnable thread
939 	 * of > or >= priority, as a preflight for choose_thread() or other
940 	 * thread selection
941 	 */
942 	boolean_t       (*processor_queue_has_priority)(processor_t             processor,
943 	    int                             priority,
944 	    boolean_t               gte);
945 
946 	/* Quantum size for the specified non-realtime thread. */
947 	uint32_t        (*initial_quantum_size)(thread_t thread);
948 
949 	/* Scheduler mode for a new thread */
950 	sched_mode_t    (*initial_thread_sched_mode)(task_t parent_task);
951 
952 	/*
953 	 * Is it safe to call update_priority, which may change a thread's
954 	 * runqueue or other state. This can be used to throttle changes
955 	 * to dynamic priority.
956 	 */
957 	boolean_t       (*can_update_priority)(thread_t thread);
958 
959 	/*
960 	 * Update both scheduled priority and other persistent state.
961 	 * Side effects may including migration to another processor's runqueue.
962 	 */
963 	void            (*update_priority)(thread_t thread);
964 
965 	/* Lower overhead update to scheduled priority and state. */
966 	void            (*lightweight_update_priority)(thread_t thread);
967 
968 	/* Callback for non-realtime threads when the quantum timer fires */
969 	void            (*quantum_expire)(thread_t thread);
970 
971 	/*
972 	 * Runnable threads on per-processor runqueue. Should only
973 	 * be used for relative comparisons of load between processors.
974 	 */
975 	int                     (*processor_runq_count)(processor_t     processor);
976 
977 	/* Aggregate runcount statistics for per-processor runqueue */
978 	uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
979 
980 	boolean_t       (*processor_bound_count)(processor_t processor);
981 
982 	void            (*thread_update_scan)(sched_update_scan_context_t scan_context);
983 
984 	/* Supports more than one pset */
985 	boolean_t   multiple_psets_enabled;
986 	/* Supports scheduler groups */
987 	boolean_t   sched_groups_enabled;
988 
989 	/* Supports avoid-processor */
990 	boolean_t   avoid_processor_enabled;
991 
992 	/* Returns true if this processor should avoid running this thread. */
993 	bool    (*thread_avoid_processor)(processor_t processor, thread_t thread, ast_t reason);
994 
995 	/*
996 	 * Invoked when a processor is about to choose the idle thread
997 	 * Used to send IPIs to a processor which would be preferred to be idle instead.
998 	 * Returns true if the current processor should anticipate a quick IPI reply back
999 	 * from another core.
1000 	 * Called with pset lock held, returns with pset lock unlocked.
1001 	 */
1002 	bool    (*processor_balance)(processor_t processor, processor_set_t pset);
1003 	rt_queue_t      (*rt_runq)(processor_set_t pset);
1004 	void    (*rt_init)(processor_set_t pset);
1005 	void    (*rt_queue_shutdown)(processor_t processor);
1006 	void    (*rt_runq_scan)(sched_update_scan_context_t scan_context);
1007 	int64_t (*rt_runq_count_sum)(void);
1008 	thread_t (*rt_steal_thread)(processor_set_t pset, uint64_t earliest_deadline);
1009 
1010 	uint32_t (*qos_max_parallelism)(int qos, uint64_t options);
1011 	void    (*check_spill)(processor_set_t pset, thread_t thread);
1012 	sched_ipi_type_t (*ipi_policy)(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event);
1013 	bool    (*thread_should_yield)(processor_t processor, thread_t thread);
1014 
1015 	/* Routine to update run counts */
1016 	uint32_t (*run_count_incr)(thread_t thread);
1017 	uint32_t (*run_count_decr)(thread_t thread);
1018 
1019 	/* Routine to update scheduling bucket for a thread */
1020 	void (*update_thread_bucket)(thread_t thread);
1021 
1022 	/* Routine to inform the scheduler when a new pset becomes schedulable */
1023 	void (*pset_made_schedulable)(processor_t processor, processor_set_t pset, boolean_t drop_lock);
1024 #if CONFIG_THREAD_GROUPS
1025 	/* Routine to inform the scheduler when CLPC changes a thread group recommendation */
1026 	void (*thread_group_recommendation_change)(struct thread_group *tg, cluster_type_t new_recommendation);
1027 #endif
1028 	/* Routine to inform the scheduler when all CPUs have finished initializing */
1029 	void (*cpu_init_completed)(void);
1030 	/* Routine to check if a thread is eligible to execute on a specific pset */
1031 	bool (*thread_eligible_for_pset)(thread_t thread, processor_set_t pset);
1032 };
1033 
1034 #if defined(CONFIG_SCHED_TRADITIONAL)
1035 extern const struct sched_dispatch_table sched_traditional_dispatch;
1036 extern const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch;
1037 #endif
1038 
1039 #if defined(CONFIG_SCHED_MULTIQ)
1040 extern const struct sched_dispatch_table sched_multiq_dispatch;
1041 extern const struct sched_dispatch_table sched_dualq_dispatch;
1042 #if __AMP__
1043 extern const struct sched_dispatch_table sched_amp_dispatch;
1044 #endif
1045 #endif
1046 
1047 #if defined(CONFIG_SCHED_PROTO)
1048 extern const struct sched_dispatch_table sched_proto_dispatch;
1049 #endif
1050 
1051 #if defined(CONFIG_SCHED_GRRR)
1052 extern const struct sched_dispatch_table sched_grrr_dispatch;
1053 #endif
1054 
1055 #if defined(CONFIG_SCHED_CLUTCH)
1056 extern const struct sched_dispatch_table sched_clutch_dispatch;
1057 #endif
1058 
1059 #if defined(CONFIG_SCHED_EDGE)
1060 extern const struct sched_dispatch_table sched_edge_dispatch;
1061 #endif
1062 
1063 extern void sched_set_max_unsafe_rt_quanta(int max);
1064 extern void sched_set_max_unsafe_fixed_quanta(int max);
1065 
1066 #endif  /* MACH_KERNEL_PRIVATE */
1067 
1068 __END_DECLS
1069 
1070 #endif  /* _KERN_SCHED_PRIM_H_ */
1071