xref: /xnu-12377.81.4/osfmk/kern/sched_prim.h (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 /*
59  *	File:	sched_prim.h
60  *	Author:	David Golub
61  *
62  *	Scheduling primitive definitions file
63  *
64  */
65 
66 #ifndef _KERN_SCHED_PRIM_H_
67 #define _KERN_SCHED_PRIM_H_
68 
69 #include <sys/cdefs.h>
70 #include <mach/boolean.h>
71 #include <mach/machine/vm_types.h>
72 #include <mach/kern_return.h>
73 #include <kern/clock.h>
74 #include <kern/kern_types.h>
75 #include <kern/percpu.h>
76 #include <kern/thread.h>
77 #include <kern/block_hint.h>
78 
79 extern int              thread_get_current_cpuid(void);
80 
81 #if XNU_KERNEL_PRIVATE
82 
83 /*
84  * The quantum length used for Fixed and RT sched modes. In general the quantum
85  * can vary - for example for background or QOS.
86  */
87 uint64_t sched_get_quantum_us(void);
88 
89 #endif /* XNU_KERNEL_PRIVATE */
90 
91 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
92 
93 #include <kern/sched_urgency.h>
94 #include <kern/thread_group.h>
95 #include <kern/waitq.h>
96 
97 /* Initialization */
98 extern void             sched_init(void);
99 
100 extern void             sched_startup(void);
101 
102 extern void             sched_timebase_init(void);
103 
104 extern bool             processor_is_fast_track_candidate_for_realtime_thread(processor_set_t pset, processor_t processor);
105 
106 extern void             sched_check_spill(processor_set_t pset, thread_t thread);
107 
108 extern bool             sched_thread_should_yield(processor_t processor, thread_t thread);
109 
110 extern bool             sched_steal_thread_DISABLED(processor_set_t pset);
111 extern bool             sched_steal_thread_enabled(processor_set_t pset);
112 
113 /* Force a preemption point for a thread and wait for it to stop running */
114 extern boolean_t        thread_stop(
115 	thread_t        thread,
116 	boolean_t       until_not_runnable);
117 
118 /* Release a previous stop request */
119 extern void                     thread_unstop(
120 	thread_t        thread);
121 
122 /* Wait for a thread to stop running */
123 extern void                     thread_wait(
124 	thread_t        thread,
125 	boolean_t       until_not_runnable);
126 
127 /* Unblock thread on wake up */
128 extern boolean_t        thread_unblock(
129 	thread_t                thread,
130 	wait_result_t   wresult);
131 
132 /* Unblock and dispatch thread */
133 extern void thread_go(
134 	thread_t                thread,
135 	wait_result_t           wresult,
136 	bool                    try_handoff);
137 
138 /* Check if direct handoff is allowed */
139 extern boolean_t
140 thread_allowed_for_handoff(
141 	thread_t         thread);
142 
143 /* Handle threads at context switch */
144 extern void                     thread_dispatch(
145 	thread_t                old_thread,
146 	thread_t                new_thread);
147 
148 /* Switch directly to a particular thread */
149 extern int                      thread_run(
150 	thread_t                        self,
151 	thread_continue_t       continuation,
152 	void                            *parameter,
153 	thread_t                        new_thread);
154 
155 /* Resume thread with new stack */
156 extern __dead2 void     thread_continue(thread_t old_thread);
157 
158 /* Invoke continuation */
159 extern __dead2 void     call_continuation(
160 	thread_continue_t       continuation,
161 	void                    *parameter,
162 	wait_result_t           wresult,
163 	boolean_t               enable_interrupts);
164 
165 /*
166  * Flags that can be passed to set_sched_pri
167  * to skip side effects
168  */
169 __options_decl(set_sched_pri_options_t, uint32_t, {
170 	SETPRI_DEFAULT  = 0x0,
171 	SETPRI_LAZY     = 0x1,  /* Avoid setting AST flags or sending IPIs */
172 });
173 
174 /* Set the current scheduled priority */
175 extern void set_sched_pri(
176 	thread_t      thread,
177 	int16_t       priority,
178 	set_sched_pri_options_t options);
179 
180 /* Set base priority of the specified thread */
181 extern void             sched_set_thread_base_priority(
182 	thread_t                thread,
183 	int                             priority);
184 
185 /* Set absolute base priority of the specified thread */
186 extern void             sched_set_kernel_thread_priority(
187 	thread_t                thread,
188 	int                             priority);
189 
190 
191 /* Set the thread's true scheduling mode */
192 extern void             sched_set_thread_mode(thread_t thread,
193     sched_mode_t mode);
194 
195 /*
196  * Set the thread's scheduling mode taking into account that the thread may have
197  * been demoted.
198  * */
199 extern void             sched_set_thread_mode_user(thread_t thread,
200     sched_mode_t mode);
201 
202 /*
203  * Get the thread's scheduling mode taking into account that the thread may have
204  * been demoted.
205  * */
206 extern sched_mode_t     sched_get_thread_mode_user(thread_t thread);
207 
208 
209 /* Demote the true scheduler mode */
210 extern void             sched_thread_mode_demote(thread_t thread,
211     uint32_t reason);
212 /* Un-demote the true scheduler mode */
213 extern void             sched_thread_mode_undemote(thread_t thread,
214     uint32_t reason);
215 /* Check for a specific demotion */
216 extern bool             sched_thread_mode_has_demotion(thread_t thread,
217     uint32_t reason);
218 
219 extern void sched_thread_promote_reason(thread_t thread, uint32_t reason, uintptr_t trace_obj);
220 extern void sched_thread_unpromote_reason(thread_t thread, uint32_t reason, uintptr_t trace_obj);
221 
222 /* Re-evaluate base priority of thread (thread locked) */
223 void thread_recompute_priority(thread_t thread);
224 
225 /* Re-evaluate scheduled priority of thread (thread locked) */
226 extern void thread_recompute_sched_pri(
227 	thread_t thread,
228 	set_sched_pri_options_t options);
229 
230 /* Periodic scheduler activity */
231 extern void             sched_init_thread(void);
232 
233 /* Perform sched_tick housekeeping activities */
234 extern boolean_t                can_update_priority(
235 	thread_t                thread);
236 
237 extern void             update_priority(
238 	thread_t                thread);
239 
240 extern void             lightweight_update_priority(
241 	thread_t                thread);
242 
243 extern void             sched_default_quantum_expire(thread_t thread);
244 
245 /* Idle processor thread continuation */
246 extern void             idle_thread(
247 	void*           parameter,
248 	wait_result_t   result);
249 
250 extern void idle_thread_create(
251 	processor_t             processor,
252 	thread_continue_t       continuation);
253 
254 /* Continuation return from syscall */
255 extern void     thread_syscall_return(
256 	kern_return_t   ret);
257 
258 /* Context switch */
259 extern wait_result_t    thread_block_reason(
260 	thread_continue_t       continuation,
261 	void                            *parameter,
262 	ast_t                           reason);
263 
264 __options_decl(sched_options_t, uint32_t, {
265 	SCHED_NONE      = 0x0,
266 	SCHED_TAILQ     = 0x1,
267 	SCHED_HEADQ     = 0x2,
268 	SCHED_PREEMPT   = 0x4,
269 	SCHED_REBALANCE = 0x8,
270 	SCHED_STIR_POT  = 0x10,
271 });
272 
273 /* Reschedule thread for execution */
274 extern void             thread_setrun(
275 	thread_t        thread,
276 	sched_options_t options);
277 
278 extern processor_set_t  task_choose_pset(
279 	task_t                  task);
280 
281 /* Bind the current thread to a particular processor */
282 extern processor_t      thread_bind(
283 	processor_t             processor);
284 
285 extern void             thread_bind_during_wakeup(
286 	thread_t                thread,
287 	processor_t             processor);
288 
289 extern void             thread_unbind_after_queue_shutdown(
290 	thread_t                thread,
291 	processor_t             processor);
292 
293 extern bool pset_has_stealable_threads(
294 	processor_set_t         pset);
295 
296 extern processor_set_t choose_starting_pset(
297 	pset_node_t  node,
298 	thread_t     thread,
299 	processor_t *processor_hint);
300 
301 extern int pset_available_cpu_count(
302 	processor_set_t pset);
303 
304 extern bool pset_is_recommended(
305 	processor_set_t pset);
306 
307 extern bool pset_type_is_recommended(
308 	processor_set_t pset);
309 
310 extern pset_node_t sched_choose_node(
311 	thread_t     thread);
312 
313 #if CONFIG_SCHED_SMT
314 extern processor_t      choose_processor_smt(
315 	processor_set_t                pset,
316 	processor_t                    processor,
317 	thread_t                       thread,
318 	sched_options_t               *options);
319 #else /* !CONFIG_SCHED_SMT */
320 /* Choose the best processor to run a thread */
321 extern processor_t      choose_processor(
322 	processor_set_t                pset,
323 	processor_t                    processor,
324 	thread_t                       thread,
325 	sched_options_t               *options);
326 #endif /* !CONFIG_SCHED_SMT */
327 
328 extern bool sched_SMT_balance(
329 	processor_t processor,
330 	processor_set_t pset);
331 
332 extern void thread_quantum_init(
333 	thread_t thread,
334 	uint64_t now);
335 
336 
337 extern void             run_queue_init(
338 	run_queue_t             runq);
339 
340 extern thread_t run_queue_dequeue(
341 	run_queue_t           runq,
342 	sched_options_t       options);
343 
344 extern boolean_t        run_queue_enqueue(
345 	run_queue_t           runq,
346 	thread_t              thread,
347 	sched_options_t       options);
348 
349 extern void     run_queue_remove(
350 	run_queue_t            runq,
351 	thread_t                       thread);
352 
353 extern thread_t run_queue_peek(
354 	run_queue_t            runq);
355 
356 struct sched_update_scan_context {
357 	uint64_t        earliest_bg_make_runnable_time;
358 	uint64_t        earliest_normal_make_runnable_time;
359 	uint64_t        earliest_rt_make_runnable_time;
360 	uint64_t        sched_tick_last_abstime;
361 };
362 typedef struct sched_update_scan_context *sched_update_scan_context_t;
363 
364 struct pulled_thread_queue;
365 extern void sched_pset_made_schedulable(
366 	processor_set_t pset);
367 
368 extern void sched_cpu_init_completed(void);
369 
370 extern void sched_update_pset_avg_execution_time(processor_set_t pset, uint64_t execution_time, uint64_t curtime, sched_bucket_t sched_bucket);
371 extern void sched_update_pset_load_average(processor_set_t pset, uint64_t curtime);
372 
373 /*
374  * Enum to define various events which need IPIs. The IPI policy
375  * engine decides what kind of IPI to use based on destination
376  * processor state, thread and one of the following scheduling events.
377  */
378 typedef enum {
379 	SCHED_IPI_EVENT_BOUND_THR   = 0x1,
380 	SCHED_IPI_EVENT_PREEMPT     = 0x2,
381 	SCHED_IPI_EVENT_SMT_REBAL   = 0x3,
382 	SCHED_IPI_EVENT_SPILL       = 0x4,
383 	SCHED_IPI_EVENT_REBALANCE   = 0x5,
384 	SCHED_IPI_EVENT_RT_PREEMPT  = 0x6,
385 } sched_ipi_event_t;
386 
387 
388 /* Enum to define various IPI types used by the scheduler */
389 typedef enum {
390 	SCHED_IPI_NONE              = 0x0,
391 	SCHED_IPI_IMMEDIATE         = 0x1,
392 	SCHED_IPI_IDLE              = 0x2,
393 	SCHED_IPI_DEFERRED          = 0x3,
394 } sched_ipi_type_t;
395 
396 /* The IPI policy engine behaves in the following manner:
397  * - All scheduler events which need an IPI invoke sched_ipi_action() with
398  *   the appropriate destination processor, thread and event.
399  * - sched_ipi_action() performs basic checks, invokes the scheduler specific
400  *   ipi_policy routine and sets pending_AST bits based on the result.
401  * - Once the pset lock is dropped, the scheduler invokes sched_ipi_perform()
402  *   routine which actually sends the appropriate IPI to the destination core.
403  */
404 extern sched_ipi_type_t sched_ipi_action(processor_t dst, thread_t thread, sched_ipi_event_t event);
405 extern void sched_ipi_perform(processor_t dst, sched_ipi_type_t ipi);
406 
407 /* sched_ipi_policy() is the global default IPI policy for all schedulers */
408 extern sched_ipi_type_t sched_ipi_policy(processor_t dst, thread_t thread,
409     boolean_t dst_idle, sched_ipi_event_t event);
410 
411 /* sched_ipi_deferred_policy() is the global default deferred IPI policy for all schedulers */
412 extern sched_ipi_type_t sched_ipi_deferred_policy(processor_set_t pset,
413     processor_t dst, thread_t thread, sched_ipi_event_t event);
414 
415 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
416 
417 extern boolean_t        thread_update_add_thread(thread_t thread);
418 extern void             thread_update_process_threads(void);
419 extern boolean_t        runq_scan(run_queue_t runq, sched_update_scan_context_t scan_context);
420 
421 #if CONFIG_SCHED_CLUTCH
422 extern boolean_t        sched_clutch_timeshare_scan(queue_t thread_queue, uint16_t count, sched_update_scan_context_t scan_context);
423 #endif /* CONFIG_SCHED_CLUTCH */
424 
425 extern void sched_timeshare_init(void);
426 extern void sched_timeshare_timebase_init(void);
427 extern void sched_timeshare_maintenance_continue(void);
428 
429 
430 
431 extern boolean_t priority_is_urgent(int priority);
432 extern uint32_t sched_timeshare_initial_quantum_size(thread_t thread);
433 
434 extern int sched_compute_timeshare_priority(thread_t thread);
435 
436 #endif /* CONFIG_SCHED_TIMESHARE_CORE */
437 
438 /* Remove thread from its run queue */
439 extern boolean_t        thread_run_queue_remove(thread_t thread);
440 thread_t thread_run_queue_remove_for_handoff(thread_t thread);
441 
442 /* Put a thread back in the run queue after being yanked */
443 extern void thread_run_queue_reinsert(thread_t thread, sched_options_t options);
444 
445 extern void             thread_timer_expire(
446 	void                    *thread,
447 	void                    *p1);
448 
449 extern bool thread_is_eager_preempt(thread_t thread);
450 
451 extern boolean_t sched_generic_direct_dispatch_to_idle_processors;
452 
453 /* Set the maximum interrupt level for the thread */
454 __private_extern__ wait_interrupt_t thread_interrupt_level(
455 	wait_interrupt_t interruptible);
456 
457 __private_extern__ wait_result_t thread_mark_wait_locked(
458 	thread_t                 thread,
459 	wait_interrupt_t interruptible);
460 
461 /* Wake up locked thread directly, passing result */
462 __private_extern__ kern_return_t clear_wait_internal(
463 	thread_t                thread,
464 	wait_result_t   result);
465 
466 struct sched_statistics {
467 	uint32_t        csw_count;
468 	uint32_t        preempt_count;
469 	uint32_t        preempted_rt_count;
470 	uint32_t        preempted_by_rt_count;
471 	uint32_t        rt_sched_count;
472 	uint32_t        interrupt_count;
473 	uint32_t        ipi_count;
474 	uint32_t        timer_pop_count;
475 	uint32_t        idle_transitions;
476 	uint32_t        quantum_timer_expirations;
477 };
478 PERCPU_DECL(struct sched_statistics, sched_stats);
479 extern bool             sched_stats_active;
480 
481 extern void sched_stats_handle_csw(
482 	processor_t processor,
483 	int reasons,
484 	int selfpri,
485 	int otherpri);
486 
487 extern void sched_stats_handle_runq_change(
488 	struct runq_stats *stats,
489 	int old_count);
490 
491 #define SCHED_STATS_INC(field)                                                  \
492 MACRO_BEGIN                                                                     \
493 	if (__improbable(sched_stats_active)) {                                 \
494 	        PERCPU_GET(sched_stats)->field++;                               \
495 	}                                                                       \
496 MACRO_END
497 
498 #if DEBUG
499 
500 #define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri)                  \
501 MACRO_BEGIN                                                                     \
502 	if (__improbable(sched_stats_active)) {                                 \
503 	        sched_stats_handle_csw((processor),                             \
504 	            (reasons), (selfpri), (otherpri));                          \
505 	}                                                                       \
506 MACRO_END
507 
508 
509 #define SCHED_STATS_RUNQ_CHANGE(stats, old_count)                               \
510 MACRO_BEGIN                                                                     \
511 	if (__improbable(sched_stats_active)) {                                 \
512 	        sched_stats_handle_runq_change((stats), (old_count));           \
513 	}                                                                       \
514 MACRO_END
515 
516 #else /* DEBUG */
517 
518 #define SCHED_STATS_CSW(processor, reasons, selfpri, otherpri) do { }while(0)
519 #define SCHED_STATS_RUNQ_CHANGE(stats, old_count) do { }while(0)
520 
521 #endif /* DEBUG */
522 
523 extern uint32_t sched_debug_flags;
524 #define SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS           0x00000001
525 #define SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS   0x00000002
526 #define SCHED_DEBUG_FLAG_AST_CHECK_TRACEPOINTS   0x00000004
527 
528 #define SCHED_DEBUG_PLATFORM_KERNEL_DEBUG_CONSTANT(...)                         \
529 MACRO_BEGIN                                                                     \
530 	if (__improbable(sched_debug_flags &                                    \
531 	    SCHED_DEBUG_FLAG_PLATFORM_TRACEPOINTS)) {                           \
532 	        KERNEL_DEBUG_CONSTANT(__VA_ARGS__);                             \
533 	}                                                                       \
534 MACRO_END
535 
536 #define SCHED_DEBUG_CHOOSE_PROCESSOR_KERNEL_DEBUG_CONSTANT_IST(...)             \
537 MACRO_BEGIN                                                                     \
538 	if (__improbable(sched_debug_flags &                                    \
539 	    SCHED_DEBUG_FLAG_CHOOSE_PROCESSOR_TRACEPOINTS)) {                   \
540 	        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, __VA_ARGS__);           \
541 	}                                                                       \
542 MACRO_END
543 
544 #define SCHED_DEBUG_AST_CHECK_KDBG_RELEASE(...)                                 \
545 MACRO_BEGIN                                                                     \
546 	if (__improbable(sched_debug_flags &                                    \
547 	    SCHED_DEBUG_FLAG_AST_CHECK_TRACEPOINTS)) {                          \
548 	        KDBG_RELEASE(__VA_ARGS__);                                      \
549 	}                                                                       \
550 MACRO_END
551 
552 
553 /* Tells if there are "active" RT threads in the system (provided by CPU PM) */
554 extern void     active_rt_threads(
555 	boolean_t       active);
556 
557 /* Returns the perfcontrol attribute for the thread */
558 extern perfcontrol_class_t thread_get_perfcontrol_class(
559 	thread_t        thread);
560 
561 /* Generic routine for Non-AMP schedulers to calculate parallelism */
562 extern uint32_t sched_qos_max_parallelism(int qos, uint64_t options);
563 
564 extern void check_monotonic_time(uint64_t ctime);
565 
566 #endif /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
567 
568 __BEGIN_DECLS
569 
570 #ifdef  XNU_KERNEL_PRIVATE
571 
572 extern kern_return_t thread_soft_bind_cluster_type(thread_t, char cluster_type);
573 
574 __options_decl(thread_bind_option_t, uint64_t, {
575 	/* Unbind a previously cluster bound thread */
576 	THREAD_UNBIND                   = 0x1,
577 	/*
578 	 * Bind thread to the cluster only if it is eligible to run on that cluster. If
579 	 * the thread is not eligible to run on the cluster, thread_soft_bind_cluster_id()
580 	 * returns KERN_INVALID_POLICY.
581 	 */
582 	THREAD_BIND_ELIGIBLE_ONLY       = 0x2,
583 });
584 extern kern_return_t thread_soft_bind_cluster_id(thread_t thread, uint32_t cluster_id, thread_bind_option_t options);
585 
586 extern int sched_get_rt_n_backup_processors(void);
587 extern void sched_set_rt_n_backup_processors(int n);
588 
589 extern int sched_get_rt_deadline_epsilon(void);
590 extern void sched_set_rt_deadline_epsilon(int new_epsilon_us);
591 
592 /* Toggles a global override to turn off CPU Throttling */
593 extern void     sys_override_cpu_throttle(boolean_t enable_override);
594 
595 extern int sched_get_powered_cores(void);
596 extern void sched_set_powered_cores(int n);
597 
598 uint64_t sched_sysctl_get_recommended_cores(void);
599 
600 /*
601  ****************** Only exported until BSD stops using ********************
602  */
603 
604 extern void                     thread_vm_bind_group_add(void);
605 
606 /* Wake up thread directly, passing result */
607 extern kern_return_t clear_wait(
608 	thread_t                thread,
609 	wait_result_t   result);
610 
611 /* Start thread running */
612 extern void             thread_bootstrap_return(void) __attribute__((noreturn));
613 
614 /* Return from exception (BSD-visible interface) */
615 extern void             thread_exception_return(void) __dead2;
616 
617 #define SCHED_STRING_MAX_LENGTH (48)
618 /* String declaring the name of the current scheduler */
619 extern char sched_string[SCHED_STRING_MAX_LENGTH];
620 
621 __options_decl(thread_handoff_option_t, uint32_t, {
622 	THREAD_HANDOFF_NONE          = 0,
623 	THREAD_HANDOFF_SETRUN_NEEDED = 0x1,
624 });
625 
626 /* Remove thread from its run queue */
627 thread_t thread_prepare_for_handoff(thread_t thread, thread_handoff_option_t option);
628 
629 /* Attempt to context switch to a specific runnable thread */
630 extern wait_result_t thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option);
631 
632 __attribute__((nonnull(2)))
633 extern void thread_handoff_parameter(thread_t thread,
634     thread_continue_t continuation, void *parameter, thread_handoff_option_t) __dead2;
635 
636 extern struct waitq     *assert_wait_queue(event_t event);
637 
638 /*
639  * sched_cond_t:
640  *
641  * A atomic condition variable used to synchronize wake/block operations on threads.
642  * Bits defined below are reserved for use by sched_prim. Remaining
643  * bits may be used by caller for additional synchronization semantics.
644  */
645 __options_decl(sched_cond_t, uint32_t, {
646 	SCHED_COND_INIT = 0x0000,    /* initialize all bits to zero (inactive and not awoken) */
647 	SCHED_COND_ACTIVE = 0x0001,  /* target thread is active */
648 	SCHED_COND_WAKEUP = 0x0002   /* wakeup has been issued for target thread */
649 });
650 typedef _Atomic sched_cond_t sched_cond_atomic_t;
651 
652 /*
653  * sched_cond_init:
654  *
655  * Initialize an atomic condition variable. Note that this does not occur atomically and should be
656  * performed during thread initialization, before the condition is observable by other threads.
657  */
658 extern void sched_cond_init(
659 	sched_cond_atomic_t *cond);
660 
661 /*
662  * sched_cond_signal:
663  *
664  * Wakeup the specified thread if it is waiting on this event and it has not already been issued a wakeup.
665  *
666  * parameters:
667  *      thread    thread to awaken
668  *      cond      atomic condition variable
669  */
670 extern kern_return_t sched_cond_signal(
671 	sched_cond_atomic_t *cond,
672 	thread_t thread);
673 
674 /*
675  * sched_cond_wait_parameter:
676  *
677  * Assert wait and block on cond if no wakeup has been issued.
678  * If a wakeup has been issued on cond since the last `sched_cond_ack`, clear_wait and
679  * return `THREAD_AWAKENED`.
680  *
681  * `sched_cond_wait_parameter` must be paired with `sched_cond_ack`.
682  *
683  * NOTE: `continuation` will only be jumped to if a wakeup has not been issued
684  *
685  * parameters:
686  *      cond             atomic condition variable to synchronize on
687  *      interruptible    interruptible value to pass to assert_wait
688  *      continuation     continuation if block succeeds
689  *      parameter
690  */
691 extern wait_result_t sched_cond_wait_parameter(
692 	sched_cond_atomic_t *cond,
693 	wait_interrupt_t interruptible,
694 	thread_continue_t continuation,
695 	void *parameter);
696 
697 /*
698  * sched_cond_wait:
699  *
700  * Assert wait and block on cond if no wakeup has been issued.
701  * If a wakeup has been issued on cond since the last `sched_cond_ack`, clear_wait and
702  * return `THREAD_AWAKENED`.
703  *
704  * `sched_cond_wait` must be paired with `sched_cond_ack`.
705  *
706  * NOTE: `continuation` will only be jumped to if a wakeup has not been issued
707  *
708  * parameters:
709  *      cond             atomic condition variable to synchronize on
710  *      interruptible    interruptible value to pass to assert_wait
711  *      continuation     continuation if block succeeds
712  */
713 extern wait_result_t sched_cond_wait(
714 	sched_cond_atomic_t *cond,
715 	wait_interrupt_t interruptible,
716 	thread_continue_t continuation);
717 
718 /*
719  * sched_cond_ack:
720  *
721  * Acknowledge an issued wakeup by clearing WAKEUP and setting ACTIVE (via XOR).
722  * It is the callers responsibility to ensure that the ACTIVE bit is always low prior to calling
723  * (i.e. by calling `sched_cond_wait` prior to any rerun or block).
724  * Synchronization schemes that allow for WAKEUP bit to be reset prior to wakeup
725  * (e.g. a cancellation mechanism) should check that WAKEUP was indeed cleared.
726  *
727  * e.g.
728  * ```
729  * if (sched_cond_ack(&my_state) & SCHED_THREAD_WAKEUP) {
730  *     // WAKEUP bit was no longer set by the time this thread woke up
731  *     do_cancellation_policy();
732  * }
733  * ```
734  *
735  * parameters:
736  *      cond:    atomic condition variable
737  */
738 extern sched_cond_t sched_cond_ack(
739 	sched_cond_atomic_t *cond);
740 
741 #endif  /* XNU_KERNEL_PRIVATE */
742 
743 #if defined(KERNEL_PRIVATE) || SCHED_TEST_HARNESS
744 /* Set pending block hint for a particular object before we go into a wait state */
745 extern void             thread_set_pending_block_hint(
746 	thread_t                        thread,
747 	block_hint_t                    block_hint);
748 
749 #define QOS_PARALLELISM_COUNT_LOGICAL   0x1
750 #define QOS_PARALLELISM_REALTIME        0x2
751 #define QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE              0x4
752 
753 extern uint32_t qos_max_parallelism(int qos, uint64_t options);
754 #endif /* defined(KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
755 
756 #if XNU_KERNEL_PRIVATE
757 extern void             thread_yield_with_continuation(
758 	thread_continue_t       continuation,
759 	void                            *parameter) __dead2;
760 #endif /* XNU_KERNEL_PRIVATE */
761 
762 /* Context switch */
763 extern wait_result_t    thread_block(
764 	thread_continue_t       continuation);
765 
766 extern wait_result_t    thread_block_parameter(
767 	thread_continue_t       continuation,
768 	void                            *parameter);
769 
770 /* Declare thread will wait on a particular event */
771 extern wait_result_t    assert_wait(
772 	event_t                         event,
773 	wait_interrupt_t        interruptible);
774 
775 /* Assert that the thread intends to wait with a timeout */
776 extern wait_result_t    assert_wait_timeout(
777 	event_t                         event,
778 	wait_interrupt_t        interruptible,
779 	uint32_t                        interval,
780 	uint32_t                        scale_factor);
781 
782 /* Assert that the thread intends to wait with an urgency, timeout and leeway */
783 extern wait_result_t    assert_wait_timeout_with_leeway(
784 	event_t                         event,
785 	wait_interrupt_t        interruptible,
786 	wait_timeout_urgency_t  urgency,
787 	uint32_t                        interval,
788 	uint32_t                        leeway,
789 	uint32_t                        scale_factor);
790 
791 extern wait_result_t    assert_wait_deadline(
792 	event_t                         event,
793 	wait_interrupt_t        interruptible,
794 	uint64_t                        deadline);
795 
796 /* Assert that the thread intends to wait with an urgency, deadline, and leeway */
797 extern wait_result_t    assert_wait_deadline_with_leeway(
798 	event_t                         event,
799 	wait_interrupt_t        interruptible,
800 	wait_timeout_urgency_t  urgency,
801 	uint64_t                        deadline,
802 	uint64_t                        leeway);
803 
804 
805 /* Wake up thread (or threads) waiting on a particular event */
806 extern kern_return_t    thread_wakeup_prim(
807 	event_t                         event,
808 	boolean_t                       one_thread,
809 	wait_result_t                   result);
810 
811 /* Wake up up to given number of threads waiting on a particular event */
812 extern kern_return_t    thread_wakeup_nthreads_prim(
813 	event_t                         event,
814 	uint32_t                        nthreads,
815 	wait_result_t                   result);
816 
817 #define thread_wakeup(x) \
818 	thread_wakeup_prim((x), FALSE, THREAD_AWAKENED)
819 #define thread_wakeup_with_result(x, z) \
820 	thread_wakeup_prim((x), FALSE, (z))
821 #define thread_wakeup_one(x) \
822 	thread_wakeup_prim((x), TRUE, THREAD_AWAKENED)
823 
824 #define thread_wakeup_nthreads(x, nthreads) \
825 	thread_wakeup_nthreads_prim((x), (nthreads), THREAD_AWAKENED)
826 #define thread_wakeup_nthreads_with_result(x, nthreads, z) \
827 	thread_wakeup_nthreads_prim((x), (nthreads), (z))
828 
829 /* Wakeup the specified thread if it is waiting on this event */
830 extern kern_return_t thread_wakeup_thread(event_t event, thread_t thread);
831 
832 extern boolean_t preemption_enabled(void);
833 
834 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
835 
836 
837 #if   !CONFIG_SCHED_TIMESHARE_CORE && !CONFIG_SCHED_CLUTCH && !CONFIG_SCHED_EDGE
838 #error Enable at least one scheduler algorithm in osfmk/conf/MASTER.XXX
839 #endif
840 
841 /*
842  * The scheduling policy is fixed at compile-time, in order to save the performance
843  * cost of function pointer indirection that we would otherwise pay each time when
844  * making a policy-specific callout.
845  */
846 #if __AMP__
847 
848 #if CONFIG_SCHED_EDGE
849 extern const struct sched_dispatch_table sched_edge_dispatch;
850 #define SCHED(f) (sched_edge_dispatch.f)
851 #else /* CONFIG_SCHED_EDGE */
852 extern const struct sched_dispatch_table sched_amp_dispatch;
853 #define SCHED(f) (sched_amp_dispatch.f)
854 #endif /* CONFIG_SCHED_EDGE */
855 
856 #else /* __AMP__ */
857 
858 #if CONFIG_SCHED_CLUTCH
859 extern const struct sched_dispatch_table sched_clutch_dispatch;
860 #define SCHED(f) (sched_clutch_dispatch.f)
861 #else /* CONFIG_SCHED_CLUTCH */
862 extern const struct sched_dispatch_table sched_dualq_dispatch;
863 #define SCHED(f) (sched_dualq_dispatch.f)
864 #endif /* CONFIG_SCHED_CLUTCH */
865 
866 #endif /* __AMP__ */
867 
868 struct sched_dispatch_table {
869 	const char *sched_name;
870 	void    (*init)(void);                          /* Init global state */
871 	void    (*timebase_init)(void);         /* Timebase-dependent initialization */
872 	void    (*processor_init)(processor_t processor);       /* Per-processor scheduler init */
873 	void    (*pset_init)(processor_set_t pset);     /* Per-processor set scheduler init */
874 
875 	void    (*maintenance_continuation)(void);      /* Function called regularly */
876 
877 	/*
878 	 * Choose a thread of greater or equal priority from the per-processor
879 	 * runqueue for timeshare/fixed threads
880 	 */
881 	thread_t        (*choose_thread)(
882 		processor_t           processor,
883 		int                           priority,
884 		thread_t              prev_thread,
885 		ast_t reason);
886 
887 	/* True if scheduler supports stealing threads for this pset */
888 	bool    (*steal_thread_enabled)(processor_set_t pset);
889 
890 	/*
891 	 * Steal a thread from another processor in the pset so that it can run
892 	 * immediately
893 	 */
894 	thread_t        (*steal_thread)(
895 		processor_set_t         pset);
896 
897 	/*
898 	 * Compute priority for a timeshare thread based on base priority.
899 	 */
900 	int (*compute_timeshare_priority)(thread_t thread);
901 
902 	/*
903 	 * Pick the best node for a thread to run on.
904 	 */
905 	pset_node_t (*choose_node)(
906 		thread_t                      thread);
907 
908 	/*
909 	 * Pick the best processor for a thread (any kind of thread) to run on.
910 	 */
911 	processor_t     (*choose_processor)(
912 		processor_set_t                pset,
913 		processor_t                    processor,
914 		thread_t                       thread,
915 		sched_options_t               *options);
916 
917 	/*
918 	 * Enqueue a timeshare or fixed priority thread onto the per-processor
919 	 * runqueue
920 	 */
921 	boolean_t (*processor_enqueue)(
922 		processor_t                    processor,
923 		thread_t                       thread,
924 		sched_options_t                options);
925 
926 	/* Migrate threads away in preparation for processor shutdown */
927 	void (*processor_queue_shutdown)(
928 		processor_t                    processor,
929 		struct pulled_thread_queue * threadq);
930 
931 	/* Remove the specific thread from the per-processor runqueue */
932 	boolean_t       (*processor_queue_remove)(
933 		processor_t             processor,
934 		thread_t                thread);
935 
936 	/*
937 	 * Does the per-processor runqueue have any timeshare or fixed priority
938 	 * threads on it? Called without pset lock held, so should
939 	 * not assume immutability while executing.
940 	 */
941 	boolean_t       (*processor_queue_empty)(processor_t            processor);
942 
943 	/*
944 	 * Would this priority trigger an urgent preemption if it's sitting
945 	 * on the per-processor runqueue?
946 	 */
947 	boolean_t       (*priority_is_urgent)(int priority);
948 
949 	/*
950 	 * Does the per-processor runqueue contain runnable threads that
951 	 * should cause the currently-running thread to be preempted?
952 	 */
953 	ast_t           (*processor_csw_check)(processor_t processor);
954 
955 	/*
956 	 * Does the per-processor runqueue contain a runnable thread
957 	 * of > or >= priority, as a preflight for choose_thread() or other
958 	 * thread selection
959 	 */
960 	boolean_t       (*processor_queue_has_priority)(processor_t             processor,
961 	    int                             priority,
962 	    boolean_t               gte);
963 
964 	/* Quantum size for the specified non-realtime thread. */
965 	uint32_t        (*initial_quantum_size)(thread_t thread);
966 
967 	/* Scheduler mode for a new thread */
968 	sched_mode_t    (*initial_thread_sched_mode)(task_t parent_task);
969 
970 	/*
971 	 * Is it safe to call update_priority, which may change a thread's
972 	 * runqueue or other state. This can be used to throttle changes
973 	 * to dynamic priority.
974 	 */
975 	boolean_t       (*can_update_priority)(thread_t thread);
976 
977 	/*
978 	 * Update both scheduled priority and other persistent state.
979 	 * Side effects may including migration to another processor's runqueue.
980 	 */
981 	void            (*update_priority)(thread_t thread);
982 
983 	/* Lower overhead update to scheduled priority and state. */
984 	void            (*lightweight_update_priority)(thread_t thread);
985 
986 	/* Callback for non-realtime threads when the quantum timer fires */
987 	void            (*quantum_expire)(thread_t thread);
988 
989 	/*
990 	 * Runnable threads on per-processor runqueue. Should only
991 	 * be used for relative comparisons of load between processors.
992 	 */
993 	int                     (*processor_runq_count)(processor_t     processor);
994 
995 	/* Aggregate runcount statistics for per-processor runqueue */
996 	uint64_t    (*processor_runq_stats_count_sum)(processor_t   processor);
997 
998 	boolean_t       (*processor_bound_count)(processor_t processor);
999 
1000 	void            (*thread_update_scan)(sched_update_scan_context_t scan_context);
1001 
1002 	/* Supports more than one pset */
1003 	boolean_t   multiple_psets_enabled;
1004 
1005 	/* Supports avoid-processor */
1006 	boolean_t   avoid_processor_enabled;
1007 
1008 	/* Returns true if this processor should avoid running this thread. */
1009 	bool    (*thread_avoid_processor)(processor_t processor, thread_t thread, ast_t reason);
1010 
1011 	/*
1012 	 * Invoked when a processor is about to choose the idle thread
1013 	 * Used to send IPIs to a processor which would be preferred to be idle instead.
1014 	 * Returns true if the current processor should anticipate a quick IPI reply back
1015 	 * from another core.
1016 	 * Called with pset lock held, returns with pset lock unlocked.
1017 	 */
1018 	bool    (*processor_balance)(processor_t processor, processor_set_t pset);
1019 
1020 	processor_t (*rt_choose_processor)(processor_set_t starting_pset, processor_t starting_processor, thread_t thread);
1021 	thread_t    (*rt_steal_thread)(processor_set_t stealing_pset);
1022 	void    (*rt_init_pset)(processor_set_t pset);
1023 	void    (*rt_init_completed)(void);
1024 	void    (*rt_queue_shutdown)(processor_t processor, struct pulled_thread_queue * threadq);
1025 	void    (*rt_runq_scan)(sched_update_scan_context_t scan_context);
1026 	int64_t (*rt_runq_count_sum)(void);
1027 
1028 	uint32_t (*qos_max_parallelism)(int qos, uint64_t options);
1029 	void    (*check_spill)(processor_set_t pset, thread_t thread);
1030 	sched_ipi_type_t (*ipi_policy)(processor_t dst, thread_t thread, boolean_t dst_idle, sched_ipi_event_t event);
1031 	bool    (*thread_should_yield)(processor_t processor, thread_t thread);
1032 
1033 	/* Routine to update run counts */
1034 	uint32_t (*run_count_incr)(thread_t thread);
1035 	uint32_t (*run_count_decr)(thread_t thread);
1036 
1037 	/* Routine to update scheduling bucket for a thread */
1038 	void (*update_thread_bucket)(thread_t thread);
1039 
1040 	/* Routine to inform the scheduler when a new pset becomes schedulable */
1041 	void (*pset_made_schedulable)(processor_set_t pset);
1042 #if CONFIG_THREAD_GROUPS
1043 	/* Routine to inform the scheduler when CLPC changes a thread group recommendation */
1044 	void (*thread_group_recommendation_change)(struct thread_group *tg, cluster_type_t new_recommendation);
1045 #endif
1046 	/* Routine to inform the scheduler when all CPUs have finished initializing */
1047 	void (*cpu_init_completed)(void);
1048 	/* Routine to check if a thread is eligible to execute on a specific pset */
1049 	bool (*thread_eligible_for_pset)(thread_t thread, processor_set_t pset);
1050 	/* Routine to update the load average for a pset after enqueueing or commiting to run a new thread */
1051 	void (*update_pset_load_average)(processor_set_t pset, uint64_t curtime);
1052 	/* Routine to update average execution time metrics for a pset after a thread exits core */
1053 	void (*update_pset_avg_execution_time)(processor_set_t pset, uint64_t execution_time, uint64_t curtime, sched_bucket_t sched_bucket);
1054 };
1055 
1056 extern const struct sched_dispatch_table sched_dualq_dispatch;
1057 #if __AMP__
1058 extern const struct sched_dispatch_table sched_amp_dispatch;
1059 #endif
1060 
1061 #if defined(CONFIG_SCHED_CLUTCH)
1062 extern const struct sched_dispatch_table sched_clutch_dispatch;
1063 #endif
1064 
1065 #if defined(CONFIG_SCHED_EDGE)
1066 extern const struct sched_dispatch_table sched_edge_dispatch;
1067 #endif
1068 
1069 extern void sched_set_max_unsafe_rt_quanta(int max);
1070 extern void sched_set_max_unsafe_fixed_quanta(int max);
1071 
1072 #endif  /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
1073 
1074 __END_DECLS
1075 
1076 #endif  /* _KERN_SCHED_PRIM_H_ */
1077