1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 /*
60 * processor.h: Processor and processor-related definitions.
61 */
62
63 #ifndef _KERN_PROCESSOR_H_
64 #define _KERN_PROCESSOR_H_
65
66 #include <mach/boolean.h>
67 #include <mach/kern_return.h>
68 #include <kern/kern_types.h>
69
70 #include <sys/cdefs.h>
71
72 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
73 #include <kern/bits.h>
74 #include <kern/sched_common.h>
75 #include <kern/sched_urgency.h>
76 #include <mach/sfi_class.h>
77 #endif /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
78
79 #ifdef MACH_KERNEL_PRIVATE
80 #include <mach/mach_types.h>
81 #include <kern/ast.h>
82 #include <kern/cpu_number.h>
83 #include <kern/smp.h>
84 #include <kern/simple_lock.h>
85 #include <kern/locks.h>
86 #include <kern/percpu.h>
87 #include <kern/queue.h>
88 #include <kern/recount.h>
89 #include <kern/sched.h>
90 #include <kern/timer.h>
91 #include <kern/sched_clutch.h>
92 #include <kern/timer_call.h>
93 #include <kern/assert.h>
94 #include <machine/limits.h>
95 #endif
96
97 __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
98
99 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
100
101 /*
102 * Processor state is accessed by locking the scheduling lock
103 * for the assigned processor set.
104 *
105 * --- PENDING_OFFLINE <
106 * / \
107 * _/ \
108 * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
109 * \_________________^ ^ ^______/ /
110 * \__________________/
111 *
112 * The transition from offline to start and idle to dispatching
113 * is externally driven as a a directive. However these
114 * are paired with a handshake by the processor itself
115 * to indicate that it has completed a transition of indeterminate
116 * length (for example, the DISPATCHING->RUNNING or START->RUNNING
117 * transitions must occur on the processor itself).
118 *
119 * The boot processor has some special cases, and skips the START state,
120 * since it has already bootstrapped and is ready to context switch threads.
121 *
122 * When a processor is in DISPATCHING or RUNNING state, the current_pri,
123 * current_thmode, and deadline fields should be set, so that other
124 * processors can evaluate if it is an appropriate candidate for preemption.
125 */
126 #if defined(CONFIG_SCHED_DEFERRED_AST)
127 /*
128 * --- PENDING_OFFLINE <
129 * / \
130 * _/ \
131 * OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
132 * \_________________^ ^ ^______/ ^_____ / /
133 * \__________________/
134 *
135 * A DISPATCHING processor may be put back into IDLE, if another
136 * processor determines that the target processor will have nothing to do
137 * upon reaching the RUNNING state. This is racy, but if the target
138 * responds and becomes RUNNING, it will not break the processor state
139 * machine.
140 *
141 * This change allows us to cancel an outstanding signal/AST on a processor
142 * (if such an operation is supported through hardware or software), and
143 * push the processor back into the IDLE state as a power optimization.
144 */
145 #endif /* defined(CONFIG_SCHED_DEFERRED_AST) */
146
147 typedef enum {
148 PROCESSOR_OFF_LINE = 0, /* Not booted or off-line */
149 /* PROCESSOR_SHUTDOWN = 1, Going off-line, but schedulable. No longer used. */
150 PROCESSOR_START = 2, /* Being started */
151 PROCESSOR_PENDING_OFFLINE = 3, /* Going off-line, not schedulable */
152 PROCESSOR_IDLE = 4, /* Idle (available) */
153 PROCESSOR_DISPATCHING = 5, /* Dispatching (idle -> active) */
154 PROCESSOR_RUNNING = 6, /* Normal execution */
155 PROCESSOR_STATE_LEN = (PROCESSOR_RUNNING + 1)
156 } processor_state_t;
157
158 typedef enum {
159 PSET_SMP = 0,
160 #if __AMP__
161 PSET_AMP_E = 1,
162 PSET_AMP_P = 2,
163 #endif /* __AMP__ */
164 MAX_PSET_TYPES,
165 } pset_cluster_type_t;
166
167 #if __AMP__
168
169 #define MAX_AMP_CLUSTER_TYPES (MAX_PSET_TYPES - 1)
170
171 typedef enum {
172 SCHED_PERFCTL_POLICY_DEFAULT, /* static policy: set at boot */
173 SCHED_PERFCTL_POLICY_FOLLOW_GROUP, /* dynamic policy: perfctl_class follows thread group across amp clusters */
174 SCHED_PERFCTL_POLICY_RESTRICT_E, /* dynamic policy: limits perfctl_class to amp e cluster */
175 } sched_perfctl_class_policy_t;
176
177 extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util;
178 extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg;
179
180 #endif /* __AMP__ */
181
182 typedef bitmap_t cpumap_t;
183
184 #if __arm64__
185
186 extern cluster_type_t pset_cluster_type_to_cluster_type(pset_cluster_type_t pset_cluster_type);
187 extern pset_cluster_type_t cluster_type_to_pset_cluster_type(cluster_type_t cluster_type);
188
189 /*
190 * pset_execution_time_t
191 *
192 * The pset_execution_time_t type is used to maintain the average
193 * execution time of threads on a pset. Since the avg. execution time is
194 * updated from contexts where the pset lock is not held, it uses a
195 * double-wide RMW loop to update these values atomically.
196 */
197 typedef union {
198 struct {
199 uint64_t pset_avg_thread_execution_time;
200 uint64_t pset_execution_time_last_update;
201 };
202 unsigned __int128 pset_execution_time_packed;
203 } pset_execution_time_t;
204
205 #endif /* __arm64__ */
206
207 struct processor_set {
208 int pset_id;
209 int online_processor_count;
210 int cpu_set_low, cpu_set_hi;
211 int cpu_set_count;
212 int last_chosen;
213
214 #if CONFIG_SCHED_EDGE
215 uint64_t pset_load_average[TH_BUCKET_SCHED_MAX];
216 /*
217 * Count of threads running or enqueued on the cluster (not including threads enqueued in a processor-bound runq).
218 * Updated atomically per scheduling bucket, around the same time as pset_load_average
219 */
220 uint32_t pset_runnable_depth[TH_BUCKET_SCHED_MAX];
221 #else /* !CONFIG_SCHED_EDGE */
222 uint64_t load_average;
223 #endif /* CONFIG_SCHED_EDGE */
224 uint64_t pset_load_last_update;
225 cpumap_t cpu_bitmask;
226 cpumap_t recommended_bitmask;
227 cpumap_t cpu_state_map[PROCESSOR_STATE_LEN];
228 #if CONFIG_SCHED_SMT
229 cpumap_t primary_map;
230 #endif /* CONFIG_SCHED_SMT */
231 cpumap_t realtime_map;
232 cpumap_t cpu_available_map;
233
234 #define SCHED_PSET_TLOCK (1)
235 #if defined(SCHED_PSET_TLOCK)
236 /* TODO: reorder struct for temporal cache locality */
237 __attribute__((aligned(128))) lck_ticket_t sched_lock;
238 #else /* SCHED_PSET_TLOCK*/
239 __attribute__((aligned(128))) lck_spin_t sched_lock; /* lock for above */
240 #endif /* SCHED_PSET_TLOCK*/
241
242 struct run_queue pset_runq; /* runq for this processor set, used by the amp and dualq scheduler policies */
243 struct rt_queue rt_runq; /* realtime runq for this processor set */
244 /*
245 * stealable_rt_threads_earliest_deadline stores the earliest deadline of
246 * the rt_runq if this pset has stealable RT threads, and RT_DEADLINE_NONE
247 * otherwise.
248 *
249 * It can only be read outside of the pset lock in sched_rt_steal_thread as
250 * a hint for which pset to lock. It must be re-checked under the lock
251 * before relying on its value to dequeue a thread.
252 *
253 * Updates are made under the pset lock by pset_update_rt_stealable_state.
254 */
255 _Atomic uint64_t stealable_rt_threads_earliest_deadline;
256 #if CONFIG_SCHED_CLUTCH
257 struct sched_clutch_root pset_clutch_root; /* clutch hierarchy root */
258 #endif /* CONFIG_SCHED_CLUTCH */
259
260 /* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */
261 cpumap_t pending_AST_URGENT_cpu_mask;
262 cpumap_t pending_AST_PREEMPT_cpu_mask;
263 #if defined(CONFIG_SCHED_DEFERRED_AST)
264 /*
265 * A separate mask, for ASTs that we may be able to cancel. This is dependent on
266 * some level of support for requesting an AST on a processor, and then quashing
267 * that request later.
268 *
269 * The purpose of this field (and the associated codepaths) is to infer when we
270 * no longer need a processor that is DISPATCHING to come up, and to prevent it
271 * from coming out of IDLE if possible. This should serve to decrease the number
272 * of spurious ASTs in the system, and let processors spend longer periods in
273 * IDLE.
274 */
275 cpumap_t pending_deferred_AST_cpu_mask;
276 #endif /* defined(CONFIG_SCHED_DEFERRED_AST) */
277 cpumap_t pending_spill_cpu_mask;
278 cpumap_t rt_pending_spill_cpu_mask;
279
280 struct ipc_port * pset_self; /* port for operations */
281 struct ipc_port * pset_name_self; /* port for information */
282
283 processor_set_t pset_list; /* chain of associated psets */
284 pset_node_t node;
285 uint32_t pset_cluster_id;
286
287 /*
288 * Currently the scheduler uses a mix of pset_cluster_type_t & cluster_type_t
289 * for recommendations etc. It might be useful to unify these as a single type.
290 */
291 pset_cluster_type_t pset_cluster_type;
292 /*
293 * For scheduler use only:
294 * The type that this pset will be treated like for scheduling purposes
295 */
296 cluster_type_t pset_type;
297
298 #if CONFIG_SCHED_EDGE
299 cpumap_t cpu_running_foreign;
300 cpumap_t cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_COUNT];
301 sched_bucket_t cpu_running_buckets[MAX_CPUS];
302
303 bitmap_t foreign_psets[BITMAP_LEN(MAX_PSETS)];
304 bitmap_t native_psets[BITMAP_LEN(MAX_PSETS)];
305 bitmap_t local_psets[BITMAP_LEN(MAX_PSETS)];
306 bitmap_t remote_psets[BITMAP_LEN(MAX_PSETS)];
307 pset_execution_time_t pset_execution_time[TH_BUCKET_SCHED_MAX];
308 uint64_t pset_cluster_shared_rsrc_load[CLUSTER_SHARED_RSRC_TYPE_COUNT];
309 _Atomic sched_clutch_edge sched_edges[MAX_PSETS][TH_BUCKET_SCHED_MAX];
310 sched_pset_search_order_t spill_search_order[TH_BUCKET_SCHED_MAX];
311 /*
312 * Recommended width of threads (one per core) or shared resource threads
313 * (one per cluster), if this is the preferred pset.
314 */
315 uint8_t max_parallel_cores[TH_BUCKET_SCHED_MAX];
316 uint8_t max_parallel_clusters[TH_BUCKET_SCHED_MAX];
317 #endif /* CONFIG_SCHED_EDGE */
318
319 #if __AMP__
320 /* Writes to sched_rt_* fields are guarded by sched_available_cores_lock to
321 * prevent concurrent updates. Reads are not guaranteed to be consistent
322 * except atomicity of specific fields, as noted below */
323
324 /* sched_rt_edges controls realtime thread scheduling policies like migration and steal. */
325 sched_clutch_edge sched_rt_edges[MAX_PSETS];
326 sched_pset_search_order_t sched_rt_spill_search_order; /* should be stored/accessed atomically */
327 #if CONFIG_SCHED_EDGE
328 sched_pset_search_order_t sched_rt_steal_search_order; /* should be stored/accessed atomically */
329 #endif /* CONFIG_SCHED_EDGE */
330 #endif /* __AMP__ */
331 cpumap_t perfcontrol_cpu_preferred_bitmask;
332 cpumap_t perfcontrol_cpu_migration_bitmask;
333 int cpu_preferred_last_chosen;
334 #if CONFIG_SCHED_SMT
335 bool is_SMT; /* pset contains SMT processors */
336 #endif /* CONFIG_SCHED_SMT */
337 };
338
339 /* Boot (and default) pset */
340 extern struct processor_set pset0;
341
342 typedef bitmap_t pset_map_t;
343
344 struct pset_node {
345 processor_set_t psets; /* list of associated psets */
346
347 pset_node_t node_list; /* chain of associated nodes */
348
349 pset_cluster_type_t pset_cluster_type; /* Same as the type of all psets in this node */
350
351 pset_map_t pset_map; /* map of associated psets */
352 _Atomic pset_map_t pset_idle_map; /* psets with at least one IDLE CPU */
353 _Atomic pset_map_t pset_non_rt_map; /* psets with at least one available CPU not running a realtime thread */
354 #if CONFIG_SCHED_SMT
355 _Atomic pset_map_t pset_non_rt_primary_map;/* psets with at least one available primary CPU not running a realtime thread */
356 #endif /* CONFIG_SCHED_SMT */
357 _Atomic pset_map_t pset_recommended_map; /* psets with at least one recommended processor */
358 };
359
360 /* Boot pset node and head of the pset node linked list */
361 extern struct pset_node pset_node0;
362
363 #if __AMP__
364
365 /* Boot pset node */
366 #define pset_node0 (pset_nodes[0])
367 extern struct pset_node pset_nodes[MAX_AMP_CLUSTER_TYPES];
368 extern pset_node_t pset_node_for_pset_cluster_type(pset_cluster_type_t pset_cluster_type);
369
370 #else /* !__AMP__ */
371
372 /* Boot pset node and head of the pset node linked list */
373 extern struct pset_node pset_node0;
374
375 #endif /* !__AMP__ */
376
377 extern queue_head_t tasks, threads, corpse_tasks;
378 extern int tasks_count, terminated_tasks_count, threads_count, terminated_threads_count;
379 decl_lck_mtx_data(extern, tasks_threads_lock);
380 decl_lck_mtx_data(extern, tasks_corpse_lock);
381
382 /*
383 * The terminated tasks queue should only be inspected elsewhere by stackshot.
384 */
385 extern queue_head_t terminated_tasks;
386
387 extern queue_head_t terminated_threads;
388
389 /*
390 * Valid state transitions:
391 * not booted -> starting
392 * starting -> started not running
393 * starting -> started not waited
394 * started not running | not waited -> running
395 * running -> begin shutdown
396 * begin shutdown -> pending offline
397 * pending offline -> system sleep
398 * system sleep -> running
399 * pending offline -> cpu offline -> fully offline
400 * fully offline -> starting
401 */
402 __enum_closed_decl(processor_offline_state_t, uint8_t, {
403 /* Before it's ever booted */
404 PROCESSOR_OFFLINE_NOT_BOOTED = 0,
405
406 /* cpu_start is going to be sent */
407 PROCESSOR_OFFLINE_STARTING = 1,
408
409 /* cpu_start has been sent, but it hasn't started up yet */
410 PROCESSOR_OFFLINE_STARTED_NOT_RUNNING = 2,
411
412 /* processor has started up and began running, but nobody has wait-for-start-ed it */
413 PROCESSOR_OFFLINE_STARTED_NOT_WAITED = 3,
414
415 /* processor is running and someone confirmed this with wait for start, no state change operations are in flight */
416 PROCESSOR_OFFLINE_RUNNING = 4, /* This is the 'normal' state */
417
418 /* someone is working on asking to shut this processor down */
419 PROCESSOR_OFFLINE_BEGIN_SHUTDOWN = 5,
420
421 /* this processor has started itself on its way to offline */
422 PROCESSOR_OFFLINE_PENDING_OFFLINE = 6,
423
424 /* another processor has confirmed the processor has powered down */
425 PROCESSOR_OFFLINE_CPU_OFFLINE = 7,
426
427 /* cluster power has been disabled for this processor if it's going to be */
428 PROCESSOR_OFFLINE_FULLY_OFFLINE = 8, /* This is the finished powering down state */
429
430 /* This processor is the boot processor, and it's in the final system sleep */
431 PROCESSOR_OFFLINE_FINAL_SYSTEM_SLEEP = 9,
432
433 PROCESSOR_OFFLINE_MAX = 10,
434 });
435
436 /* Locked under the sched_available_cores_lock */
437 extern cpumap_t processor_offline_state_map[PROCESSOR_OFFLINE_MAX];
438
439
440 struct processor {
441 processor_state_t state; /* See above */
442 #if CONFIG_SCHED_SMT
443 bool is_SMT;
444 bool current_is_NO_SMT; /* cached TH_SFLAG_NO_SMT of current thread */
445 #endif /* CONFIG_SCHED_SMT */
446 bool is_recommended;
447 bool current_is_bound; /* current thread is bound to this processor */
448 bool current_is_eagerpreempt;/* current thread is TH_SFLAG_EAGERPREEMPT */
449 bool pending_nonurgent_preemption; /* RUNNING_TIMER_PREEMPT is armed */
450 struct thread *active_thread; /* thread running on processor */
451 struct thread *idle_thread; /* this processor's idle thread. */
452 struct thread *startup_thread;
453
454 processor_set_t processor_set; /* assigned set */
455
456 /*
457 * XXX All current_* fields should be grouped together, as they're
458 * updated at the same time.
459 */
460 int current_pri; /* priority of current thread */
461 sfi_class_id_t current_sfi_class; /* SFI class of current thread */
462 perfcontrol_class_t current_perfctl_class; /* Perfcontrol class for current thread */
463 /*
464 * The cluster type recommended for the current thread, used by AMP scheduler
465 */
466 pset_cluster_type_t current_recommended_pset_type;
467 thread_urgency_t current_urgency; /* cached urgency of current thread */
468
469 #if CONFIG_THREAD_GROUPS
470 struct thread_group *current_thread_group; /* thread_group of current thread */
471 #endif /* CONFIG_THREAD_GROUPS */
472 int starting_pri; /* priority of current thread as it was when scheduled */
473 int cpu_id; /* platform numeric id */
474
475 uint64_t quantum_end; /* time when current quantum ends */
476 uint64_t last_dispatch; /* time of last dispatch */
477
478 #if KPERF
479 uint64_t kperf_last_sample_time; /* time of last kperf sample */
480 #endif /* KPERF */
481
482 uint64_t deadline; /* for next realtime thread */
483 bool first_timeslice; /* has the quantum expired since context switch */
484
485 bool must_idle; /* Needs to be forced idle as next selected thread is allowed on this processor */
486 bool next_idle_short; /* Expecting a response IPI soon, so the next idle period is likely very brief */
487
488 #if !SCHED_TEST_HARNESS
489 bool running_timers_active; /* whether the running timers should fire */
490 struct timer_call running_timers[RUNNING_TIMER_MAX];
491 #endif /* !SCHED_TEST_HARNESS */
492
493 struct run_queue runq; /* runq for this processor */
494
495 #if !SCHED_TEST_HARNESS
496 struct recount_processor pr_recount;
497 #endif /* !SCHED_TEST_HARNESS */
498
499 #if CONFIG_SCHED_SMT
500 /*
501 * Pointer to primary processor for secondary SMT processors, or a
502 * pointer to ourselves for primaries or non-SMT.
503 */
504 processor_t processor_primary;
505 processor_t processor_secondary;
506 #endif /* CONFIG_SCHED_SMT */
507 struct ipc_port *processor_self; /* port for operations */
508
509 processor_t processor_list; /* all existing processors */
510
511 uint64_t timer_call_ttd; /* current timer call time-to-deadline */
512 processor_reason_t last_startup_reason;
513 processor_reason_t last_shutdown_reason;
514 processor_reason_t last_recommend_reason;
515 processor_reason_t last_derecommend_reason;
516
517 /* locked by processor_start_state_lock */
518 bool processor_instartup; /* between dostartup and up */
519
520 /* Locked by the processor_updown_lock */
521 bool processor_booted; /* Has gone through processor_boot */
522
523 /* Locked by sched_available_cores_lock */
524 bool shutdown_temporary; /* Shutdown should be transparent to user - don't update CPU counts */
525 bool processor_online; /* between mark-online and mark-offline, tracked in sched_online_processors */
526
527 bool processor_inshutdown; /* is the processor between processor_shutdown and processor_startup */
528 processor_offline_state_t processor_offline_state;
529
530 #if CONFIG_SCHED_EDGE
531 _Atomic int stir_the_pot_inbox_cpu; /* ID of P-core available to be preempted for stir-the-pot */
532 #endif /* CONFIG_SCHED_EDGE */
533 };
534
535 extern bool sched_all_cpus_offline(void);
536 extern void sched_assert_not_last_online_cpu(int cpu_id);
537
538 extern processor_t processor_list;
539 decl_simple_lock_data(extern, processor_list_lock);
540
541 decl_simple_lock_data(extern, processor_start_state_lock);
542
543 /*
544 * Maximum number of CPUs supported by the scheduler. bits.h bitmap macros
545 * need to be used to support greater than 64.
546 */
547 #define MAX_SCHED_CPUS 64
548 extern processor_t __single processor_array[MAX_SCHED_CPUS]; /* array indexed by cpuid */
549 extern processor_set_t __single pset_array[MAX_PSETS]; /* array indexed by pset_id */
550
551 extern uint32_t processor_avail_count;
552 extern uint32_t processor_avail_count_user;
553 #if CONFIG_SCHED_SMT
554 extern uint32_t primary_processor_avail_count_user;
555 #endif /* CONFIG_SCHED_SMT */
556
557 #define cpumap_foreach(cpu_id, cpumap) \
558 for (int cpu_id = lsb_first(cpumap); \
559 (cpu_id) >= 0; \
560 cpu_id = lsb_next((cpumap), cpu_id))
561
562 #define foreach_node(node) \
563 for (pset_node_t node = &pset_node0; node != NULL; node = node->node_list)
564
565 #define foreach_pset_id(pset_id, node) \
566 for (int pset_id = lsb_first((node)->pset_map); \
567 pset_id >= 0; \
568 pset_id = lsb_next((node)->pset_map, pset_id))
569
570 cpumap_t pset_available_cpumap(processor_set_t pset);
571
572 /*
573 * All of the operations on a processor that change the processor count
574 * published to userspace and kernel.
575 */
576 __enum_closed_decl(processor_mode_t, uint8_t, {
577 PCM_RECOMMENDED = 0, /* processor->is_recommended */
578 PCM_TEMPORARY = 1, /* processor->shutdown_temporary */
579 PCM_ONLINE = 2, /* processor->processor_online */
580 });
581
582 extern void sched_processor_change_mode_locked(processor_t processor, processor_mode_t pcm_mode, bool value);
583
584 extern processor_t current_processor(void);
585
586 #if !SCHED_TEST_HARNESS
587
588 #define master_processor PERCPU_GET_MASTER(processor)
589 PERCPU_DECL(struct processor, processor);
590
591 /* Lock macros, always acquired and released with interrupts disabled (splsched()) */
592
593 extern lck_grp_t pset_lck_grp;
594
595 #if defined(SCHED_PSET_TLOCK)
596 #define pset_lock_init(p) lck_ticket_init(&(p)->sched_lock, &pset_lck_grp)
597 #define pset_lock(p) lck_ticket_lock(&(p)->sched_lock, &pset_lck_grp)
598 #define pset_unlock(p) lck_ticket_unlock(&(p)->sched_lock)
599 #define pset_assert_locked(p) lck_ticket_assert_owned(&(p)->sched_lock)
600 #else /* SCHED_PSET_TLOCK*/
601 #define pset_lock_init(p) lck_spin_init(&(p)->sched_lock, &pset_lck_grp, NULL)
602 #define pset_lock(p) lck_spin_lock_grp(&(p)->sched_lock, &pset_lck_grp)
603 #define pset_unlock(p) lck_spin_unlock(&(p)->sched_lock)
604 #define pset_assert_locked(p) LCK_SPIN_ASSERT(&(p)->sched_lock, LCK_ASSERT_OWNED)
605 #endif /*!SCHED_PSET_TLOCK*/
606
607 inline static processor_set_t
change_locked_pset(processor_set_t current_pset,processor_set_t new_pset)608 change_locked_pset(processor_set_t current_pset, processor_set_t new_pset)
609 {
610 if (current_pset != new_pset) {
611 pset_unlock(current_pset);
612 pset_lock(new_pset);
613 }
614
615 return new_pset;
616 }
617
618 extern lck_spin_t pset_node_lock;
619
620 #endif /* !SCHED_TEST_HARNESS */
621
622 extern void processor_bootstrap(void);
623
624 extern void processor_init(
625 processor_t processor,
626 int cpu_id,
627 processor_set_t processor_set);
628
629 #if CONFIG_SCHED_SMT
630 extern void processor_set_primary(
631 processor_t processor,
632 processor_t primary);
633 #endif /* CONFIG_SCHED_SMT */
634
635 extern void
636 processor_update_offline_state(processor_t processor, processor_offline_state_t new_state);
637 extern void
638 processor_update_offline_state_locked(processor_t processor, processor_offline_state_t new_state);
639
640 extern void processor_doshutdown(
641 processor_t processor,
642 bool is_final_system_sleep);
643
644 __enum_closed_decl(processor_start_kind_t, uint8_t, {
645 PROCESSOR_FIRST_BOOT = 0,
646 PROCESSOR_BEFORE_ENTERING_SLEEP = 1,
647 PROCESSOR_WAKE_FROM_SLEEP = 2,
648 PROCESSOR_CLUSTER_POWERDOWN_SUSPEND = 3,
649 PROCESSOR_CLUSTER_POWERDOWN_RESUME = 4,
650 PROCESSOR_POWERED_CORES_CHANGE = 5,
651 });
652
653 extern void processor_wait_for_start(
654 processor_t processor,
655 processor_start_kind_t start_kind);
656
657 extern kern_return_t processor_start_from_user(
658 processor_t processor);
659 extern kern_return_t processor_start_from_kext(
660 processor_t processor);
661 extern kern_return_t processor_exit_from_kext(
662 processor_t processor);
663
664
665 extern void processor_start_reason(
666 processor_t processor,
667 processor_reason_t reason);
668 extern void processor_exit_reason(
669 processor_t processor,
670 processor_reason_t reason,
671 bool is_system_sleep);
672
673 extern kern_return_t sched_processor_exit_user(processor_t processor);
674 extern kern_return_t sched_processor_start_user(processor_t processor);
675
676 extern bool sched_mark_processor_online(processor_t processor, processor_reason_t reason);
677 extern void sched_mark_processor_offline(processor_t processor, bool is_final_system_sleep);
678
679 #if !SCHED_TEST_HARNESS
680
681 extern lck_mtx_t cluster_powerdown_lock;
682 extern lck_mtx_t processor_updown_lock;
683
684 extern bool sched_is_in_sleep(void);
685 extern bool sched_is_cpu_init_completed(void);
686
687 extern void processor_queue_shutdown(
688 processor_t processor);
689
690 extern processor_set_t processor_pset(
691 processor_t processor);
692
693 extern pset_node_t pset_node_root(void);
694
695 extern processor_set_t pset_create(
696 cluster_type_t cluster_type,
697 uint32_t pset_cluster_id,
698 int pset_id);
699
700 extern void pset_init(
701 processor_set_t pset,
702 pset_node_t node);
703
704 extern processor_set_t pset_find(
705 uint32_t cluster_id,
706 processor_set_t default_pset);
707
708 extern kern_return_t processor_info_count(
709 processor_flavor_t flavor,
710 mach_msg_type_number_t *count);
711
712 extern void processor_cpu_load_info(
713 processor_t processor,
714 natural_t ticks[static CPU_STATE_MAX]);
715
716 extern void machine_run_count(
717 uint32_t count);
718
719 #if defined(__x86_64__)
720 extern processor_t machine_choose_processor(
721 processor_set_t pset,
722 processor_t processor);
723 #endif /* __x86_64__ */
724
725 #endif /* !SCHED_TEST_HARNESS */
726
727 inline static processor_set_t
next_pset(processor_set_t pset)728 next_pset(processor_set_t pset)
729 {
730 pset_map_t map = pset->node->pset_map;
731
732 int pset_id = lsb_next(map, pset->pset_id);
733 if (pset_id == -1) {
734 pset_id = lsb_first(map);
735 }
736
737 return pset_array[pset_id];
738 }
739
740 #define PSET_THING_TASK 0
741 #define PSET_THING_THREAD 1
742
743 extern pset_cluster_type_t recommended_pset_type(
744 thread_t thread);
745
746 extern void processor_state_update_idle(
747 processor_t processor);
748
749 extern void processor_state_update_from_thread(
750 processor_t processor,
751 thread_t thread,
752 boolean_t pset_lock_held);
753
754 #define PSET_LOAD_NUMERATOR_SHIFT 16
755 #define PSET_LOAD_FRACTIONAL_SHIFT 4
756
757 #if CONFIG_SCHED_EDGE
758
759 extern cluster_type_t pset_type_for_id(uint32_t cluster_id);
760 extern uint64_t sched_pset_cluster_shared_rsrc_load(processor_set_t pset, cluster_shared_rsrc_type_t shared_rsrc_type);
761
762 /*
763 * The Edge scheduler uses average scheduling latency as the metric for making
764 * thread migration decisions. One component of avg scheduling latency is the load
765 * average on the cluster.
766 *
767 * Load Average Fixed Point Arithmetic
768 *
769 * The load average is maintained as a 24.8 fixed point arithmetic value for precision.
770 * When multiplied by the average execution time, it needs to be rounded up (based on
771 * the most significant bit of the fractional part) for better accuracy. After rounding
772 * up, the whole number part of the value is used as the actual load value for
773 * migrate/steal decisions.
774 */
775 #define SCHED_PSET_LOAD_EWMA_FRACTION_BITS 8
776 #define SCHED_PSET_LOAD_EWMA_ROUND_BIT (1 << (SCHED_PSET_LOAD_EWMA_FRACTION_BITS - 1))
777 #define SCHED_PSET_LOAD_EWMA_FRACTION_MASK ((1 << SCHED_PSET_LOAD_EWMA_FRACTION_BITS) - 1)
778
779 inline static int
sched_get_pset_load_average(processor_set_t pset,sched_bucket_t sched_bucket)780 sched_get_pset_load_average(processor_set_t pset, sched_bucket_t sched_bucket)
781 {
782 uint64_t load_average = os_atomic_load(&pset->pset_load_average[sched_bucket], relaxed);
783 uint64_t avg_execution_time = os_atomic_load(&pset->pset_execution_time[sched_bucket].pset_avg_thread_execution_time, relaxed);
784 /*
785 * Since a load average of 0 indicates an idle cluster, don't allow an average
786 * execution time less than 1us to cause a cluster to appear idle.
787 */
788 avg_execution_time = MAX(avg_execution_time, 1ULL);
789 return (int)(((load_average + SCHED_PSET_LOAD_EWMA_ROUND_BIT) >> SCHED_PSET_LOAD_EWMA_FRACTION_BITS) * avg_execution_time);
790 }
791
792 #else /* CONFIG_SCHED_EDGE */
793 inline static int
sched_get_pset_load_average(processor_set_t pset,__unused sched_bucket_t sched_bucket)794 sched_get_pset_load_average(processor_set_t pset, __unused sched_bucket_t sched_bucket)
795 {
796 return (int)pset->load_average >> (PSET_LOAD_NUMERATOR_SHIFT - PSET_LOAD_FRACTIONAL_SHIFT);
797 }
798 #endif /* CONFIG_SCHED_EDGE */
799
800 extern void sched_update_pset_load_average(processor_set_t pset, uint64_t curtime);
801 extern void sched_update_pset_avg_execution_time(processor_set_t pset, uint64_t delta, uint64_t curtime, sched_bucket_t sched_bucket);
802
803 inline static void
pset_update_processor_state(processor_set_t pset,processor_t processor,uint new_state)804 pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state)
805 {
806 pset_assert_locked(pset);
807
808 uint old_state = processor->state;
809 uint cpuid = (uint)processor->cpu_id;
810
811 assert(processor->processor_set == pset);
812 assert(bit_test(pset->cpu_bitmask, cpuid));
813
814 assert(old_state < PROCESSOR_STATE_LEN);
815 assert(new_state < PROCESSOR_STATE_LEN);
816
817 processor->state = new_state;
818
819 bit_clear(pset->cpu_state_map[old_state], cpuid);
820 bit_set(pset->cpu_state_map[new_state], cpuid);
821
822 if (bit_test(pset->cpu_available_map, cpuid) && (new_state < PROCESSOR_IDLE)) {
823 /* No longer available for scheduling */
824 bit_clear(pset->cpu_available_map, cpuid);
825 } else if (!bit_test(pset->cpu_available_map, cpuid) && (new_state >= PROCESSOR_IDLE)) {
826 /* Newly available for scheduling */
827 bit_set(pset->cpu_available_map, cpuid);
828 }
829
830 if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) {
831 sched_update_pset_load_average(pset, 0);
832 if (new_state == PROCESSOR_RUNNING) {
833 assert(processor == current_processor());
834 }
835 }
836 if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) {
837 if (new_state == PROCESSOR_IDLE) {
838 bit_clear(pset->realtime_map, cpuid);
839 }
840
841 pset_node_t node = pset->node;
842
843 if (bit_count(node->pset_map) == 1) {
844 /* Node has only a single pset, so skip node pset map updates */
845 return;
846 }
847
848 if (new_state == PROCESSOR_IDLE) {
849 #if CONFIG_SCHED_SMT
850 if (processor->processor_primary == processor) {
851 if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) {
852 atomic_bit_set(&node->pset_non_rt_primary_map, pset->pset_id, memory_order_relaxed);
853 }
854 }
855 #endif /* CONFIG_SCHED_SMT */
856 if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) {
857 atomic_bit_set(&node->pset_non_rt_map, pset->pset_id, memory_order_relaxed);
858 }
859 if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
860 atomic_bit_set(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
861 }
862 } else {
863 cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE];
864 if (idle_map == 0) {
865 /* No more IDLE CPUs */
866 if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
867 atomic_bit_clear(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
868 }
869 }
870 }
871 }
872 }
873
874 decl_simple_lock_data(extern, sched_available_cores_lock);
875
876 #endif /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
877
878 #ifdef KERNEL_PRIVATE
879
880 /* Private KPI */
881 extern processor_t cpu_to_processor(int cpu);
882
883 /*!
884 * @function sched_enable_acc_rail
885 * @abstract Enable shared voltage rail for a single ACC block.
886 * @param die_id 0-based die number indicating which die the ACC is on.
887 * @param die_cluster_id 0 for the first cluster on the die, 1 for the second, ...
888 * @discussion Called from the PMGR driver. On systems where ANE and PACC
889 * share a voltage rail, the PMGR driver calls into XNU prior to
890 * accessing the ANE hardware, to ensure that the ANE block
891 * is powered. This will block until the rail has been enabled,
892 * and it must be called from a schedulable context.
893 *
894 * This should not be called on systems without a shared ANE/ACC rail.
895 * The caller is responsible for knowing which die/cluster needs to
896 * be forced on, in order to allow access to the ANE block.
897 */
898 extern void sched_enable_acc_rail(unsigned int die_id, unsigned int die_cluster_id);
899
900 /*!
901 * @function sched_disable_acc_rail
902 * @abstract Disable voltage rail for a single ACC block.
903 * @param die_id 0-based die number indicating which die the ACC is on.
904 * @param die_cluster_id 0 for the first cluster on the die, 1 for the second, ...
905 * @discussion Tells XNU that the shared ACC voltage rail can be safely disabled.
906 * This may or may not cut voltage immediately. Must be called from a
907 * schedulable context.
908 */
909 extern void sched_disable_acc_rail(unsigned int die_id, unsigned int die_cluster_id);
910
911 /*
912 * Private KPI with CLPC
913 *
914 * Update the scheduler with the set of cores that should be used to dispatch new threads.
915 * Non-recommended cores can still be used to field interrupts or run bound threads.
916 * This should be called with interrupts enabled and no scheduler locks held.
917 */
918 #define ALL_CORES_RECOMMENDED (~(uint64_t)0)
919 #define ALL_CORES_POWERED (~(uint64_t)0)
920
921 extern void sched_perfcontrol_update_recommended_cores(uint32_t recommended_cores);
922 extern void sched_perfcontrol_update_recommended_cores_reason(uint64_t recommended_cores, processor_reason_t reason, uint32_t flags);
923
924 /* Request a change to the powered cores mask that CLPC wants. Does not block waiting for completion. */
925 extern void sched_perfcontrol_update_powered_cores(uint64_t powered_cores, processor_reason_t reason, uint32_t flags);
926
927 /* Reevaluate the thread placement decision on cpu_id and force a preemption if necessary. */
928 extern bool sched_perfcontrol_check_oncore_thread_preemption(uint64_t flags, int cpu_id);
929
930 #endif /* KERNEL_PRIVATE */
931
932 #ifdef XNU_KERNEL_PRIVATE
933
934 extern bool support_bootcpu_shutdown;
935 extern bool enable_processor_exit;
936 extern unsigned int processor_count;
937
938 #if CONFIG_SCHED_SMT
939 extern int sched_enable_smt;
940
941 extern kern_return_t enable_smt_processors(bool enable);
942 #endif /* CONFIG_SCHED_SMT */
943
944 extern void sched_override_available_cores_for_sleep(void);
945 extern void sched_restore_available_cores_after_sleep(void);
946 extern bool processor_should_kprintf(processor_t processor, bool starting);
947 extern void suspend_cluster_powerdown(void);
948 extern void resume_cluster_powerdown(void);
949 extern kern_return_t suspend_cluster_powerdown_from_user(void);
950 extern kern_return_t resume_cluster_powerdown_from_user(void);
951 extern int get_cluster_powerdown_user_suspended(void);
952
953 extern void processor_wake(
954 processor_t processor);
955 extern void processor_sleep(
956 processor_t processor);
957 extern void processor_boot(
958 processor_t processor);
959 extern kern_return_t processor_exit_from_user(
960 processor_t processor);
961
962 #endif /* XNU_KERNEL_PRIVATE */
963
964 __ASSUME_PTR_ABI_SINGLE_END __END_DECLS
965
966 #endif /* _KERN_PROCESSOR_H_ */
967