xref: /xnu-11417.121.6/osfmk/kern/processor.h (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 /*
60  *	processor.h:	Processor and processor-related definitions.
61  */
62 
63 #ifndef _KERN_PROCESSOR_H_
64 #define _KERN_PROCESSOR_H_
65 
66 #include <mach/boolean.h>
67 #include <mach/kern_return.h>
68 #include <kern/kern_types.h>
69 
70 #include <sys/cdefs.h>
71 
72 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
73 #include <kern/sched_urgency.h>
74 #include <mach/sfi_class.h>
75 #endif /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
76 
77 #ifdef  MACH_KERNEL_PRIVATE
78 #include <mach/mach_types.h>
79 #include <kern/ast.h>
80 #include <kern/cpu_number.h>
81 #include <kern/smp.h>
82 #include <kern/simple_lock.h>
83 #include <kern/locks.h>
84 #include <kern/percpu.h>
85 #include <kern/queue.h>
86 #include <kern/recount.h>
87 #include <kern/sched.h>
88 #include <kern/timer.h>
89 #include <kern/sched_clutch.h>
90 #include <kern/timer_call.h>
91 #include <kern/assert.h>
92 #include <machine/limits.h>
93 #endif
94 
95 __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
96 
97 #if defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS
98 
99 /*
100  *	Processor state is accessed by locking the scheduling lock
101  *	for the assigned processor set.
102  *
103  *           --- PENDING_OFFLINE <
104  *          /                     \
105  *        _/                      \
106  *  OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
107  *         \_________________^   ^ ^______/           /
108  *                                \__________________/
109  *
110  *  The transition from offline to start and idle to dispatching
111  *  is externally driven as a a directive. However these
112  *  are paired with a handshake by the processor itself
113  *  to indicate that it has completed a transition of indeterminate
114  *  length (for example, the DISPATCHING->RUNNING or START->RUNNING
115  *  transitions must occur on the processor itself).
116  *
117  *  The boot processor has some special cases, and skips the START state,
118  *  since it has already bootstrapped and is ready to context switch threads.
119  *
120  *  When a processor is in DISPATCHING or RUNNING state, the current_pri,
121  *  current_thmode, and deadline fields should be set, so that other
122  *  processors can evaluate if it is an appropriate candidate for preemption.
123  */
124 #if defined(CONFIG_SCHED_DEFERRED_AST)
125 /*
126  *           --- PENDING_OFFLINE <
127  *          /                     \
128  *        _/                      \
129  *  OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
130  *         \_________________^   ^ ^______/ ^_____ /  /
131  *                                \__________________/
132  *
133  *  A DISPATCHING processor may be put back into IDLE, if another
134  *  processor determines that the target processor will have nothing to do
135  *  upon reaching the RUNNING state.  This is racy, but if the target
136  *  responds and becomes RUNNING, it will not break the processor state
137  *  machine.
138  *
139  *  This change allows us to cancel an outstanding signal/AST on a processor
140  *  (if such an operation is supported through hardware or software), and
141  *  push the processor back into the IDLE state as a power optimization.
142  */
143 #endif /* defined(CONFIG_SCHED_DEFERRED_AST) */
144 
145 typedef enum {
146 	PROCESSOR_OFF_LINE        = 0,    /* Not booted or off-line */
147 	/* PROCESSOR_SHUTDOWN     = 1,    Going off-line, but schedulable. No longer used. */
148 	PROCESSOR_START           = 2,    /* Being started */
149 	PROCESSOR_PENDING_OFFLINE = 3,    /* Going off-line, not schedulable */
150 	PROCESSOR_IDLE            = 4,    /* Idle (available) */
151 	PROCESSOR_DISPATCHING     = 5,    /* Dispatching (idle -> active) */
152 	PROCESSOR_RUNNING         = 6,    /* Normal execution */
153 	PROCESSOR_STATE_LEN       = (PROCESSOR_RUNNING + 1)
154 } processor_state_t;
155 
156 typedef enum {
157 	PSET_SMP    = 0,
158 #if __AMP__
159 	PSET_AMP_E  = 1,
160 	PSET_AMP_P  = 2,
161 #endif /* __AMP__ */
162 } pset_cluster_type_t;
163 
164 #if __AMP__
165 
166 typedef enum {
167 	SCHED_PERFCTL_POLICY_DEFAULT,           /*  static policy: set at boot */
168 	SCHED_PERFCTL_POLICY_FOLLOW_GROUP,      /* dynamic policy: perfctl_class follows thread group across amp clusters */
169 	SCHED_PERFCTL_POLICY_RESTRICT_E,        /* dynamic policy: limits perfctl_class to amp e cluster */
170 } sched_perfctl_class_policy_t;
171 
172 extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util;
173 extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg;
174 
175 #endif /* __AMP__ */
176 
177 typedef bitmap_t cpumap_t;
178 
179 #if __arm64__
180 
181 extern pset_cluster_type_t cluster_type_to_pset_cluster_type(cluster_type_t cluster_type);
182 extern pset_node_t cluster_type_to_pset_node(cluster_type_t cluster_type);
183 
184 /*
185  * pset_execution_time_t
186  *
187  * The pset_execution_time_t type is used to maintain the average
188  * execution time of threads on a pset. Since the avg. execution time is
189  * updated from contexts where the pset lock is not held, it uses a
190  * double-wide RMW loop to update these values atomically.
191  */
192 typedef union {
193 	struct {
194 		uint64_t        pset_avg_thread_execution_time;
195 		uint64_t        pset_execution_time_last_update;
196 	};
197 	unsigned __int128       pset_execution_time_packed;
198 } pset_execution_time_t;
199 
200 #endif /* __arm64__ */
201 
202 struct processor_set {
203 	int                     pset_id;
204 	int                     online_processor_count;
205 	int                     cpu_set_low, cpu_set_hi;
206 	int                     cpu_set_count;
207 	int                     last_chosen;
208 
209 #if CONFIG_SCHED_EDGE
210 	uint64_t                pset_load_average[TH_BUCKET_SCHED_MAX];
211 	/*
212 	 * Count of threads running or enqueued on the cluster (not including threads enqueued in a processor-bound runq).
213 	 * Updated atomically per scheduling bucket, around the same time as pset_load_average
214 	 */
215 	uint32_t                pset_runnable_depth[TH_BUCKET_SCHED_MAX];
216 #else /* !CONFIG_SCHED_EDGE */
217 	uint64_t                load_average;
218 #endif /* CONFIG_SCHED_EDGE */
219 	uint64_t                pset_load_last_update;
220 	cpumap_t                cpu_bitmask;
221 	cpumap_t                recommended_bitmask;
222 	cpumap_t                cpu_state_map[PROCESSOR_STATE_LEN];
223 #if CONFIG_SCHED_SMT
224 	cpumap_t                primary_map;
225 #endif /* CONFIG_SCHED_SMT */
226 	cpumap_t                realtime_map;
227 	cpumap_t                cpu_available_map;
228 
229 #define SCHED_PSET_TLOCK (1)
230 #if     defined(SCHED_PSET_TLOCK)
231 /* TODO: reorder struct for temporal cache locality */
232 	__attribute__((aligned(128))) lck_ticket_t      sched_lock;
233 #else /* SCHED_PSET_TLOCK*/
234 	__attribute__((aligned(128))) lck_spin_t        sched_lock;     /* lock for above */
235 #endif /* SCHED_PSET_TLOCK*/
236 
237 	struct run_queue        pset_runq;      /* runq for this processor set, used by the amp and dualq scheduler policies */
238 	struct rt_queue         rt_runq;        /* realtime runq for this processor set */
239 	uint64_t                stealable_rt_threads_earliest_deadline; /* if this pset has stealable RT threads, the earliest deadline; else UINT64_MAX */
240 #if CONFIG_SCHED_CLUTCH
241 	struct sched_clutch_root pset_clutch_root; /* clutch hierarchy root */
242 #endif /* CONFIG_SCHED_CLUTCH */
243 
244 	/* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */
245 	cpumap_t                pending_AST_URGENT_cpu_mask;
246 	cpumap_t                pending_AST_PREEMPT_cpu_mask;
247 #if defined(CONFIG_SCHED_DEFERRED_AST)
248 	/*
249 	 * A separate mask, for ASTs that we may be able to cancel.  This is dependent on
250 	 * some level of support for requesting an AST on a processor, and then quashing
251 	 * that request later.
252 	 *
253 	 * The purpose of this field (and the associated codepaths) is to infer when we
254 	 * no longer need a processor that is DISPATCHING to come up, and to prevent it
255 	 * from coming out of IDLE if possible.  This should serve to decrease the number
256 	 * of spurious ASTs in the system, and let processors spend longer periods in
257 	 * IDLE.
258 	 */
259 	cpumap_t                pending_deferred_AST_cpu_mask;
260 #endif /* defined(CONFIG_SCHED_DEFERRED_AST) */
261 	cpumap_t                pending_spill_cpu_mask;
262 	cpumap_t                rt_pending_spill_cpu_mask;
263 
264 	struct ipc_port *       pset_self;              /* port for operations */
265 	struct ipc_port *       pset_name_self; /* port for information */
266 
267 	processor_set_t         pset_list;              /* chain of associated psets */
268 	pset_node_t             node;
269 	uint32_t                pset_cluster_id;
270 
271 	/*
272 	 * Currently the scheduler uses a mix of pset_cluster_type_t & cluster_type_t
273 	 * for recommendations etc. It might be useful to unify these as a single type.
274 	 */
275 	pset_cluster_type_t     pset_cluster_type;
276 	/*
277 	 * For scheduler use only:
278 	 * The type that this pset will be treated like for scheduling purposes
279 	 */
280 	cluster_type_t          pset_type;
281 
282 #if CONFIG_SCHED_EDGE
283 	cpumap_t                cpu_running_foreign;
284 	cpumap_t                cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_COUNT];
285 	sched_bucket_t          cpu_running_buckets[MAX_CPUS];
286 
287 	bitmap_t                foreign_psets[BITMAP_LEN(MAX_PSETS)];
288 	bitmap_t                native_psets[BITMAP_LEN(MAX_PSETS)];
289 	bitmap_t                local_psets[BITMAP_LEN(MAX_PSETS)];
290 	bitmap_t                remote_psets[BITMAP_LEN(MAX_PSETS)];
291 	sched_clutch_edge       sched_edges[MAX_PSETS];
292 	pset_execution_time_t   pset_execution_time[TH_BUCKET_SCHED_MAX];
293 	uint64_t                pset_cluster_shared_rsrc_load[CLUSTER_SHARED_RSRC_TYPE_COUNT];
294 #endif /* CONFIG_SCHED_EDGE */
295 	cpumap_t                perfcontrol_cpu_preferred_bitmask;
296 	cpumap_t                perfcontrol_cpu_migration_bitmask;
297 	int                     cpu_preferred_last_chosen;
298 #if CONFIG_SCHED_SMT
299 	bool                    is_SMT;                 /* pset contains SMT processors */
300 #endif /* CONFIG_SCHED_SMT */
301 };
302 
303 /* Boot (and default) pset */
304 extern struct processor_set     pset0;
305 
306 typedef bitmap_t pset_map_t;
307 
308 struct pset_node {
309 	processor_set_t         psets;                  /* list of associated psets */
310 
311 	pset_node_t             nodes;                  /* list of associated subnodes */
312 	pset_node_t             node_list;              /* chain of associated nodes */
313 
314 	pset_cluster_type_t     pset_cluster_type;      /* Same as the type of all psets in this node */
315 
316 	pset_map_t              pset_map;               /* map of associated psets */
317 	_Atomic pset_map_t      pset_idle_map;          /* psets with at least one IDLE CPU */
318 	_Atomic pset_map_t      pset_non_rt_map;        /* psets with at least one available CPU not running a realtime thread */
319 #if CONFIG_SCHED_SMT
320 	_Atomic pset_map_t      pset_non_rt_primary_map;/* psets with at least one available primary CPU not running a realtime thread */
321 #endif /* CONFIG_SCHED_SMT */
322 	_Atomic pset_map_t      pset_recommended_map;   /* psets with at least one recommended processor */
323 };
324 
325 /* Boot pset node and head of the pset node linked list */
326 extern struct pset_node pset_node0;
327 
328 #if __AMP__
329 extern pset_node_t ecore_node;
330 extern pset_node_t pcore_node;
331 #endif /* __AMP__ */
332 
333 extern queue_head_t tasks, threads, corpse_tasks;
334 extern int tasks_count, terminated_tasks_count, threads_count, terminated_threads_count;
335 decl_lck_mtx_data(extern, tasks_threads_lock);
336 decl_lck_mtx_data(extern, tasks_corpse_lock);
337 
338 /*
339  * The terminated tasks queue should only be inspected elsewhere by stackshot.
340  */
341 extern queue_head_t terminated_tasks;
342 
343 extern queue_head_t terminated_threads;
344 
345 /*
346  * Valid state transitions:
347  * not booted -> starting
348  * starting -> started not running
349  * starting -> started not waited
350  * started not running | not waited -> running
351  * running -> begin shutdown
352  * begin shutdown -> pending offline
353  * pending offline -> system sleep
354  * system sleep -> running
355  * pending offline -> cpu offline -> fully offline
356  * fully offline -> starting
357  */
358 __enum_closed_decl(processor_offline_state_t, uint8_t, {
359 	/* Before it's ever booted */
360 	PROCESSOR_OFFLINE_NOT_BOOTED            = 0,
361 
362 	/* cpu_start is going to be sent */
363 	PROCESSOR_OFFLINE_STARTING              = 1,
364 
365 	/* cpu_start has been sent, but it hasn't started up yet */
366 	PROCESSOR_OFFLINE_STARTED_NOT_RUNNING   = 2,
367 
368 	/* processor has started up and began running, but nobody has wait-for-start-ed it */
369 	PROCESSOR_OFFLINE_STARTED_NOT_WAITED    = 3,
370 
371 	/* processor is running and someone confirmed this with wait for start, no state change operations are in flight */
372 	PROCESSOR_OFFLINE_RUNNING               = 4,  /* This is the 'normal' state */
373 
374 	/* someone is working on asking to shut this processor down */
375 	PROCESSOR_OFFLINE_BEGIN_SHUTDOWN        = 5,
376 
377 	/* this processor has started itself on its way to offline */
378 	PROCESSOR_OFFLINE_PENDING_OFFLINE       = 6,
379 
380 	/* another processor has confirmed the processor has powered down */
381 	PROCESSOR_OFFLINE_CPU_OFFLINE           = 7,
382 
383 	/* cluster power has been disabled for this processor if it's going to be */
384 	PROCESSOR_OFFLINE_FULLY_OFFLINE         = 8, /* This is the finished powering down state */
385 
386 	/* This processor is the boot processor, and it's in the final system sleep */
387 	PROCESSOR_OFFLINE_FINAL_SYSTEM_SLEEP    = 9,
388 
389 	PROCESSOR_OFFLINE_MAX                   = 10,
390 });
391 
392 /* Locked under the sched_available_cores_lock */
393 extern cpumap_t processor_offline_state_map[PROCESSOR_OFFLINE_MAX];
394 
395 
396 struct processor {
397 	processor_state_t       state;                  /* See above */
398 #if CONFIG_SCHED_SMT
399 	bool                    is_SMT;
400 	bool                    current_is_NO_SMT;      /* cached TH_SFLAG_NO_SMT of current thread */
401 #endif /* CONFIG_SCHED_SMT */
402 	bool                    is_recommended;
403 	bool                    current_is_bound;       /* current thread is bound to this processor */
404 	bool                    current_is_eagerpreempt;/* current thread is TH_SFLAG_EAGERPREEMPT */
405 	bool                    pending_nonurgent_preemption; /* RUNNING_TIMER_PREEMPT is armed */
406 	struct thread          *active_thread;          /* thread running on processor */
407 	struct thread          *idle_thread;            /* this processor's idle thread. */
408 	struct thread          *startup_thread;
409 
410 	processor_set_t         processor_set;  /* assigned set */
411 
412 	/*
413 	 * XXX All current_* fields should be grouped together, as they're
414 	 * updated at the same time.
415 	 */
416 	int                     current_pri;            /* priority of current thread */
417 	sfi_class_id_t          current_sfi_class;      /* SFI class of current thread */
418 	perfcontrol_class_t     current_perfctl_class;  /* Perfcontrol class for current thread */
419 	/*
420 	 * The cluster type recommended for the current thread, used by AMP scheduler
421 	 */
422 	pset_cluster_type_t     current_recommended_pset_type;
423 	thread_urgency_t        current_urgency;        /* cached urgency of current thread */
424 
425 #if CONFIG_THREAD_GROUPS
426 	struct thread_group    *current_thread_group;   /* thread_group of current thread */
427 #endif /* CONFIG_THREAD_GROUPS */
428 	int                     starting_pri;           /* priority of current thread as it was when scheduled */
429 	int                     cpu_id;                 /* platform numeric id */
430 
431 	uint64_t                quantum_end;            /* time when current quantum ends */
432 	uint64_t                last_dispatch;          /* time of last dispatch */
433 
434 #if KPERF
435 	uint64_t                kperf_last_sample_time; /* time of last kperf sample */
436 #endif /* KPERF */
437 
438 	uint64_t                deadline;               /* for next realtime thread */
439 	bool                    first_timeslice;        /* has the quantum expired since context switch */
440 
441 	bool                    must_idle;              /* Needs to be forced idle as next selected thread is allowed on this processor */
442 	bool                    next_idle_short;        /* Expecting a response IPI soon, so the next idle period is likely very brief */
443 
444 #if !SCHED_TEST_HARNESS
445 	bool                    running_timers_active;  /* whether the running timers should fire */
446 	struct timer_call       running_timers[RUNNING_TIMER_MAX];
447 #endif /* !SCHED_TEST_HARNESS */
448 
449 	struct run_queue        runq;                   /* runq for this processor */
450 
451 #if !SCHED_TEST_HARNESS
452 	struct recount_processor pr_recount;
453 #endif /* !SCHED_TEST_HARNESS */
454 
455 #if CONFIG_SCHED_SMT
456 	/*
457 	 * Pointer to primary processor for secondary SMT processors, or a
458 	 * pointer to ourselves for primaries or non-SMT.
459 	 */
460 	processor_t             processor_primary;
461 	processor_t             processor_secondary;
462 #endif /* CONFIG_SCHED_SMT */
463 	struct ipc_port        *processor_self;         /* port for operations */
464 
465 	processor_t             processor_list;         /* all existing processors */
466 
467 	uint64_t                timer_call_ttd;         /* current timer call time-to-deadline */
468 	processor_reason_t      last_startup_reason;
469 	processor_reason_t      last_shutdown_reason;
470 	processor_reason_t      last_recommend_reason;
471 	processor_reason_t      last_derecommend_reason;
472 
473 	/* locked by processor_start_state_lock */
474 	bool                    processor_instartup;     /* between dostartup and up */
475 
476 	/* Locked by the processor_updown_lock */
477 	bool                    processor_booted;       /* Has gone through processor_boot */
478 
479 	/* Locked by sched_available_cores_lock */
480 	bool                    shutdown_temporary;     /* Shutdown should be transparent to user - don't update CPU counts */
481 	bool                    processor_online;       /* between mark-online and mark-offline, tracked in sched_online_processors */
482 
483 	bool                    processor_inshutdown;   /* is the processor between processor_shutdown and processor_startup */
484 	processor_offline_state_t processor_offline_state;
485 };
486 
487 extern bool sched_all_cpus_offline(void);
488 extern void sched_assert_not_last_online_cpu(int cpu_id);
489 
490 extern processor_t processor_list;
491 decl_simple_lock_data(extern, processor_list_lock);
492 
493 decl_simple_lock_data(extern, processor_start_state_lock);
494 
495 /*
496  * Maximum number of CPUs supported by the scheduler.  bits.h bitmap macros
497  * need to be used to support greater than 64.
498  */
499 #define MAX_SCHED_CPUS          64
500 extern processor_t     __single processor_array[MAX_SCHED_CPUS];    /* array indexed by cpuid */
501 extern processor_set_t __single pset_array[MAX_PSETS];           /* array indexed by pset_id */
502 
503 extern uint32_t                 processor_avail_count;
504 extern uint32_t                 processor_avail_count_user;
505 #if CONFIG_SCHED_SMT
506 extern uint32_t                 primary_processor_avail_count_user;
507 #endif /* CONFIG_SCHED_SMT */
508 
509 /*
510  * All of the operations on a processor that change the processor count
511  * published to userspace and kernel.
512  */
513 __enum_closed_decl(processor_mode_t, uint8_t, {
514 	PCM_RECOMMENDED = 0, /* processor->is_recommended */
515 	PCM_TEMPORARY   = 1, /* processor->shutdown_temporary */
516 	PCM_ONLINE      = 2, /* processor->processor_online */
517 });
518 
519 extern void sched_processor_change_mode_locked(processor_t processor, processor_mode_t pcm_mode, bool value);
520 
521 extern processor_t      current_processor(void);
522 
523 #if !SCHED_TEST_HARNESS
524 
525 #define master_processor PERCPU_GET_MASTER(processor)
526 PERCPU_DECL(struct processor, processor);
527 
528 /* Lock macros, always acquired and released with interrupts disabled (splsched()) */
529 
530 extern lck_grp_t pset_lck_grp;
531 
532 #if defined(SCHED_PSET_TLOCK)
533 #define pset_lock_init(p)               lck_ticket_init(&(p)->sched_lock, &pset_lck_grp)
534 #define pset_lock(p)                    lck_ticket_lock(&(p)->sched_lock, &pset_lck_grp)
535 #define pset_unlock(p)                  lck_ticket_unlock(&(p)->sched_lock)
536 #define pset_assert_locked(p)           lck_ticket_assert_owned(&(p)->sched_lock)
537 #else /* SCHED_PSET_TLOCK*/
538 #define pset_lock_init(p)               lck_spin_init(&(p)->sched_lock, &pset_lck_grp, NULL)
539 #define pset_lock(p)                    lck_spin_lock_grp(&(p)->sched_lock, &pset_lck_grp)
540 #define pset_unlock(p)                  lck_spin_unlock(&(p)->sched_lock)
541 #define pset_assert_locked(p)           LCK_SPIN_ASSERT(&(p)->sched_lock, LCK_ASSERT_OWNED)
542 #endif /*!SCHED_PSET_TLOCK*/
543 
544 extern lck_spin_t       pset_node_lock;
545 
546 #endif /* !SCHED_TEST_HARNESS */
547 
548 extern void             processor_bootstrap(void);
549 
550 extern void             processor_init(
551 	processor_t             processor,
552 	int                     cpu_id,
553 	processor_set_t         processor_set);
554 
555 #if CONFIG_SCHED_SMT
556 extern void             processor_set_primary(
557 	processor_t             processor,
558 	processor_t             primary);
559 #endif /* CONFIG_SCHED_SMT */
560 
561 extern void
562 processor_update_offline_state(processor_t processor, processor_offline_state_t new_state);
563 extern void
564 processor_update_offline_state_locked(processor_t processor, processor_offline_state_t new_state);
565 
566 extern void processor_doshutdown(
567 	processor_t             processor,
568 	bool                    is_final_system_sleep);
569 
570 __enum_closed_decl(processor_start_kind_t, uint8_t, {
571 	PROCESSOR_FIRST_BOOT = 0,
572 	PROCESSOR_BEFORE_ENTERING_SLEEP = 1,
573 	PROCESSOR_WAKE_FROM_SLEEP = 2,
574 	PROCESSOR_CLUSTER_POWERDOWN_SUSPEND = 3,
575 	PROCESSOR_CLUSTER_POWERDOWN_RESUME = 4,
576 	PROCESSOR_POWERED_CORES_CHANGE = 5,
577 });
578 
579 extern void             processor_wait_for_start(
580 	processor_t             processor,
581 	processor_start_kind_t  start_kind);
582 
583 extern kern_return_t    processor_start_from_user(
584 	processor_t             processor);
585 extern kern_return_t    processor_start_from_kext(
586 	processor_t             processor);
587 extern kern_return_t    processor_exit_from_kext(
588 	processor_t             processor);
589 
590 
591 extern void processor_start_reason(
592 	processor_t             processor,
593 	processor_reason_t      reason);
594 extern void processor_exit_reason(
595 	processor_t             processor,
596 	processor_reason_t      reason,
597 	bool is_system_sleep);
598 
599 extern kern_return_t sched_processor_exit_user(processor_t processor);
600 extern kern_return_t sched_processor_start_user(processor_t processor);
601 
602 extern bool sched_mark_processor_online(processor_t processor, processor_reason_t reason);
603 extern void sched_mark_processor_offline(processor_t processor, bool is_final_system_sleep);
604 
605 #if !SCHED_TEST_HARNESS
606 
607 extern lck_mtx_t cluster_powerdown_lock;
608 extern lck_mtx_t processor_updown_lock;
609 
610 extern bool sched_is_in_sleep(void);
611 extern bool sched_is_cpu_init_completed(void);
612 
613 extern void             processor_queue_shutdown(
614 	processor_t             processor);
615 
616 extern processor_set_t  processor_pset(
617 	processor_t             processor);
618 
619 extern pset_node_t      pset_node_root(void);
620 
621 extern processor_set_t  pset_create(
622 	pset_node_t             node,
623 	pset_cluster_type_t     pset_type,
624 	uint32_t                pset_cluster_id,
625 	int                     pset_id);
626 
627 extern void             pset_init(
628 	processor_set_t         pset,
629 	pset_node_t             node);
630 
631 extern processor_set_t  pset_find(
632 	uint32_t                cluster_id,
633 	processor_set_t         default_pset);
634 
635 extern kern_return_t    processor_info_count(
636 	processor_flavor_t      flavor,
637 	mach_msg_type_number_t  *count);
638 
639 extern void processor_cpu_load_info(
640 	processor_t processor,
641 	natural_t ticks[static CPU_STATE_MAX]);
642 
643 extern void             machine_run_count(
644 	uint32_t                count);
645 
646 #if defined(__x86_64__)
647 extern processor_t      machine_choose_processor(
648 	processor_set_t         pset,
649 	processor_t             processor);
650 #endif /* __x86_64__ */
651 
652 #endif /* !SCHED_TEST_HARNESS */
653 
654 inline static processor_set_t
next_pset(processor_set_t pset)655 next_pset(processor_set_t pset)
656 {
657 	pset_map_t map = pset->node->pset_map;
658 
659 	int pset_id = lsb_next(map, pset->pset_id);
660 	if (pset_id == -1) {
661 		pset_id = lsb_first(map);
662 	}
663 
664 	return pset_array[pset_id];
665 }
666 
667 #define PSET_THING_TASK         0
668 #define PSET_THING_THREAD       1
669 
670 extern pset_cluster_type_t recommended_pset_type(
671 	thread_t                thread);
672 
673 extern void             processor_state_update_idle(
674 	processor_t             processor);
675 
676 extern void             processor_state_update_from_thread(
677 	processor_t             processor,
678 	thread_t                thread,
679 	boolean_t               pset_lock_held);
680 
681 #define PSET_LOAD_NUMERATOR_SHIFT   16
682 #define PSET_LOAD_FRACTIONAL_SHIFT   4
683 
684 #if CONFIG_SCHED_EDGE
685 
686 extern cluster_type_t pset_type_for_id(uint32_t cluster_id);
687 extern uint64_t sched_pset_cluster_shared_rsrc_load(processor_set_t pset, cluster_shared_rsrc_type_t shared_rsrc_type);
688 
689 /*
690  * The Edge scheduler uses average scheduling latency as the metric for making
691  * thread migration decisions. One component of avg scheduling latency is the load
692  * average on the cluster.
693  *
694  * Load Average Fixed Point Arithmetic
695  *
696  * The load average is maintained as a 24.8 fixed point arithmetic value for precision.
697  * When multiplied by the average execution time, it needs to be rounded up (based on
698  * the most significant bit of the fractional part) for better accuracy. After rounding
699  * up, the whole number part of the value is used as the actual load value for
700  * migrate/steal decisions.
701  */
702 #define SCHED_PSET_LOAD_EWMA_FRACTION_BITS 8
703 #define SCHED_PSET_LOAD_EWMA_ROUND_BIT     (1 << (SCHED_PSET_LOAD_EWMA_FRACTION_BITS - 1))
704 #define SCHED_PSET_LOAD_EWMA_FRACTION_MASK ((1 << SCHED_PSET_LOAD_EWMA_FRACTION_BITS) - 1)
705 
706 inline static int
sched_get_pset_load_average(processor_set_t pset,sched_bucket_t sched_bucket)707 sched_get_pset_load_average(processor_set_t pset, sched_bucket_t sched_bucket)
708 {
709 	uint64_t load_average = os_atomic_load(&pset->pset_load_average[sched_bucket], relaxed);
710 	uint64_t avg_execution_time = os_atomic_load(&pset->pset_execution_time[sched_bucket].pset_avg_thread_execution_time, relaxed);
711 	/*
712 	 * Since a load average of 0 indicates an idle cluster, don't allow an average
713 	 * execution time less than 1us to cause a cluster to appear idle.
714 	 */
715 	avg_execution_time = MAX(avg_execution_time, 1ULL);
716 	return (int)(((load_average + SCHED_PSET_LOAD_EWMA_ROUND_BIT) >> SCHED_PSET_LOAD_EWMA_FRACTION_BITS) * avg_execution_time);
717 }
718 
719 #else /* CONFIG_SCHED_EDGE */
720 inline static int
sched_get_pset_load_average(processor_set_t pset,__unused sched_bucket_t sched_bucket)721 sched_get_pset_load_average(processor_set_t pset, __unused sched_bucket_t sched_bucket)
722 {
723 	return (int)pset->load_average >> (PSET_LOAD_NUMERATOR_SHIFT - PSET_LOAD_FRACTIONAL_SHIFT);
724 }
725 #endif /* CONFIG_SCHED_EDGE */
726 
727 extern void sched_update_pset_load_average(processor_set_t pset, uint64_t curtime);
728 extern void sched_update_pset_avg_execution_time(processor_set_t pset, uint64_t delta, uint64_t curtime, sched_bucket_t sched_bucket);
729 
730 inline static void
pset_update_processor_state(processor_set_t pset,processor_t processor,uint new_state)731 pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state)
732 {
733 	pset_assert_locked(pset);
734 
735 	uint old_state = processor->state;
736 	uint cpuid = (uint)processor->cpu_id;
737 
738 	assert(processor->processor_set == pset);
739 	assert(bit_test(pset->cpu_bitmask, cpuid));
740 
741 	assert(old_state < PROCESSOR_STATE_LEN);
742 	assert(new_state < PROCESSOR_STATE_LEN);
743 
744 	processor->state = new_state;
745 
746 	bit_clear(pset->cpu_state_map[old_state], cpuid);
747 	bit_set(pset->cpu_state_map[new_state], cpuid);
748 
749 	if (bit_test(pset->cpu_available_map, cpuid) && (new_state < PROCESSOR_IDLE)) {
750 		/* No longer available for scheduling */
751 		bit_clear(pset->cpu_available_map, cpuid);
752 	} else if (!bit_test(pset->cpu_available_map, cpuid) && (new_state >= PROCESSOR_IDLE)) {
753 		/* Newly available for scheduling */
754 		bit_set(pset->cpu_available_map, cpuid);
755 	}
756 
757 	if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) {
758 		sched_update_pset_load_average(pset, 0);
759 		if (new_state == PROCESSOR_RUNNING) {
760 			assert(processor == current_processor());
761 		}
762 	}
763 	if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) {
764 		if (new_state == PROCESSOR_IDLE) {
765 			bit_clear(pset->realtime_map, cpuid);
766 		}
767 
768 		pset_node_t node = pset->node;
769 
770 		if (bit_count(node->pset_map) == 1) {
771 			/* Node has only a single pset, so skip node pset map updates */
772 			return;
773 		}
774 
775 		if (new_state == PROCESSOR_IDLE) {
776 #if CONFIG_SCHED_SMT
777 			if (processor->processor_primary == processor) {
778 				if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) {
779 					atomic_bit_set(&node->pset_non_rt_primary_map, pset->pset_id, memory_order_relaxed);
780 				}
781 			}
782 #endif /* CONFIG_SCHED_SMT */
783 			if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) {
784 				atomic_bit_set(&node->pset_non_rt_map, pset->pset_id, memory_order_relaxed);
785 			}
786 			if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
787 				atomic_bit_set(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
788 			}
789 		} else {
790 			cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE];
791 			if (idle_map == 0) {
792 				/* No more IDLE CPUs */
793 				if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
794 					atomic_bit_clear(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
795 				}
796 			}
797 		}
798 	}
799 }
800 
801 decl_simple_lock_data(extern, sched_available_cores_lock);
802 
803 #endif  /* defined(MACH_KERNEL_PRIVATE) || SCHED_TEST_HARNESS */
804 
805 #ifdef KERNEL_PRIVATE
806 
807 /* Private KPI */
808 extern processor_t      cpu_to_processor(int cpu);
809 
810 /*!
811  * @function              sched_enable_acc_rail
812  * @abstract              Enable shared voltage rail for a single ACC block.
813  * @param die_id          0-based die number indicating which die the ACC is on.
814  * @param die_cluster_id  0 for the first cluster on the die, 1 for the second, ...
815  * @discussion            Called from the PMGR driver.  On systems where ANE and PACC
816  *                        share a voltage rail, the PMGR driver calls into XNU prior to
817  *                        accessing the ANE hardware, to ensure that the ANE block
818  *                        is powered.  This will block until the rail has been enabled,
819  *                        and it must be called from a schedulable context.
820  *
821  *                        This should not be called on systems without a shared ANE/ACC rail.
822  *                        The caller is responsible for knowing which die/cluster needs to
823  *                        be forced on, in order to allow access to the ANE block.
824  */
825 extern void sched_enable_acc_rail(unsigned int die_id, unsigned int die_cluster_id);
826 
827 /*!
828  * @function              sched_disable_acc_rail
829  * @abstract              Disable voltage rail for a single ACC block.
830  * @param die_id          0-based die number indicating which die the ACC is on.
831  * @param die_cluster_id  0 for the first cluster on the die, 1 for the second, ...
832  * @discussion            Tells XNU that the shared ACC voltage rail can be safely disabled.
833  *                        This may or may not cut voltage immediately.  Must be called from a
834  *                        schedulable context.
835  */
836 extern void sched_disable_acc_rail(unsigned int die_id, unsigned int die_cluster_id);
837 
838 /*
839  * Private KPI with CLPC
840  *
841  * Update the scheduler with the set of cores that should be used to dispatch new threads.
842  * Non-recommended cores can still be used to field interrupts or run bound threads.
843  * This should be called with interrupts enabled and no scheduler locks held.
844  */
845 #define ALL_CORES_RECOMMENDED   (~(uint64_t)0)
846 #define ALL_CORES_POWERED       (~(uint64_t)0)
847 
848 extern void sched_perfcontrol_update_recommended_cores(uint32_t recommended_cores);
849 extern void sched_perfcontrol_update_recommended_cores_reason(uint64_t recommended_cores, processor_reason_t reason, uint32_t flags);
850 
851 /* Request a change to the powered cores mask that CLPC wants.  Does not block waiting for completion. */
852 extern void sched_perfcontrol_update_powered_cores(uint64_t powered_cores, processor_reason_t reason, uint32_t flags);
853 
854 #endif /* KERNEL_PRIVATE */
855 
856 #ifdef XNU_KERNEL_PRIVATE
857 
858 extern bool support_bootcpu_shutdown;
859 extern bool enable_processor_exit;
860 extern unsigned int processor_count;
861 
862 #if CONFIG_SCHED_SMT
863 extern int sched_enable_smt;
864 
865 extern kern_return_t    enable_smt_processors(bool enable);
866 #endif /* CONFIG_SCHED_SMT */
867 
868 extern void sched_override_available_cores_for_sleep(void);
869 extern void sched_restore_available_cores_after_sleep(void);
870 extern bool processor_should_kprintf(processor_t processor, bool starting);
871 extern void suspend_cluster_powerdown(void);
872 extern void resume_cluster_powerdown(void);
873 extern kern_return_t suspend_cluster_powerdown_from_user(void);
874 extern kern_return_t resume_cluster_powerdown_from_user(void);
875 extern int get_cluster_powerdown_user_suspended(void);
876 
877 extern void processor_wake(
878 	processor_t             processor);
879 extern void processor_sleep(
880 	processor_t             processor);
881 extern void processor_boot(
882 	processor_t             processor);
883 extern kern_return_t    processor_exit_from_user(
884 	processor_t             processor);
885 
886 #endif /* XNU_KERNEL_PRIVATE */
887 
888 __ASSUME_PTR_ABI_SINGLE_END __END_DECLS
889 
890 #endif  /* _KERN_PROCESSOR_H_ */
891