xref: /xnu-8020.140.41/osfmk/kern/processor.h (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * @OSF_COPYRIGHT@
30  */
31 /*
32  * Mach Operating System
33  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34  * All Rights Reserved.
35  *
36  * Permission to use, copy, modify and distribute this software and its
37  * documentation is hereby granted, provided that both the copyright
38  * notice and this permission notice appear in all copies of the
39  * software, derivative works or modified versions, and any portions
40  * thereof, and that both notices appear in supporting documentation.
41  *
42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45  *
46  * Carnegie Mellon requests users of this software to return to
47  *
48  *  Software Distribution Coordinator  or  [email protected]
49  *  School of Computer Science
50  *  Carnegie Mellon University
51  *  Pittsburgh PA 15213-3890
52  *
53  * any improvements or extensions that they make and grant Carnegie Mellon
54  * the rights to redistribute these changes.
55  */
56 /*
57  */
58 
59 /*
60  *	processor.h:	Processor and processor-related definitions.
61  */
62 
63 #ifndef _KERN_PROCESSOR_H_
64 #define _KERN_PROCESSOR_H_
65 
66 #include <mach/boolean.h>
67 #include <mach/kern_return.h>
68 #include <kern/kern_types.h>
69 
70 #include <sys/cdefs.h>
71 
72 #ifdef  MACH_KERNEL_PRIVATE
73 #include <mach/mach_types.h>
74 #include <kern/ast.h>
75 #include <kern/cpu_number.h>
76 #include <kern/smp.h>
77 #include <kern/simple_lock.h>
78 #include <kern/locks.h>
79 #include <kern/percpu.h>
80 #include <kern/queue.h>
81 #include <kern/sched.h>
82 #include <kern/sched_urgency.h>
83 #include <kern/timer.h>
84 #include <mach/sfi_class.h>
85 #include <kern/sched_clutch.h>
86 #include <kern/timer_call.h>
87 #include <kern/assert.h>
88 #include <machine/limits.h>
89 #endif
90 
91 __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
92 
93 #ifdef  MACH_KERNEL_PRIVATE
94 
95 /*
96  *	Processor state is accessed by locking the scheduling lock
97  *	for the assigned processor set.
98  *
99  *           -------------------- SHUTDOWN
100  *          /                     ^     ^
101  *        _/                      |      \
102  *  OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
103  *         \_________________^   ^ ^______/           /
104  *                                \__________________/
105  *
106  *  Most of these state transitions are externally driven as a
107  *  a directive (for instance telling an IDLE processor to start
108  *  coming out of the idle state to run a thread). However these
109  *  are typically paired with a handshake by the processor itself
110  *  to indicate that it has completed a transition of indeterminate
111  *  length (for example, the DISPATCHING->RUNNING or START->RUNNING
112  *  transitions must occur on the processor itself).
113  *
114  *  The boot processor has some special cases, and skips the START state,
115  *  since it has already bootstrapped and is ready to context switch threads.
116  *
117  *  When a processor is in DISPATCHING or RUNNING state, the current_pri,
118  *  current_thmode, and deadline fields should be set, so that other
119  *  processors can evaluate if it is an appropriate candidate for preemption.
120  */
121 #if defined(CONFIG_SCHED_DEFERRED_AST)
122 /*
123  *           -------------------- SHUTDOWN
124  *          /                     ^     ^
125  *        _/                      |      \
126  *  OFF_LINE ---> START ---> RUNNING ---> IDLE ---> DISPATCHING
127  *         \_________________^   ^ ^______/ ^_____ /  /
128  *                                \__________________/
129  *
130  *  A DISPATCHING processor may be put back into IDLE, if another
131  *  processor determines that the target processor will have nothing to do
132  *  upon reaching the RUNNING state.  This is racy, but if the target
133  *  responds and becomes RUNNING, it will not break the processor state
134  *  machine.
135  *
136  *  This change allows us to cancel an outstanding signal/AST on a processor
137  *  (if such an operation is supported through hardware or software), and
138  *  push the processor back into the IDLE state as a power optimization.
139  */
140 #endif
141 
142 typedef enum {
143 	PROCESSOR_OFF_LINE      = 0,    /* Not available */
144 	PROCESSOR_SHUTDOWN      = 1,    /* Going off-line */
145 	PROCESSOR_START         = 2,    /* Being started */
146 	PROCESSOR_UNUSED        = 3,    /* Formerly Inactive (unavailable) */
147 	PROCESSOR_IDLE          = 4,    /* Idle (available) */
148 	PROCESSOR_DISPATCHING   = 5,    /* Dispatching (idle -> active) */
149 	PROCESSOR_RUNNING       = 6,    /* Normal execution */
150 	PROCESSOR_STATE_LEN     = (PROCESSOR_RUNNING + 1)
151 } processor_state_t;
152 
153 typedef enum {
154 	PSET_SMP,
155 #if __AMP__
156 	PSET_AMP_E,
157 	PSET_AMP_P,
158 #endif
159 } pset_cluster_type_t;
160 
161 #if __AMP__
162 
163 typedef enum {
164 	SCHED_PERFCTL_POLICY_DEFAULT,           /*  static policy: set at boot */
165 	SCHED_PERFCTL_POLICY_FOLLOW_GROUP,      /* dynamic policy: perfctl_class follows thread group across amp clusters */
166 	SCHED_PERFCTL_POLICY_RESTRICT_E,        /* dynamic policy: limits perfctl_class to amp e cluster */
167 } sched_perfctl_class_policy_t;
168 
169 extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_util;
170 extern _Atomic sched_perfctl_class_policy_t sched_perfctl_policy_bg;
171 
172 #endif /* __AMP__ */
173 
174 typedef bitmap_t cpumap_t;
175 
176 #if __arm64__
177 
178 /*
179  * pset_execution_time_t
180  *
181  * The pset_execution_time_t type is used to maintain the average
182  * execution time of threads on a pset. Since the avg. execution time is
183  * updated from contexts where the pset lock is not held, it uses a
184  * double-wide RMW loop to update these values atomically.
185  */
186 typedef union {
187 	struct {
188 		uint64_t        pset_avg_thread_execution_time;
189 		uint64_t        pset_execution_time_last_update;
190 	};
191 	unsigned __int128       pset_execution_time_packed;
192 } pset_execution_time_t;
193 
194 #endif /* __arm64__ */
195 
196 struct processor_set {
197 	int                     pset_id;
198 	int                     online_processor_count;
199 	int                     cpu_set_low, cpu_set_hi;
200 	int                     cpu_set_count;
201 	int                     last_chosen;
202 
203 	uint64_t                load_average;
204 	uint64_t                pset_load_average[TH_BUCKET_SCHED_MAX];
205 	uint64_t                pset_load_last_update;
206 	cpumap_t                cpu_bitmask;
207 	cpumap_t                recommended_bitmask;
208 	cpumap_t                cpu_state_map[PROCESSOR_STATE_LEN];
209 	cpumap_t                primary_map;
210 	cpumap_t                realtime_map;
211 
212 #define SCHED_PSET_TLOCK (1)
213 #if     defined(SCHED_PSET_TLOCK)
214 /* TODO: reorder struct for temporal cache locality */
215 	__attribute__((aligned(128))) lck_ticket_t      sched_lock;
216 #else /* SCHED_PSET_TLOCK*/
217 	__attribute__((aligned(128))) lck_spin_t        sched_lock;     /* lock for above */
218 #endif /* SCHED_PSET_TLOCK*/
219 
220 #if defined(CONFIG_SCHED_TRADITIONAL) || defined(CONFIG_SCHED_MULTIQ)
221 	struct run_queue        pset_runq;      /* runq for this processor set */
222 #endif
223 	struct rt_queue         rt_runq;        /* realtime runq for this processor set */
224 	uint64_t                stealable_rt_threads_earliest_deadline; /* if this pset has stealable RT threads, the earliest deadline; else UINT64_MAX */
225 #if CONFIG_SCHED_CLUTCH
226 	struct sched_clutch_root pset_clutch_root; /* clutch hierarchy root */
227 #endif /* CONFIG_SCHED_CLUTCH */
228 
229 #if defined(CONFIG_SCHED_TRADITIONAL)
230 	int                     pset_runq_bound_count;
231 	/* # of threads in runq bound to any processor in pset */
232 #endif
233 
234 	/* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */
235 	cpumap_t                pending_AST_URGENT_cpu_mask;
236 	cpumap_t                pending_AST_PREEMPT_cpu_mask;
237 #if defined(CONFIG_SCHED_DEFERRED_AST)
238 	/*
239 	 * A separate mask, for ASTs that we may be able to cancel.  This is dependent on
240 	 * some level of support for requesting an AST on a processor, and then quashing
241 	 * that request later.
242 	 *
243 	 * The purpose of this field (and the associated codepaths) is to infer when we
244 	 * no longer need a processor that is DISPATCHING to come up, and to prevent it
245 	 * from coming out of IDLE if possible.  This should serve to decrease the number
246 	 * of spurious ASTs in the system, and let processors spend longer periods in
247 	 * IDLE.
248 	 */
249 	cpumap_t                pending_deferred_AST_cpu_mask;
250 #endif
251 	cpumap_t                pending_spill_cpu_mask;
252 	cpumap_t                rt_pending_spill_cpu_mask;
253 
254 	struct ipc_port *       pset_self;              /* port for operations */
255 	struct ipc_port *       pset_name_self; /* port for information */
256 
257 	processor_set_t         pset_list;              /* chain of associated psets */
258 	pset_node_t             node;
259 	uint32_t                pset_cluster_id;
260 
261 	/*
262 	 * Currently the scheduler uses a mix of pset_cluster_type_t & cluster_type_t
263 	 * for recommendations etc. It might be useful to unify these as a single type.
264 	 */
265 	pset_cluster_type_t     pset_cluster_type;
266 	cluster_type_t          pset_type;
267 
268 #if CONFIG_SCHED_EDGE
269 	cpumap_t                cpu_running_foreign;
270 	cpumap_t                cpu_running_cluster_shared_rsrc_thread[CLUSTER_SHARED_RSRC_TYPE_COUNT];
271 	sched_bucket_t          cpu_running_buckets[MAX_CPUS];
272 
273 	bitmap_t                foreign_psets[BITMAP_LEN(MAX_PSETS)];
274 	bitmap_t                native_psets[BITMAP_LEN(MAX_PSETS)];
275 	bitmap_t                local_psets[BITMAP_LEN(MAX_PSETS)];
276 	bitmap_t                remote_psets[BITMAP_LEN(MAX_PSETS)];
277 	sched_clutch_edge       sched_edges[MAX_PSETS];
278 	pset_execution_time_t   pset_execution_time[TH_BUCKET_SCHED_MAX];
279 	uint64_t                pset_cluster_shared_rsrc_load[CLUSTER_SHARED_RSRC_TYPE_COUNT];
280 #endif /* CONFIG_SCHED_EDGE */
281 	bool                    is_SMT;                 /* pset contains SMT processors */
282 };
283 
284 extern struct processor_set     pset0;
285 
286 typedef bitmap_t pset_map_t;
287 
288 struct pset_node {
289 	processor_set_t         psets;                  /* list of associated psets */
290 
291 	pset_node_t             nodes;                  /* list of associated subnodes */
292 	pset_node_t             node_list;              /* chain of associated nodes */
293 
294 	pset_node_t             parent;
295 
296 	pset_cluster_type_t     pset_cluster_type;      /* Same as the type of all psets in this node */
297 
298 	pset_map_t              pset_map;               /* map of associated psets */
299 	_Atomic pset_map_t      pset_idle_map;          /* psets with at least one IDLE CPU */
300 	_Atomic pset_map_t      pset_idle_primary_map;  /* psets with at least one IDLE primary CPU */
301 	_Atomic pset_map_t      pset_non_rt_map;        /* psets with at least one available CPU not running a realtime thread */
302 	_Atomic pset_map_t      pset_non_rt_primary_map;/* psets with at least one available primary CPU not running a realtime thread */
303 };
304 
305 extern struct pset_node pset_node0;
306 #if __AMP__
307 extern struct pset_node pset_node1;
308 extern pset_node_t ecore_node;
309 extern pset_node_t pcore_node;
310 #endif
311 
312 extern queue_head_t tasks, threads, corpse_tasks;
313 extern int tasks_count, terminated_tasks_count, threads_count, terminated_threads_count;
314 decl_lck_mtx_data(extern, tasks_threads_lock);
315 decl_lck_mtx_data(extern, tasks_corpse_lock);
316 
317 /*
318  * The terminated tasks queue should only be inspected elsewhere by stackshot.
319  */
320 extern queue_head_t terminated_tasks;
321 
322 extern queue_head_t terminated_threads;
323 
324 struct processor {
325 	processor_state_t       state;                  /* See above */
326 	bool                    is_SMT;
327 	bool                    is_recommended;
328 	bool                    current_is_NO_SMT;      /* cached TH_SFLAG_NO_SMT of current thread */
329 	bool                    current_is_bound;       /* current thread is bound to this processor */
330 	bool                    current_is_eagerpreempt;/* current thread is TH_SFLAG_EAGERPREEMPT */
331 	struct thread          *active_thread;          /* thread running on processor */
332 	struct thread          *idle_thread;            /* this processor's idle thread. */
333 	struct thread          *startup_thread;
334 
335 	processor_set_t         processor_set;  /* assigned set */
336 
337 	/*
338 	 * XXX All current_* fields should be grouped together, as they're
339 	 * updated at the same time.
340 	 */
341 	int                     current_pri;            /* priority of current thread */
342 	sfi_class_id_t          current_sfi_class;      /* SFI class of current thread */
343 	perfcontrol_class_t     current_perfctl_class;  /* Perfcontrol class for current thread */
344 	/*
345 	 * The cluster type recommended for the current thread.
346 	 */
347 	pset_cluster_type_t     current_recommended_pset_type;
348 	thread_urgency_t        current_urgency;        /* cached urgency of current thread */
349 
350 #if CONFIG_SCHED_TRADITIONAL
351 	int                     runq_bound_count;       /* # of threads bound to this processor */
352 #endif /* CONFIG_SCHED_TRADITIONAL */
353 
354 #if CONFIG_THREAD_GROUPS
355 	struct thread_group    *current_thread_group;   /* thread_group of current thread */
356 #endif
357 	int                     starting_pri;           /* priority of current thread as it was when scheduled */
358 	int                     cpu_id;                 /* platform numeric id */
359 
360 	uint64_t                quantum_end;            /* time when current quantum ends */
361 	uint64_t                last_dispatch;          /* time of last dispatch */
362 
363 #if KPERF
364 	uint64_t                kperf_last_sample_time; /* time of last kperf sample */
365 #endif /* KPERF */
366 
367 	uint64_t                deadline;               /* for next realtime thread */
368 	bool                    first_timeslice;        /* has the quantum expired since context switch */
369 
370 	bool                    processor_offlined;     /* has the processor been explicitly processor_offline'ed */
371 	bool                    must_idle;              /* Needs to be forced idle as next selected thread is allowed on this processor */
372 
373 	bool                    running_timers_active;  /* whether the running timers should fire */
374 	struct timer_call       running_timers[RUNNING_TIMER_MAX];
375 
376 #if CONFIG_SCHED_TRADITIONAL || CONFIG_SCHED_MULTIQ
377 	struct run_queue        runq;                   /* runq for this processor */
378 #endif /* CONFIG_SCHED_TRADITIONAL || CONFIG_SCHED_MULTIQ */
379 
380 #if CONFIG_SCHED_GRRR
381 	struct grrr_run_queue   grrr_runq;              /* Group Ratio Round-Robin runq */
382 #endif /* CONFIG_SCHED_GRRR */
383 
384 	/*
385 	 * Pointer to primary processor for secondary SMT processors, or a
386 	 * pointer to ourselves for primaries or non-SMT.
387 	 */
388 	processor_t             processor_primary;
389 	processor_t             processor_secondary;
390 	struct ipc_port        *processor_self;         /* port for operations */
391 
392 	processor_t             processor_list;         /* all existing processors */
393 
394 	/* Processor state statistics */
395 	timer_data_t            idle_state;
396 	timer_data_t            system_state;
397 	timer_data_t            user_state;
398 
399 	timer_t                 current_state;          /* points to processor's idle, system, or user state timer */
400 
401 	/* Thread execution timers */
402 	timer_t                 thread_timer;           /* points to current thread's user or system timer */
403 	timer_t                 kernel_timer;           /* points to current thread's system_timer */
404 
405 	uint64_t                timer_call_ttd;         /* current timer call time-to-deadline */
406 };
407 
408 extern processor_t processor_list;
409 decl_simple_lock_data(extern, processor_list_lock);
410 
411 /*
412  * Maximum number of CPUs supported by the scheduler.  bits.h bitmap macros
413  * need to be used to support greater than 64.
414  */
415 #define MAX_SCHED_CPUS          64
416 extern processor_t     __single processor_array[MAX_SCHED_CPUS];    /* array indexed by cpuid */
417 extern processor_set_t __single pset_array[MAX_PSETS];           /* array indexed by pset_id */
418 
419 extern uint32_t                 processor_avail_count;
420 extern uint32_t                 processor_avail_count_user;
421 extern uint32_t                 primary_processor_avail_count;
422 extern uint32_t                 primary_processor_avail_count_user;
423 
424 #define master_processor PERCPU_GET_MASTER(processor)
425 PERCPU_DECL(struct processor, processor);
426 
427 extern processor_t      current_processor(void);
428 
429 /* Lock macros, always acquired and released with interrupts disabled (splsched()) */
430 
431 extern lck_grp_t pset_lck_grp;
432 
433 #if defined(SCHED_PSET_TLOCK)
434 #define pset_lock_init(p)               lck_ticket_init(&(p)->sched_lock, &pset_lck_grp)
435 #define pset_lock(p)                    lck_ticket_lock(&(p)->sched_lock, &pset_lck_grp)
436 #define pset_unlock(p)                  lck_ticket_unlock(&(p)->sched_lock)
437 #define pset_assert_locked(p)           lck_ticket_assert_owned(&(p)->sched_lock)
438 #else /* SCHED_PSET_TLOCK*/
439 #define pset_lock_init(p)               lck_spin_init(&(p)->sched_lock, &pset_lck_grp, NULL)
440 #define pset_lock(p)                    lck_spin_lock_grp(&(p)->sched_lock, &pset_lck_grp)
441 #define pset_unlock(p)                  lck_spin_unlock(&(p)->sched_lock)
442 #define pset_assert_locked(p)           LCK_SPIN_ASSERT(&(p)->sched_lock, LCK_ASSERT_OWNED)
443 #endif /*!SCHED_PSET_TLOCK*/
444 
445 extern lck_spin_t       pset_node_lock;
446 
447 extern void             processor_bootstrap(void);
448 
449 extern void             processor_init(
450 	processor_t             processor,
451 	int                     cpu_id,
452 	processor_set_t         processor_set);
453 
454 extern void             processor_set_primary(
455 	processor_t             processor,
456 	processor_t             primary);
457 
458 extern kern_return_t    processor_shutdown(
459 	processor_t             processor);
460 
461 extern kern_return_t    processor_start_from_user(
462 	processor_t             processor);
463 extern kern_return_t    processor_exit_from_user(
464 	processor_t             processor);
465 
466 extern kern_return_t    sched_processor_enable(
467 	processor_t             processor,
468 	boolean_t               enable);
469 
470 extern void             processor_queue_shutdown(
471 	processor_t             processor);
472 
473 extern void             processor_queue_shutdown(
474 	processor_t             processor);
475 
476 extern processor_set_t  processor_pset(
477 	processor_t             processor);
478 
479 extern pset_node_t      pset_node_root(void);
480 
481 extern processor_set_t  pset_create(
482 	pset_node_t             node,
483 	pset_cluster_type_t     pset_type,
484 	uint32_t                pset_cluster_id,
485 	int                     pset_id);
486 
487 extern void             pset_init(
488 	processor_set_t         pset,
489 	pset_node_t             node);
490 
491 extern processor_set_t  pset_find(
492 	uint32_t                cluster_id,
493 	processor_set_t         default_pset);
494 
495 extern kern_return_t    processor_info_count(
496 	processor_flavor_t      flavor,
497 	mach_msg_type_number_t  *count);
498 
499 #define pset_deallocate(x)
500 #define pset_reference(x)
501 
502 extern void             machine_run_count(
503 	uint32_t                count);
504 
505 extern processor_t      machine_choose_processor(
506 	processor_set_t         pset,
507 	processor_t             processor);
508 
509 inline static processor_set_t
next_pset(processor_set_t pset)510 next_pset(processor_set_t pset)
511 {
512 	pset_map_t map = pset->node->pset_map;
513 
514 	int pset_id = lsb_next(map, pset->pset_id);
515 	if (pset_id == -1) {
516 		pset_id = lsb_first(map);
517 	}
518 
519 	return pset_array[pset_id];
520 }
521 
522 #define PSET_THING_TASK         0
523 #define PSET_THING_THREAD       1
524 
525 extern pset_cluster_type_t recommended_pset_type(
526 	thread_t                thread);
527 #if CONFIG_THREAD_GROUPS
528 extern pset_cluster_type_t thread_group_pset_recommendation(
529 	struct thread_group     *tg,
530 	cluster_type_t          recommendation);
531 #endif /* CONFIG_THREAD_GROUPS */
532 
533 inline static bool
pset_is_recommended(processor_set_t pset)534 pset_is_recommended(processor_set_t pset)
535 {
536 	if (!pset) {
537 		return false;
538 	}
539 	return (pset->recommended_bitmask & pset->cpu_bitmask) != 0;
540 }
541 
542 extern void             processor_state_update_idle(
543 	processor_t             processor);
544 
545 extern void             processor_state_update_from_thread(
546 	processor_t             processor,
547 	thread_t                thread,
548 	boolean_t               pset_lock_held);
549 
550 extern void             processor_state_update_explicit(
551 	processor_t             processor,
552 	int                     pri,
553 	sfi_class_id_t          sfi_class,
554 	pset_cluster_type_t     pset_type,
555 	perfcontrol_class_t     perfctl_class,
556 	thread_urgency_t        urgency,
557 	sched_bucket_t          bucket);
558 
559 #define PSET_LOAD_NUMERATOR_SHIFT   16
560 #define PSET_LOAD_FRACTIONAL_SHIFT   4
561 
562 #if CONFIG_SCHED_EDGE
563 
564 extern cluster_type_t pset_type_for_id(uint32_t cluster_id);
565 extern uint64_t sched_pset_cluster_shared_rsrc_load(processor_set_t pset, cluster_shared_rsrc_type_t shared_rsrc_type);
566 
567 /*
568  * The Edge scheduler uses average scheduling latency as the metric for making
569  * thread migration decisions. One component of avg scheduling latency is the load
570  * average on the cluster.
571  *
572  * Load Average Fixed Point Arithmetic
573  *
574  * The load average is maintained as a 24.8 fixed point arithmetic value for precision.
575  * When multiplied by the average execution time, it needs to be rounded up (based on
576  * the most significant bit of the fractional part) for better accuracy. After rounding
577  * up, the whole number part of the value is used as the actual load value for
578  * migrate/steal decisions.
579  */
580 #define SCHED_PSET_LOAD_EWMA_FRACTION_BITS 8
581 #define SCHED_PSET_LOAD_EWMA_ROUND_BIT     (1 << (SCHED_PSET_LOAD_EWMA_FRACTION_BITS - 1))
582 #define SCHED_PSET_LOAD_EWMA_FRACTION_MASK ((1 << SCHED_PSET_LOAD_EWMA_FRACTION_BITS) - 1)
583 
584 inline static int
sched_get_pset_load_average(processor_set_t pset,sched_bucket_t sched_bucket)585 sched_get_pset_load_average(processor_set_t pset, sched_bucket_t sched_bucket)
586 {
587 	uint64_t load_average = os_atomic_load(&pset->pset_load_average[sched_bucket], relaxed);
588 	return (int)(((load_average + SCHED_PSET_LOAD_EWMA_ROUND_BIT) >> SCHED_PSET_LOAD_EWMA_FRACTION_BITS) *
589 	       pset->pset_execution_time[sched_bucket].pset_avg_thread_execution_time);
590 }
591 
592 #else /* CONFIG_SCHED_EDGE */
593 inline static int
sched_get_pset_load_average(processor_set_t pset,__unused sched_bucket_t sched_bucket)594 sched_get_pset_load_average(processor_set_t pset, __unused sched_bucket_t sched_bucket)
595 {
596 	return (int)pset->load_average >> (PSET_LOAD_NUMERATOR_SHIFT - PSET_LOAD_FRACTIONAL_SHIFT);
597 }
598 #endif /* CONFIG_SCHED_EDGE */
599 
600 extern void sched_update_pset_load_average(processor_set_t pset, uint64_t curtime);
601 extern void sched_update_pset_avg_execution_time(processor_set_t pset, uint64_t delta, uint64_t curtime, sched_bucket_t sched_bucket);
602 
603 inline static void
pset_update_processor_state(processor_set_t pset,processor_t processor,uint new_state)604 pset_update_processor_state(processor_set_t pset, processor_t processor, uint new_state)
605 {
606 	pset_assert_locked(pset);
607 
608 	uint old_state = processor->state;
609 	uint cpuid = (uint)processor->cpu_id;
610 
611 	assert(processor->processor_set == pset);
612 	assert(bit_test(pset->cpu_bitmask, cpuid));
613 
614 	assert(old_state < PROCESSOR_STATE_LEN);
615 	assert(new_state < PROCESSOR_STATE_LEN);
616 
617 	processor->state = new_state;
618 
619 	bit_clear(pset->cpu_state_map[old_state], cpuid);
620 	bit_set(pset->cpu_state_map[new_state], cpuid);
621 
622 	if ((old_state == PROCESSOR_RUNNING) || (new_state == PROCESSOR_RUNNING)) {
623 		sched_update_pset_load_average(pset, 0);
624 		if (new_state == PROCESSOR_RUNNING) {
625 			assert(processor == current_processor());
626 		}
627 	}
628 	if ((old_state == PROCESSOR_IDLE) || (new_state == PROCESSOR_IDLE)) {
629 		if (new_state == PROCESSOR_IDLE) {
630 			bit_clear(pset->realtime_map, cpuid);
631 		}
632 
633 		pset_node_t node = pset->node;
634 
635 		if (bit_count(node->pset_map) == 1) {
636 			/* Node has only a single pset, so skip node pset map updates */
637 			return;
638 		}
639 
640 		if (new_state == PROCESSOR_IDLE) {
641 			if (processor->processor_primary == processor) {
642 				if (!bit_test(atomic_load(&node->pset_non_rt_primary_map), pset->pset_id)) {
643 					atomic_bit_set(&node->pset_non_rt_primary_map, pset->pset_id, memory_order_relaxed);
644 				}
645 				if (!bit_test(atomic_load(&node->pset_idle_primary_map), pset->pset_id)) {
646 					atomic_bit_set(&node->pset_idle_primary_map, pset->pset_id, memory_order_relaxed);
647 				}
648 			}
649 			if (!bit_test(atomic_load(&node->pset_non_rt_map), pset->pset_id)) {
650 				atomic_bit_set(&node->pset_non_rt_map, pset->pset_id, memory_order_relaxed);
651 			}
652 			if (!bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
653 				atomic_bit_set(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
654 			}
655 		} else {
656 			cpumap_t idle_map = pset->cpu_state_map[PROCESSOR_IDLE];
657 			if (idle_map == 0) {
658 				/* No more IDLE CPUs */
659 				if (bit_test(atomic_load(&node->pset_idle_map), pset->pset_id)) {
660 					atomic_bit_clear(&node->pset_idle_map, pset->pset_id, memory_order_relaxed);
661 				}
662 			}
663 			if (processor->processor_primary == processor) {
664 				idle_map &= pset->primary_map;
665 				if (idle_map == 0) {
666 					/* No more IDLE primary CPUs */
667 					if (bit_test(atomic_load(&node->pset_idle_primary_map), pset->pset_id)) {
668 						atomic_bit_clear(&node->pset_idle_primary_map, pset->pset_id, memory_order_relaxed);
669 					}
670 				}
671 			}
672 		}
673 	}
674 }
675 
676 #else   /* MACH_KERNEL_PRIVATE */
677 
678 extern void             pset_deallocate(
679 	processor_set_t         pset);
680 
681 extern void             pset_reference(
682 	processor_set_t         pset);
683 
684 #endif  /* MACH_KERNEL_PRIVATE */
685 #ifdef KERNEL_PRIVATE
686 
687 extern unsigned int     processor_count;
688 extern processor_t      cpu_to_processor(int cpu);
689 
690 extern kern_return_t    enable_smt_processors(bool enable);
691 
692 #endif /* KERNEL_PRIVATE */
693 
694 __ASSUME_PTR_ABI_SINGLE_END __END_DECLS
695 
696 #endif  /* _KERN_PROCESSOR_H_ */
697