1 /*
2 * Copyright (c) 2024 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <stdint.h>
30
31 #include <kern/bits.h>
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/sched_clutch.h>
35 #include <kern/sched_common.h>
36 #include <kern/sched_prim.h>
37 #include <kern/sched_rt.h>
38 #include <kern/thread.h>
39 #include <kern/queue.h>
40
41 #include <sys/kdebug_kernel.h>
42
43 #include <os/atomic_private.h>
44
45 #include <machine/machine_routines.h>
46
47 #define KTRC KDBG_RELEASE
48
49 #pragma mark - Constants and Tunables
50
51 #include <kern/startup.h>
52
53 /*
54 * Tunables controlling how xnu initializes the realtime matrix. CLPC can
55 * override their effects with sched_perfcontrol interfaces.
56 */
57
58 TUNABLE(unsigned int, sched_rt_spill_policy, "sched_rt_spill_policy", 1);
59
60 TUNABLE(unsigned, sched_rt_steal_policy, "sched_rt_steal_policy", 2);
61
62 uint32_t rt_deadline_epsilon;
63 uint32_t rt_constraint_threshold;
64 /* epsilon for comparing RT deadlines */
65 int rt_deadline_epsilon_us = 100;
66 uint32_t max_rt_quantum;
67 uint32_t min_rt_quantum;
68 int sched_allow_rt_smt = 1;
69 int sched_rt_runq_strict_priority = false;
70
71 int
sched_get_rt_deadline_epsilon(void)72 sched_get_rt_deadline_epsilon(void)
73 {
74 return rt_deadline_epsilon_us;
75 }
76
77 void
sched_set_rt_deadline_epsilon(int new_epsilon_us)78 sched_set_rt_deadline_epsilon(int new_epsilon_us)
79 {
80 rt_deadline_epsilon_us = new_epsilon_us;
81
82 uint64_t abstime;
83 clock_interval_to_absolutetime_interval(rt_deadline_epsilon_us, NSEC_PER_USEC, &abstime);
84 assert((abstime >> 32) == 0 && ((rt_deadline_epsilon_us == 0) || (uint32_t)abstime != 0));
85 rt_deadline_epsilon = (uint32_t)abstime;
86 }
87
88 #pragma mark - Initialization
89
90 void
sched_realtime_timebase_init(void)91 sched_realtime_timebase_init(void)
92 {
93 uint64_t abstime;
94
95 /* smallest rt computation (50 us) */
96 clock_interval_to_absolutetime_interval(50, NSEC_PER_USEC, &abstime);
97 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
98 min_rt_quantum = (uint32_t)abstime;
99
100 /* maximum rt computation (50 ms) */
101 clock_interval_to_absolutetime_interval(
102 50, 1000 * NSEC_PER_USEC, &abstime);
103 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
104 max_rt_quantum = (uint32_t)abstime;
105
106 /* constraint threshold for sending backup IPIs (4 ms) */
107 clock_interval_to_absolutetime_interval(4, NSEC_PER_MSEC, &abstime);
108 assert((abstime >> 32) == 0 && (uint32_t)abstime != 0);
109 rt_constraint_threshold = (uint32_t)abstime;
110
111 /* epsilon for comparing deadlines */
112 sched_set_rt_deadline_epsilon(rt_deadline_epsilon_us);
113 }
114
115 #if CONFIG_SCHED_EDGE
116 /* forward-declare config utility */
117 static void
118 sched_rt_config_pset_push(processor_set_t pset);
119 #endif /* CONFIG_SCHED_EDGE */
120
121 static void
rt_init_completed(void)122 rt_init_completed(void)
123 {
124 /* Realtime spill/steal are only supported on platforms with the edge scheduler. */
125 #if CONFIG_SCHED_EDGE
126 /* Hold sched_available_cores_lock to prevent multiple concurrent matrix updates. */
127 spl_t s = splsched();
128 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
129 for (pset_id_t src_pset_id = 0; src_pset_id < sched_num_psets; src_pset_id++) {
130 processor_set_t src_pset = pset_array[src_pset_id];
131 assert3p(src_pset, !=, PROCESSOR_SET_NULL); /* all psets should be initialized */
132
133 /* For each cluster, set all its outgoing edge parameters */
134 for (pset_id_t dst_pset_id = 0; dst_pset_id < sched_num_psets; dst_pset_id++) {
135 if (dst_pset_id == src_pset_id) {
136 continue;
137 }
138 processor_set_t dst_pset = pset_array[dst_pset_id];
139 assert3p(dst_pset, !=, PROCESSOR_SET_NULL); /* all psets should be initialized */
140
141 bool clusters_homogenous = (src_pset->pset_type == dst_pset->pset_type);
142 if (clusters_homogenous) {
143 /* Default realtime policy: spill allowed among homogeneous psets. */
144 sched_rt_config_set((pset_id_t) src_pset_id, (pset_id_t) dst_pset_id, (sched_clutch_edge) {
145 .sce_migration_allowed = true,
146 .sce_steal_allowed = true,
147 .sce_migration_weight = 0,
148 });
149 } else {
150 /* Default realtime policy: disallow spill among heterogeneous psets. */
151 sched_rt_config_set((pset_id_t) src_pset_id, (pset_id_t) dst_pset_id, (sched_clutch_edge) {
152 .sce_migration_allowed = false,
153 .sce_steal_allowed = false,
154 .sce_migration_weight = 0,
155 });
156 }
157 }
158 }
159
160
161 for (pset_id_t pset_id = 0; pset_id < sched_num_psets; pset_id++) {
162 sched_rt_config_pset_push(pset_array[pset_id]);
163 }
164
165 simple_unlock(&sched_available_cores_lock);
166 splx(s);
167 #endif /* CONFIG_SCHED_EDGE */
168 }
169
170 static void
pset_rt_init(processor_set_t pset)171 pset_rt_init(processor_set_t pset)
172 {
173 for (int pri = BASEPRI_RTQUEUES; pri <= MAXPRI; pri++) {
174 int i = pri - BASEPRI_RTQUEUES;
175 rt_queue_pri_t *rqi = &pset->rt_runq.rt_queue_pri[i];
176 queue_init(&rqi->pri_queue);
177 rqi->pri_count = 0;
178 rqi->pri_earliest_deadline = RT_DEADLINE_NONE;
179 rqi->pri_constraint = RT_CONSTRAINT_NONE;
180 }
181 os_atomic_init(&pset->stealable_rt_threads_earliest_deadline, RT_DEADLINE_NONE);
182
183 rt_queue_t rt_runq = &pset->rt_runq;
184 os_atomic_init(&rt_runq->count, 0);
185 os_atomic_init(&rt_runq->earliest_deadline, RT_DEADLINE_NONE);
186 os_atomic_init(&rt_runq->constraint, RT_CONSTRAINT_NONE);
187 os_atomic_init(&rt_runq->ed_index, NOPRI);
188 bzero(&rt_runq->bitmap, sizeof(rt_runq->bitmap));
189 bzero(&rt_runq->runq_stats, sizeof(rt_runq->runq_stats));
190
191 #if __AMP__
192 /*
193 * Initialize spill/steal search orders as invalid to prevent spill/steal
194 * before the matrix is configured.
195 */
196 bzero(pset->sched_rt_edges, sizeof(pset->sched_rt_edges));
197 for (pset_id_t i = 0; i < MAX_PSETS - 1; i++) {
198 pset->sched_rt_spill_search_order.spso_search_order[i] = PSET_ID_INVALID;
199 #if CONFIG_SCHED_EDGE
200 pset->sched_rt_steal_search_order.spso_search_order[i] = PSET_ID_INVALID;
201 #endif /* CONFIG_SCHED_EDGE */
202 }
203 #endif /* __AMP__ */
204 }
205
206 #pragma mark - Realtime Scheduler/CLPC interface
207
208 #if CONFIG_SCHED_EDGE
209 void
sched_rt_config_set(uint8_t src_pset,uint8_t dst_pset,sched_clutch_edge edge_config)210 sched_rt_config_set(
211 uint8_t src_pset,
212 uint8_t dst_pset,
213 sched_clutch_edge edge_config)
214 {
215 assert(src_pset != dst_pset || !edge_config.sce_migration_allowed); /* No self-edges. */
216 os_atomic_store(&pset_array[src_pset]->sched_rt_edges[dst_pset], edge_config, relaxed);
217 }
218
219 sched_clutch_edge
sched_rt_config_get(uint8_t src_pset,uint8_t dst_pset)220 sched_rt_config_get(
221 uint8_t src_pset,
222 uint8_t dst_pset)
223 {
224 return os_atomic_load(&pset_array[src_pset]->sched_rt_edges[dst_pset], relaxed);
225 }
226
227 void
sched_rt_matrix_get(sched_clutch_edge * edge_matrix,bool * edge_requests,uint64_t num_psets)228 sched_rt_matrix_get(
229 sched_clutch_edge *edge_matrix,
230 bool *edge_requests,
231 uint64_t num_psets)
232 {
233 uint64_t edge_index = 0;
234 for (uint8_t src_pset = 0; src_pset < num_psets; src_pset++) {
235 for (uint8_t dst_pset = 0; dst_pset < num_psets; dst_pset++) {
236 if (edge_requests[edge_index]) {
237 edge_matrix[edge_index] = sched_rt_config_get(src_pset, dst_pset);
238 }
239 edge_index++;
240 }
241 }
242 }
243
244 /*
245 * sched_rt_config_pset_push()
246 *
247 * After using sched_rt_config_set() to update edge tunables outgoing from a particular source
248 * pset, this function should be called in order to propagate the updates to derived metadata for
249 * the pset, such as search orders for outgoing spill and steal.
250 */
251 static void
sched_rt_config_pset_push(processor_set_t pset)252 sched_rt_config_pset_push(processor_set_t pset)
253 {
254 assert3u(pset->pset_id, <, UINT8_MAX);
255
256 sched_pset_search_order_sort_data_t spill_datas[MAX_PSETS - 1], steal_datas[MAX_PSETS - 1];
257 uint num_spill_datas = 0, num_steal_datas = 0;
258 for (pset_id_t other_pset_id = 0; other_pset_id < sched_num_psets; other_pset_id++) {
259 if (pset->pset_id == other_pset_id) {
260 continue; /* No self-edges. */
261 }
262 /* Spill */
263 sched_clutch_edge out_edge = sched_rt_config_get((pset_id_t)pset->pset_cluster_id, other_pset_id);
264 if (out_edge.sce_migration_allowed) {
265 spill_datas[num_spill_datas++] = (sched_pset_search_order_sort_data_t) {
266 .spsosd_src_pset = pset,
267 .spsosd_migration_weight = out_edge.sce_migration_weight,
268 .spsosd_dst_pset_id = other_pset_id
269 };
270 }
271 /* Steal */
272 sched_clutch_edge in_edge = sched_rt_config_get(other_pset_id, (pset_id_t)pset->pset_cluster_id);
273 if (in_edge.sce_steal_allowed) {
274 steal_datas[num_steal_datas++] = (sched_pset_search_order_sort_data_t) {
275 .spsosd_src_pset = pset,
276 .spsosd_migration_weight = in_edge.sce_migration_weight,
277 .spsosd_dst_pset_id = other_pset_id,
278 };
279 }
280 }
281 sched_pset_search_order_compute(&pset->sched_rt_spill_search_order, spill_datas, num_spill_datas, sched_edge_search_order_weight_then_locality_cmp);
282 sched_pset_search_order_compute(&pset->sched_rt_steal_search_order, steal_datas, num_steal_datas, sched_edge_search_order_weight_then_locality_cmp);
283 }
284
285 void
sched_rt_matrix_set(sched_clutch_edge * rt_matrix,bool * edge_changes,uint64_t num_psets)286 sched_rt_matrix_set(
287 sched_clutch_edge *rt_matrix,
288 bool *edge_changes,
289 uint64_t num_psets)
290 {
291 /* Hold sched_available_cores_lock to prevent multiple concurrent matrix updates. */
292 spl_t s = splsched();
293 simple_lock(&sched_available_cores_lock, LCK_GRP_NULL);
294
295 for (uint8_t src_pset_id = 0; src_pset_id < num_psets; src_pset_id++) {
296 for (uint8_t dst_pset_id = 0; dst_pset_id < num_psets; dst_pset_id++) {
297 const uint64_t rt_matrix_index = src_pset_id * num_psets + dst_pset_id;
298 if (edge_changes[rt_matrix_index]) {
299 sched_rt_config_set(src_pset_id, dst_pset_id, rt_matrix[rt_matrix_index]);
300 }
301 }
302 }
303
304 for (pset_id_t pset_id = 0; pset_id < num_psets; pset_id++) {
305 sched_rt_config_pset_push(pset_array[pset_id]);
306 }
307
308 simple_unlock(&sched_available_cores_lock);
309 splx(s);
310 }
311 #endif /* CONFIG_SCHED_EDGE */
312
313 #pragma mark - Scheduler Callouts
314
315 #if CONFIG_SCHED_SMT
316 /*
317 * SMT-aware callout for rt_choose_processor.
318 */
319 processor_t
sched_rtlocal_choose_processor_smt(processor_set_t starting_pset,processor_t processor,thread_t thread)320 sched_rtlocal_choose_processor_smt(
321 processor_set_t starting_pset,
322 processor_t processor,
323 thread_t thread)
324 {
325 processor_set_t nset = PROCESSOR_SET_NULL;
326 processor_set_t pset = starting_pset;
327 pset_node_t node = pset->node;
328
329 processor_t lc_processor = processor;
330 integer_t lowest_count = INT_MAX;
331 if (lc_processor != PROCESSOR_NULL) {
332 lowest_count = SCHED(processor_runq_count)(processor);
333 }
334
335 bool include_ast_urgent_pending_cpus = false;
336 cpumap_t ast_urgent_pending;
337 try_again:
338 ast_urgent_pending = 0;
339 int consider_secondaries = (!pset->is_SMT) || (bit_count(node->pset_map) == 1) || (node->pset_non_rt_primary_map == 0) || include_ast_urgent_pending_cpus;
340 for (; consider_secondaries < 2; consider_secondaries++) {
341 pset = change_locked_pset(pset, starting_pset);
342 do {
343 cpumap_t available_map = pset_available_cpumap(pset);
344 if (available_map == 0) {
345 goto no_available_cpus;
346 }
347
348 processor = pset_choose_processor_for_realtime_thread_smt(pset, PROCESSOR_NULL, consider_secondaries, false);
349 if (processor) {
350 return processor;
351 }
352
353 if (consider_secondaries) {
354 processor = pset_choose_furthest_deadline_processor_for_realtime_thread(pset, thread->sched_pri, thread->realtime.deadline, PROCESSOR_NULL, false, include_ast_urgent_pending_cpus);
355 if (processor) {
356 /*
357 * Instead of looping through all the psets to find the global
358 * furthest deadline processor, preempt the first candidate found.
359 * The preempted thread will then find any other available far deadline
360 * processors to preempt.
361 */
362 return processor;
363 }
364
365 ast_urgent_pending |= pset->pending_AST_URGENT_cpu_mask;
366
367 if (rt_runq_count(pset) < lowest_count) {
368 int cpuid = bit_first(available_map);
369 assert(cpuid >= 0);
370 lc_processor = processor_array[cpuid];
371 lowest_count = rt_runq_count(pset);
372 }
373 }
374
375 no_available_cpus:
376 nset = next_pset(pset);
377
378 if (nset != starting_pset) {
379 pset = change_locked_pset(pset, nset);
380 }
381 } while (nset != starting_pset);
382 }
383
384 /* Short cut for single pset nodes */
385 if (bit_count(node->pset_map) == 1) {
386 if (lc_processor) {
387 pset_assert_locked(lc_processor->processor_set);
388 return lc_processor;
389 }
390 } else {
391 if (ast_urgent_pending && !include_ast_urgent_pending_cpus) {
392 /* See the comment in pset_choose_furthest_deadline_processor_for_realtime_thread() */
393 include_ast_urgent_pending_cpus = true;
394 goto try_again;
395 }
396 }
397
398 processor = lc_processor;
399
400 if (processor) {
401 pset = change_locked_pset(pset, processor->processor_set);
402 /* Check that chosen processor is still usable */
403 cpumap_t available_map = pset_available_cpumap(pset);
404 if (bit_test(available_map, processor->cpu_id)) {
405 return processor;
406 }
407
408 /* processor is no longer usable */
409 processor = PROCESSOR_NULL;
410 }
411
412 pset_assert_locked(pset);
413 pset_unlock(pset);
414 return PROCESSOR_NULL;
415 }
416 #else /* !CONFIG_SCHED_SMT */
417 /*
418 * Called with thread and starting_pset locked. The returned processor's pset is
419 * locked on return.
420 */
421 processor_t
sched_rt_choose_processor(const processor_set_t starting_pset,processor_t processor,thread_t thread)422 sched_rt_choose_processor(
423 const processor_set_t starting_pset,
424 processor_t processor,
425 thread_t thread)
426 {
427 assert3u(thread->sched_pri, >=, BASEPRI_RTQUEUES);
428 assert3u(thread->sched_pri, <=, MAXPRI);
429
430 /*
431 * In choose_starting_pset, we found a good candidate pset for this thread.
432 * Now, we pick the best processor to preempt, if there is one. It is also
433 * possible that conditions have changed and the thread should spill to
434 * another pset.
435 */
436
437 processor_set_t pset = starting_pset; /* Lock is held on this pset. */
438 pset_assert_locked(pset);
439
440 #if __AMP__
441 /*
442 * If there are processors with outstanding urgent preemptions, we consider
443 * them in a second pass. While we are changing pset locks here, it is
444 * possible a processor may resolve its outstanding urgent preemption and
445 * become eligible to run this thread. See comment in
446 * pset_choose_furthest_deadline_processor_for_realtime_thread().
447 */
448 bool found_ast_urgent_pending = false; /* Tracks whether any (eligible) processors have pending urgent ASTs. */
449 for (int include_ast_urgent_pending_cpus = 0; include_ast_urgent_pending_cpus < 2; include_ast_urgent_pending_cpus++) {
450 if (include_ast_urgent_pending_cpus && !found_ast_urgent_pending) {
451 break; /* Skip the second pass. */
452 }
453
454 sched_pset_iterate_state_t istate = SCHED_PSET_ITERATE_STATE_INIT;
455 while (sched_iterate_psets_ordered(starting_pset, &starting_pset->sched_rt_spill_search_order, ~0, &istate)) {
456 /* Switch to the next pset. We need to check for null psets because
457 * we do not use acquire/release semantics for the spill order. */
458 processor_set_t nset = pset_array[istate.spis_pset_id];
459 if (__improbable(nset == PROCESSOR_SET_NULL)) {
460 continue;
461 }
462 pset = change_locked_pset(pset, nset);
463
464 processor = pset_choose_processor_for_realtime_thread(pset, PROCESSOR_NULL, false);
465 if (processor != PROCESSOR_NULL) {
466 /* We found a candidate processor on this pset to wake or preempt. */
467 pset_assert_locked(processor->processor_set);
468 return processor;
469 }
470
471 /* TODO <rdar://140219824>: Policy question of EDF vs targeting idle cores on another pset. */
472 processor = pset_choose_furthest_deadline_processor_for_realtime_thread(pset, thread->sched_pri, thread->realtime.deadline, PROCESSOR_NULL, false, include_ast_urgent_pending_cpus);
473 if (processor) {
474 /*
475 * Instead of looping through all the psets to find the global
476 * furthest deadline processor, preempt the first candidate found.
477 * The preempted thread will then find any other available far deadline
478 * processors to preempt.
479 */
480 pset_assert_locked(processor->processor_set);
481 return processor;
482 }
483
484 found_ast_urgent_pending = found_ast_urgent_pending || (pset->pending_AST_URGENT_cpu_mask != 0);
485 }
486 }
487
488 /*
489 * There was no obvious (idle or non-realtime) processor to run the thread.
490 * Instead, do EDF scheduling again on starting_pset, putting the thread on
491 * the run queue if there is no processor to preempt.
492 */
493
494 pset = change_locked_pset(pset, starting_pset);
495 #endif /* __AMP__ */
496
497 /* Check (again, for AMP systems) that there is no lower-priority or idle processor. */
498 processor = pset_choose_processor_for_realtime_thread(pset, PROCESSOR_NULL, false);
499 if (processor != PROCESSOR_NULL) {
500 /* We found a candidate processor on this pset to wake or preempt. */
501 pset_assert_locked(processor->processor_set);
502 return processor;
503 }
504
505 processor = pset_choose_furthest_deadline_processor_for_realtime_thread(pset, thread->sched_pri, thread->realtime.deadline, PROCESSOR_NULL, false, true);
506 if (processor == PROCESSOR_NULL) {
507 /* Choose an arbitrary available and recommended processor from the pset.
508 * It won't get preempted anyways, since this thread has a later
509 * deadline. */
510 int processor_id = lsb_first(pset_available_cpumap(pset));
511
512 /* starting_pset had available, recommended processors coming into
513 * rt_choose_processor(), but that might have changed after dropping the
514 * pset lock. If there are no such processors, bail out here and let
515 * sched_edge_migrate_candidate() find a better starting pset. */
516 if (processor_id < 0) {
517 pset_unlock(pset);
518 return PROCESSOR_NULL;
519 }
520
521 processor = processor_array[processor_id];
522 }
523
524 pset_assert_locked(processor->processor_set);
525 return processor;
526 }
527 #endif /* !CONFIG_SCHED_SMT */
528
529 #if CONFIG_SCHED_EDGE
530 /*
531 * Called with stealing_pset locked and returns with stealing_pset locked but
532 * the lock will have been dropped if a thread is returned. The lock may have
533 * been temporarily dropped, even if no thread is returned.
534 */
535 thread_t
sched_rt_steal_thread(processor_set_t stealing_pset)536 sched_rt_steal_thread(processor_set_t stealing_pset)
537 {
538 uint64_t earliest_deadline = rt_runq_earliest_deadline(stealing_pset);
539 processor_set_t pset = stealing_pset;
540
541 /* Continue searching until there are no steal candidates found in a single iteration. */
542 while (true) {
543 processor_set_t target_pset = NULL;
544 uint64_t target_deadline;
545 if (__improbable(os_sub_overflow(earliest_deadline, rt_deadline_epsilon, &target_deadline))) {
546 target_deadline = 0;
547 }
548
549 sched_pset_iterate_state_t istate = SCHED_PSET_ITERATE_STATE_INIT;
550 while (sched_iterate_psets_ordered(stealing_pset, &stealing_pset->sched_rt_steal_search_order, ~BIT(stealing_pset->pset_id), &istate)) {
551 assert3s(istate.spis_pset_id, !=, stealing_pset->pset_id); /* stealing_pset's runqueue is drained by sched_rt_choose_processor */
552 const processor_set_t nset = pset_array[istate.spis_pset_id];
553 /* Check for null because we do not use acquire/release semantics for steal order. */
554 if (__improbable(nset == PROCESSOR_SET_NULL)) {
555 continue;
556 }
557 uint64_t nset_deadline = os_atomic_load(&nset->stealable_rt_threads_earliest_deadline, relaxed);
558 if (nset_deadline < target_deadline) {
559 target_pset = nset;
560 target_deadline = nset_deadline;
561 }
562 }
563
564 if (target_pset != PROCESSOR_SET_NULL) {
565 assert3u(target_deadline, !=, RT_DEADLINE_NONE);
566
567 /* target_pset is a candidate for steal. Check again under its pset lock. */
568
569 pset = change_locked_pset(pset, target_pset);
570 if (os_atomic_load(&pset->stealable_rt_threads_earliest_deadline, relaxed) <= target_deadline) {
571 /* Steal the next thread from target_pset's runqueue. */
572 thread_t new_thread = rt_runq_dequeue(&pset->rt_runq);
573 pset_update_rt_stealable_state(pset);
574 KTRC(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_STEAL) | DBG_FUNC_NONE, (uintptr_t)thread_tid(new_thread), pset->pset_id, pset->cpu_set_low, 0);
575
576 pset = change_locked_pset(pset, stealing_pset);
577 return new_thread;
578 } else {
579 /* Failed to steal (another pset stole first). Try again. */
580 KTRC(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_STEAL) | DBG_FUNC_NONE, (uintptr_t)thread_tid(THREAD_NULL), pset->pset_id, pset->cpu_set_low, 1);
581 pset = change_locked_pset(pset, stealing_pset);
582 /* Update earliest_deadline in case it changed while the stealing_pset lock was not held. */
583 earliest_deadline = rt_runq_earliest_deadline(pset);
584 continue;
585 }
586 } else {
587 /* No steal candidates, stop searching. */
588 break;
589 }
590 }
591 /* No stealable threads, return with stealing_pset locked. */
592 pset = change_locked_pset(pset, stealing_pset);
593 return THREAD_NULL;
594 }
595 #endif /* CONFIG_SCHED_EDGE */
596
597 /*
598 * processor's pset is locked, may drop and retake the lock
599 */
600 thread_t
sched_rt_choose_thread(processor_t processor)601 sched_rt_choose_thread(processor_t processor)
602 {
603 processor_set_t pset = processor->processor_set;
604 pset_assert_locked(pset);
605
606 if (SCHED(rt_steal_thread) != NULL) {
607 do {
608 rt_clear_pending_spill(processor, 2);
609 thread_t new_thread = SCHED(rt_steal_thread)(pset);
610 /* pset lock may have been dropped and retaken, is currently locked */
611 pset_assert_locked(pset);
612 if (new_thread != THREAD_NULL) {
613 /* Spill might have been set if the pset lock was dropped in steal. */
614 rt_clear_pending_spill(processor, 3);
615 return new_thread;
616 }
617 } while (bit_test(pset->rt_pending_spill_cpu_mask, processor->cpu_id));
618 }
619 rt_clear_pending_spill(processor, 5);
620
621 if (rt_runq_count(pset) > 0) {
622 thread_t new_thread = rt_runq_dequeue(&pset->rt_runq);
623 assert(new_thread != THREAD_NULL);
624 pset_update_rt_stealable_state(pset);
625 return new_thread;
626 }
627
628 return THREAD_NULL;
629 }
630
631 void
sched_rt_init_pset(processor_set_t pset)632 sched_rt_init_pset(processor_set_t pset)
633 {
634 pset_rt_init(pset);
635 }
636
637 void
sched_rt_init_completed(void)638 sched_rt_init_completed(void)
639 {
640 rt_init_completed();
641 }
642
643 void
sched_rt_queue_shutdown(processor_t processor,struct pulled_thread_queue * threadq)644 sched_rt_queue_shutdown(processor_t processor, struct pulled_thread_queue * threadq)
645 {
646 processor_set_t pset = processor->processor_set;
647
648 pset_lock(pset);
649
650 /* We only need to migrate threads if this is the last active or last recommended processor in the pset */
651 if (bit_count(pset_available_cpumap(pset)) > 0) {
652 pset_unlock(pset);
653 return;
654 }
655
656 while (rt_runq_count(pset) > 0) {
657 thread_t thread = rt_runq_dequeue(&pset->rt_runq);
658 pulled_thread_queue_enqueue(threadq, thread);
659 }
660 SCHED(update_pset_load_average)(pset, 0);
661 pset_update_rt_stealable_state(pset);
662 pset_unlock(pset);
663 }
664
665 /*
666 * Assumes RT lock is not held, and acquires splsched/rt_lock itself.
667 * Also records tracepoints for pset bitmasks under the pset lock.
668 */
669 void
sched_rt_runq_scan(sched_update_scan_context_t scan_context)670 sched_rt_runq_scan(sched_update_scan_context_t scan_context)
671 {
672 thread_t thread;
673
674 pset_node_t node = &pset_node0;
675 processor_set_t pset = node->psets;
676
677 spl_t s = splsched();
678 do {
679 while (pset != NULL) {
680 pset_lock(pset);
681
682 bitmap_t *map = pset->rt_runq.bitmap;
683 for (int i = bitmap_first(map, NRTQS); i >= 0; i = bitmap_next(map, i)) {
684 rt_queue_pri_t *rt_runq = &pset->rt_runq.rt_queue_pri[i];
685
686 qe_foreach_element_safe(thread, &rt_runq->pri_queue, runq_links) {
687 if (thread->last_made_runnable_time < scan_context->earliest_rt_make_runnable_time) {
688 scan_context->earliest_rt_make_runnable_time = thread->last_made_runnable_time;
689 }
690 }
691 }
692
693 KTRC(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_PSET_BITMASKS),
694 pset->pset_id,
695 pset->recommended_bitmask,
696 pset->perfcontrol_cpu_migration_bitmask,
697 pset->perfcontrol_cpu_preferred_bitmask);
698
699 pset_unlock(pset);
700
701 pset = pset->pset_list;
702 }
703 } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
704 splx(s);
705 }
706
707 int64_t
sched_rt_runq_count_sum(void)708 sched_rt_runq_count_sum(void)
709 {
710 pset_node_t node = &pset_node0;
711 processor_set_t pset = node->psets;
712 int64_t count = 0;
713
714 do {
715 while (pset != NULL) {
716 count += pset->rt_runq.runq_stats.count_sum;
717
718 pset = pset->pset_list;
719 }
720 } while (((node = node->node_list) != NULL) && ((pset = node->psets) != NULL));
721
722 return count;
723 }
724
725 #pragma mark - Utilities
726
727 uint64_t
rt_deadline_add(uint64_t d,uint64_t e)728 rt_deadline_add(uint64_t d, uint64_t e)
729 {
730 uint64_t sum;
731 return os_add_overflow(d, e, &sum) ? RT_DEADLINE_NONE : sum;
732 }
733
734 cpumap_t
pset_available_but_not_running_rt_threads_cpumap(processor_set_t pset)735 pset_available_but_not_running_rt_threads_cpumap(processor_set_t pset)
736 {
737 cpumap_t avail_map = pset_available_cpumap(pset);
738 #if CONFIG_SCHED_SMT
739 if (!sched_allow_rt_smt) {
740 /*
741 * Secondary CPUs are not allowed to run RT threads, so
742 * only primary CPUs should be included
743 */
744 avail_map &= pset->primary_map;
745 }
746 #endif /* CONFIG_SCHED_SMT */
747
748 return avail_map & ~pset->realtime_map;
749 }
750
751 /* pset is locked */
752 static processor_t
pset_choose_next_processor_for_realtime_thread(processor_set_t pset,int max_pri,uint64_t minimum_deadline,processor_t skip_processor,bool consider_secondaries)753 pset_choose_next_processor_for_realtime_thread(processor_set_t pset, int max_pri, uint64_t minimum_deadline, processor_t skip_processor, bool consider_secondaries)
754 {
755 (void) consider_secondaries;
756 bool skip_spills = true;
757 bool include_ast_urgent_pending_cpus = false;
758
759 #if CONFIG_SCHED_SMT
760 processor_t next_processor = pset_choose_processor_for_realtime_thread_smt(pset, skip_processor, consider_secondaries, skip_spills);
761 #else /* CONFIG_SCHED_SMT */
762 processor_t next_processor = pset_choose_processor_for_realtime_thread(pset, skip_processor, skip_spills);
763 #endif /* CONFIG_SCHED_SMT */
764 if (next_processor != PROCESSOR_NULL) {
765 return next_processor;
766 }
767
768 next_processor = pset_choose_furthest_deadline_processor_for_realtime_thread(pset, max_pri, minimum_deadline, skip_processor, skip_spills, include_ast_urgent_pending_cpus);
769 return next_processor;
770 }
771
772 #if CONFIG_SCHED_EDGE
773 /*
774 * Realtime Steal Utilities
775 *
776 * Realtime steal is only supported on platforms with the edge scheduler.
777 */
778
779 /* Update realtime stealable state. */
780 void
pset_update_rt_stealable_state(processor_set_t pset)781 pset_update_rt_stealable_state(processor_set_t pset)
782 {
783 pset_assert_locked(pset);
784 if (rt_pset_has_stealable_threads(pset)) {
785 os_atomic_store(&pset->stealable_rt_threads_earliest_deadline, rt_runq_earliest_deadline(pset), relaxed);
786 } else {
787 os_atomic_store(&pset->stealable_rt_threads_earliest_deadline, RT_DEADLINE_NONE, relaxed);
788 }
789 }
790
791 bool
rt_pset_has_stealable_threads(processor_set_t pset)792 rt_pset_has_stealable_threads(processor_set_t pset)
793 {
794 cpumap_t avail_map = pset_available_but_not_running_rt_threads_cpumap(pset);
795
796 return rt_runq_count(pset) > bit_count(avail_map);
797 }
798
799 /*
800 * Returns the next processor to IPI for a migrating realtime thread. Realtime
801 * spill is only supported with the edge scheduler.
802 *
803 * Expects starting_pset to be locked. Returns false if starting_pset was never
804 * unlocked; otherwise, returns true with no lock held.
805 */
806 bool
rt_choose_next_processor_for_spill_IPI(processor_set_t starting_pset,processor_t chosen_processor,processor_t * result_processor,sched_ipi_type_t * result_ipi_type)807 rt_choose_next_processor_for_spill_IPI(
808 processor_set_t starting_pset,
809 processor_t chosen_processor,
810 processor_t *result_processor,
811 sched_ipi_type_t *result_ipi_type
812 )
813 {
814 assert3p(starting_pset, !=, PROCESSOR_SET_NULL);
815 assert3p(chosen_processor, !=, PROCESSOR_NULL);
816
817 uint64_t earliest_deadline = rt_runq_earliest_deadline(starting_pset);
818 int max_pri = rt_runq_priority(starting_pset);
819 __kdebug_only uint64_t spill_tid = thread_tid(rt_runq_first(&starting_pset->rt_runq));
820 processor_set_t pset = starting_pset; /* lock is held on this pset */
821 processor_t next_rt_processor = PROCESSOR_NULL;
822 /* Optimization so caller can avoid unnecessary lock-takes if there are no psets eligible for spill: */
823 bool starting_pset_was_unlocked = false;
824
825 cpumap_t candidate_map = ~BIT(starting_pset->pset_id); /* exclude stealing_pset */
826 sched_pset_iterate_state_t istate = SCHED_PSET_ITERATE_STATE_INIT;
827 while (sched_iterate_psets_ordered(starting_pset, &starting_pset->sched_rt_spill_search_order, candidate_map, &istate)) {
828 assert3u(starting_pset->pset_id, !=, istate.spis_pset_id);
829 /* Check for null pset because we do not use acquire/release semantics for spill order. */
830 processor_set_t nset = pset_array[istate.spis_pset_id];
831 if (__improbable(nset == PROCESSOR_SET_NULL)) {
832 continue;
833 }
834
835 /* Make sure the pset is allowed to steal threads from stealing_pset's runqueue. */
836 sched_clutch_edge edge = sched_rt_config_get((pset_id_t) starting_pset->pset_id, (pset_id_t) istate.spis_pset_id);
837 if (istate.spis_pset_id != starting_pset->pset_id && edge.sce_steal_allowed == false) {
838 continue;
839 }
840 pset = change_locked_pset(pset, nset);
841 starting_pset_was_unlocked = true;
842
843 next_rt_processor = pset_choose_next_processor_for_realtime_thread(pset, max_pri, earliest_deadline, chosen_processor, true);
844 if (next_rt_processor != PROCESSOR_NULL) {
845 break;
846 }
847 }
848
849 if (next_rt_processor != PROCESSOR_NULL) {
850 if (pset != starting_pset) {
851 if (bit_set_if_clear(pset->rt_pending_spill_cpu_mask, next_rt_processor->cpu_id)) {
852 KTRC(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_SIGNAL_SPILL) | DBG_FUNC_START,
853 next_rt_processor->cpu_id, pset->rt_pending_spill_cpu_mask, starting_pset->cpu_set_low, spill_tid);
854 }
855 }
856 *result_ipi_type = sched_ipi_action(next_rt_processor, NULL, SCHED_IPI_EVENT_RT_PREEMPT);
857 *result_processor = next_rt_processor;
858 }
859 if (starting_pset_was_unlocked) {
860 pset_unlock(pset);
861 return true;
862 } else {
863 return false;
864 }
865 }
866 #endif /* CONFIG_SCHED_EDGE */
867
868 bool
rt_pset_needs_a_followup_IPI(processor_set_t pset)869 rt_pset_needs_a_followup_IPI(processor_set_t pset)
870 {
871 int nbackup_cpus = 0;
872
873 if (rt_runq_is_low_latency(pset)) {
874 nbackup_cpus = sched_rt_n_backup_processors;
875 }
876
877 int rt_rq_count = rt_runq_count(pset);
878
879 return (rt_rq_count > 0) && ((rt_rq_count + nbackup_cpus - bit_count(pset->pending_AST_URGENT_cpu_mask)) > 0);
880 }
881
882 /*
883 * Returns the next processor to IPI as a followup for low-latency realtime
884 * threads on the runqueue.
885 *
886 * pset should be locked, and stays locked the whole time.
887 */
888 void
rt_choose_next_processor_for_followup_IPI(processor_set_t pset,processor_t chosen_processor,processor_t * result_processor,sched_ipi_type_t * result_ipi_type)889 rt_choose_next_processor_for_followup_IPI(
890 processor_set_t pset,
891 processor_t chosen_processor,
892 processor_t *result_processor,
893 sched_ipi_type_t *result_ipi_type)
894 {
895 uint64_t earliest_deadline = rt_runq_earliest_deadline(pset);
896 int max_pri = rt_runq_priority(pset);
897 processor_t next_rt_processor = pset_choose_next_processor_for_realtime_thread(pset, max_pri, earliest_deadline, chosen_processor, true);
898 if (next_rt_processor != PROCESSOR_NULL) {
899 *result_ipi_type = sched_ipi_action(next_rt_processor, NULL, SCHED_IPI_EVENT_RT_PREEMPT);
900 *result_processor = next_rt_processor;
901 }
902 }
903
904 #if CONFIG_SCHED_SMT
905 extern int sched_avoid_cpu0;
906 extern int sched_allow_rt_smt;
907
908 /* pset is locked */
909 processor_t
pset_choose_processor_for_realtime_thread_smt(processor_set_t pset,processor_t skip_processor,bool consider_secondaries,bool skip_spills)910 pset_choose_processor_for_realtime_thread_smt(processor_set_t pset, processor_t skip_processor, bool consider_secondaries, bool skip_spills)
911 {
912 #if defined(__x86_64__)
913 bool avoid_cpu0 = sched_avoid_cpu0 && bit_test(pset->cpu_bitmask, 0);
914 #else
915 const bool avoid_cpu0 = false;
916 #endif
917 cpumap_t cpu_map;
918
919 try_again:
920 cpu_map = pset_available_cpumap(pset) & ~pset->pending_AST_URGENT_cpu_mask & ~pset->realtime_map;
921 if (skip_processor) {
922 bit_clear(cpu_map, skip_processor->cpu_id);
923 }
924 if (skip_spills) {
925 cpu_map &= ~pset->rt_pending_spill_cpu_mask;
926 }
927
928 if (avoid_cpu0 && (sched_avoid_cpu0 == 2)) {
929 bit_clear(cpu_map, 0);
930 }
931
932 cpumap_t primary_map = cpu_map & pset->primary_map;
933 if (avoid_cpu0) {
934 primary_map = bit_ror64(primary_map, 1);
935 }
936
937 int rotid = lsb_first(primary_map);
938 if (rotid >= 0) {
939 int cpuid = avoid_cpu0 ? ((rotid + 1) & 63) : rotid;
940
941 processor_t processor = processor_array[cpuid];
942
943 return processor;
944 }
945
946 if (!pset->is_SMT || !sched_allow_rt_smt || !consider_secondaries) {
947 goto out;
948 }
949
950 if (avoid_cpu0 && (sched_avoid_cpu0 == 2)) {
951 /* Also avoid cpu1 */
952 bit_clear(cpu_map, 1);
953 }
954
955 /* Consider secondary processors whose primary is actually running a realtime thread */
956 cpumap_t secondary_map = cpu_map & ~pset->primary_map & (pset->realtime_map << 1);
957 if (avoid_cpu0) {
958 /* Also avoid cpu1 */
959 secondary_map = bit_ror64(secondary_map, 2);
960 }
961 rotid = lsb_first(secondary_map);
962 if (rotid >= 0) {
963 int cpuid = avoid_cpu0 ? ((rotid + 2) & 63) : rotid;
964
965 processor_t processor = processor_array[cpuid];
966
967 return processor;
968 }
969
970 /* Consider secondary processors */
971 secondary_map = cpu_map & ~pset->primary_map;
972 if (avoid_cpu0) {
973 /* Also avoid cpu1 */
974 secondary_map = bit_ror64(secondary_map, 2);
975 }
976 rotid = lsb_first(secondary_map);
977 if (rotid >= 0) {
978 int cpuid = avoid_cpu0 ? ((rotid + 2) & 63) : rotid;
979
980 processor_t processor = processor_array[cpuid];
981
982 return processor;
983 }
984
985 /*
986 * I was hoping the compiler would optimize
987 * this away when avoid_cpu0 is const bool false
988 * but it still complains about the assignmnent
989 * in that case.
990 */
991 if (avoid_cpu0 && (sched_avoid_cpu0 == 2)) {
992 #if defined(__x86_64__)
993 avoid_cpu0 = false;
994 #else
995 assert(0);
996 #endif
997 goto try_again;
998 }
999
1000 out:
1001 if (skip_processor) {
1002 return PROCESSOR_NULL;
1003 }
1004
1005 /*
1006 * If we didn't find an obvious processor to choose, but there are still more CPUs
1007 * not already running realtime threads than realtime threads in the realtime run queue,
1008 * this thread belongs in this pset, so choose some other processor in this pset
1009 * to ensure the thread is enqueued here.
1010 */
1011 cpumap_t non_realtime_map = pset_available_cpumap(pset) & pset->primary_map & ~pset->realtime_map;
1012 if (bit_count(non_realtime_map) > rt_runq_count(pset)) {
1013 cpu_map = non_realtime_map;
1014 assert(cpu_map != 0);
1015 int cpuid = bit_first(cpu_map);
1016 assert(cpuid >= 0);
1017 return processor_array[cpuid];
1018 }
1019
1020 if (!pset->is_SMT || !sched_allow_rt_smt || !consider_secondaries) {
1021 goto skip_secondaries;
1022 }
1023
1024 non_realtime_map = pset_available_cpumap(pset) & ~pset->realtime_map;
1025 if (bit_count(non_realtime_map) > rt_runq_count(pset)) {
1026 cpu_map = non_realtime_map;
1027 assert(cpu_map != 0);
1028 int cpuid = bit_first(cpu_map);
1029 assert(cpuid >= 0);
1030 return processor_array[cpuid];
1031 }
1032
1033 skip_secondaries:
1034 return PROCESSOR_NULL;
1035 }
1036 #else /* !CONFIG_SCHED_SMT*/
1037 /* pset is locked */
1038 processor_t
pset_choose_processor_for_realtime_thread(processor_set_t pset,processor_t skip_processor,bool skip_spills)1039 pset_choose_processor_for_realtime_thread(processor_set_t pset, processor_t skip_processor, bool skip_spills)
1040 {
1041 cpumap_t cpu_map = pset_available_cpumap(pset) & ~pset->pending_AST_URGENT_cpu_mask & ~pset->realtime_map;
1042 if (skip_processor) {
1043 bit_clear(cpu_map, skip_processor->cpu_id);
1044 }
1045 if (skip_spills) {
1046 cpu_map &= ~pset->rt_pending_spill_cpu_mask;
1047 }
1048
1049 int rotid = lsb_first(cpu_map);
1050 if (rotid >= 0) {
1051 return processor_array[rotid];
1052 }
1053
1054 /*
1055 * If we didn't find an obvious processor to choose, but there are still more CPUs
1056 * not already running realtime threads than realtime threads in the realtime run queue,
1057 * this thread belongs in this pset, so choose some other processor in this pset
1058 * to ensure the thread is enqueued here.
1059 */
1060 cpumap_t non_realtime_map = pset_available_cpumap(pset) & ~pset->realtime_map;
1061 if (bit_count(non_realtime_map) > rt_runq_count(pset)) {
1062 cpu_map = non_realtime_map;
1063 assert(cpu_map != 0);
1064 int cpuid = bit_first(cpu_map);
1065 assert(cpuid >= 0);
1066 return processor_array[cpuid];
1067 }
1068
1069 return PROCESSOR_NULL;
1070 }
1071 #endif /* !CONFIG_SCHED_SMT */
1072
1073 /*
1074 * Choose the processor with (1) the lowest priority less than max_pri and (2) the furthest deadline for that priority.
1075 * If all available processors are at max_pri, choose the furthest deadline that is greater than minimum_deadline.
1076 *
1077 * pset is locked.
1078 */
1079 processor_t
pset_choose_furthest_deadline_processor_for_realtime_thread(processor_set_t pset,int max_pri,uint64_t minimum_deadline,processor_t skip_processor,bool skip_spills,bool include_ast_urgent_pending_cpus)1080 pset_choose_furthest_deadline_processor_for_realtime_thread(processor_set_t pset, int max_pri, uint64_t minimum_deadline, processor_t skip_processor, bool skip_spills, bool include_ast_urgent_pending_cpus)
1081 {
1082 uint64_t furthest_deadline = rt_deadline_add(minimum_deadline, rt_deadline_epsilon);
1083 processor_t fd_processor = PROCESSOR_NULL;
1084 int lowest_priority = max_pri;
1085
1086 cpumap_t cpu_map = pset_available_cpumap(pset) & ~pset->pending_AST_URGENT_cpu_mask;
1087 if (skip_processor) {
1088 bit_clear(cpu_map, skip_processor->cpu_id);
1089 }
1090 if (skip_spills) {
1091 cpu_map &= ~pset->rt_pending_spill_cpu_mask;
1092 }
1093
1094 for (int cpuid = bit_first(cpu_map); cpuid >= 0; cpuid = bit_next(cpu_map, cpuid)) {
1095 processor_t processor = processor_array[cpuid];
1096
1097 if (processor->current_pri > lowest_priority) {
1098 continue;
1099 }
1100
1101 if (processor->current_pri < lowest_priority) {
1102 lowest_priority = processor->current_pri;
1103 furthest_deadline = processor->deadline;
1104 fd_processor = processor;
1105 continue;
1106 }
1107
1108 if (processor->deadline > furthest_deadline) {
1109 furthest_deadline = processor->deadline;
1110 fd_processor = processor;
1111 }
1112 }
1113
1114 if (fd_processor) {
1115 return fd_processor;
1116 }
1117
1118 /*
1119 * There is a race condition possible when there are multiple processor sets.
1120 * choose_processor() takes pset lock A, sees the pending_AST_URGENT_cpu_mask set for a processor in that set and finds no suitable candiate CPU,
1121 * so it drops pset lock A and tries to take pset lock B. Meanwhile the pending_AST_URGENT_cpu_mask CPU is looking for a thread to run and holds
1122 * pset lock B. It doesn't find any threads (because the candidate thread isn't yet on any run queue), so drops lock B, takes lock A again to clear
1123 * the pending_AST_URGENT_cpu_mask bit, and keeps running the current (far deadline) thread. choose_processor() now has lock B and can only find
1124 * the lowest count processor in set B so enqueues it on set B's run queue but doesn't IPI anyone. (The lowest count includes all threads,
1125 * near and far deadlines, so will prefer a low count of earlier deadlines to a high count of far deadlines, which is suboptimal for EDF scheduling.
1126 * To make a better choice we would need to know how many threads with earlier deadlines than the candidate thread exist on each pset's run queue.
1127 * But even if we chose the better run queue, we still wouldn't send an IPI in this case.)
1128 *
1129 * The migitation is to also look for suitable CPUs that have their pending_AST_URGENT_cpu_mask bit set where there are no earlier deadline threads
1130 * on the run queue of that pset.
1131 */
1132 if (include_ast_urgent_pending_cpus && (rt_runq_earliest_deadline(pset) > furthest_deadline)) {
1133 cpu_map = pset_available_cpumap(pset) & pset->pending_AST_URGENT_cpu_mask;
1134 assert(skip_processor == PROCESSOR_NULL);
1135 assert(skip_spills == false);
1136
1137 for (int cpuid = bit_first(cpu_map); cpuid >= 0; cpuid = bit_next(cpu_map, cpuid)) {
1138 processor_t processor = processor_array[cpuid];
1139
1140 if (processor->current_pri > lowest_priority) {
1141 continue;
1142 }
1143
1144 if (processor->current_pri < lowest_priority) {
1145 lowest_priority = processor->current_pri;
1146 furthest_deadline = processor->deadline;
1147 fd_processor = processor;
1148 continue;
1149 }
1150
1151 if (processor->deadline > furthest_deadline) {
1152 furthest_deadline = processor->deadline;
1153 fd_processor = processor;
1154 }
1155 }
1156 }
1157
1158 return fd_processor;
1159 }
1160
1161 bool
rt_clear_pending_spill(processor_t processor,int reason)1162 rt_clear_pending_spill(processor_t processor, int reason)
1163 {
1164 processor_set_t pset = processor->processor_set;
1165 if (bit_clear_if_set(pset->rt_pending_spill_cpu_mask, processor->cpu_id)) {
1166 KTRC(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RT_SIGNAL_SPILL) | DBG_FUNC_END, processor->cpu_id, pset->rt_pending_spill_cpu_mask, 0, reason);
1167 return true;
1168 } else {
1169 return false;
1170 }
1171 }
1172
1173 #pragma mark - Realtime Runqueues
1174
1175 #if DEBUG || SCHED_TEST_HARNESS
1176 void
check_rt_runq_consistency(rt_queue_t rt_run_queue,thread_t thread)1177 check_rt_runq_consistency(rt_queue_t rt_run_queue, thread_t thread)
1178 {
1179 __assert_only bitmap_t *map = rt_run_queue->bitmap;
1180
1181 uint64_t earliest_deadline = RT_DEADLINE_NONE;
1182 uint32_t constraint = RT_CONSTRAINT_NONE;
1183 int ed_index = NOPRI;
1184 int count = 0;
1185 bool found_thread = false;
1186
1187 for (int pri = BASEPRI_RTQUEUES; pri <= MAXPRI; pri++) {
1188 int i = pri - BASEPRI_RTQUEUES;
1189 rt_queue_pri_t *rt_runq = &rt_run_queue->rt_queue_pri[i];
1190 queue_t queue = &rt_runq->pri_queue;
1191 queue_entry_t iter;
1192 int n = 0;
1193 uint64_t previous_deadline = 0;
1194 qe_foreach(iter, queue) {
1195 thread_t iter_thread = qe_element(iter, struct thread, runq_links);
1196 assert_thread_magic(iter_thread);
1197 if (iter_thread == thread) {
1198 found_thread = true;
1199 }
1200 assert(iter_thread->sched_pri == (i + BASEPRI_RTQUEUES));
1201 assert(iter_thread->realtime.deadline < RT_DEADLINE_NONE);
1202 assert(iter_thread->realtime.constraint < RT_CONSTRAINT_NONE);
1203 assert(previous_deadline <= iter_thread->realtime.deadline);
1204 n++;
1205 if (iter == queue_first(queue)) {
1206 assert(rt_runq->pri_earliest_deadline == iter_thread->realtime.deadline);
1207 assert(rt_runq->pri_constraint == iter_thread->realtime.constraint);
1208 }
1209 previous_deadline = iter_thread->realtime.deadline;
1210 }
1211 assert(n == rt_runq->pri_count);
1212 if (n == 0) {
1213 assert(bitmap_test(map, i) == false);
1214 assert(rt_runq->pri_earliest_deadline == RT_DEADLINE_NONE);
1215 assert(rt_runq->pri_constraint == RT_CONSTRAINT_NONE);
1216 } else {
1217 assert(bitmap_test(map, i) == true);
1218 }
1219 if (rt_runq->pri_earliest_deadline < earliest_deadline) {
1220 earliest_deadline = rt_runq->pri_earliest_deadline;
1221 constraint = rt_runq->pri_constraint;
1222 ed_index = i;
1223 }
1224 count += n;
1225 }
1226 assert(os_atomic_load_wide(&rt_run_queue->earliest_deadline, relaxed) == earliest_deadline);
1227 assert(os_atomic_load(&rt_run_queue->count, relaxed) == count);
1228 assert(os_atomic_load(&rt_run_queue->constraint, relaxed) == constraint);
1229 assert(os_atomic_load(&rt_run_queue->ed_index, relaxed) == ed_index);
1230 if (thread) {
1231 assert(found_thread);
1232 }
1233 }
1234 #endif /* DEBUG || SCHED_TEST_HARNESS */
1235
1236 static bool
rt_runq_enqueue(rt_queue_t rt_run_queue,thread_t thread,processor_t processor)1237 rt_runq_enqueue(rt_queue_t rt_run_queue, thread_t thread, processor_t processor)
1238 {
1239 int pri = thread->sched_pri;
1240 assert((pri >= BASEPRI_RTQUEUES) && (pri <= MAXPRI));
1241 int i = pri - BASEPRI_RTQUEUES;
1242 rt_queue_pri_t *rt_runq = &rt_run_queue->rt_queue_pri[i];
1243 bitmap_t *map = rt_run_queue->bitmap;
1244
1245 bitmap_set(map, i);
1246
1247 queue_t queue = &rt_runq->pri_queue;
1248 uint64_t deadline = thread->realtime.deadline;
1249 bool preempt = false;
1250 bool earliest = false;
1251
1252 if (queue_empty(queue)) {
1253 enqueue_tail(queue, &thread->runq_links);
1254 preempt = true;
1255 earliest = true;
1256 rt_runq->pri_earliest_deadline = deadline;
1257 rt_runq->pri_constraint = thread->realtime.constraint;
1258 } else {
1259 /* Insert into rt_runq in thread deadline order */
1260 queue_entry_t iter;
1261 qe_foreach(iter, queue) {
1262 thread_t iter_thread = qe_element(iter, struct thread, runq_links);
1263 assert_thread_magic(iter_thread);
1264
1265 if (deadline < iter_thread->realtime.deadline) {
1266 if (iter == queue_first(queue)) {
1267 preempt = true;
1268 earliest = true;
1269 rt_runq->pri_earliest_deadline = deadline;
1270 rt_runq->pri_constraint = thread->realtime.constraint;
1271 }
1272 insque(&thread->runq_links, queue_prev(iter));
1273 break;
1274 } else if (iter == queue_last(queue)) {
1275 enqueue_tail(queue, &thread->runq_links);
1276 break;
1277 }
1278 }
1279 }
1280 if (earliest && (deadline < os_atomic_load_wide(&rt_run_queue->earliest_deadline, relaxed))) {
1281 os_atomic_store_wide(&rt_run_queue->earliest_deadline, deadline, relaxed);
1282 os_atomic_store(&rt_run_queue->constraint, thread->realtime.constraint, relaxed);
1283 os_atomic_store(&rt_run_queue->ed_index, pri - BASEPRI_RTQUEUES, relaxed);
1284 }
1285
1286 SCHED_STATS_RUNQ_CHANGE(&rt_run_queue->runq_stats, os_atomic_load(&rt_run_queue->count, relaxed));
1287 rt_runq->pri_count++;
1288 os_atomic_inc(&rt_run_queue->count, relaxed);
1289
1290 thread_set_runq_locked(thread, processor);
1291
1292 CHECK_RT_RUNQ_CONSISTENCY(rt_run_queue, thread);
1293
1294 return preempt;
1295 }
1296
1297 uint64_t
rt_runq_earliest_deadline(processor_set_t pset)1298 rt_runq_earliest_deadline(processor_set_t pset)
1299 {
1300 return os_atomic_load_wide(&pset->rt_runq.earliest_deadline, relaxed);
1301 }
1302
1303 /*
1304 * rt_runq_insert:
1305 *
1306 * Enqueue a thread for realtime execution.
1307 */
1308 bool
rt_runq_insert(processor_t processor,processor_set_t pset,thread_t thread)1309 rt_runq_insert(processor_t processor, processor_set_t pset, thread_t thread)
1310 {
1311 pset_assert_locked(pset);
1312
1313 bool preempt = rt_runq_enqueue(&pset->rt_runq, thread, processor);
1314 pset_update_rt_stealable_state(pset);
1315
1316 return preempt;
1317 }
1318
1319 int
rt_runq_count(processor_set_t pset)1320 rt_runq_count(processor_set_t pset)
1321 {
1322 return os_atomic_load(&pset->rt_runq.count, relaxed);
1323 }
1324
1325 int
rt_runq_priority(processor_set_t pset)1326 rt_runq_priority(processor_set_t pset)
1327 {
1328 pset_assert_locked(pset);
1329 rt_queue_t rt_run_queue = &pset->rt_runq;
1330
1331 bitmap_t *map = rt_run_queue->bitmap;
1332 int i = bitmap_first(map, NRTQS);
1333 assert(i < NRTQS);
1334
1335 if (i >= 0) {
1336 return i + BASEPRI_RTQUEUES;
1337 }
1338
1339 return i;
1340 }
1341
1342 bool
rt_runq_is_low_latency(processor_set_t pset)1343 rt_runq_is_low_latency(processor_set_t pset)
1344 {
1345 return os_atomic_load(&pset->rt_runq.constraint, relaxed) <= rt_constraint_threshold;
1346 }
1347
1348 thread_t
rt_runq_dequeue(rt_queue_t rt_run_queue)1349 rt_runq_dequeue(rt_queue_t rt_run_queue)
1350 {
1351 bitmap_t *map = rt_run_queue->bitmap;
1352 int i = bitmap_first(map, NRTQS);
1353 assert((i >= 0) && (i < NRTQS));
1354
1355 rt_queue_pri_t *rt_runq = &rt_run_queue->rt_queue_pri[i];
1356
1357 if (!sched_rt_runq_strict_priority) {
1358 int ed_index = os_atomic_load(&rt_run_queue->ed_index, relaxed);
1359 if (ed_index != i) {
1360 assert((ed_index >= 0) && (ed_index < NRTQS));
1361 rt_queue_pri_t *ed_runq = &rt_run_queue->rt_queue_pri[ed_index];
1362
1363 thread_t ed_thread = qe_queue_first(&ed_runq->pri_queue, struct thread, runq_links);
1364 thread_t hi_thread = qe_queue_first(&rt_runq->pri_queue, struct thread, runq_links);
1365
1366 if (ed_thread->realtime.computation + hi_thread->realtime.computation + rt_deadline_epsilon < hi_thread->realtime.constraint) {
1367 /* choose the earliest deadline thread */
1368 rt_runq = ed_runq;
1369 i = ed_index;
1370 }
1371 }
1372 }
1373
1374 assert(rt_runq->pri_count > 0);
1375 uint64_t earliest_deadline = RT_DEADLINE_NONE;
1376 uint32_t constraint = RT_CONSTRAINT_NONE;
1377 int ed_index = NOPRI;
1378 thread_t new_thread = qe_dequeue_head(&rt_runq->pri_queue, struct thread, runq_links);
1379 SCHED_STATS_RUNQ_CHANGE(&rt_run_queue->runq_stats, os_atomic_load(&rt_run_queue->count, relaxed));
1380 if (--rt_runq->pri_count > 0) {
1381 thread_t next_rt = qe_queue_first(&rt_runq->pri_queue, struct thread, runq_links);
1382 assert(next_rt != THREAD_NULL);
1383 earliest_deadline = next_rt->realtime.deadline;
1384 constraint = next_rt->realtime.constraint;
1385 ed_index = i;
1386 } else {
1387 bitmap_clear(map, i);
1388 }
1389 rt_runq->pri_earliest_deadline = earliest_deadline;
1390 rt_runq->pri_constraint = constraint;
1391
1392 for (i = bitmap_first(map, NRTQS); i >= 0; i = bitmap_next(map, i)) {
1393 rt_runq = &rt_run_queue->rt_queue_pri[i];
1394 if (rt_runq->pri_earliest_deadline < earliest_deadline) {
1395 earliest_deadline = rt_runq->pri_earliest_deadline;
1396 constraint = rt_runq->pri_constraint;
1397 ed_index = i;
1398 }
1399 }
1400 os_atomic_store_wide(&rt_run_queue->earliest_deadline, earliest_deadline, relaxed);
1401 os_atomic_store(&rt_run_queue->constraint, constraint, relaxed);
1402 os_atomic_store(&rt_run_queue->ed_index, ed_index, relaxed);
1403 os_atomic_dec(&rt_run_queue->count, relaxed);
1404
1405 thread_clear_runq(new_thread);
1406
1407 CHECK_RT_RUNQ_CONSISTENCY(rt_run_queue, THREAD_NULL);
1408
1409 return new_thread;
1410 }
1411
1412 thread_t
rt_runq_first(rt_queue_t rt_run_queue)1413 rt_runq_first(rt_queue_t rt_run_queue)
1414 {
1415 bitmap_t *map = rt_run_queue->bitmap;
1416 int i = bitmap_first(map, NRTQS);
1417 if (i < 0) {
1418 return THREAD_NULL;
1419 }
1420 rt_queue_pri_t *rt_runq = &rt_run_queue->rt_queue_pri[i];
1421 thread_t next_rt = qe_queue_first(&rt_runq->pri_queue, struct thread, runq_links);
1422
1423 return next_rt;
1424 }
1425
1426 void
rt_runq_remove(rt_queue_t rt_run_queue,thread_t thread)1427 rt_runq_remove(rt_queue_t rt_run_queue, thread_t thread)
1428 {
1429 CHECK_RT_RUNQ_CONSISTENCY(rt_run_queue, thread);
1430
1431 int pri = thread->sched_pri;
1432 assert((pri >= BASEPRI_RTQUEUES) && (pri <= MAXPRI));
1433 int i = pri - BASEPRI_RTQUEUES;
1434 rt_queue_pri_t *rt_runq = &rt_run_queue->rt_queue_pri[i];
1435 bitmap_t *map = rt_run_queue->bitmap;
1436
1437 assert(rt_runq->pri_count > 0);
1438 uint64_t earliest_deadline = RT_DEADLINE_NONE;
1439 uint32_t constraint = RT_CONSTRAINT_NONE;
1440 int ed_index = NOPRI;
1441 remqueue(&thread->runq_links);
1442 SCHED_STATS_RUNQ_CHANGE(&rt_run_queue->runq_stats, os_atomic_load(&rt_run_queue->count, relaxed));
1443 if (--rt_runq->pri_count > 0) {
1444 thread_t next_rt = qe_queue_first(&rt_runq->pri_queue, struct thread, runq_links);
1445 earliest_deadline = next_rt->realtime.deadline;
1446 constraint = next_rt->realtime.constraint;
1447 ed_index = i;
1448 } else {
1449 bitmap_clear(map, i);
1450 }
1451 rt_runq->pri_earliest_deadline = earliest_deadline;
1452 rt_runq->pri_constraint = constraint;
1453
1454 for (i = bitmap_first(map, NRTQS); i >= 0; i = bitmap_next(map, i)) {
1455 rt_runq = &rt_run_queue->rt_queue_pri[i];
1456 if (rt_runq->pri_earliest_deadline < earliest_deadline) {
1457 earliest_deadline = rt_runq->pri_earliest_deadline;
1458 constraint = rt_runq->pri_constraint;
1459 ed_index = i;
1460 }
1461 }
1462 os_atomic_store_wide(&rt_run_queue->earliest_deadline, earliest_deadline, relaxed);
1463 os_atomic_store(&rt_run_queue->constraint, constraint, relaxed);
1464 os_atomic_store(&rt_run_queue->ed_index, ed_index, relaxed);
1465 os_atomic_dec(&rt_run_queue->count, relaxed);
1466
1467 thread_clear_runq_locked(thread);
1468
1469 CHECK_RT_RUNQ_CONSISTENCY(rt_run_queue, THREAD_NULL);
1470 }
1471