1 // Copyright (c) 2024 Apple Inc. All rights reserved.
2
3 #include "sched_test_harness/sched_policy_darwintest.h"
4 #include "sched_test_harness/sched_edge_harness.h"
5
6 T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
7 T_META_RADAR_COMPONENT_NAME("xnu"),
8 T_META_RADAR_COMPONENT_VERSION("scheduler"),
9 T_META_RUN_CONCURRENTLY(true),
10 T_META_OWNER("m_zinn"));
11
12 static mach_timebase_info_data_t timebase_info;
13
14 uint64_t
nanos_to_abs(uint64_t nanos)15 nanos_to_abs(uint64_t nanos)
16 {
17 static mach_timebase_info_data_t timebase = {};
18
19 if (timebase.numer == 0 || timebase.denom == 0) {
20 kern_return_t kr;
21
22 kr = mach_timebase_info(&timebase_info);
23 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
24
25 timebase = timebase_info;
26 }
27 return nanos * timebase.denom / timebase.numer;
28 }
29
30 SCHED_POLICY_T_DECL(rt_migration_cluster_bound,
31 "Verify that cluster-bound realtime threads always choose the bound "
32 "cluster except when its derecommended")
33 {
34 int ret;
35 init_migration_harness(dual_die);
36 struct thread_group *tg = create_tg(0);
37 test_thread_t threads[dual_die.num_psets];
38 for (int i = 0; i < dual_die.num_psets; i++) {
39 threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
40 set_thread_cluster_bound(threads[i], i);
41 }
42 for (int i = 0; i < dual_die.num_psets; i++) {
43 set_current_processor(pset_id_to_cpu_id(i));
44 for (int j = 0; j < dual_die.num_psets; j++) {
45 ret = choose_pset_for_thread_expect(threads[j], j);
46 T_QUIET; T_EXPECT_TRUE(ret, "Expecting the bound cluster");
47 }
48 }
49 SCHED_POLICY_PASS("Cluster bound chooses bound cluster");
50 /* Derecommend the bound cluster */
51 for (int i = 0; i < dual_die.num_psets; i++) {
52 set_pset_derecommended(i);
53 int replacement_pset = -1;
54 for (int j = 0; j < dual_die.num_psets; j++) {
55 /* Find the first homogenous cluster and mark it as idle so we choose it */
56 if ((i != j) && (dual_die.psets[i].cpu_type == dual_die.psets[j].cpu_type)) {
57 replacement_pset = j;
58 break;
59 }
60 }
61 ret = choose_pset_for_thread_expect(threads[i], replacement_pset);
62 T_QUIET; T_EXPECT_TRUE(ret, "Expecting the idle pset when the bound cluster is derecommended");
63 /* Restore pset conditions */
64 set_pset_recommended(i);
65 }
66 SCHED_POLICY_PASS("Cluster binding is soft");
67 }
68
69 SCHED_POLICY_T_DECL(rt_choose_processor,
70 "Verify the realtime spill policy")
71 {
72 test_hw_topology_t topo = dual_die;
73 init_migration_harness(topo);
74
75 uint64_t start = mach_absolute_time();
76
77 const uint64_t period = 0;
78 const uint64_t computation = nanos_to_abs(5000000ULL); /* 5ms */
79 const uint64_t constraint = nanos_to_abs(10000000ULL); /* 10ms */
80 const bool preemptible = false;
81 const uint8_t priority_offset = 0;
82
83 struct thread_group *tg = create_tg(0);
84 thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
85 set_thread_sched_mode(thread, TH_MODE_REALTIME);
86 const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(10000000ULL /* 10ms */));
87 set_thread_realtime(thread, period, computation, constraint, preemptible, priority_offset, deadline);
88
89 test_thread_t earlier_threads[topo.total_cpus] = {};
90 for (int i = 0; i < topo.total_cpus; i++) {
91 earlier_threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
92 set_thread_sched_mode(earlier_threads[i], TH_MODE_REALTIME);
93 const uint64_t early_deadline = rt_deadline_add(start, nanos_to_abs(5000000) /* 5ms */);
94 set_thread_realtime(earlier_threads[i], period, computation, constraint, preemptible, priority_offset, early_deadline);
95 }
96
97 test_thread_t later_thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
98 set_thread_sched_mode(later_thread, TH_MODE_REALTIME);
99 const uint64_t late_deadline = rt_deadline_add(start, nanos_to_abs(20000000ULL) /* 20ms */);
100 set_thread_realtime(later_thread, period, computation, constraint, preemptible, priority_offset, late_deadline);
101
102 for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
103 set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id);
104 sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
105
106 /* Unloaded system. Expect to choose the preferred pset. */
107 choose_pset_for_thread_expect(thread, preferred_pset_id);
108
109 /*
110 * Load the preferred pset with earlier-deadline threads. Should cause
111 * the thread to spill (since the die has multiple clusters of each
112 * performance type).
113 */
114 for (int i = 0; i < topo.psets[preferred_pset_id].num_cpus; i++) {
115 int cpu_id = pset_id_to_cpu_id(preferred_pset_id) + i;
116 cpu_set_thread_current(cpu_id, earlier_threads[i]);
117 }
118 int chosen = choose_pset_for_thread(thread);
119 T_QUIET; T_EXPECT_GE(chosen, 0, "chose a valid cluster");
120 T_QUIET; T_EXPECT_NE(chosen, preferred_pset_id, "chose an unloaded cluster");
121 T_QUIET; T_EXPECT_EQ(topo.psets[chosen].cpu_type, topo.psets[preferred_pset_id].cpu_type, "chose a pset of the same performance type");
122
123 /* Replace the first earlier-deadline thread with a later-deadline thread. Should cause the thread to preempt. */
124 cpu_set_thread_current(pset_id_to_cpu_id(preferred_pset_id), later_thread);
125 chosen = choose_pset_for_thread(thread);
126 T_QUIET; T_EXPECT_EQ(chosen, preferred_pset_id, "preempting later-deadline thread");
127
128 /* Load all psets of the same performance type with early-deadline threads. Expected preferred pset to be chosen. */
129 for (int i = 0; i < topo.num_psets; i++) {
130 if (topo.psets[i].cpu_type != topo.psets[preferred_pset_id].cpu_type) {
131 continue;
132 }
133 for (int j = 0; j < topo.psets[i].num_cpus; j++) {
134 int cpu_id = pset_id_to_cpu_id(i) + j;
135 cpu_set_thread_current(cpu_id, earlier_threads[cpu_id]);
136 }
137 }
138 choose_pset_for_thread_expect(thread, preferred_pset_id);
139
140 /* Clean up */
141 for (int i = 0; i < topo.total_cpus; i++) {
142 cpu_clear_thread_current(i);
143 }
144
145 sched_policy_pop_metadata(/* preferred_pset_id */);
146 }
147
148 SCHED_POLICY_PASS("sched_rt_choose_processor selects the right pset");
149 }
150
151 SCHED_POLICY_T_DECL(rt_spill_order, "Verify computed realtime spill orders.")
152 {
153 init_migration_harness(dual_die);
154
155 /* Test setup: reset all edges. */
156 for (uint src_id = 0; src_id < dual_die.num_psets; src_id++) {
157 for (uint dst_id = 0; dst_id < dual_die.num_psets; dst_id++) {
158 sched_rt_config_set(src_id, dst_id, (sched_clutch_edge) {});
159 }
160 }
161
162 /* First test: create edges from pset 5 to psets 0-3. */
163 for (unsigned i = 0; i < 4; i++) {
164 sched_rt_config_set(5, i, (sched_clutch_edge) {
165 .sce_migration_allowed = 1,
166 .sce_steal_allowed = 0,
167 .sce_migration_weight = i % 3 /* create ties to test die-locality */
168 });
169 }
170 /* Disallow spill from 5 to 4, despite being the same perf level. */
171 sched_rt_config_set(5, 4, (sched_clutch_edge) {
172 .sce_migration_allowed = 0,
173 .sce_steal_allowed = 0,
174 .sce_migration_weight = 0
175 });
176
177 rt_pset_recompute_spill_order(5);
178
179 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 0), 3, "spso_search_order[0] == 3");
180 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 1), 0, "spso_search_order[1] == 0");
181 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 2), 1, "spso_search_order[2] == 1");
182 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 3), 2, "spso_search_order[3] == 2");
183 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID");
184
185 /* Second test: create edges from 0 to psets 1, 2, 4, and 5. */
186 sched_rt_config_set(0, 1, (sched_clutch_edge) {
187 .sce_migration_allowed = 1,
188 .sce_steal_allowed = 0,
189 .sce_migration_weight = 2
190 });
191 sched_rt_config_set(0, 2, (sched_clutch_edge) {
192 .sce_migration_allowed = 1,
193 .sce_steal_allowed = 0,
194 .sce_migration_weight = 1
195 });
196 sched_rt_config_set(0, 4, (sched_clutch_edge) {
197 .sce_migration_allowed = 1,
198 .sce_steal_allowed = 0,
199 .sce_migration_weight = 0
200 });
201 sched_rt_config_set(0, 5, (sched_clutch_edge) {
202 .sce_migration_allowed = 1,
203 .sce_steal_allowed = 0,
204 .sce_migration_weight = 1
205 });
206
207 rt_pset_recompute_spill_order(0);
208
209 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 0), 4, "spso_search_order[0] == 4");
210 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 1), 2, "spso_search_order[1] == 2");
211 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 2), 5, "spso_search_order[2] == 5");
212 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 3), 1, "spso_search_order[3] == 1");
213 T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID");
214
215 SCHED_POLICY_PASS("Realtime spill orders are computed correctly.");
216 }
217
218 SCHED_POLICY_T_DECL(rt_thread_avoid_processor,
219 "Verify that thread_avoid_processor is correct for realtime threads")
220 {
221 int ret;
222 test_hw_topology_t topo = dual_die;
223 init_migration_harness(topo);
224 struct thread_group *tg = create_tg(0);
225 thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
226
227 /* Iterate conditions with different preferred psets and pset loads */
228 for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
229 set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id);
230 sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
231
232 /* Where the thread proactively wants to go */
233 int chosen_pset = choose_pset_for_thread(thread);
234 T_QUIET; T_EXPECT_EQ(preferred_pset_id, chosen_pset, "Thread should choose un-loaded preferred pset %s",
235 sched_policy_dump_metadata());
236
237 /* Thread generally should not avoid a processor in its chosen pset */
238 for (int c = 0; c < topo.psets[chosen_pset].num_cpus; c++) {
239 int avoid_cpu_id = pset_id_to_cpu_id(chosen_pset) + c;
240 sched_policy_push_metadata("avoid_cpu_id", avoid_cpu_id);
241 ret = thread_avoid_processor_expect(thread, avoid_cpu_id, false, false);
242 T_QUIET; T_EXPECT_TRUE(ret, "Thread should not want to leave processor in just chosen pset %s",
243 sched_policy_dump_metadata());
244 sched_policy_pop_metadata();
245 }
246
247 /* Thread should avoid processor if not allowed to run on the pset */
248 for (int c = 0; c < topo.total_cpus; c++) {
249 sched_clutch_edge edge = sched_rt_config_get(preferred_pset_id, cpu_id_to_pset_id(c));
250 if (cpu_id_to_pset_id(c) != preferred_pset_id && !(edge.sce_migration_allowed || edge.sce_steal_allowed)) {
251 sched_policy_push_metadata("avoid_non_preferred_cpu_id", c);
252 ret = thread_avoid_processor_expect(thread, c, false, true);
253 T_QUIET; T_EXPECT_TRUE(ret, "Thread should avoid processor in non-preferred pset to get to idle "
254 "preferred pset %s", sched_policy_dump_metadata());
255 sched_policy_pop_metadata();
256 }
257 }
258
259 sched_policy_pop_metadata();
260 }
261 SCHED_POLICY_PASS("thread_avoid_processor works for realtime threads");
262 }
263
264 static thread_t
create_realtime_thread_with_deadline(uint64_t deadline_nanos)265 create_realtime_thread_with_deadline(uint64_t deadline_nanos)
266 {
267 test_thread_t thread = create_thread(
268 TH_BUCKET_FIXPRI,
269 create_tg(0) /* realtime policies don't consider thread groups */,
270 BASEPRI_RTQUEUES);
271 set_thread_sched_mode(thread, TH_MODE_REALTIME);
272 set_thread_realtime(
273 thread,
274 0,
275 (uint32_t) nanos_to_abs(5000000ULL /* 5ms */),
276 (uint32_t) nanos_to_abs(10000000ULL /* 10ms */),
277 false,
278 0,
279 nanos_to_abs(deadline_nanos));
280 return thread;
281 }
282
283 static void
fill_all_cpus_with_realtime_threads(uint64_t deadline_nanos)284 fill_all_cpus_with_realtime_threads(uint64_t deadline_nanos)
285 {
286 for (int i = 0; i < get_hw_topology().total_cpus; i++) {
287 cpu_set_thread_current(i, create_realtime_thread_with_deadline(deadline_nanos));
288 }
289 }
290
291 SCHED_POLICY_T_DECL(rt_choose_thread, "Verify realtime thread selection policy and mechanism")
292 {
293 int ret;
294 test_hw_topology_t topo = dual_die;
295 init_migration_harness(topo);
296
297 const uint64_t start = mach_absolute_time();
298 const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */
299 const uint64_t later_deadline = rt_deadline_add(start, nanos_to_abs(6000000)); /* start + 6ms */
300
301 fill_all_cpus_with_realtime_threads(later_deadline);
302
303 /* One of these threads will be on the stealing pset runqueue: */
304 test_thread_t later_deadline_thread = create_realtime_thread_with_deadline(later_deadline);
305 test_thread_t earlier_deadline_thread = create_realtime_thread_with_deadline(deadline);
306
307 /* And this thread will be on another runqueue: */
308 test_thread_t stealable_thread = create_realtime_thread_with_deadline(deadline);
309
310 /* Check that sched_rt_choose_thread obeys the steal policies configured by
311 * the realtime matrix. A pset should only steal if the thread's deadline
312 * is earlier than that of any thread on the pset's runqueue. */
313
314 for (uint stealing_pset_id = 0; stealing_pset_id < topo.num_psets; stealing_pset_id++) {
315 sched_policy_push_metadata("stealing_pset", stealing_pset_id);
316 for (uint off = 1; off < topo.num_psets; off++) {
317 uint other_pset_id = (stealing_pset_id + off) % topo.num_psets;
318 sched_policy_push_metadata("other_pset", other_pset_id);
319
320 enqueue_thread(pset_target(other_pset_id), stealable_thread);
321
322 enqueue_thread(pset_target(stealing_pset_id), earlier_deadline_thread);
323 ret = dequeue_thread_expect(pset_target(stealing_pset_id), earlier_deadline_thread);
324 T_QUIET; T_ASSERT_TRUE(ret, "when deadlines are equal, prefer thread from local runqueue %s", sched_policy_dump_metadata());
325
326 enqueue_thread(pset_target(stealing_pset_id), later_deadline_thread);
327 if (topo.psets[other_pset_id].cpu_type == topo.psets[stealing_pset_id].cpu_type) {
328 T_QUIET; T_ASSERT_TRUE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal allowed between psets of the same type %s", sched_policy_dump_metadata());
329
330 ret = dequeue_thread_expect(pset_target(stealing_pset_id), stealable_thread);
331 T_QUIET; T_ASSERT_TRUE(ret, "steal because the other pset has an earlier-deadline thread %s", sched_policy_dump_metadata());
332
333 ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread);
334 T_QUIET; T_ASSERT_TRUE(ret, "take thread from local runqueue because no earlier-deadline threads on other psets %s", sched_policy_dump_metadata());
335 } else {
336 T_QUIET; T_ASSERT_FALSE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal disallowed between psets of different types %s", sched_policy_dump_metadata());
337
338 ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread);
339 T_QUIET; T_ASSERT_TRUE(ret, "take later-deadline thread because policy disallows steal %s", sched_policy_dump_metadata());
340
341 ret = dequeue_thread_expect(pset_target(other_pset_id), stealable_thread);
342 T_QUIET; T_ASSERT_TRUE(ret, "removed stealable thread %s", sched_policy_dump_metadata());
343 }
344 sched_policy_pop_metadata(/* other_pset */);
345 }
346 sched_policy_pop_metadata(/* stealing_pset */);
347 }
348
349 SCHED_POLICY_PASS("Verified realtime thread selection");
350 }
351
352 SCHED_POLICY_T_DECL(rt_followup_ipi, "Verify that followup IPIs are sent when there are stealable realtime threads and idle processors")
353 {
354 int ret;
355 test_hw_topology_t topo = dual_die;
356 init_migration_harness(topo);
357
358 const uint64_t start = mach_absolute_time();
359 const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */
360
361 fill_all_cpus_with_realtime_threads(deadline);
362
363 /* This thread is used to load a runqueue. */
364 test_thread_t thread = create_realtime_thread_with_deadline(deadline);
365
366 for (int target_cpu = 0; target_cpu < topo.total_cpus; target_cpu++) {
367 sched_policy_push_metadata("target_cpu", target_cpu);
368 for (int idle_cpu = 0; idle_cpu < topo.total_cpus; idle_cpu++) {
369 if (target_cpu == idle_cpu) {
370 continue;
371 }
372
373 sched_policy_push_metadata("idle_cpu", idle_cpu);
374 enqueue_thread(cpu_target(target_cpu), thread);
375 test_thread_t saved_idle_thread = cpu_clear_thread_current(idle_cpu);
376
377 /* idle_cpu is now "idle," now simulate thread_select() on target_cpu: */
378 cpu_set_thread_current(target_cpu, cpu_clear_thread_current(target_cpu));
379
380 /* That should result in a deferred followup IPI, if spill is allowed between target_cpu and idle_cpu. */
381 if (topo.psets[cpu_id_to_pset_id(idle_cpu)].cpu_type == topo.psets[cpu_id_to_pset_id(target_cpu)].cpu_type) {
382 ret = ipi_expect(idle_cpu, TEST_IPI_DEFERRED);
383 T_QUIET; T_ASSERT_TRUE(ret, "should send a followup IPI %s", sched_policy_dump_metadata());
384 }
385
386 /* Clean up for the next iteration. */
387 ret = dequeue_thread_expect(cpu_target(target_cpu), thread);
388 T_QUIET; T_ASSERT_TRUE(ret, "cleaning up %s", sched_policy_dump_metadata());
389 cpu_set_thread_current(idle_cpu, saved_idle_thread);
390 sched_policy_pop_metadata(/* idle_cpu */);
391 }
392 sched_policy_pop_metadata(/* target_cpu */);
393 }
394
395 SCHED_POLICY_PASS("Realtime followup IPIs work");
396 }
397