xref: /xnu-12377.81.4/tests/sched/rt_migration.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 // Copyright (c) 2024 Apple Inc.  All rights reserved.
2 
3 #include "sched_test_harness/sched_policy_darwintest.h"
4 #include "sched_test_harness/sched_edge_harness.h"
5 
6 T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
7     T_META_RADAR_COMPONENT_NAME("xnu"),
8     T_META_RADAR_COMPONENT_VERSION("scheduler"),
9     T_META_RUN_CONCURRENTLY(true),
10     T_META_OWNER("m_zinn"));
11 
12 static mach_timebase_info_data_t timebase_info;
13 
14 uint64_t
nanos_to_abs(uint64_t nanos)15 nanos_to_abs(uint64_t nanos)
16 {
17 	static mach_timebase_info_data_t timebase = {};
18 
19 	if (timebase.numer == 0 || timebase.denom == 0) {
20 		kern_return_t kr;
21 
22 		kr = mach_timebase_info(&timebase_info);
23 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
24 
25 		timebase = timebase_info;
26 	}
27 	return nanos * timebase.denom / timebase.numer;
28 }
29 
30 SCHED_POLICY_T_DECL(rt_migration_cluster_bound,
31     "Verify that cluster-bound realtime threads always choose the bound "
32     "cluster except when its derecommended")
33 {
34 	int ret;
35 	init_migration_harness(dual_die);
36 	struct thread_group *tg = create_tg(0);
37 	test_thread_t threads[dual_die.num_psets];
38 	for (int i = 0; i < dual_die.num_psets; i++) {
39 		threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
40 		set_thread_cluster_bound(threads[i], i);
41 	}
42 	for (int i = 0; i < dual_die.num_psets; i++) {
43 		set_current_processor(pset_id_to_cpu_id(i));
44 		for (int j = 0; j < dual_die.num_psets; j++) {
45 			ret = choose_pset_for_thread_expect(threads[j], j);
46 			T_QUIET; T_EXPECT_TRUE(ret, "Expecting the bound cluster");
47 		}
48 	}
49 	SCHED_POLICY_PASS("Cluster bound chooses bound cluster");
50 	/* Derecommend the bound cluster */
51 	for (int i = 0; i < dual_die.num_psets; i++) {
52 		set_pset_derecommended(i);
53 		int replacement_pset = -1;
54 		for (int j = 0; j < dual_die.num_psets; j++) {
55 			/* Find the first homogenous cluster and mark it as idle so we choose it */
56 			if ((i != j) && (dual_die.psets[i].cpu_type == dual_die.psets[j].cpu_type)) {
57 				replacement_pset = j;
58 				break;
59 			}
60 		}
61 		ret = choose_pset_for_thread_expect(threads[i], replacement_pset);
62 		T_QUIET; T_EXPECT_TRUE(ret, "Expecting the idle pset when the bound cluster is derecommended");
63 		/* Restore pset conditions */
64 		set_pset_recommended(i);
65 	}
66 	SCHED_POLICY_PASS("Cluster binding is soft");
67 }
68 
69 SCHED_POLICY_T_DECL(rt_choose_processor,
70     "Verify the realtime spill policy")
71 {
72 	test_hw_topology_t topo = dual_die;
73 	init_migration_harness(topo);
74 
75 	uint64_t start = mach_absolute_time();
76 
77 	const uint64_t period = 0;
78 	const uint64_t computation = nanos_to_abs(5000000ULL); /* 5ms */
79 	const uint64_t constraint = nanos_to_abs(10000000ULL); /* 10ms */
80 	const bool preemptible = false;
81 	const uint8_t priority_offset = 0;
82 
83 	struct thread_group *tg = create_tg(0);
84 	thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
85 	set_thread_sched_mode(thread, TH_MODE_REALTIME);
86 	const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(10000000ULL /* 10ms */));
87 	set_thread_realtime(thread, period, computation, constraint, preemptible, priority_offset, deadline);
88 
89 	test_thread_t earlier_threads[topo.total_cpus] = {};
90 	for (int i = 0; i < topo.total_cpus; i++) {
91 		earlier_threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
92 		set_thread_sched_mode(earlier_threads[i], TH_MODE_REALTIME);
93 		const uint64_t early_deadline = rt_deadline_add(start, nanos_to_abs(5000000) /* 5ms */);
94 		set_thread_realtime(earlier_threads[i], period, computation, constraint, preemptible, priority_offset, early_deadline);
95 	}
96 
97 	test_thread_t later_thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
98 	set_thread_sched_mode(later_thread, TH_MODE_REALTIME);
99 	const uint64_t late_deadline = rt_deadline_add(start, nanos_to_abs(20000000ULL) /* 20ms */);
100 	set_thread_realtime(later_thread, period, computation, constraint, preemptible, priority_offset, late_deadline);
101 
102 	for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
103 		set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id);
104 		sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
105 
106 		/* Unloaded system. Expect to choose the preferred pset. */
107 		choose_pset_for_thread_expect(thread, preferred_pset_id);
108 
109 		/*
110 		 * Load the preferred pset with earlier-deadline threads. Should cause
111 		 * the thread to spill (since the die has multiple clusters of each
112 		 * performance type).
113 		 */
114 		for (int i = 0; i < topo.psets[preferred_pset_id].num_cpus; i++) {
115 			int cpu_id = pset_id_to_cpu_id(preferred_pset_id) + i;
116 			cpu_set_thread_current(cpu_id, earlier_threads[i]);
117 		}
118 		int chosen = choose_pset_for_thread(thread);
119 		T_QUIET; T_EXPECT_GE(chosen, 0, "chose a valid cluster");
120 		T_QUIET; T_EXPECT_NE(chosen, preferred_pset_id, "chose an unloaded cluster");
121 		T_QUIET; T_EXPECT_EQ(topo.psets[chosen].cpu_type, topo.psets[preferred_pset_id].cpu_type, "chose a pset of the same performance type");
122 
123 		/* Replace the first earlier-deadline thread with a later-deadline thread. Should cause the thread to preempt. */
124 		cpu_set_thread_current(pset_id_to_cpu_id(preferred_pset_id), later_thread);
125 		chosen = choose_pset_for_thread(thread);
126 		T_QUIET; T_EXPECT_EQ(chosen, preferred_pset_id, "preempting later-deadline thread");
127 
128 		/* Load all psets of the same performance type with early-deadline threads. Expected preferred pset to be chosen. */
129 		for (int i = 0; i < topo.num_psets; i++) {
130 			if (topo.psets[i].cpu_type != topo.psets[preferred_pset_id].cpu_type) {
131 				continue;
132 			}
133 			for (int j = 0; j < topo.psets[i].num_cpus; j++) {
134 				int cpu_id = pset_id_to_cpu_id(i) + j;
135 				cpu_set_thread_current(cpu_id, earlier_threads[cpu_id]);
136 			}
137 		}
138 		choose_pset_for_thread_expect(thread, preferred_pset_id);
139 
140 		/* Clean up */
141 		for (int i = 0; i < topo.total_cpus; i++) {
142 			cpu_clear_thread_current(i);
143 		}
144 
145 		sched_policy_pop_metadata(/* preferred_pset_id */);
146 	}
147 
148 	SCHED_POLICY_PASS("sched_rt_choose_processor selects the right pset");
149 }
150 
151 SCHED_POLICY_T_DECL(rt_spill_order, "Verify computed realtime spill orders.")
152 {
153 	init_migration_harness(dual_die);
154 
155 	/* Test setup: reset all edges. */
156 	for (uint src_id = 0; src_id < dual_die.num_psets; src_id++) {
157 		for (uint dst_id = 0; dst_id < dual_die.num_psets; dst_id++) {
158 			sched_rt_config_set(src_id, dst_id, (sched_clutch_edge) {});
159 		}
160 	}
161 
162 	/* First test: create edges from pset 5 to psets 0-3. */
163 	for (unsigned i = 0; i < 4; i++) {
164 		sched_rt_config_set(5, i, (sched_clutch_edge) {
165 			.sce_migration_allowed = 1,
166 			.sce_steal_allowed = 0,
167 			.sce_migration_weight = i % 3 /* create ties to test die-locality */
168 		});
169 	}
170 	/* Disallow spill from 5 to 4, despite being the same perf level. */
171 	sched_rt_config_set(5, 4, (sched_clutch_edge) {
172 		.sce_migration_allowed = 0,
173 		.sce_steal_allowed = 0,
174 		.sce_migration_weight = 0
175 	});
176 
177 	rt_pset_recompute_spill_order(5);
178 
179 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 0), 3, "spso_search_order[0] == 3");
180 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 1), 0, "spso_search_order[1] == 0");
181 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 2), 1, "spso_search_order[2] == 1");
182 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 3), 2, "spso_search_order[3] == 2");
183 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID");
184 
185 	/* Second test: create edges from 0 to psets 1, 2, 4, and 5. */
186 	sched_rt_config_set(0, 1, (sched_clutch_edge) {
187 		.sce_migration_allowed = 1,
188 		.sce_steal_allowed = 0,
189 		.sce_migration_weight = 2
190 	});
191 	sched_rt_config_set(0, 2, (sched_clutch_edge) {
192 		.sce_migration_allowed = 1,
193 		.sce_steal_allowed = 0,
194 		.sce_migration_weight = 1
195 	});
196 	sched_rt_config_set(0, 4, (sched_clutch_edge) {
197 		.sce_migration_allowed = 1,
198 		.sce_steal_allowed = 0,
199 		.sce_migration_weight = 0
200 	});
201 	sched_rt_config_set(0, 5, (sched_clutch_edge) {
202 		.sce_migration_allowed = 1,
203 		.sce_steal_allowed = 0,
204 		.sce_migration_weight = 1
205 	});
206 
207 	rt_pset_recompute_spill_order(0);
208 
209 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 0), 4, "spso_search_order[0] == 4");
210 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 1), 2, "spso_search_order[1] == 2");
211 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 2), 5, "spso_search_order[2] == 5");
212 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 3), 1, "spso_search_order[3] == 1");
213 	T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID");
214 
215 	SCHED_POLICY_PASS("Realtime spill orders are computed correctly.");
216 }
217 
218 SCHED_POLICY_T_DECL(rt_thread_avoid_processor,
219     "Verify that thread_avoid_processor is correct for realtime threads")
220 {
221 	int ret;
222 	test_hw_topology_t topo = dual_die;
223 	init_migration_harness(topo);
224 	struct thread_group *tg = create_tg(0);
225 	thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
226 
227 	/* Iterate conditions with different preferred psets and pset loads */
228 	for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
229 		set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id);
230 		sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
231 
232 		/* Where the thread proactively wants to go */
233 		int chosen_pset = choose_pset_for_thread(thread);
234 		T_QUIET; T_EXPECT_EQ(preferred_pset_id, chosen_pset, "Thread should choose un-loaded preferred pset %s",
235 		    sched_policy_dump_metadata());
236 
237 		/* Thread generally should not avoid a processor in its chosen pset */
238 		for (int c = 0; c < topo.psets[chosen_pset].num_cpus; c++) {
239 			int avoid_cpu_id = pset_id_to_cpu_id(chosen_pset) + c;
240 			sched_policy_push_metadata("avoid_cpu_id", avoid_cpu_id);
241 			ret = thread_avoid_processor_expect(thread, avoid_cpu_id, false, false);
242 			T_QUIET; T_EXPECT_TRUE(ret, "Thread should not want to leave processor in just chosen pset %s",
243 			    sched_policy_dump_metadata());
244 			sched_policy_pop_metadata();
245 		}
246 
247 		/* Thread should avoid processor if not allowed to run on the pset */
248 		for (int c = 0; c < topo.total_cpus; c++) {
249 			sched_clutch_edge edge = sched_rt_config_get(preferred_pset_id, cpu_id_to_pset_id(c));
250 			if (cpu_id_to_pset_id(c) != preferred_pset_id && !(edge.sce_migration_allowed || edge.sce_steal_allowed)) {
251 				sched_policy_push_metadata("avoid_non_preferred_cpu_id", c);
252 				ret = thread_avoid_processor_expect(thread, c, false, true);
253 				T_QUIET; T_EXPECT_TRUE(ret, "Thread should avoid processor in non-preferred pset to get to idle "
254 				    "preferred pset %s", sched_policy_dump_metadata());
255 				sched_policy_pop_metadata();
256 			}
257 		}
258 
259 		sched_policy_pop_metadata();
260 	}
261 	SCHED_POLICY_PASS("thread_avoid_processor works for realtime threads");
262 }
263 
264 static thread_t
create_realtime_thread_with_deadline(uint64_t deadline_nanos)265 create_realtime_thread_with_deadline(uint64_t deadline_nanos)
266 {
267 	test_thread_t thread = create_thread(
268 		TH_BUCKET_FIXPRI,
269 		create_tg(0) /* realtime policies don't consider thread groups */,
270 		BASEPRI_RTQUEUES);
271 	set_thread_sched_mode(thread, TH_MODE_REALTIME);
272 	set_thread_realtime(
273 		thread,
274 		0,
275 		(uint32_t) nanos_to_abs(5000000ULL /* 5ms */),
276 		(uint32_t) nanos_to_abs(10000000ULL /* 10ms */),
277 		false,
278 		0,
279 		nanos_to_abs(deadline_nanos));
280 	return thread;
281 }
282 
283 static void
fill_all_cpus_with_realtime_threads(uint64_t deadline_nanos)284 fill_all_cpus_with_realtime_threads(uint64_t deadline_nanos)
285 {
286 	for (int i = 0; i < get_hw_topology().total_cpus; i++) {
287 		cpu_set_thread_current(i, create_realtime_thread_with_deadline(deadline_nanos));
288 	}
289 }
290 
291 SCHED_POLICY_T_DECL(rt_choose_thread, "Verify realtime thread selection policy and mechanism")
292 {
293 	int ret;
294 	test_hw_topology_t topo = dual_die;
295 	init_migration_harness(topo);
296 
297 	const uint64_t start = mach_absolute_time();
298 	const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */
299 	const uint64_t later_deadline = rt_deadline_add(start, nanos_to_abs(6000000)); /* start + 6ms */
300 
301 	fill_all_cpus_with_realtime_threads(later_deadline);
302 
303 	/* One of these threads will be on the stealing pset runqueue: */
304 	test_thread_t later_deadline_thread = create_realtime_thread_with_deadline(later_deadline);
305 	test_thread_t earlier_deadline_thread = create_realtime_thread_with_deadline(deadline);
306 
307 	/* And this thread will be on another runqueue: */
308 	test_thread_t stealable_thread = create_realtime_thread_with_deadline(deadline);
309 
310 	/* Check that sched_rt_choose_thread obeys the steal policies configured by
311 	 * the realtime matrix. A pset should only steal if the thread's deadline
312 	 * is earlier than that of any thread on the pset's runqueue. */
313 
314 	for (uint stealing_pset_id = 0; stealing_pset_id < topo.num_psets; stealing_pset_id++) {
315 		sched_policy_push_metadata("stealing_pset", stealing_pset_id);
316 		for (uint off = 1; off < topo.num_psets; off++) {
317 			uint other_pset_id = (stealing_pset_id + off) % topo.num_psets;
318 			sched_policy_push_metadata("other_pset", other_pset_id);
319 
320 			enqueue_thread(pset_target(other_pset_id), stealable_thread);
321 
322 			enqueue_thread(pset_target(stealing_pset_id), earlier_deadline_thread);
323 			ret = dequeue_thread_expect(pset_target(stealing_pset_id), earlier_deadline_thread);
324 			T_QUIET; T_ASSERT_TRUE(ret, "when deadlines are equal, prefer thread from local runqueue %s", sched_policy_dump_metadata());
325 
326 			enqueue_thread(pset_target(stealing_pset_id), later_deadline_thread);
327 			if (topo.psets[other_pset_id].cpu_type == topo.psets[stealing_pset_id].cpu_type) {
328 				T_QUIET; T_ASSERT_TRUE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal allowed between psets of the same type %s", sched_policy_dump_metadata());
329 
330 				ret = dequeue_thread_expect(pset_target(stealing_pset_id), stealable_thread);
331 				T_QUIET; T_ASSERT_TRUE(ret, "steal because the other pset has an earlier-deadline thread %s", sched_policy_dump_metadata());
332 
333 				ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread);
334 				T_QUIET; T_ASSERT_TRUE(ret, "take thread from local runqueue because no earlier-deadline threads on other psets %s", sched_policy_dump_metadata());
335 			} else {
336 				T_QUIET; T_ASSERT_FALSE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal disallowed between psets of different types %s", sched_policy_dump_metadata());
337 
338 				ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread);
339 				T_QUIET; T_ASSERT_TRUE(ret, "take later-deadline thread because policy disallows steal %s", sched_policy_dump_metadata());
340 
341 				ret = dequeue_thread_expect(pset_target(other_pset_id), stealable_thread);
342 				T_QUIET; T_ASSERT_TRUE(ret, "removed stealable thread %s", sched_policy_dump_metadata());
343 			}
344 			sched_policy_pop_metadata(/* other_pset */);
345 		}
346 		sched_policy_pop_metadata(/* stealing_pset */);
347 	}
348 
349 	SCHED_POLICY_PASS("Verified realtime thread selection");
350 }
351 
352 SCHED_POLICY_T_DECL(rt_followup_ipi, "Verify that followup IPIs are sent when there are stealable realtime threads and idle processors")
353 {
354 	int ret;
355 	test_hw_topology_t topo = dual_die;
356 	init_migration_harness(topo);
357 
358 	const uint64_t start = mach_absolute_time();
359 	const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */
360 
361 	fill_all_cpus_with_realtime_threads(deadline);
362 
363 	/* This thread is used to load a runqueue. */
364 	test_thread_t thread = create_realtime_thread_with_deadline(deadline);
365 
366 	for (int target_cpu = 0; target_cpu < topo.total_cpus; target_cpu++) {
367 		sched_policy_push_metadata("target_cpu", target_cpu);
368 		for (int idle_cpu = 0; idle_cpu < topo.total_cpus; idle_cpu++) {
369 			if (target_cpu == idle_cpu) {
370 				continue;
371 			}
372 
373 			sched_policy_push_metadata("idle_cpu", idle_cpu);
374 			enqueue_thread(cpu_target(target_cpu), thread);
375 			test_thread_t saved_idle_thread = cpu_clear_thread_current(idle_cpu);
376 
377 			/* idle_cpu is now "idle," now simulate thread_select() on target_cpu: */
378 			cpu_set_thread_current(target_cpu, cpu_clear_thread_current(target_cpu));
379 
380 			/* That should result in a deferred followup IPI, if spill is allowed between target_cpu and idle_cpu. */
381 			if (topo.psets[cpu_id_to_pset_id(idle_cpu)].cpu_type == topo.psets[cpu_id_to_pset_id(target_cpu)].cpu_type) {
382 				ret = ipi_expect(idle_cpu, TEST_IPI_DEFERRED);
383 				T_QUIET; T_ASSERT_TRUE(ret, "should send a followup IPI %s", sched_policy_dump_metadata());
384 			}
385 
386 			/* Clean up for the next iteration. */
387 			ret = dequeue_thread_expect(cpu_target(target_cpu), thread);
388 			T_QUIET; T_ASSERT_TRUE(ret, "cleaning up %s", sched_policy_dump_metadata());
389 			cpu_set_thread_current(idle_cpu, saved_idle_thread);
390 			sched_policy_pop_metadata(/* idle_cpu */);
391 		}
392 		sched_policy_pop_metadata(/* target_cpu */);
393 	}
394 
395 	SCHED_POLICY_PASS("Realtime followup IPIs work");
396 }
397