xref: /xnu-12377.81.4/tests/sched/edge_migration.c (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 // Copyright (c) 2024 Apple Inc.  All rights reserved.
2 
3 #include "sched_test_harness/sched_policy_darwintest.h"
4 #include "sched_test_harness/sched_edge_harness.h"
5 
6 T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
7     T_META_RADAR_COMPONENT_NAME("xnu"),
8     T_META_RADAR_COMPONENT_VERSION("scheduler"),
9     T_META_RUN_CONCURRENTLY(true),
10     T_META_OWNER("emily_peterson"));
11 
12 SCHED_POLICY_T_DECL(migration_cluster_bound,
13     "Verify that cluster-bound threads always choose the bound "
14     "cluster except when its derecommended")
15 {
16 	int ret;
17 	init_migration_harness(dual_die);
18 	struct thread_group *tg = create_tg(0);
19 	test_thread_t threads[dual_die.num_psets];
20 	int idle_load = 0;
21 	int low_load = 100000;
22 	int high_load = 10000000;
23 	for (int i = 0; i < dual_die.num_psets; i++) {
24 		threads[i] = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
25 		set_thread_cluster_bound(threads[i], i);
26 		set_pset_load_avg(i, TH_BUCKET_SHARE_DF, low_load);
27 	}
28 	for (int i = 0; i < dual_die.num_psets; i++) {
29 		set_current_processor(pset_id_to_cpu_id(i));
30 		for (int j = 0; j < dual_die.num_psets; j++) {
31 			/* Add extra load to the bound cluster, so we're definitely not just idle short-circuiting */
32 			set_pset_load_avg(j, TH_BUCKET_SHARE_DF, high_load);
33 			ret = choose_pset_for_thread_expect(threads[j], j);
34 			T_QUIET; T_EXPECT_TRUE(ret, "Expecting the bound cluster");
35 			set_pset_load_avg(j, TH_BUCKET_SHARE_DF, low_load);
36 		}
37 	}
38 	SCHED_POLICY_PASS("Cluster bound chooses bound cluster");
39 	/* Derecommend the bound cluster */
40 	for (int i = 0; i < dual_die.num_psets; i++) {
41 		set_pset_derecommended(i);
42 		int replacement_pset = -1;
43 		for (int j = 0; j < dual_die.num_psets; j++) {
44 			/* Find the first homogenous cluster and mark it as idle so we choose it */
45 			if ((i != j) && (dual_die.psets[i].cpu_type == dual_die.psets[j].cpu_type)) {
46 				replacement_pset = j;
47 				set_pset_load_avg(replacement_pset, TH_BUCKET_SHARE_DF, idle_load);
48 				break;
49 			}
50 		}
51 		ret = choose_pset_for_thread_expect(threads[i], replacement_pset);
52 		T_QUIET; T_EXPECT_TRUE(ret, "Expecting the idle pset when the bound cluster is derecommended");
53 		/* Restore pset conditions */
54 		set_pset_recommended(i);
55 		set_pset_load_avg(replacement_pset, TH_BUCKET_SHARE_DF, low_load);
56 	}
57 	SCHED_POLICY_PASS("Cluster binding is soft");
58 }
59 
60 SCHED_POLICY_T_DECL(migration_should_yield,
61     "Verify that we only yield if there's a \"good enough\" thread elsewhere "
62     "to switch to")
63 {
64 	int ret;
65 	init_migration_harness(basic_amp);
66 	struct thread_group *tg = create_tg(0);
67 	test_thread_t yielder = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
68 	int p_pset = 0;
69 	int p_cpu = pset_id_to_cpu_id(p_pset);
70 	cpu_set_thread_current(p_cpu, yielder);
71 	ret = cpu_check_should_yield(p_cpu, false);
72 	T_QUIET; T_EXPECT_TRUE(ret, "No thread present to yield to");
73 	ret = tracepoint_expect(EDGE_SHOULD_YIELD, get_thread_tid(yielder), p_pset, 0, 4);
74 	T_QUIET; T_EXPECT_TRUE(ret, "SCHED_EDGE_YIELD_DISALLOW");
75 
76 	test_thread_t background = create_thread(TH_BUCKET_SHARE_BG, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_BG]);
77 	enqueue_thread(pset_target(p_pset), background);
78 	ret = cpu_check_should_yield(p_cpu, true);
79 	T_QUIET; T_EXPECT_TRUE(ret, "Should yield to a low priority thread on the current runqueue");
80 	ret = tracepoint_expect(EDGE_SHOULD_YIELD, get_thread_tid(yielder), p_pset, 0, 0);
81 	T_QUIET; T_EXPECT_TRUE(ret, "SCHED_EDGE_YIELD_RUNQ_NONEMPTY");
82 	SCHED_POLICY_PASS("Basic yield behavior on single pset");
83 
84 	int e_pset = 1;
85 	int e_cpu = pset_id_to_cpu_id(e_pset);
86 	ret = dequeue_thread_expect(pset_target(p_pset), background);
87 	T_QUIET; T_EXPECT_TRUE(ret, "Only background thread in runqueue");
88 	set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_SHARE_BG, e_pset);
89 	enqueue_thread(pset_target(e_pset), background);
90 	ret = cpu_check_should_yield(p_cpu, false);
91 	T_QUIET; T_EXPECT_TRUE(ret, "Should not yield in order to running rebalance native thread");
92 	ret = tracepoint_expect(EDGE_SHOULD_YIELD, get_thread_tid(yielder), p_cpu, 0, 4);
93 	T_QUIET; T_EXPECT_TRUE(ret, "SCHED_EDGE_YIELD_DISALLOW");
94 
95 	ret = dequeue_thread_expect(pset_target(e_pset), background);
96 	T_QUIET; T_EXPECT_TRUE(ret, "Only background thread in runqueue");
97 	set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_SHARE_BG, p_pset);
98 	cpu_set_thread_current(e_cpu, background);
99 	ret = cpu_check_should_yield(p_cpu, true);
100 	T_QUIET; T_EXPECT_TRUE(ret, "Should yield in order to running rebalance foreign thread");
101 	ret = tracepoint_expect(EDGE_SHOULD_YIELD, get_thread_tid(yielder), p_cpu, 0, 2);
102 	T_QUIET; T_EXPECT_TRUE(ret, "SCHED_EDGE_YIELD_FOREIGN_RUNNING");
103 
104 	enqueue_thread(pset_target(p_pset), background);
105 	cpu_set_thread_current(e_cpu, yielder);
106 	ret = cpu_check_should_yield(e_cpu, true);
107 	T_QUIET; T_EXPECT_TRUE(ret, "Should yield in order to steal thread");
108 	ret = tracepoint_expect(EDGE_SHOULD_YIELD, get_thread_tid(yielder), e_pset, 0, 3);
109 	T_QUIET; T_EXPECT_TRUE(ret, "SCHED_EDGE_YIELD_STEAL_POSSIBLE");
110 	SCHED_POLICY_PASS("Thread yields in order to steal from other psets");
111 }
112 
113 SCHED_POLICY_T_DECL(migration_stir_the_pot_basic,
114     "Verify stir-the-pot succeeds to rotate threads across P and E-cores after"
115     "their respective quanta have expired")
116 {
117 	int ret;
118 	init_migration_harness(basic_amp);
119 
120 	struct thread_group *tg = create_tg(0);
121 	test_thread_t starts_p = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
122 	test_thread_t starts_e = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
123 	test_thread_t other_p_thread = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
124 	int p_cpu = 0;
125 	int e_cpu = 2;
126 	int other_e_cpu = 3;
127 	int other_p_cpu = 1;
128 	cpu_set_thread_current(p_cpu, starts_p);
129 	cpu_set_thread_current(e_cpu, starts_e);
130 	cpu_set_thread_current(other_p_cpu, other_p_thread);
131 	int p_pset = 0;
132 	int e_pset = 1;
133 
134 	/* Thread on low core type "pays its dues" */
135 	cpu_expire_quantum(e_cpu);
136 
137 	/* Thread on high core type should locate swap candidate */
138 	cpu_expire_quantum(p_cpu);
139 	ret = ipi_expect(e_cpu, TEST_IPI_IMMEDIATE);
140 	T_QUIET; T_EXPECT_TRUE(ret, "Should have found stir-the-pot candidate with expired quantum");
141 
142 	/* Thread on low core type should respond to IPI by preempting... */
143 	ret = thread_avoid_processor_expect(starts_e, e_cpu, false, true);
144 	T_QUIET; T_EXPECT_TRUE(ret, "Thread should preempt to get on P-core");
145 
146 	/* (Simulate as if we are switching to another quantum-expired thread) */
147 	test_thread_t other_expired_thread = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
148 	cpu_set_thread_current(other_e_cpu, other_expired_thread);
149 	cpu_expire_quantum(other_e_cpu);
150 	cpu_clear_thread_current(other_e_cpu);
151 	cpu_set_thread_current(e_cpu, other_expired_thread);
152 
153 	/* ...and choosing the corresponding P-core for swap */
154 	ret = choose_pset_for_thread_expect(starts_e, p_pset);
155 	T_QUIET; T_EXPECT_TRUE(ret, "Should choose P-cores despite no idle cores there");
156 
157 	/* Upon arrival, thread swapping in should preempt its predecessor */
158 	enqueue_thread(pset_target(p_pset), starts_e);
159 	ret = cpu_check_preempt_current(p_cpu, true);
160 	T_QUIET; T_EXPECT_TRUE(ret, "P-core should preempt quantum expired thread");
161 
162 	/* ...and preempted thread on P-core should spill down to E, completing the swap */
163 	ret = dequeue_thread_expect(pset_target(p_pset), starts_e);
164 	T_QUIET; T_ASSERT_TRUE(ret, "e_starts was enqueued on P");
165 	cpu_set_thread_current(p_cpu, starts_e);
166 	ret = choose_pset_for_thread_expect(starts_p, e_pset);
167 	T_QUIET; T_EXPECT_TRUE(ret, "p_starts spilled to E, completing swap");
168 
169 	/*
170 	 * And a second swap should be initiated for the other E-expired thread
171 	 * that switched on-core afterwards.
172 	 */
173 	cpu_expire_quantum(other_p_cpu);
174 	ret = ipi_expect(e_cpu, TEST_IPI_IMMEDIATE);
175 	T_QUIET; T_EXPECT_TRUE(ret, "Should have found stir-the-pot candidate with expired quantum");
176 
177 	SCHED_POLICY_PASS("Stir-the-pot successfully initiated by P-core and completed");
178 
179 	/* Clean-up and reset to initial conditions */
180 	cpu_set_thread_current(p_cpu, starts_p);
181 	cpu_set_thread_current(e_cpu, starts_e);
182 	cpu_set_thread_current(other_p_cpu, other_p_thread);
183 	cpu_set_thread_current(other_e_cpu, other_expired_thread);
184 
185 	/* Now P-core expires quantum first */
186 	cpu_expire_quantum(p_cpu);
187 
188 	/* Thread on E-core "pays its dues" and responds to self-message by preempting */
189 	cpu_expire_quantum(e_cpu);
190 	ret = thread_avoid_processor_expect(starts_e, e_cpu, false, true);
191 	T_QUIET; T_EXPECT_TRUE(ret, "Thread should preempt to get on P-core");
192 
193 	/* ...and choosing the corresponding P-core for swap */
194 	cpu_clear_thread_current(e_cpu);
195 	ret = choose_pset_for_thread_expect(starts_e, p_pset);
196 	T_QUIET; T_EXPECT_TRUE(ret, "Should choose P-cores despite no idle cores there");
197 
198 	/* Upon arrival, thread swapping in should preempt its predecessor */
199 	enqueue_thread(pset_target(p_pset), starts_e);
200 	ret = cpu_check_preempt_current(p_cpu, true);
201 	T_QUIET; T_EXPECT_TRUE(ret, "P-core should preempt quantum expired thread");
202 
203 	/* ...and preempted thread on P-core should spill down to E, completing the swap */
204 	ret = dequeue_thread_expect(pset_target(p_pset), starts_e);
205 	T_QUIET; T_ASSERT_TRUE(ret, "e_starts was enqueued on P");
206 	cpu_set_thread_current(p_cpu, starts_e);
207 	ret = choose_pset_for_thread_expect(starts_p, e_pset);
208 	T_QUIET; T_EXPECT_TRUE(ret, "p_starts spilled to E, completing swap");
209 
210 	SCHED_POLICY_PASS("Stir-the-pot successfully initiated by E-core and completed");
211 }
212 
213 SCHED_POLICY_T_DECL(migration_ipi_policy,
214     "Verify we send the right type of IPI in different cross-core preemption scenarios")
215 {
216 	int ret;
217 	init_migration_harness(dual_die);
218 	struct thread_group *tg = create_tg(0);
219 	thread_t thread = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
220 	int dst_pcore = 3;
221 	int src_pcore = 0;
222 
223 	set_current_processor(src_pcore);
224 	cpu_send_ipi_for_thread(dst_pcore, thread, TEST_IPI_EVENT_PREEMPT);
225 	ret = ipi_expect(dst_pcore, TEST_IPI_IDLE);
226 	T_QUIET; T_EXPECT_TRUE(ret, "Idle CPU");
227 
228 	thread_t core_busy = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
229 	cpu_set_thread_current(dst_pcore, core_busy);
230 	set_current_processor(src_pcore);
231 	cpu_send_ipi_for_thread(dst_pcore, thread, TEST_IPI_EVENT_PREEMPT);
232 	ret = ipi_expect(dst_pcore, TEST_IPI_IMMEDIATE);
233 	T_QUIET; T_EXPECT_TRUE(ret, "Should immediate IPI to preempt on P-core");
234 	SCHED_POLICY_PASS("Immediate IPIs to preempt P-cores");
235 
236 	int dst_ecore = 13;
237 	int ecluster_id = 5;
238 	set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_SHARE_DF, ecluster_id);
239 	set_current_processor(src_pcore);
240 	cpu_send_ipi_for_thread(dst_ecore, thread, TEST_IPI_EVENT_PREEMPT);
241 	ret = ipi_expect(dst_ecore, TEST_IPI_IDLE);
242 	T_QUIET; T_EXPECT_TRUE(ret, "Idle CPU");
243 
244 	cpu_set_thread_current(dst_ecore, core_busy);
245 	set_current_processor(src_pcore);
246 	cpu_send_ipi_for_thread(dst_ecore, thread, TEST_IPI_EVENT_PREEMPT);
247 	ret = ipi_expect(dst_ecore, TEST_IPI_IMMEDIATE);
248 	T_QUIET; T_EXPECT_TRUE(ret, "Should immediate IPI to preempt for E->E");
249 	SCHED_POLICY_PASS("Immediate IPIs to cluster homogeneous with preferred");
250 }
251 
252 SCHED_POLICY_T_DECL(migration_max_parallelism,
253     "Verify we report expected values for recommended width of parallel workloads")
254 {
255 	int ret;
256 	init_migration_harness(dual_die);
257 	uint32_t num_pclusters = 4;
258 	uint32_t num_pcores = 4 * num_pclusters;
259 	uint32_t num_eclusters = 2;
260 	uint32_t num_ecores = 2 * num_eclusters;
261 	for (thread_qos_t qos = THREAD_QOS_UNSPECIFIED; qos < THREAD_QOS_LAST; qos++) {
262 		for (int shared_rsrc = 0; shared_rsrc < 2; shared_rsrc++) {
263 			for (int rt = 0; rt < 2; rt++) {
264 				uint64_t options = 0;
265 				uint32_t expected_width = 0;
266 				if (shared_rsrc) {
267 					options |= QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE;
268 				}
269 				if (rt) {
270 					options |= QOS_PARALLELISM_REALTIME;
271 					/* Recommend P-width */
272 					expected_width = shared_rsrc ? num_pclusters : num_pcores;
273 				} else if (qos == THREAD_QOS_BACKGROUND || qos == THREAD_QOS_MAINTENANCE) {
274 					/* Recommend E-width */
275 					expected_width = shared_rsrc ? num_eclusters : num_ecores;
276 				} else {
277 					/* Recommend full width */
278 					expected_width = shared_rsrc ? (num_eclusters + num_pclusters) : (num_pcores + num_ecores);
279 				}
280 				ret = max_parallelism_expect(qos, options, expected_width);
281 				T_QUIET; T_EXPECT_TRUE(ret, "Unexpected width for QoS %d shared_rsrc %d RT %d",
282 				    qos, shared_rsrc, rt);
283 			}
284 		}
285 	}
286 	SCHED_POLICY_PASS("Correct recommended parallel width for all configurations");
287 }
288 
289 SCHED_POLICY_T_DECL(migration_rebalance_basic, "Verify that basic rebalance steal and "
290     "running rebalance mechanisms kick in")
291 {
292 	int ret;
293 	test_hw_topology_t topo = SCHED_POLICY_DEFAULT_TOPO;
294 	init_migration_harness(topo);
295 	int sched_bucket = TH_BUCKET_SHARE_DF;
296 	struct thread_group *tg = create_tg(0);
297 	thread_t thread = create_thread(sched_bucket, tg, root_bucket_to_highest_pri[sched_bucket]);
298 
299 	for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
300 		set_tg_sched_bucket_preferred_pset(tg, sched_bucket, preferred_pset_id);
301 		sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
302 		for (int running_on_pset_id = 0; running_on_pset_id < topo.num_psets; running_on_pset_id++) {
303 			/* Running rebalance */
304 			int running_on_cpu = pset_id_to_cpu_id(running_on_pset_id);
305 			cpu_set_thread_current(running_on_cpu, thread);
306 			sched_policy_push_metadata("running_on_pset_id", running_on_pset_id);
307 			for (int c = 0; c < topo.total_cpus; c++) {
308 				sched_policy_push_metadata("evaluate_cpu", c);
309 				int evaluate_pset = cpu_id_to_pset_id(c);
310 				bool want_rebalance = cpu_processor_balance(c);
311 				if (evaluate_pset == running_on_pset_id) {
312 					T_QUIET; T_EXPECT_FALSE(want_rebalance, "should be no thread available for rebalance %s",
313 					    sched_policy_dump_metadata());
314 					sched_policy_pop_metadata();
315 					continue;
316 				}
317 				bool should_rebalance = (topo.psets[evaluate_pset].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
318 				    (topo.psets[running_on_pset_id].cpu_type != topo.psets[preferred_pset_id].cpu_type);
319 				T_QUIET; T_EXPECT_EQ(want_rebalance, should_rebalance, "should rebalance to move thread to preferred type "
320 				    "if not there already %s", sched_policy_dump_metadata());
321 				if (should_rebalance) {
322 					ret = tracepoint_expect(EDGE_REBAL_RUNNING, 0, c, running_on_cpu, 0);
323 					T_QUIET; T_EXPECT_TRUE(ret, "EDGE_REBAL_RUNNING tracepoint");
324 					ret = thread_avoid_processor_expect(thread, running_on_cpu, false, true);
325 					T_QUIET; T_EXPECT_TRUE(ret, "thread will preempt in response to running rebalance IPI %s",
326 					    sched_policy_dump_metadata());
327 					/* Try loading all other cores of the preferred type, forcing this decision to find the idle one */
328 					for (int p = 0; p < topo.num_psets; p++) {
329 						if ((topo.psets[p].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
330 						    (p != evaluate_pset)) {
331 							set_pset_load_avg(p, sched_bucket, 10000000);
332 						}
333 					}
334 					ret = thread_avoid_processor_expect(thread, running_on_cpu, false, true);
335 					T_QUIET; T_EXPECT_TRUE(ret, "...even if all other cores (except rebalancer) are full %s",
336 					    sched_policy_dump_metadata());
337 					/* Unload cores for clean-up */
338 					for (int p = 0; p < topo.num_psets; p++) {
339 						if ((topo.psets[p].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
340 						    (p != evaluate_pset)) {
341 							set_pset_load_avg(p, sched_bucket, 0);
342 						}
343 					}
344 				}
345 				sched_policy_pop_metadata();
346 			}
347 			cpu_clear_thread_current(running_on_cpu);
348 			sched_policy_pop_metadata();
349 
350 			/* Rebalance steal */
351 			int enqueued_pset = running_on_pset_id;
352 			enqueue_thread(pset_target(enqueued_pset), thread);
353 			sched_policy_push_metadata("enqueued_pset", enqueued_pset);
354 			for (int c = 0; c < topo.total_cpus; c++) {
355 				sched_policy_push_metadata("evaluate_cpu", c);
356 				int evaluate_pset = cpu_id_to_pset_id(c);
357 				if ((topo.psets[evaluate_pset].cpu_type != topo.psets[enqueued_pset].cpu_type) &&
358 				    ((topo.psets[enqueued_pset].cpu_type != TEST_CPU_TYPE_PERFORMANCE) ||
359 				    (topo.psets[preferred_pset_id].cpu_type != TEST_CPU_TYPE_PERFORMANCE))) {
360 					/* Only evaluate steal between mismatching cluster types and where spill is not allowed */
361 					thread_t stolen_thread = cpu_steal_thread(c);
362 					bool should_rebalance_steal = (topo.psets[evaluate_pset].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
363 					    (topo.psets[enqueued_pset].cpu_type != topo.psets[preferred_pset_id].cpu_type);
364 					bool did_rebalance_steal = (stolen_thread == thread);
365 					if (stolen_thread != NULL) {
366 						T_QUIET; T_EXPECT_EQ(stolen_thread, thread, "should only be one thread to steal?");
367 					}
368 					T_QUIET; T_EXPECT_EQ(did_rebalance_steal, should_rebalance_steal, "should rebalance steal to move "
369 					    "thread to preferred type if not already there %s", sched_policy_dump_metadata());
370 					if (did_rebalance_steal) {
371 						ret = tracepoint_expect(EDGE_REBAL_RUNNABLE, 0, evaluate_pset, enqueued_pset, 0);
372 						T_QUIET; T_EXPECT_TRUE(ret, "EDGE_REBAL_RUNNABLE tracepoint");
373 						/* Put back stolen thread */
374 						enqueue_thread(pset_target(enqueued_pset), thread);
375 					}
376 				}
377 				sched_policy_pop_metadata();
378 			}
379 
380 			ret = dequeue_thread_expect(pset_target(enqueued_pset), thread);
381 			T_QUIET; T_EXPECT_TRUE(ret, "thread correctly where we left it");
382 			sched_policy_pop_metadata();
383 		}
384 		sched_policy_pop_metadata();
385 	}
386 	SCHED_POLICY_PASS("Rebalance mechanisms kicking in!");
387 }
388 
389 static test_pset_t two_of_each_psets[6] = {
390 	{
391 		.cpu_type = TEST_CPU_TYPE_EFFICIENCY,
392 		.num_cpus = 2,
393 		.cluster_id = 0,
394 		.die_id = 0,
395 	},
396 	{
397 		.cpu_type = TEST_CPU_TYPE_PERFORMANCE,
398 		.num_cpus = 2,
399 		.cluster_id = 1,
400 		.die_id = 0,
401 	},
402 	{
403 		.cpu_type = TEST_CPU_TYPE_EFFICIENCY,
404 		.num_cpus = 2,
405 		.cluster_id = 2,
406 		.die_id = 1,
407 	},
408 	{
409 		.cpu_type = TEST_CPU_TYPE_PERFORMANCE,
410 		.num_cpus = 2,
411 		.cluster_id = 3,
412 		.die_id = 1,
413 	},
414 };
415 test_hw_topology_t two_of_each = {
416 	.psets = &two_of_each_psets[0],
417 	.num_psets = 4,
418 	.total_cpus = 8,
419 };
420 
421 static void
clear_threads_from_topo(void)422 clear_threads_from_topo(void)
423 {
424 	test_hw_topology_t topo = get_hw_topology();
425 	int pset_first_cpu = 0;
426 	for (int p = 0; p < topo.num_psets; p++) {
427 		while (!runqueue_empty(pset_target(p))) {
428 			(void)dequeue_thread_expect(pset_target(p), (test_thread_t)0xc0ffee);
429 		}
430 		for (int b = 0; b < TH_BUCKET_SCHED_MAX; b++) {
431 			set_pset_load_avg(p, b, 0);
432 		}
433 		for (int c = pset_first_cpu; c < pset_first_cpu + topo.psets[p].num_cpus; c++) {
434 			cpu_clear_thread_current(c);
435 		}
436 		pset_first_cpu += topo.psets[p].num_cpus;
437 	}
438 }
439 
440 typedef enum {
441 	enqueued = 0,
442 	running = 1,
443 	thread_type_max = 2,
444 } thread_type_t;
445 
446 typedef enum {
447 	e_recc = 0,
448 	p_recc = 1,
449 	recc_type_max = 2,
450 } recc_type_t;
451 
452 static char *
thread_recc_to_core_type_char(recc_type_t recc)453 thread_recc_to_core_type_char(recc_type_t recc)
454 {
455 	switch (recc) {
456 	case e_recc:
457 		return "E";
458 	case p_recc:
459 		return "P";
460 	default:
461 		assert(false);
462 	}
463 }
464 
465 static char
pset_id_to_core_type_char(int pset_id)466 pset_id_to_core_type_char(int pset_id)
467 {
468 	return test_cpu_type_to_char(get_hw_topology().psets[pset_id].cpu_type);
469 }
470 
471 static void
no_steal_expect(int stealing_pset,char * explanation)472 no_steal_expect(int stealing_pset, char *explanation)
473 {
474 	test_thread_t no_steal = cpu_steal_thread(pset_id_to_cpu_id(stealing_pset));
475 	T_EXPECT_NULL(no_steal, "No thread stolen because: %s (%p)", explanation, no_steal);
476 }
477 
478 /*
479  * For convenience when handling arrays with one test thread per each
480  * possible recommendation type, map the recommendation type to an
481  * index in such an array.
482  */
483 static int
recc_type_to_ind(recc_type_t recc)484 recc_type_to_ind(recc_type_t recc)
485 {
486 	return (int)recc;
487 }
488 
489 static void
foreign_steal_expect(int stealing_pset,int stolen_from_pset,test_thread_t thread_candidates_matrix[thread_type_max][4][recc_type_max],recc_type_t thread_recommendation)490 foreign_steal_expect(int stealing_pset, int stolen_from_pset,
491     test_thread_t thread_candidates_matrix[thread_type_max][4][recc_type_max],
492     recc_type_t thread_recommendation)
493 {
494 	int ret;
495 	test_thread_t thread = cpu_steal_thread(pset_id_to_cpu_id(stealing_pset));
496 	char stealing_type = pset_id_to_core_type_char(stealing_pset);
497 	char stolen_type = pset_id_to_core_type_char(stolen_from_pset);
498 	char *recc_type = thread_recc_to_core_type_char(thread_recommendation);
499 	T_EXPECT_EQ(thread, thread_candidates_matrix[enqueued][stolen_from_pset][recc_type_to_ind(thread_recommendation)],
500 	    "%c (%d) rebalance-steals %s-recommended from %c (%d)", stealing_type, stealing_pset,
501 	    recc_type, stolen_type, stolen_from_pset);
502 	ret = tracepoint_expect(EDGE_REBAL_RUNNABLE,
503 	    get_thread_tid(thread_candidates_matrix[enqueued][stolen_from_pset][recc_type_to_ind(thread_recommendation)]),
504 	    stealing_pset, stolen_from_pset, 0);
505 	T_QUIET; T_EXPECT_TRUE(ret, "EDGE_REBAL_RUNNABLE %c->%c %s-recommended tracepoint",
506 	    stolen_type, stealing_type, recc_type);
507 }
508 
509 static void
work_steal_expect(int stealing_pset,int stolen_from_pset,test_thread_t thread_candidates_matrix[thread_type_max][4][recc_type_max],recc_type_t thread_recommendation)510 work_steal_expect(int stealing_pset, int stolen_from_pset,
511     test_thread_t thread_candidates_matrix[thread_type_max][4][recc_type_max],
512     recc_type_t thread_recommendation)
513 {
514 	int ret;
515 	test_thread_t thread = cpu_steal_thread(pset_id_to_cpu_id(stealing_pset));
516 	char stealing_type = pset_id_to_core_type_char(stealing_pset);
517 	char stolen_type = pset_id_to_core_type_char(stolen_from_pset);
518 	char *recc_type = thread_recc_to_core_type_char(thread_recommendation);
519 	T_EXPECT_EQ(thread, thread_candidates_matrix[enqueued][stolen_from_pset][recc_type_to_ind(thread_recommendation)],
520 	    "%c (%d) work-steals %s-recommended from %c (%d)", stealing_type, stealing_pset,
521 	    recc_type, stolen_type, stolen_from_pset);
522 	ret = tracepoint_expect(EDGE_STEAL,
523 	    get_thread_tid(thread_candidates_matrix[enqueued][stolen_from_pset][recc_type_to_ind(thread_recommendation)]),
524 	    stealing_pset, stolen_from_pset, 0);
525 	T_QUIET; T_EXPECT_TRUE(ret, "EDGE_STEAL %c->%c %s-recommended tracepoint",
526 	    stolen_type, stealing_type, recc_type);
527 }
528 
529 static void
running_rebalance_expect(int rebalancing_pset,char * target_name,int num_target_cpus,int * target_cpus)530 running_rebalance_expect(int rebalancing_pset, char *target_name,
531     int num_target_cpus, int *target_cpus)
532 {
533 	int ret;
534 	char rebalancing_type = pset_id_to_core_type_char(rebalancing_pset);
535 	bool want_rebalance = cpu_processor_balance(pset_id_to_cpu_id(rebalancing_pset));
536 	T_EXPECT_TRUE(want_rebalance, "Send running rebalance %s->%c IPIs",
537 	    target_name, rebalancing_type);
538 	for (int i = 0; i < num_target_cpus; i++) {
539 		ret = tracepoint_expect(EDGE_REBAL_RUNNING, 0, pset_id_to_cpu_id(rebalancing_pset),
540 		    target_cpus[i], 0);
541 		T_QUIET; T_EXPECT_TRUE(ret, "EDGE_REBAL_RUNNING %s->%c IPI tracepoint %d",
542 		    target_name, rebalancing_type, i);
543 	}
544 }
545 
546 SCHED_POLICY_T_DECL(migration_steal_order, "Verify that steal policy steps "
547     "happen in the right order")
548 {
549 	int sched_bucket = TH_BUCKET_SHARE_DF;
550 	init_migration_harness(two_of_each);
551 	for (int config = 0; config < 2; config++) {
552 		/*
553 		 * Enqueue one thread of each recommendation type on each pset,
554 		 * and set one thread of each recommendation type on each pset
555 		 * running on a core.
556 		 */
557 		struct thread_group *p_tg = create_tg(0);
558 		int p_pset = 1;
559 		set_tg_sched_bucket_preferred_pset(p_tg, sched_bucket, p_pset);
560 		struct thread_group *e_tg = create_tg(0);
561 		int e_pset = 0;
562 		set_tg_sched_bucket_preferred_pset(e_tg, sched_bucket, e_pset);
563 		test_thread_t threads[thread_type_max][4][recc_type_max];
564 		for (int p = 0; p < two_of_each.num_psets; p++) {
565 			for (recc_type_t r = 0; r < recc_type_max; r++) {
566 				threads[enqueued][p][r] = create_thread(sched_bucket, (r == e_recc) ? e_tg : p_tg,
567 				    root_bucket_to_highest_pri[sched_bucket]);
568 				enqueue_thread(pset_target(p), threads[enqueued][p][r]);
569 				T_LOG("Enqueued thread %p on pset %d, recc %d", threads[enqueued][p][r], p, r);
570 				threads[running][p][r] = create_thread(sched_bucket, (r == e_recc) ? e_tg : p_tg,
571 				    root_bucket_to_highest_pri[sched_bucket]);
572 				int run_cpu_id = pset_id_to_cpu_id(p) + r;
573 				cpu_set_thread_current(run_cpu_id, threads[running][p][r]);
574 			}
575 		}
576 		int other_p_pset = 3;
577 		int other_e_pset = 2;
578 		if (config == 0) {
579 			/* ~~~~~ P-core steal/idle path ~~~~~ */
580 			/* 1. Foreign rebalance steal */
581 			foreign_steal_expect(other_p_pset, e_pset, threads, p_recc);
582 			foreign_steal_expect(other_p_pset, other_e_pset, threads, p_recc);
583 			/* 2. Native work-steal */
584 			work_steal_expect(other_p_pset, p_pset, threads, p_recc);
585 			/* 3. Running rebalance */
586 			no_steal_expect(other_p_pset, "Want to perform running rebalance");
587 			running_rebalance_expect(other_p_pset, "E", 2,
588 			    (int[]){pset_id_to_cpu_id(e_pset) + p_recc, pset_id_to_cpu_id(other_e_pset) + p_recc});
589 			cpu_clear_thread_current(pset_id_to_cpu_id(e_pset) + p_recc);
590 			cpu_clear_thread_current(pset_id_to_cpu_id(other_e_pset) + p_recc);
591 			/* 4. Work-steal from anywhere allowed */
592 			no_steal_expect(other_p_pset, "Nothing left a P-core wants to steal");
593 			SCHED_POLICY_PASS("Verified steal order steps for stealing P-core");
594 		} else {
595 			/* ~~~~~ E-core steal/idle path ~~~~~ */
596 			/* 1. Foreign rebalance steal */
597 			/* Foreign pset search starts with highest id */
598 			foreign_steal_expect(other_e_pset, p_pset, threads, e_recc);
599 			foreign_steal_expect(other_e_pset, other_p_pset, threads, e_recc);
600 			/* 2. Native work-steal */
601 			work_steal_expect(other_e_pset, e_pset, threads, e_recc);
602 			work_steal_expect(other_e_pset, e_pset, threads, p_recc);
603 			/* 3. Running rebalance */
604 			no_steal_expect(other_e_pset, "Want to perform running rebalance");
605 			running_rebalance_expect(other_e_pset, "P", 2,
606 			    (int[]){pset_id_to_cpu_id(p_pset) + e_recc, pset_id_to_cpu_id(other_p_pset) + e_recc});
607 			cpu_clear_thread_current(pset_id_to_cpu_id(p_pset) + e_recc);
608 			cpu_clear_thread_current(pset_id_to_cpu_id(other_p_pset) + e_recc);
609 			/* 4. Work-steal from anywhere allowed */
610 			for (int i = 0; i < 2; i++) {
611 				int src_pset = (i == 0) ? other_p_pset : p_pset;
612 				no_steal_expect(other_e_pset, "Non-zero edge (P->E) steal requires excess "
613 				    "threads in the runqueue");
614 				cpu_set_thread_current(pset_id_to_cpu_id(src_pset) + e_recc,
615 				    create_thread(sched_bucket, p_tg, root_bucket_to_highest_pri[sched_bucket]));
616 				work_steal_expect(other_e_pset, src_pset, threads, p_recc);
617 			}
618 			no_steal_expect(other_e_pset, "Nothing left of interest to steal");
619 			SCHED_POLICY_PASS("Verified steal order steps for stealing E-core");
620 		}
621 		clear_threads_from_topo();
622 	}
623 }
624 
625 static bool shush = false;
626 
627 static void
work_steal_expect_simple(int stealing_pset,int stolen_from_pset,test_thread_t stolen_thread,char * msg)628 work_steal_expect_simple(int stealing_pset, int stolen_from_pset,
629     test_thread_t stolen_thread, char *msg)
630 {
631 	int ret;
632 	test_thread_t found_thread = cpu_steal_thread(pset_id_to_cpu_id(stealing_pset));
633 	if (shush) {
634 		T_QUIET;
635 	}
636 	T_EXPECT_EQ(found_thread, stolen_thread, msg);
637 	ret = tracepoint_expect(EDGE_STEAL, get_thread_tid(stolen_thread), stealing_pset, stolen_from_pset, 0);
638 	T_QUIET; T_EXPECT_TRUE(ret, "EDGE_STEAL tracepoint for %s", msg);
639 }
640 
641 SCHED_POLICY_T_DECL(migration_steal_only_excess_by_qos, "Verify that steal logic "
642     "only steals across hetergeneous psets when there are excess threads at that QoS")
643 {
644 	init_migration_harness(dual_die);
645 	int p_pset = 1;
646 	int p_pset_cpus = get_hw_topology().psets[p_pset].num_cpus;
647 	int e_pset = 0;
648 	int other_p_pset = 2;
649 
650 	/* Load P-pset core-by-core until there's an excess thread for E-pset to steal */
651 	test_thread_t default_threads[p_pset_cpus + 1];
652 	struct thread_group *tg = create_tg(0);
653 	set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_SHARE_DF, p_pset);
654 	for (int i = 0; i < p_pset_cpus + 1; i++) {
655 		default_threads[i] = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
656 	}
657 	for (int i = 0; i < p_pset_cpus; i++) {
658 		enqueue_thread(pset_target(p_pset), default_threads[i]);
659 		increment_mock_time_us(5); // Get FIFO order out
660 		no_steal_expect(e_pset, "No excess threads yet");
661 	}
662 	enqueue_thread(pset_target(p_pset), default_threads[p_pset_cpus]);
663 	work_steal_expect_simple(e_pset, p_pset, default_threads[0], "P->E Excess thread stolen");
664 	no_steal_expect(e_pset, "Back to no excess threads");
665 	/* Allow P-pset to swipe up non-excess threads */
666 	for (int i = 1; i < p_pset_cpus + 1; i++) {
667 		work_steal_expect_simple(other_p_pset, p_pset, default_threads[i],
668 		    "Homogenous (P->P) can steal non-excess threads");
669 	}
670 	no_steal_expect(other_p_pset, "All threads stolen already");
671 	SCHED_POLICY_PASS("Heterogenous psets only steal excess threads, while homogeneous steal any");
672 	clear_threads_from_topo();
673 
674 	/* Enqueue "pyramid" of threads at different QoSes */
675 	test_thread_t per_qos_threads[TH_BUCKET_SCHED_MAX];
676 	for (int bucket = 0; bucket < TH_BUCKET_SCHED_MAX; bucket++) {
677 		set_tg_sched_bucket_preferred_pset(tg, bucket, p_pset);
678 		per_qos_threads[bucket] = create_thread(bucket, tg, root_bucket_to_highest_pri[bucket]);
679 		if (bucket == 0) {
680 			set_thread_sched_mode(per_qos_threads[bucket], TH_MODE_FIXED);
681 		}
682 	}
683 	for (int bucket = 0; bucket < TH_BUCKET_SCHED_MAX; bucket++) {
684 		enqueue_thread(pset_target(p_pset), per_qos_threads[bucket]);
685 		if (bucket < p_pset_cpus) {
686 			no_steal_expect(e_pset, "No excess threads yet");
687 		}
688 	}
689 	for (int qos_with_excess = p_pset_cpus; qos_with_excess < TH_BUCKET_SCHED_MAX; qos_with_excess++) {
690 		work_steal_expect_simple(e_pset, p_pset, per_qos_threads[qos_with_excess],
691 		    "Steal from highest QoS with non-idle load");
692 	}
693 	SCHED_POLICY_PASS("Heterogeneous psets only steal from excess QoSes");
694 }
695 
696 static test_pset_t pair_p_psets[2] = {
697 	{
698 		.cpu_type = TEST_CPU_TYPE_PERFORMANCE,
699 		.num_cpus = 1,
700 		.cluster_id = 0,
701 		.die_id = 0,
702 	},
703 	{
704 		.cpu_type = TEST_CPU_TYPE_PERFORMANCE,
705 		.num_cpus = 1,
706 		.cluster_id = 1,
707 		.die_id = 0,
708 	},
709 };
710 test_hw_topology_t pair_p = {
711 	.psets = &pair_p_psets[0],
712 	.num_psets = 2,
713 	.total_cpus = 2,
714 };
715 
716 SCHED_POLICY_T_DECL(migration_steal_no_cluster_bound,
717     "Verify that cluster-bound threads do not get stolen to a different pset")
718 {
719 	init_migration_harness(pair_p);
720 	int load_multiplier = 10;
721 	int loaded_pset = 0;
722 	int idle_pset = 1;
723 	int num_bound_threads = pair_p.psets[loaded_pset].num_cpus * load_multiplier;
724 	enum { eBound = 0, eNativeFirst = 1, eRoundRobin = 2, eMax = 3 } bound_type;
725 	test_thread_t bound_threads[eMax][num_bound_threads];
726 	struct thread_group *tg = create_tg(0);
727 	for (bound_type = 0; bound_type < eMax; bound_type++) {
728 		for (int i = 0; i < num_bound_threads; i++) {
729 			bound_threads[bound_type][i] = create_thread(TH_BUCKET_SHARE_DF, tg,
730 			    root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
731 			switch (bound_type) {
732 			case eBound:
733 				set_thread_cluster_bound(bound_threads[bound_type][i], loaded_pset);
734 				break;
735 			case eNativeFirst:
736 				edge_set_thread_shared_rsrc(bound_threads[bound_type][i], true);
737 				break;
738 			case eRoundRobin:
739 				edge_set_thread_shared_rsrc(bound_threads[bound_type][i], false);
740 				break;
741 			default:
742 				T_QUIET; T_ASSERT_FAIL("Invalid bound case");
743 			}
744 			increment_mock_time_us(5); // Get FIFO order
745 			enqueue_thread(pset_target(loaded_pset), bound_threads[bound_type][i]);
746 		}
747 		no_steal_expect(idle_pset, "Refuse to steal cluster bound threads");
748 	}
749 	test_thread_t unbound_thread = create_thread(TH_BUCKET_SHARE_DF, tg,
750 	    root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
751 	increment_mock_time_us(5);
752 	enqueue_thread(pset_target(loaded_pset), unbound_thread);
753 	work_steal_expect_simple(idle_pset, loaded_pset, unbound_thread,
754 	    "Pluck out the unbound thread to steal");
755 	no_steal_expect(idle_pset, "Still refuse to steal cluster bound threads");
756 	SCHED_POLICY_PASS("Cluster bound threads cannot be stolen");
757 }
758 
759 SCHED_POLICY_T_DECL(migration_steal_highest_pri,
760     "Verify that higher priority threads are stolen first, across silos")
761 {
762 	init_migration_harness(pair_p);
763 	int idle_pset = 0;
764 	int loaded_pset = 1;
765 	int max_pri_to_subtract = 4;
766 	int high_bucket = TH_BUCKET_SHARE_FG;
767 	int low_bucket = TH_BUCKET_SHARE_BG;
768 	int num_buckets = low_bucket - high_bucket + 1;
769 	int num_silos = 2;
770 	int num_threads = num_silos * num_buckets * (max_pri_to_subtract + 1);
771 	test_thread_t threads[num_threads];
772  #define silo_bucket_pri_to_ind(silo, bucket, sub_pri) \
773 	(silo * (num_buckets * (max_pri_to_subtract + 1)) + \
774 	            (bucket - high_bucket) * ((max_pri_to_subtract + 1)) + sub_pri)
775 	/* Create a bunch of threads for the different silos, buckets, and priority values */
776 	for (int s = 0; s < num_silos; s++) {
777 		struct thread_group *silo_tg = create_tg(0);
778 		for (int b = high_bucket; b <= low_bucket; b++) {
779 			set_tg_sched_bucket_preferred_pset(silo_tg, b, s);
780 			for (int p = 0; p <= max_pri_to_subtract; p++) {
781 				threads[silo_bucket_pri_to_ind(s, b, p)] =
782 				    create_thread(b, silo_tg, root_bucket_to_highest_pri[b] - p);
783 			}
784 		}
785 	}
786 	/* Despite enqueueing in a random order, the threads should be stolen out in priority order */
787 	int rand_seed = 777777;
788 	enqueue_threads_arr_rand_order(pset_target(loaded_pset), rand_seed, num_threads, threads);
789 	shush = true; // Quiet work_steal_expect_simple()'s expects
790 	for (int b = high_bucket; b <= low_bucket; b++) {
791 		for (int p = 0; p <= max_pri_to_subtract; p++) {
792 			for (int s = 0; s < num_silos; s++) {
793 				T_QUIET; work_steal_expect_simple(idle_pset, loaded_pset,
794 				    threads[silo_bucket_pri_to_ind(s, b, p)], "Higher pri threads stolen first");
795 			}
796 		}
797 	}
798 	shush = false;
799 	no_steal_expect(idle_pset, "Already stole all the threads");
800 	SCHED_POLICY_PASS("Higher priority threads stolen first across silos");
801 }
802 
803 SCHED_POLICY_T_DECL(migration_harmonious_chosen_pset,
804     "Verify that different migration mechanisms agree about where a thread "
805     "should be, given current system conditions")
806 {
807 	int ret;
808 	test_hw_topology_t topo = SCHED_POLICY_DEFAULT_TOPO;
809 	init_migration_harness(topo);
810 	int sched_bucket = TH_BUCKET_SHARE_DF;
811 	struct thread_group *tg = create_tg(0);
812 	thread_t thread = create_thread(sched_bucket, tg, root_bucket_to_highest_pri[sched_bucket]);
813 	int max_load_threads = 20;
814 	test_thread_t load_threads[max_load_threads];
815 	for (int i = 0; i < max_load_threads; i++) {
816 		load_threads[i] = create_thread(sched_bucket, tg, root_bucket_to_highest_pri[sched_bucket]);
817 	}
818 
819 	/* Iterate conditions with different preferred psets and pset loads */
820 	for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
821 		set_tg_sched_bucket_preferred_pset(tg, sched_bucket, preferred_pset_id);
822 		sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
823 		for (int loaded_pset_id = 0; loaded_pset_id < topo.num_psets; loaded_pset_id++) {
824 			/* Load the loaded_pset */
825 			enqueue_threads_arr(pset_target(loaded_pset_id), max_load_threads, load_threads);
826 			bool preferred_is_idle = preferred_pset_id != loaded_pset_id;
827 			sched_policy_push_metadata("loaded_pset_id", loaded_pset_id);
828 
829 			/* Where the thread proactively wants to go */
830 			int chosen_pset = choose_pset_for_thread(thread);
831 			bool chose_the_preferred_pset = chosen_pset == preferred_pset_id;
832 			if (preferred_is_idle) {
833 				T_QUIET; T_EXPECT_TRUE(chose_the_preferred_pset, "Should always choose the preferred pset if idle %s",
834 				    sched_policy_dump_metadata());
835 			}
836 
837 			/* Thread generally should not avoid a processor in its chosen pset */
838 			for (int c = 0; c < topo.psets[chosen_pset].num_cpus; c++) {
839 				int avoid_cpu_id = pset_id_to_cpu_id(chosen_pset) + c;
840 				sched_policy_push_metadata("avoid_cpu_id", avoid_cpu_id);
841 				ret = thread_avoid_processor_expect(thread, avoid_cpu_id, false, false);
842 				T_QUIET; T_EXPECT_TRUE(ret, "Thread should not want to leave processor in just chosen pset %s",
843 				    sched_policy_dump_metadata());
844 				sched_policy_pop_metadata();
845 			}
846 
847 			/* Extra assertions we can make based on the preferred pset being idle */
848 			if (preferred_is_idle) {
849 				/* Thread should avoid processor in non-preferred pset to get to the idle preferred pset */
850 				for (int c = 0; c < topo.total_cpus; c++) {
851 					if (cpu_id_to_pset_id(c) != preferred_pset_id) {
852 						sched_policy_push_metadata("avoid_non_preferred_cpu_id", c);
853 						ret = thread_avoid_processor_expect(thread, c, false, true);
854 						T_QUIET; T_EXPECT_TRUE(ret, "Thread should avoid processor in non-preferred pset to get to idle "
855 						    "preferred pset %s", sched_policy_dump_metadata());
856 						sched_policy_pop_metadata();
857 					}
858 				}
859 			}
860 
861 			/* Other cores should not want to rebalance the running thread away from its chosen pset */
862 			int chosen_cpu = pset_id_to_cpu_id(chosen_pset);
863 			cpu_set_thread_current(chosen_cpu, thread);
864 			for (int c = 0; c < topo.total_cpus; c++) {
865 				if ((cpu_id_to_pset_id(c) != chosen_pset) && (cpu_id_to_pset_id(c) != loaded_pset_id)) {
866 					sched_policy_push_metadata("stealing_cpu_id", c);
867 					thread_t stolen_thread = cpu_steal_thread(c);
868 					if (stolen_thread != NULL) {
869 						T_QUIET; T_EXPECT_NE(stolen_thread, thread, "Should not steal back thread from its chosen_pset %s",
870 						    sched_policy_dump_metadata());
871 						if (stolen_thread != thread) {
872 							/* Put back the stolen load thread */
873 							enqueue_thread(pset_target(loaded_pset_id), stolen_thread);
874 						}
875 					}
876 					bool want_rebalance = cpu_processor_balance(c);
877 					T_QUIET; T_EXPECT_FALSE(want_rebalance, "Should not rebalance thread away from its chosen_pset %s",
878 					    sched_policy_dump_metadata());
879 					sched_policy_pop_metadata();
880 				}
881 			}
882 
883 			(void)dequeue_threads_expect_ordered_arr(pset_target(loaded_pset_id), max_load_threads, load_threads);
884 			clear_threads_from_topo();
885 			for (int pset = 0; pset < topo.num_psets; pset++) {
886 				T_QUIET; T_EXPECT_TRUE(runqueue_empty(pset_target(pset)), "pset %d wasn't cleared at the end of test "
887 				    "scenario %s", pset, sched_policy_dump_metadata());
888 			}
889 			sched_policy_pop_metadata();
890 		}
891 		sched_policy_pop_metadata();
892 	}
893 	SCHED_POLICY_PASS("Policy is harmonious on the subject of a thread's chosen pset");
894 }
895 
896 SCHED_POLICY_T_DECL(migration_search_order,
897     "Verify that we iterate psets for spill and steal in the expected order")
898 {
899 	int ret;
900 	init_migration_harness(dual_die);
901 	int expected_orders[6][6] = {
902 		{0, 3, 1, 2, 4, 5},
903 		{1, 2, 4, 5, 0, 3},
904 		{2, 1, 4, 5, 0, 3},
905 		{3, 0, 4, 5, 1, 2},
906 		{4, 5, 1, 2, 3, 0},
907 		{5, 4, 1, 2, 3, 0},
908 	};
909 	for (int src_pset_id = 0; src_pset_id < dual_die.num_psets; src_pset_id++) {
910 		ret = iterate_pset_search_order_expect(src_pset_id, UINT64_MAX, 0, expected_orders[src_pset_id], dual_die.num_psets);
911 		T_QUIET; T_EXPECT_EQ(ret, -1, "Mismatched search order at ind %d for src_pset_id %d",
912 		    ret, src_pset_id);
913 	}
914 	SCHED_POLICY_PASS("Search order sorts on migration weight, then locality, then pset id");
915 	uint64_t p_mask = 0b110110;
916 	int expected_p_orders[6][6] = {
917 		{1, 2, 4, 5, -1, -1},
918 		{1, 2, 4, 5, -1, -1},
919 		{2, 1, 4, 5, -1, -1},
920 		{4, 5, 1, 2, -1, -1},
921 		{4, 5, 1, 2, -1, -1},
922 		{5, 4, 1, 2, -1, -1},
923 	};
924 	uint64_t e_mask = 0b001001;
925 	int expected_e_orders[6][6] = {
926 		{0, 3, -1, -1, -1, -1},
927 		{0, 3, -1, -1, -1, -1},
928 		{0, 3, -1, -1, -1, -1},
929 		{3, 0, -1, -1, -1, -1},
930 		{3, 0, -1, -1, -1, -1},
931 		{3, 0, -1, -1, -1, -1},
932 	};
933 	for (int i = 0; i < 2; i++) {
934 		for (int src_pset_id = 0; src_pset_id < dual_die.num_psets; src_pset_id++) {
935 			uint64_t mask = (i == 0) ? p_mask : e_mask;
936 			int *expected_order_masked = (i == 0) ? expected_p_orders[src_pset_id] : expected_e_orders[src_pset_id];
937 			ret = iterate_pset_search_order_expect(src_pset_id, mask, 0, expected_order_masked, dual_die.num_psets);
938 			T_QUIET; T_EXPECT_EQ(ret, -1, "Mismatched masked search order at ind %d for src_pset_id %d",
939 			    ret, src_pset_id);
940 		}
941 	}
942 	SCHED_POLICY_PASS("Search order traversal respects candidate mask");
943 }
944