xref: /xnu-12377.41.6/tests/sched/edge_migration.c (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 // Copyright (c) 2024 Apple Inc.  All rights reserved.
2 
3 #include "sched_test_harness/sched_policy_darwintest.h"
4 #include "sched_test_harness/sched_edge_harness.h"
5 
6 T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
7     T_META_RADAR_COMPONENT_NAME("xnu"),
8     T_META_RADAR_COMPONENT_VERSION("scheduler"),
9     T_META_RUN_CONCURRENTLY(true),
10     T_META_OWNER("emily_peterson"));
11 
12 SCHED_POLICY_T_DECL(migration_cluster_bound,
13     "Verify that cluster-bound threads always choose the bound "
14     "cluster except when its derecommended")
15 {
16 	int ret;
17 	init_migration_harness(dual_die);
18 	struct thread_group *tg = create_tg(0);
19 	test_thread_t threads[dual_die.num_psets];
20 	int idle_load = 0;
21 	int low_load = 100000;
22 	int high_load = 10000000;
23 	for (int i = 0; i < dual_die.num_psets; i++) {
24 		threads[i] = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
25 		set_thread_cluster_bound(threads[i], i);
26 		set_pset_load_avg(i, TH_BUCKET_SHARE_DF, low_load);
27 	}
28 	for (int i = 0; i < dual_die.num_psets; i++) {
29 		set_current_processor(pset_id_to_cpu_id(i));
30 		for (int j = 0; j < dual_die.num_psets; j++) {
31 			/* Add extra load to the bound cluster, so we're definitely not just idle short-circuiting */
32 			set_pset_load_avg(j, TH_BUCKET_SHARE_DF, high_load);
33 			ret = choose_pset_for_thread_expect(threads[j], j);
34 			T_QUIET; T_EXPECT_TRUE(ret, "Expecting the bound cluster");
35 			set_pset_load_avg(j, TH_BUCKET_SHARE_DF, low_load);
36 		}
37 	}
38 	SCHED_POLICY_PASS("Cluster bound chooses bound cluster");
39 	/* Derecommend the bound cluster */
40 	for (int i = 0; i < dual_die.num_psets; i++) {
41 		set_pset_derecommended(i);
42 		int replacement_pset = -1;
43 		for (int j = 0; j < dual_die.num_psets; j++) {
44 			/* Find the first homogenous cluster and mark it as idle so we choose it */
45 			if ((i != j) && (dual_die.psets[i].cpu_type == dual_die.psets[j].cpu_type)) {
46 				replacement_pset = j;
47 				set_pset_load_avg(replacement_pset, TH_BUCKET_SHARE_DF, idle_load);
48 				break;
49 			}
50 		}
51 		ret = choose_pset_for_thread_expect(threads[i], replacement_pset);
52 		T_QUIET; T_EXPECT_TRUE(ret, "Expecting the idle pset when the bound cluster is derecommended");
53 		/* Restore pset conditions */
54 		set_pset_recommended(i);
55 		set_pset_load_avg(replacement_pset, TH_BUCKET_SHARE_DF, low_load);
56 	}
57 	SCHED_POLICY_PASS("Cluster binding is soft");
58 }
59 
60 SCHED_POLICY_T_DECL(migration_should_yield,
61     "Verify that we only yield if there's a \"good enough\" thread elsewhere "
62     "to switch to")
63 {
64 	int ret;
65 	init_migration_harness(basic_amp);
66 	struct thread_group *tg = create_tg(0);
67 	test_thread_t background = create_thread(TH_BUCKET_SHARE_BG, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_BG]);
68 	test_thread_t yielder = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
69 	cpu_set_thread_current(0, yielder);
70 	ret = cpu_check_should_yield(0, false);
71 	T_QUIET; T_EXPECT_TRUE(ret, "No thread present to yield to");
72 	enqueue_thread(pset_target(0), background);
73 	ret = cpu_check_should_yield(0, true);
74 	T_QUIET; T_EXPECT_TRUE(ret, "Should yield to a low priority thread on the current runqueue");
75 	SCHED_POLICY_PASS("Basic yield behavior on single pset");
76 
77 	ret = dequeue_thread_expect(pset_target(0), background);
78 	T_QUIET; T_EXPECT_TRUE(ret, "Only background thread in runqueue");
79 	cpu_set_thread_current(0, yielder); /* Reset current thread */
80 	enqueue_thread(pset_target(1), background);
81 	ret = cpu_check_should_yield(0, true);
82 	T_QUIET; T_EXPECT_TRUE(ret, "Should yield in order to steal thread");
83 	ret = dequeue_thread_expect(pset_target(1), background);
84 	T_QUIET; T_EXPECT_TRUE(ret, "Only background thread in runqueue");
85 	cpu_set_thread_current(pset_id_to_cpu_id(1), background);
86 	ret = cpu_check_should_yield(pset_id_to_cpu_id(1), false);
87 	T_QUIET; T_EXPECT_TRUE(ret, "Should not yield in order to rebalance (presumed) native thread");
88 	SCHED_POLICY_PASS("Thread yields in order to steal from other psets");
89 }
90 
91 SCHED_POLICY_T_DECL(migration_stir_the_pot_basic,
92     "Verify stir-the-pot succeeds to rotate threads across P and E-cores after"
93     "their respective quanta have expired")
94 {
95 	int ret;
96 	init_migration_harness(basic_amp);
97 	struct thread_group *tg = create_tg(0);
98 	test_thread_t starts_p = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
99 	test_thread_t starts_e = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
100 	int p_cpu = 0;
101 	int e_cpu = 2;
102 	int other_e_cpu = 3;
103 	int other_p_cpu = 1;
104 	cpu_set_thread_current(p_cpu, starts_p);
105 	cpu_set_thread_current(e_cpu, starts_e);
106 	int p_pset = 0;
107 	set_pset_load_avg(p_pset, TH_BUCKET_SHARE_DF, 10000000);
108 	int e_pset = 1;
109 
110 	/* Thread on low core type "pays its dues" */
111 	cpu_expire_quantum(e_cpu);
112 
113 	/* Thread on high core type should locate swap candidate */
114 	cpu_expire_quantum(p_cpu);
115 	ret = ipi_expect(e_cpu, TEST_IPI_IMMEDIATE);
116 	T_QUIET; T_EXPECT_TRUE(ret, "Should have found stir-the-pot candidate with expired quantum");
117 
118 	/* Thread on low core type should respond to IPI by preempting... */
119 	ret = thread_avoid_processor_expect(starts_e, e_cpu, false, true);
120 	T_QUIET; T_EXPECT_TRUE(ret, "Thread should preempt to get on P-core");
121 
122 	/* (Simulate as if we are switching to another quantum-expired thread) */
123 	test_thread_t other_expired_thread = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
124 	cpu_set_thread_current(other_e_cpu, other_expired_thread);
125 	cpu_expire_quantum(other_e_cpu);
126 	cpu_clear_thread_current(other_e_cpu);
127 	cpu_set_thread_current(e_cpu, other_expired_thread);
128 
129 	/* ...and choosing the corresponding P-core for swap */
130 	ret = choose_pset_for_thread_expect(starts_e, p_pset);
131 	T_QUIET; T_EXPECT_TRUE(ret, "Should choose P-cores despite no idle cores there");
132 
133 	/* Upon arrival, thread swapping in should preempt its predecessor */
134 	enqueue_thread(pset_target(p_pset), starts_e);
135 	ret = cpu_check_preempt_current(p_cpu, true);
136 	T_QUIET; T_EXPECT_TRUE(ret, "P-core should preempt quantum expired thread");
137 
138 	/* ...and preempted thread on P-core should spill down to E, completing the swap */
139 	ret = dequeue_thread_expect(pset_target(p_pset), starts_e);
140 	T_QUIET; T_ASSERT_TRUE(ret, "e_starts was enqueued on P");
141 	cpu_set_thread_current(p_cpu, starts_e);
142 	ret = choose_pset_for_thread_expect(starts_p, e_pset);
143 	T_QUIET; T_EXPECT_TRUE(ret, "p_starts spilled to E, completing swap");
144 
145 	/*
146 	 * And a second swap should be initiated for the other E-expired thread
147 	 * that switched on-core afterwards.
148 	 */
149 	test_thread_t other_p_thread = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
150 	cpu_set_thread_current(other_p_cpu, other_p_thread);
151 	cpu_expire_quantum(other_p_cpu);
152 	ret = ipi_expect(e_cpu, TEST_IPI_IMMEDIATE);
153 	T_QUIET; T_EXPECT_TRUE(ret, "Should have found stir-the-pot candidate with expired quantum");
154 
155 	SCHED_POLICY_PASS("Stir-the-pot successfully initiated by P-core and completed");
156 
157 	/* Clean-up and reset to initial conditions */
158 	cpu_set_thread_current(p_cpu, starts_p);
159 	cpu_set_thread_current(e_cpu, starts_e);
160 	cpu_set_thread_current(other_p_cpu, other_p_thread);
161 	cpu_set_thread_current(other_e_cpu, other_expired_thread);
162 
163 	/* Now P-core expires quantum first */
164 	cpu_expire_quantum(p_cpu);
165 
166 	/* Thread on E-core "pays its dues" and responds to self-message by preempting */
167 	cpu_expire_quantum(e_cpu);
168 	ret = thread_avoid_processor_expect(starts_e, e_cpu, false, true);
169 	T_QUIET; T_EXPECT_TRUE(ret, "Thread should preempt to get on P-core");
170 
171 	/* ...and choosing the corresponding P-core for swap */
172 	cpu_clear_thread_current(e_cpu);
173 	ret = choose_pset_for_thread_expect(starts_e, p_pset);
174 	T_QUIET; T_EXPECT_TRUE(ret, "Should choose P-cores despite no idle cores there");
175 
176 	/* Upon arrival, thread swapping in should preempt its predecessor */
177 	enqueue_thread(pset_target(p_pset), starts_e);
178 	ret = cpu_check_preempt_current(p_cpu, true);
179 	T_QUIET; T_EXPECT_TRUE(ret, "P-core should preempt quantum expired thread");
180 
181 	/* ...and preempted thread on P-core should spill down to E, completing the swap */
182 	ret = dequeue_thread_expect(pset_target(p_pset), starts_e);
183 	T_QUIET; T_ASSERT_TRUE(ret, "e_starts was enqueued on P");
184 	cpu_set_thread_current(p_cpu, starts_e);
185 	ret = choose_pset_for_thread_expect(starts_p, e_pset);
186 	T_QUIET; T_EXPECT_TRUE(ret, "p_starts spilled to E, completing swap");
187 
188 	SCHED_POLICY_PASS("Stir-the-pot successfully initiated by E-core and completed");
189 }
190 
191 SCHED_POLICY_T_DECL(migration_ipi_policy,
192     "Verify we send the right type of IPI in different cross-core preemption scenarios")
193 {
194 	int ret;
195 	init_migration_harness(dual_die);
196 	struct thread_group *tg = create_tg(0);
197 	thread_t thread = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
198 	int dst_pcore = 3;
199 	int src_pcore = 0;
200 
201 	set_current_processor(src_pcore);
202 	cpu_send_ipi_for_thread(dst_pcore, thread, TEST_IPI_EVENT_PREEMPT);
203 	ret = ipi_expect(dst_pcore, TEST_IPI_IDLE);
204 	T_QUIET; T_EXPECT_TRUE(ret, "Idle CPU");
205 
206 	thread_t core_busy = create_thread(TH_BUCKET_SHARE_DF, tg, root_bucket_to_highest_pri[TH_BUCKET_SHARE_DF]);
207 	cpu_set_thread_current(dst_pcore, core_busy);
208 	set_current_processor(src_pcore);
209 	cpu_send_ipi_for_thread(dst_pcore, thread, TEST_IPI_EVENT_PREEMPT);
210 	ret = ipi_expect(dst_pcore, TEST_IPI_IMMEDIATE);
211 	T_QUIET; T_EXPECT_TRUE(ret, "Should immediate IPI to preempt on P-core");
212 	SCHED_POLICY_PASS("Immediate IPIs to preempt P-cores");
213 
214 	int dst_ecore = 13;
215 	int ecluster_id = 5;
216 	set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_SHARE_DF, ecluster_id);
217 	set_current_processor(src_pcore);
218 	cpu_send_ipi_for_thread(dst_ecore, thread, TEST_IPI_EVENT_PREEMPT);
219 	ret = ipi_expect(dst_ecore, TEST_IPI_IDLE);
220 	T_QUIET; T_EXPECT_TRUE(ret, "Idle CPU");
221 
222 	cpu_set_thread_current(dst_ecore, core_busy);
223 	set_current_processor(src_pcore);
224 	cpu_send_ipi_for_thread(dst_ecore, thread, TEST_IPI_EVENT_PREEMPT);
225 	ret = ipi_expect(dst_ecore, TEST_IPI_IMMEDIATE);
226 	T_QUIET; T_EXPECT_TRUE(ret, "Should immediate IPI to preempt for E->E");
227 	SCHED_POLICY_PASS("Immediate IPIs to cluster homogeneous with preferred");
228 }
229 
230 SCHED_POLICY_T_DECL(migration_max_parallelism,
231     "Verify we report expected values for recommended width of parallel workloads")
232 {
233 	int ret;
234 	init_migration_harness(dual_die);
235 	uint32_t num_pclusters = 4;
236 	uint32_t num_pcores = 4 * num_pclusters;
237 	uint32_t num_eclusters = 2;
238 	uint32_t num_ecores = 2 * num_eclusters;
239 	for (thread_qos_t qos = THREAD_QOS_UNSPECIFIED; qos < THREAD_QOS_LAST; qos++) {
240 		for (int shared_rsrc = 0; shared_rsrc < 2; shared_rsrc++) {
241 			for (int rt = 0; rt < 2; rt++) {
242 				uint64_t options = 0;
243 				uint32_t expected_width = 0;
244 				if (shared_rsrc) {
245 					options |= QOS_PARALLELISM_CLUSTER_SHARED_RESOURCE;
246 				}
247 				if (rt) {
248 					options |= QOS_PARALLELISM_REALTIME;
249 					/* Recommend P-width */
250 					expected_width = shared_rsrc ? num_pclusters : num_pcores;
251 				} else if (qos == THREAD_QOS_BACKGROUND || qos == THREAD_QOS_MAINTENANCE) {
252 					/* Recommend E-width */
253 					expected_width = shared_rsrc ? num_eclusters : num_ecores;
254 				} else {
255 					/* Recommend full width */
256 					expected_width = shared_rsrc ? (num_eclusters + num_pclusters) : (num_pcores + num_ecores);
257 				}
258 				ret = max_parallelism_expect(qos, options, expected_width);
259 				T_QUIET; T_EXPECT_TRUE(ret, "Unexpected width for QoS %d shared_rsrc %d RT %d",
260 				    qos, shared_rsrc, rt);
261 			}
262 		}
263 	}
264 	SCHED_POLICY_PASS("Correct recommended parallel width for all configurations");
265 }
266 
267 SCHED_POLICY_T_DECL(migration_rebalance_basic, "Verify that basic rebalance steal and "
268     "running rebalance mechanisms kick in")
269 {
270 	int ret;
271 	test_hw_topology_t topo = SCHED_POLICY_DEFAULT_TOPO;
272 	init_migration_harness(topo);
273 	int sched_bucket = TH_BUCKET_SHARE_DF;
274 	struct thread_group *tg = create_tg(0);
275 	thread_t thread = create_thread(sched_bucket, tg, root_bucket_to_highest_pri[sched_bucket]);
276 
277 	for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
278 		set_tg_sched_bucket_preferred_pset(tg, sched_bucket, preferred_pset_id);
279 		sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
280 		for (int running_on_pset_id = 0; running_on_pset_id < topo.num_psets; running_on_pset_id++) {
281 			/* Running rebalance */
282 			int running_on_cpu = pset_id_to_cpu_id(running_on_pset_id);
283 			cpu_set_thread_current(running_on_cpu, thread);
284 			sched_policy_push_metadata("running_on_pset_id", running_on_pset_id);
285 			for (int c = 0; c < topo.total_cpus; c++) {
286 				sched_policy_push_metadata("evaluate_cpu", c);
287 				int evaluate_pset = cpu_id_to_pset_id(c);
288 				bool want_rebalance = cpu_processor_balance(c);
289 				if (evaluate_pset == running_on_pset_id) {
290 					T_QUIET; T_EXPECT_FALSE(want_rebalance, "should be no thread available for rebalance %s",
291 					    sched_policy_dump_metadata());
292 					sched_policy_pop_metadata();
293 					continue;
294 				}
295 				bool should_rebalance = (topo.psets[evaluate_pset].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
296 				    (topo.psets[running_on_pset_id].cpu_type != topo.psets[preferred_pset_id].cpu_type);
297 				T_QUIET; T_EXPECT_EQ(want_rebalance, should_rebalance, "should rebalance to move thread to preferred type "
298 				    "if not there already %s", sched_policy_dump_metadata());
299 				if (should_rebalance) {
300 					ret = thread_avoid_processor_expect(thread, running_on_cpu, false, true);
301 					T_QUIET; T_EXPECT_TRUE(ret, "thread will preempt in response to running rebalance IPI %s",
302 					    sched_policy_dump_metadata());
303 					/* Try loading all other cores of the preferred type, forcing this decision to find the idle one */
304 					for (int p = 0; p < topo.num_psets; p++) {
305 						if ((topo.psets[p].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
306 						    (p != evaluate_pset)) {
307 							set_pset_load_avg(p, sched_bucket, 10000000);
308 						}
309 					}
310 					ret = thread_avoid_processor_expect(thread, running_on_cpu, false, true);
311 					T_QUIET; T_EXPECT_TRUE(ret, "...even if all other cores (except rebalancer) are full %s",
312 					    sched_policy_dump_metadata());
313 					/* Unload cores for clean-up */
314 					for (int p = 0; p < topo.num_psets; p++) {
315 						if ((topo.psets[p].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
316 						    (p != evaluate_pset)) {
317 							set_pset_load_avg(p, sched_bucket, 0);
318 						}
319 					}
320 				}
321 				sched_policy_pop_metadata();
322 			}
323 			cpu_clear_thread_current(running_on_cpu);
324 			sched_policy_pop_metadata();
325 
326 			/* Rebalance steal */
327 			int enqueued_pset = running_on_pset_id;
328 			enqueue_thread(pset_target(enqueued_pset), thread);
329 			sched_policy_push_metadata("enqueued_pset", enqueued_pset);
330 			for (int c = 0; c < topo.total_cpus; c++) {
331 				sched_policy_push_metadata("evaluate_cpu", c);
332 				int evaluate_pset = cpu_id_to_pset_id(c);
333 				if ((topo.psets[evaluate_pset].cpu_type != topo.psets[enqueued_pset].cpu_type) &&
334 				    ((topo.psets[enqueued_pset].cpu_type != TEST_CPU_TYPE_PERFORMANCE) ||
335 				    (topo.psets[preferred_pset_id].cpu_type != TEST_CPU_TYPE_PERFORMANCE))) {
336 					/* Only evaluate steal between mismatching cluster types and where spill is not allowed */
337 					thread_t stolen_thread = cpu_steal_thread(c);
338 					bool should_rebalance_steal = (topo.psets[evaluate_pset].cpu_type == topo.psets[preferred_pset_id].cpu_type) &&
339 					    (topo.psets[enqueued_pset].cpu_type != topo.psets[preferred_pset_id].cpu_type);
340 					bool did_rebalance_steal = (stolen_thread == thread);
341 					if (stolen_thread != NULL) {
342 						T_QUIET; T_EXPECT_EQ(stolen_thread, thread, "should only be one thread to steal?");
343 					}
344 					T_QUIET; T_EXPECT_EQ(did_rebalance_steal, should_rebalance_steal, "should rebalance steal to move "
345 					    "thread to preferred type if not already there %s", sched_policy_dump_metadata());
346 					if (did_rebalance_steal) {
347 						/* Put back stolen thread */
348 						enqueue_thread(pset_target(enqueued_pset), thread);
349 					}
350 				}
351 				sched_policy_pop_metadata();
352 			}
353 
354 			ret = dequeue_thread_expect(pset_target(enqueued_pset), thread);
355 			T_QUIET; T_EXPECT_TRUE(ret, "thread correctly where we left it");
356 			sched_policy_pop_metadata();
357 		}
358 		sched_policy_pop_metadata();
359 	}
360 	SCHED_POLICY_PASS("Rebalance mechanisms kicking in!");
361 }
362 
363 SCHED_POLICY_T_DECL(migration_harmonious_chosen_pset,
364     "Verify that different migration mechanisms agree about where a thread "
365     "should be, given current system conditions")
366 {
367 	int ret;
368 	test_hw_topology_t topo = SCHED_POLICY_DEFAULT_TOPO;
369 	init_migration_harness(topo);
370 	int sched_bucket = TH_BUCKET_SHARE_DF;
371 	struct thread_group *tg = create_tg(0);
372 	thread_t thread = create_thread(sched_bucket, tg, root_bucket_to_highest_pri[sched_bucket]);
373 	int max_load_threads = 20;
374 	test_thread_t load_threads[max_load_threads];
375 	for (int i = 0; i < max_load_threads; i++) {
376 		load_threads[i] = create_thread(sched_bucket, tg, root_bucket_to_highest_pri[sched_bucket]);
377 	}
378 
379 	/* Iterate conditions with different preferred psets and pset loads */
380 	for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
381 		set_tg_sched_bucket_preferred_pset(tg, sched_bucket, preferred_pset_id);
382 		sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
383 		for (int loaded_pset_id = 0; loaded_pset_id < topo.num_psets; loaded_pset_id++) {
384 			// TODO: Test properly updated load average
385 			enqueue_threads_arr(pset_target(loaded_pset_id), max_load_threads, load_threads);
386 			bool preferred_is_idle = preferred_pset_id != loaded_pset_id;
387 			sched_policy_push_metadata("loaded_pset_id", loaded_pset_id);
388 
389 			/* Where the thread proactively wants to go */
390 			int chosen_pset = choose_pset_for_thread(thread);
391 			bool chose_the_preferred_pset = chosen_pset == preferred_pset_id;
392 			if (preferred_is_idle) {
393 				T_QUIET; T_EXPECT_TRUE(chose_the_preferred_pset, "Should always choose the preferred pset if idle %s",
394 				    sched_policy_dump_metadata());
395 			}
396 
397 			/* Thread generally should not avoid a processor in its chosen pset */
398 			for (int c = 0; c < topo.psets[chosen_pset].num_cpus; c++) {
399 				int avoid_cpu_id = pset_id_to_cpu_id(chosen_pset) + c;
400 				sched_policy_push_metadata("avoid_cpu_id", avoid_cpu_id);
401 				ret = thread_avoid_processor_expect(thread, avoid_cpu_id, false, false);
402 				T_QUIET; T_EXPECT_TRUE(ret, "Thread should not want to leave processor in just chosen pset %s",
403 				    sched_policy_dump_metadata());
404 				sched_policy_pop_metadata();
405 			}
406 
407 			/* Extra assertions we can make based on the preferred pset being idle */
408 			if (preferred_is_idle) {
409 				/* Thread should avoid processor in non-preferred pset to get to the idle preferred pset */
410 				for (int c = 0; c < topo.total_cpus; c++) {
411 					if (cpu_id_to_pset_id(c) != preferred_pset_id) {
412 						sched_policy_push_metadata("avoid_non_preferred_cpu_id", c);
413 						ret = thread_avoid_processor_expect(thread, c, false, true);
414 						T_QUIET; T_EXPECT_TRUE(ret, "Thread should avoid processor in non-preferred pset to get to idle "
415 						    "preferred pset %s", sched_policy_dump_metadata());
416 						sched_policy_pop_metadata();
417 					}
418 				}
419 			}
420 
421 			/* Other cores should not want to rebalance the running thread away from its chosen pset */
422 			int chosen_cpu = pset_id_to_cpu_id(chosen_pset);
423 			cpu_set_thread_current(chosen_cpu, thread);
424 			for (int c = 0; c < topo.total_cpus; c++) {
425 				if ((cpu_id_to_pset_id(c) != chosen_pset) && (cpu_id_to_pset_id(c) != loaded_pset_id)) {
426 					sched_policy_push_metadata("stealing_cpu_id", c);
427 					thread_t stolen_thread = cpu_steal_thread(c);
428 					if (stolen_thread != NULL) {
429 						T_QUIET; T_EXPECT_NE(stolen_thread, thread, "Should not steal back thread from its chosen_pset %s",
430 						    sched_policy_dump_metadata());
431 						if (stolen_thread != thread) {
432 							/* Put back the stolen load thread */
433 							enqueue_thread(pset_target(loaded_pset_id), stolen_thread);
434 						}
435 					}
436 					bool want_rebalance = cpu_processor_balance(c);
437 					T_QUIET; T_EXPECT_FALSE(want_rebalance, "Should not rebalance thread away from its chosen_pset %s",
438 					    sched_policy_dump_metadata());
439 					sched_policy_pop_metadata();
440 				}
441 			}
442 
443 			(void)dequeue_threads_expect_ordered_arr(pset_target(loaded_pset_id), max_load_threads, load_threads);
444 			for (int pset = 0; pset < topo.num_psets; pset++) {
445 				T_QUIET; T_EXPECT_TRUE(runqueue_empty(pset_target(pset)), "pset %d wasn't cleared at the end of test "
446 				    "scenario %s", pset, sched_policy_dump_metadata());
447 			}
448 			sched_policy_pop_metadata();
449 		}
450 		sched_policy_pop_metadata();
451 	}
452 	SCHED_POLICY_PASS("Policy is harmonious on the subject of a thread's chosen pset");
453 }
454 
455 SCHED_POLICY_T_DECL(migration_search_order,
456     "Verify that we iterate psets for spill and steal in the expected order")
457 {
458 	int ret;
459 	init_migration_harness(dual_die);
460 	int expected_orders[6][6] = {
461 		{0, 3, 1, 2, 4, 5},
462 		{1, 2, 4, 5, 0, 3},
463 		{2, 1, 4, 5, 0, 3},
464 		{3, 0, 4, 5, 1, 2},
465 		{4, 5, 1, 2, 3, 0},
466 		{5, 4, 1, 2, 3, 0},
467 	};
468 	for (int src_pset_id = 0; src_pset_id < dual_die.num_psets; src_pset_id++) {
469 		ret = iterate_pset_search_order_expect(src_pset_id, UINT64_MAX, 0, expected_orders[src_pset_id], dual_die.num_psets);
470 		T_QUIET; T_EXPECT_EQ(ret, -1, "Mismatched search order at ind %d for src_pset_id %d",
471 		    ret, src_pset_id);
472 	}
473 	SCHED_POLICY_PASS("Search order sorts on migration weight, then locality, then pset id");
474 	uint64_t p_mask = 0b110110;
475 	int expected_p_orders[6][6] = {
476 		{1, 2, 4, 5, -1, -1},
477 		{1, 2, 4, 5, -1, -1},
478 		{2, 1, 4, 5, -1, -1},
479 		{4, 5, 1, 2, -1, -1},
480 		{4, 5, 1, 2, -1, -1},
481 		{5, 4, 1, 2, -1, -1},
482 	};
483 	uint64_t e_mask = 0b001001;
484 	int expected_e_orders[6][6] = {
485 		{0, 3, -1, -1, -1, -1},
486 		{0, 3, -1, -1, -1, -1},
487 		{0, 3, -1, -1, -1, -1},
488 		{3, 0, -1, -1, -1, -1},
489 		{3, 0, -1, -1, -1, -1},
490 		{3, 0, -1, -1, -1, -1},
491 	};
492 	for (int i = 0; i < 2; i++) {
493 		for (int src_pset_id = 0; src_pset_id < dual_die.num_psets; src_pset_id++) {
494 			uint64_t mask = (i == 0) ? p_mask : e_mask;
495 			int *expected_order_masked = (i == 0) ? expected_p_orders[src_pset_id] : expected_e_orders[src_pset_id];
496 			ret = iterate_pset_search_order_expect(src_pset_id, mask, 0, expected_order_masked, dual_die.num_psets);
497 			T_QUIET; T_EXPECT_EQ(ret, -1, "Mismatched masked search order at ind %d for src_pset_id %d",
498 			    ret, src_pset_id);
499 		}
500 	}
501 	SCHED_POLICY_PASS("Search order traversal respects candidate mask");
502 }
503