1 // Copyright (c) 2024 Apple Inc. All rights reserved.
2
3 #include <stdint.h>
4 #include <stdbool.h>
5
6 /* Edge shares some of its implementation with the Clutch scheduler */
7 #include "sched_clutch_harness_impl.c"
8
9 /* Machine-layer mocking */
10
11 processor_t
current_processor(void)12 current_processor(void)
13 {
14 return cpus[_curr_cpu];
15 }
16
17 unsigned int
ml_get_die_id(unsigned int cluster_id)18 ml_get_die_id(unsigned int cluster_id)
19 {
20 return curr_hw_topo.psets[cluster_id].die_id;
21 }
22
23 uint64_t
ml_cpu_signal_deferred_get_timer(void)24 ml_cpu_signal_deferred_get_timer(void)
25 {
26 /* Matching deferred_ipi_timer_ns */
27 return 64 * NSEC_PER_USEC;
28 }
29
30 static test_cpu_type_t
cluster_type_to_test_cpu_type(cluster_type_t cluster_type)31 cluster_type_to_test_cpu_type(cluster_type_t cluster_type)
32 {
33 switch (cluster_type) {
34 case CLUSTER_TYPE_E:
35 return TEST_CPU_TYPE_EFFICIENCY;
36 case CLUSTER_TYPE_P:
37 return TEST_CPU_TYPE_PERFORMANCE;
38 default:
39 assert(false);
40 }
41 }
42
43 static unsigned int cpu_count_for_type[TEST_CPU_TYPE_MAX] = { 0 };
44 static unsigned int recommended_cpu_count_for_type[TEST_CPU_TYPE_MAX] = { 0 };
45
46 unsigned int
ml_get_cpu_number_type(cluster_type_t cluster_type,bool logical,bool available)47 ml_get_cpu_number_type(cluster_type_t cluster_type, bool logical, bool available)
48 {
49 (void)logical;
50 if (available) {
51 return recommended_cpu_count_for_type[cluster_type_to_test_cpu_type(cluster_type)];
52 } else {
53 return cpu_count_for_type[cluster_type_to_test_cpu_type(cluster_type)];
54 }
55 }
56
57 static unsigned int cluster_count_for_type[TEST_CPU_TYPE_MAX] = { 0 };
58
59 unsigned int
ml_get_cluster_number_type(cluster_type_t cluster_type)60 ml_get_cluster_number_type(cluster_type_t cluster_type)
61 {
62 return cluster_count_for_type[cluster_type_to_test_cpu_type(cluster_type)];
63 }
64
65 int sched_amp_spill_deferred_ipi = 1;
66 int sched_amp_pcores_preempt_immediate_ipi = 1;
67
68 /* Implementation of sched_runqueue_harness.h interface */
69
70 static test_pset_t basic_amp_psets[2] = {
71 {
72 .cpu_type = TEST_CPU_TYPE_PERFORMANCE,
73 .num_cpus = 2,
74 .cluster_id = 0,
75 .die_id = 0,
76 },
77 {
78 .cpu_type = TEST_CPU_TYPE_EFFICIENCY,
79 .num_cpus = 4,
80 .cluster_id = 1,
81 .die_id = 0,
82 },
83 };
84 test_hw_topology_t basic_amp = {
85 .psets = &basic_amp_psets[0],
86 .num_psets = 2,
87 .total_cpus = 6,
88 };
89
90 static test_pset_t dual_die_psets[6] = {
91 {
92 .cpu_type = TEST_CPU_TYPE_EFFICIENCY,
93 .num_cpus = 2,
94 .cluster_id = 0,
95 .die_id = 0,
96 },
97 {
98 .cpu_type = TEST_CPU_TYPE_PERFORMANCE,
99 .num_cpus = 4,
100 .cluster_id = 1,
101 .die_id = 0,
102 },
103 {
104 .cpu_type = TEST_CPU_TYPE_PERFORMANCE,
105 .num_cpus = 4,
106 .cluster_id = 2,
107 .die_id = 0,
108 },
109 {
110 .cpu_type = TEST_CPU_TYPE_EFFICIENCY,
111 .num_cpus = 2,
112 .cluster_id = 3,
113 .die_id = 1,
114 },
115 {
116 .cpu_type = TEST_CPU_TYPE_PERFORMANCE,
117 .num_cpus = 4,
118 .cluster_id = 4,
119 .die_id = 1,
120 },
121 {
122 .cpu_type = TEST_CPU_TYPE_PERFORMANCE,
123 .num_cpus = 4,
124 .cluster_id = 5,
125 .die_id = 1,
126 },
127 };
128 test_hw_topology_t dual_die = {
129 .psets = &dual_die_psets[0],
130 .num_psets = 6,
131 .total_cpus = 20,
132 };
133
134 #define MAX_NODES 2
135
136 static void
edge_impl_set_cluster_type(processor_set_t pset,test_cpu_type_t type)137 edge_impl_set_cluster_type(processor_set_t pset, test_cpu_type_t type)
138 {
139 switch (type) {
140 case TEST_CPU_TYPE_EFFICIENCY:
141 pset->pset_cluster_type = PSET_AMP_E;
142 pset->node = &pset_nodes[0];
143 bitmap_set(&pset_nodes[0].pset_map, pset->pset_cluster_id);
144 break;
145 case TEST_CPU_TYPE_PERFORMANCE:
146 pset->pset_cluster_type = PSET_AMP_P;
147 pset->node = &pset_nodes[1];
148 bitmap_set(&pset_nodes[1].pset_map, pset->pset_cluster_id);
149 break;
150 default:
151 assert(false);
152 break;
153 }
154 }
155
156 struct mock_topology_info_struct mock_topology_info;
157
158 static void
edge_impl_init_runqueues(void)159 edge_impl_init_runqueues(void)
160 {
161 assert(curr_hw_topo.num_psets != 0);
162 clutch_impl_init_topology(curr_hw_topo);
163 mock_topology_info.num_cpus = curr_hw_topo.total_cpus;
164 sched_edge_init();
165 bzero(pset_nodes, sizeof(pset_nodes));
166 pset_nodes[0].pset_cluster_type = PSET_AMP_E;
167 pset_nodes[1].pset_cluster_type = PSET_AMP_P;
168 for (int i = 0; i < MAX_NODES; i++) {
169 os_atomic_store(&pset_nodes[i].pset_recommended_map, 0, relaxed);
170 }
171 for (int i = 0; i < curr_hw_topo.num_psets; i++) {
172 pset_array[i] = psets[i];
173 edge_impl_set_cluster_type(psets[i], curr_hw_topo.psets[i].cpu_type);
174 sched_edge_pset_init(psets[i]);
175 bzero(&psets[i]->pset_load_average, sizeof(psets[i]->pset_load_average));
176 bzero(&psets[i]->pset_execution_time, sizeof(psets[i]->pset_execution_time));
177 assert(psets[i]->cpu_bitmask != 0);
178 psets[i]->foreign_psets[0] = 0;
179 psets[i]->native_psets[0] = 0;
180 psets[i]->local_psets[0] = 0;
181 psets[i]->remote_psets[0] = 0;
182 cluster_count_for_type[curr_hw_topo.psets[i].cpu_type]++;
183 cpu_count_for_type[curr_hw_topo.psets[i].cpu_type] += curr_hw_topo.psets[i].num_cpus;
184 recommended_cpu_count_for_type[curr_hw_topo.psets[i].cpu_type] +=
185 curr_hw_topo.psets[i].num_cpus;
186 impl_set_pset_recommended(i);
187 psets[i]->cpu_running_foreign = 0;
188 for (uint state = 0; state < PROCESSOR_STATE_LEN; state++) {
189 psets[i]->cpu_state_map[state] = 0;
190 }
191 /* Initialize realtime queues */
192 pset_rt_init(psets[i]);
193 }
194 for (unsigned int j = 0; j < processor_avail_count; j++) {
195 processor_array[j] = cpus[j];
196 sched_clutch_processor_init(cpus[j]);
197 os_atomic_store(&cpus[j]->stir_the_pot_inbox_cpu, -1, relaxed);
198 }
199 sched_edge_cpu_init_completed();
200 sched_rt_init_completed();
201 increment_mock_time(100);
202 clutch_impl_init_params();
203 clutch_impl_init_tracepoints();
204 }
205
206 void
impl_init_runqueue(void)207 impl_init_runqueue(void)
208 {
209 assert(curr_hw_topo.num_psets == 0);
210 curr_hw_topo = single_core;
211 edge_impl_init_runqueues();
212 }
213
214 void
impl_init_migration_harness(test_hw_topology_t hw_topology)215 impl_init_migration_harness(test_hw_topology_t hw_topology)
216 {
217 assert(curr_hw_topo.num_psets == 0);
218 curr_hw_topo = hw_topology;
219 edge_impl_init_runqueues();
220 }
221
222 struct thread_group *
impl_create_tg(int interactivity_score)223 impl_create_tg(int interactivity_score)
224 {
225 return clutch_impl_create_tg(interactivity_score);
226 }
227
228 test_thread_t
impl_create_thread(int root_bucket,struct thread_group * tg,int pri)229 impl_create_thread(int root_bucket, struct thread_group *tg, int pri)
230 {
231 return clutch_impl_create_thread(root_bucket, tg, pri);
232 }
233
234 void
impl_set_thread_processor_bound(test_thread_t thread,int cpu_id)235 impl_set_thread_processor_bound(test_thread_t thread, int cpu_id)
236 {
237 _curr_cpu = cpu_id;
238 clutch_impl_set_thread_processor_bound(thread, cpu_id);
239 }
240
241 void
impl_set_thread_cluster_bound(test_thread_t thread,int cluster_id)242 impl_set_thread_cluster_bound(test_thread_t thread, int cluster_id)
243 {
244 /* Should not be already enqueued */
245 assert(thread_get_runq_locked((thread_t)thread) == NULL);
246 ((thread_t)thread)->th_bound_cluster_id = cluster_id;
247 }
248
249 static void
processor_state_update_running_foreign(processor_t processor,thread_t thread)250 processor_state_update_running_foreign(processor_t processor, thread_t thread)
251 {
252 cluster_type_t current_processor_type = pset_type_for_id(processor->processor_set->pset_cluster_id);
253 cluster_type_t thread_type = pset_type_for_id(sched_edge_thread_preferred_cluster(thread));
254
255 boolean_t non_rt_thr = (processor->current_pri < BASEPRI_RTQUEUES);
256 boolean_t non_bound_thr = (thread->bound_processor == PROCESSOR_NULL);
257 if (non_rt_thr && non_bound_thr && (current_processor_type != thread_type)) {
258 bit_set(processor->processor_set->cpu_running_foreign, processor->cpu_id);
259 } else {
260 bit_clear(processor->processor_set->cpu_running_foreign, processor->cpu_id);
261 }
262 }
263
264 void
impl_cpu_set_thread_current(int cpu_id,test_thread_t thread)265 impl_cpu_set_thread_current(int cpu_id, test_thread_t thread)
266 {
267 _curr_cpu = cpu_id;
268 processor_set_t pset = cpus[cpu_id]->processor_set;
269 clutch_impl_cpu_set_thread_current(cpu_id, thread);
270 processor_state_update_running_foreign(cpus[cpu_id], (thread_t)thread);
271 pset_update_processor_state(pset, cpus[cpu_id], PROCESSOR_RUNNING);
272 sched_bucket_t bucket = ((((thread_t)thread)->state & TH_IDLE) || (((thread_t)thread)->bound_processor != PROCESSOR_NULL)) ? TH_BUCKET_SCHED_MAX : ((thread_t)thread)->th_sched_bucket;
273 os_atomic_store(&cpus[cpu_id]->processor_set->cpu_running_buckets[cpu_id], bucket, relaxed);
274 sched_edge_stir_the_pot_update_registry_state((thread_t)thread);
275
276 /* Send followup IPIs for realtime, as needed */
277 bit_clear(pset->rt_pending_spill_cpu_mask, cpu_id);
278 processor_t next_rt_processor = PROCESSOR_NULL;
279 sched_ipi_type_t next_rt_ipi_type = SCHED_IPI_NONE;
280 if (rt_pset_has_stealable_threads(pset)) {
281 rt_choose_next_processor_for_spill_IPI(pset, cpus[cpu_id], &next_rt_processor, &next_rt_ipi_type);
282 } else if (rt_pset_needs_a_followup_IPI(pset)) {
283 rt_choose_next_processor_for_followup_IPI(pset, cpus[cpu_id], &next_rt_processor, &next_rt_ipi_type);
284 }
285 if (next_rt_processor != PROCESSOR_NULL) {
286 sched_ipi_perform(next_rt_processor, next_rt_ipi_type);
287 }
288 }
289
290 test_thread_t
impl_cpu_clear_thread_current(int cpu_id)291 impl_cpu_clear_thread_current(int cpu_id)
292 {
293 _curr_cpu = cpu_id;
294 test_thread_t thread = clutch_impl_cpu_clear_thread_current(cpu_id);
295 pset_update_processor_state(cpus[cpu_id]->processor_set, cpus[cpu_id], PROCESSOR_IDLE);
296 os_atomic_store(&cpus[cpu_id]->processor_set->cpu_running_buckets[cpu_id], TH_BUCKET_SCHED_MAX, relaxed);
297 sched_edge_stir_the_pot_clear_registry_entry();
298 return thread;
299 }
300
301 void
impl_cpu_enqueue_thread(int cpu_id,test_thread_t thread)302 impl_cpu_enqueue_thread(int cpu_id, test_thread_t thread)
303 {
304 _curr_cpu = cpu_id;
305 if (((thread_t) thread)->sched_pri >= BASEPRI_RTQUEUES) {
306 rt_runq_insert(cpus[cpu_id], cpus[cpu_id]->processor_set, (thread_t) thread);
307 } else {
308 sched_clutch_processor_enqueue(cpus[cpu_id], (thread_t) thread, SCHED_TAILQ);
309 }
310 }
311
312 test_thread_t
impl_cpu_dequeue_thread(int cpu_id)313 impl_cpu_dequeue_thread(int cpu_id)
314 {
315 _curr_cpu = cpu_id;
316 test_thread_t chosen_thread = sched_rt_choose_thread(cpus[cpu_id]);
317 if (chosen_thread != THREAD_NULL) {
318 return chosen_thread;
319 }
320 /* No realtime threads. */
321 return sched_clutch_choose_thread(cpus[cpu_id], MINPRI, NULL, 0);
322 }
323
324 test_thread_t
impl_cpu_dequeue_thread_compare_current(int cpu_id)325 impl_cpu_dequeue_thread_compare_current(int cpu_id)
326 {
327 _curr_cpu = cpu_id;
328 assert(cpus[cpu_id]->active_thread != NULL);
329 processor_set_t pset = cpus[cpu_id]->processor_set;
330 if (rt_runq_count(pset) > 0) {
331 return impl_dequeue_realtime_thread(pset);
332 } else {
333 return sched_clutch_choose_thread(cpus[cpu_id], MINPRI, cpus[cpu_id]->active_thread, 0);
334 }
335 }
336
337 bool
impl_processor_csw_check(int cpu_id)338 impl_processor_csw_check(int cpu_id)
339 {
340 _curr_cpu = cpu_id;
341 assert(cpus[cpu_id]->active_thread != NULL);
342 ast_t preempt_ast = sched_clutch_processor_csw_check(cpus[cpu_id]);
343 return preempt_ast & AST_PREEMPT;
344 }
345
346 void
impl_pop_tracepoint(uint64_t * clutch_trace_code,uint64_t * arg1,uint64_t * arg2,uint64_t * arg3,uint64_t * arg4)347 impl_pop_tracepoint(uint64_t *clutch_trace_code, uint64_t *arg1, uint64_t *arg2,
348 uint64_t *arg3, uint64_t *arg4)
349 {
350 clutch_impl_pop_tracepoint(clutch_trace_code, arg1, arg2, arg3, arg4);
351 }
352
353 int
impl_choose_pset_for_thread(test_thread_t thread)354 impl_choose_pset_for_thread(test_thread_t thread)
355 {
356 /* Begins search starting from current pset */
357 sched_options_t options = SCHED_NONE;
358 processor_t chosen_processor = sched_edge_choose_processor(
359 current_processor()->processor_set, current_processor(), (thread_t)thread, &options);
360 return chosen_processor->processor_set->pset_id;
361 }
362
363 bool
impl_thread_avoid_processor(test_thread_t thread,int cpu_id,bool quantum_expired)364 impl_thread_avoid_processor(test_thread_t thread, int cpu_id, bool quantum_expired)
365 {
366 _curr_cpu = cpu_id;
367 return sched_edge_thread_avoid_processor(cpus[cpu_id], (thread_t)thread, quantum_expired ? AST_QUANTUM : AST_NONE);
368 }
369
370 void
impl_cpu_expire_quantum(int cpu_id)371 impl_cpu_expire_quantum(int cpu_id)
372 {
373 _curr_cpu = cpu_id;
374 sched_edge_quantum_expire(cpus[cpu_id]->active_thread);
375 cpus[cpu_id]->first_timeslice = FALSE;
376 }
377
378 test_thread_t
impl_steal_thread(int cpu_id)379 impl_steal_thread(int cpu_id)
380 {
381 _curr_cpu = cpu_id;
382 return sched_edge_processor_idle(psets[cpu_id_to_pset_id(cpu_id)]);
383 }
384
385 bool
impl_processor_balance(int cpu_id)386 impl_processor_balance(int cpu_id)
387 {
388 _curr_cpu = cpu_id;
389 return sched_edge_balance(cpus[cpu_id], psets[cpu_id_to_pset_id(cpu_id)]);
390 }
391
392 void
impl_set_current_processor(int cpu_id)393 impl_set_current_processor(int cpu_id)
394 {
395 _curr_cpu = cpu_id;
396 }
397
398 void
impl_set_tg_sched_bucket_preferred_pset(struct thread_group * tg,int sched_bucket,int cluster_id)399 impl_set_tg_sched_bucket_preferred_pset(struct thread_group *tg, int sched_bucket, int cluster_id)
400 {
401 assert(sched_bucket < TH_BUCKET_SCHED_MAX);
402 sched_clutch_t clutch = sched_clutch_for_thread_group(tg);
403 bitmap_t modify_bitmap[BITMAP_LEN(TH_BUCKET_SCHED_MAX)] = {0};
404 bitmap_set(modify_bitmap, sched_bucket);
405 uint32_t tg_bucket_preferred_cluster[TH_BUCKET_SCHED_MAX] = {0};
406 tg_bucket_preferred_cluster[sched_bucket] = cluster_id;
407 sched_edge_update_preferred_cluster(clutch, modify_bitmap, tg_bucket_preferred_cluster);
408 }
409
410 void
impl_set_pset_load_avg(int cluster_id,int QoS,uint64_t load_avg)411 impl_set_pset_load_avg(int cluster_id, int QoS, uint64_t load_avg)
412 {
413 assert(QoS > 0 && QoS < TH_BUCKET_SCHED_MAX);
414 pset_array[cluster_id]->pset_load_average[QoS] = load_avg;
415 }
416
417 void
edge_set_thread_shared_rsrc(test_thread_t thread,bool native_first)418 edge_set_thread_shared_rsrc(test_thread_t thread, bool native_first)
419 {
420 int shared_rsrc_type = native_first ? CLUSTER_SHARED_RSRC_TYPE_NATIVE_FIRST :
421 CLUSTER_SHARED_RSRC_TYPE_RR;
422 ((thread_t)thread)->th_shared_rsrc_heavy_user[shared_rsrc_type] = true;
423 }
424
425 void
impl_set_pset_derecommended(int cluster_id)426 impl_set_pset_derecommended(int cluster_id)
427 {
428 processor_set_t pset = pset_array[cluster_id];
429 pset->recommended_bitmask = 0;
430 atomic_bit_clear(&pset->node->pset_recommended_map, cluster_id, memory_order_relaxed);
431 recommended_cpu_count_for_type[cluster_type_to_test_cpu_type(pset->pset_type)] -=
432 bit_count(pset->cpu_bitmask);
433 }
434
435 void
impl_set_pset_recommended(int cluster_id)436 impl_set_pset_recommended(int cluster_id)
437 {
438 processor_set_t pset = pset_array[cluster_id];
439 pset->recommended_bitmask = pset->cpu_bitmask;
440 atomic_bit_set(&pset->node->pset_recommended_map, cluster_id, memory_order_relaxed);
441 recommended_cpu_count_for_type[cluster_type_to_test_cpu_type(pset->pset_type)] +=
442 bit_count(pset->cpu_bitmask);
443 }
444
445 void
impl_pop_ipi(int * cpu_id,test_ipi_type_t * ipi_type)446 impl_pop_ipi(int *cpu_id, test_ipi_type_t *ipi_type)
447 {
448 assert(expect_ipi_ind < curr_ipi_ind);
449 *cpu_id = logged_ipis[expect_ipi_ind].cpu_id;
450 *ipi_type = (test_ipi_type_t)logged_ipis[expect_ipi_ind].ipi_type;
451 expect_ipi_ind++;
452 }
453
454 bool
impl_thread_should_yield(int cpu_id)455 impl_thread_should_yield(int cpu_id)
456 {
457 _curr_cpu = cpu_id;
458 assert(cpus[cpu_id]->active_thread != NULL);
459 return sched_edge_thread_should_yield(cpus[cpu_id], cpus[cpu_id]->active_thread);
460 }
461
462 void
impl_send_ipi(int cpu_id,test_thread_t thread,test_ipi_event_t event)463 impl_send_ipi(int cpu_id, test_thread_t thread, test_ipi_event_t event)
464 {
465 sched_ipi_type_t triggered_ipi = sched_ipi_action(cpus[cpu_id],
466 (thread_t)thread, (sched_ipi_event_t)event);
467 sched_ipi_perform(cpus[cpu_id], triggered_ipi);
468 }
469
470 int
rt_pset_spill_search_order_at_offset(int src_pset_id,int offset)471 rt_pset_spill_search_order_at_offset(int src_pset_id, int offset)
472 {
473 return psets[src_pset_id]->sched_rt_spill_search_order.spso_search_order[offset];
474 }
475
476 void
rt_pset_recompute_spill_order(int src_pset_id)477 rt_pset_recompute_spill_order(int src_pset_id)
478 {
479 sched_rt_config_pset_push(psets[src_pset_id]);
480 }
481
482 uint32_t
impl_qos_max_parallelism(int qos,uint64_t options)483 impl_qos_max_parallelism(int qos, uint64_t options)
484 {
485 return sched_edge_qos_max_parallelism(qos, options);
486 }
487
488 int *
impl_iterate_pset_search_order(int src_pset_id,uint64_t candidate_map,int sched_bucket)489 impl_iterate_pset_search_order(int src_pset_id, uint64_t candidate_map, int sched_bucket)
490 {
491 int *psets = (int *)malloc(sizeof(int) * curr_hw_topo.num_psets);
492 for (int i = 0; i < curr_hw_topo.num_psets; i++) {
493 psets[i] = -1;
494 }
495 sched_pset_iterate_state_t istate = SCHED_PSET_ITERATE_STATE_INIT;
496 int ind = 0;
497 processor_set_t starting_pset = pset_array[src_pset_id];
498 while (sched_iterate_psets_ordered(starting_pset,
499 &starting_pset->spill_search_order[sched_bucket], candidate_map, &istate)) {
500 psets[ind++] = istate.spis_pset_id;
501 }
502 return psets;
503 }
504
505 test_thread_t
impl_rt_choose_thread(int cpu_id)506 impl_rt_choose_thread(int cpu_id)
507 {
508 return sched_rt_choose_thread(cpus[cpu_id]);
509 }
510
511 void
sched_rt_spill_policy_set(unsigned policy)512 sched_rt_spill_policy_set(unsigned policy)
513 {
514 impl_sched_rt_spill_policy_set(policy);
515 }
516
517 void
sched_rt_steal_policy_set(unsigned policy)518 sched_rt_steal_policy_set(unsigned policy)
519 {
520 impl_sched_rt_steal_policy_set(policy);
521 }
522