// Copyright (c) 2024 Apple Inc. All rights reserved. #include "sched_test_harness/sched_policy_darwintest.h" #include "sched_test_harness/sched_edge_harness.h" T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"), T_META_RADAR_COMPONENT_NAME("xnu"), T_META_RADAR_COMPONENT_VERSION("scheduler"), T_META_RUN_CONCURRENTLY(true), T_META_OWNER("m_zinn")); static mach_timebase_info_data_t timebase_info; uint64_t nanos_to_abs(uint64_t nanos) { static mach_timebase_info_data_t timebase = {}; if (timebase.numer == 0 || timebase.denom == 0) { kern_return_t kr; kr = mach_timebase_info(&timebase_info); T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info"); timebase = timebase_info; } return nanos * timebase.denom / timebase.numer; } SCHED_POLICY_T_DECL(rt_migration_cluster_bound, "Verify that cluster-bound realtime threads always choose the bound " "cluster except when its derecommended") { int ret; init_migration_harness(dual_die); struct thread_group *tg = create_tg(0); test_thread_t threads[dual_die.num_psets]; for (int i = 0; i < dual_die.num_psets; i++) { threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES); set_thread_cluster_bound(threads[i], i); } for (int i = 0; i < dual_die.num_psets; i++) { set_current_processor(pset_id_to_cpu_id(i)); for (int j = 0; j < dual_die.num_psets; j++) { ret = choose_pset_for_thread_expect(threads[j], j); T_QUIET; T_EXPECT_TRUE(ret, "Expecting the bound cluster"); } } SCHED_POLICY_PASS("Cluster bound chooses bound cluster"); /* Derecommend the bound cluster */ for (int i = 0; i < dual_die.num_psets; i++) { set_pset_derecommended(i); int replacement_pset = -1; for (int j = 0; j < dual_die.num_psets; j++) { /* Find the first homogenous cluster and mark it as idle so we choose it */ if ((i != j) && (dual_die.psets[i].cpu_type == dual_die.psets[j].cpu_type)) { replacement_pset = j; break; } } ret = choose_pset_for_thread_expect(threads[i], replacement_pset); T_QUIET; T_EXPECT_TRUE(ret, "Expecting the idle pset when the bound cluster is derecommended"); /* Restore pset conditions */ set_pset_recommended(i); } SCHED_POLICY_PASS("Cluster binding is soft"); } SCHED_POLICY_T_DECL(rt_choose_processor, "Verify the realtime spill policy") { test_hw_topology_t topo = dual_die; init_migration_harness(topo); uint64_t start = mach_absolute_time(); const uint64_t period = 0; const uint64_t computation = nanos_to_abs(5000000ULL); /* 5ms */ const uint64_t constraint = nanos_to_abs(10000000ULL); /* 10ms */ const bool preemptible = false; const uint8_t priority_offset = 0; struct thread_group *tg = create_tg(0); thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES); set_thread_sched_mode(thread, TH_MODE_REALTIME); const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(10000000ULL /* 10ms */)); set_thread_realtime(thread, period, computation, constraint, preemptible, priority_offset, deadline); test_thread_t earlier_threads[topo.total_cpus] = {}; for (int i = 0; i < topo.total_cpus; i++) { earlier_threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES); set_thread_sched_mode(earlier_threads[i], TH_MODE_REALTIME); const uint64_t early_deadline = rt_deadline_add(start, nanos_to_abs(5000000) /* 5ms */); set_thread_realtime(earlier_threads[i], period, computation, constraint, preemptible, priority_offset, early_deadline); } test_thread_t later_thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES); set_thread_sched_mode(later_thread, TH_MODE_REALTIME); const uint64_t late_deadline = rt_deadline_add(start, nanos_to_abs(20000000ULL) /* 20ms */); set_thread_realtime(later_thread, period, computation, constraint, preemptible, priority_offset, late_deadline); for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) { set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id); sched_policy_push_metadata("preferred_pset_id", preferred_pset_id); /* Unloaded system. Expect to choose the preferred pset. */ choose_pset_for_thread_expect(thread, preferred_pset_id); /* * Load the preferred pset with earlier-deadline threads. Should cause * the thread to spill (since the die has multiple clusters of each * performance type). */ for (int i = 0; i < topo.psets[preferred_pset_id].num_cpus; i++) { int cpu_id = pset_id_to_cpu_id(preferred_pset_id) + i; cpu_set_thread_current(cpu_id, earlier_threads[i]); } int chosen = choose_pset_for_thread(thread); T_QUIET; T_EXPECT_GE(chosen, 0, "chose a valid cluster"); T_QUIET; T_EXPECT_NE(chosen, preferred_pset_id, "chose an unloaded cluster"); T_QUIET; T_EXPECT_EQ(topo.psets[chosen].cpu_type, topo.psets[preferred_pset_id].cpu_type, "chose a pset of the same performance type"); /* Replace the first earlier-deadline thread with a later-deadline thread. Should cause the thread to preempt. */ cpu_set_thread_current(pset_id_to_cpu_id(preferred_pset_id), later_thread); chosen = choose_pset_for_thread(thread); T_QUIET; T_EXPECT_EQ(chosen, preferred_pset_id, "preempting later-deadline thread"); /* Load all psets of the same performance type with early-deadline threads. Expected preferred pset to be chosen. */ for (int i = 0; i < topo.num_psets; i++) { if (topo.psets[i].cpu_type != topo.psets[preferred_pset_id].cpu_type) { continue; } for (int j = 0; j < topo.psets[i].num_cpus; j++) { int cpu_id = pset_id_to_cpu_id(i) + j; cpu_set_thread_current(cpu_id, earlier_threads[cpu_id]); } } choose_pset_for_thread_expect(thread, preferred_pset_id); /* Clean up */ for (int i = 0; i < topo.total_cpus; i++) { cpu_clear_thread_current(i); } sched_policy_pop_metadata(/* preferred_pset_id */); } SCHED_POLICY_PASS("sched_rt_choose_processor selects the right pset"); } SCHED_POLICY_T_DECL(rt_spill_order, "Verify computed realtime spill orders.") { init_migration_harness(dual_die); /* Test setup: reset all edges. */ for (uint src_id = 0; src_id < dual_die.num_psets; src_id++) { for (uint dst_id = 0; dst_id < dual_die.num_psets; dst_id++) { sched_rt_config_set(src_id, dst_id, (sched_clutch_edge) {}); } } /* First test: create edges from pset 5 to psets 0-3. */ for (unsigned i = 0; i < 4; i++) { sched_rt_config_set(5, i, (sched_clutch_edge) { .sce_migration_allowed = 1, .sce_steal_allowed = 0, .sce_migration_weight = i % 3 /* create ties to test die-locality */ }); } /* Disallow spill from 5 to 4, despite being the same perf level. */ sched_rt_config_set(5, 4, (sched_clutch_edge) { .sce_migration_allowed = 0, .sce_steal_allowed = 0, .sce_migration_weight = 0 }); rt_pset_recompute_spill_order(5); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 0), 3, "spso_search_order[0] == 3"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 1), 0, "spso_search_order[1] == 0"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 2), 1, "spso_search_order[2] == 1"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 3), 2, "spso_search_order[3] == 2"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID"); /* Second test: create edges from 0 to psets 1, 2, 4, and 5. */ sched_rt_config_set(0, 1, (sched_clutch_edge) { .sce_migration_allowed = 1, .sce_steal_allowed = 0, .sce_migration_weight = 2 }); sched_rt_config_set(0, 2, (sched_clutch_edge) { .sce_migration_allowed = 1, .sce_steal_allowed = 0, .sce_migration_weight = 1 }); sched_rt_config_set(0, 4, (sched_clutch_edge) { .sce_migration_allowed = 1, .sce_steal_allowed = 0, .sce_migration_weight = 0 }); sched_rt_config_set(0, 5, (sched_clutch_edge) { .sce_migration_allowed = 1, .sce_steal_allowed = 0, .sce_migration_weight = 1 }); rt_pset_recompute_spill_order(0); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 0), 4, "spso_search_order[0] == 4"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 1), 2, "spso_search_order[1] == 2"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 2), 5, "spso_search_order[2] == 5"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 3), 1, "spso_search_order[3] == 1"); T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID"); SCHED_POLICY_PASS("Realtime spill orders are computed correctly."); } SCHED_POLICY_T_DECL(rt_thread_avoid_processor, "Verify that thread_avoid_processor is correct for realtime threads") { int ret; test_hw_topology_t topo = dual_die; init_migration_harness(topo); struct thread_group *tg = create_tg(0); thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES); /* Iterate conditions with different preferred psets and pset loads */ for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) { set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id); sched_policy_push_metadata("preferred_pset_id", preferred_pset_id); /* Where the thread proactively wants to go */ int chosen_pset = choose_pset_for_thread(thread); T_QUIET; T_EXPECT_EQ(preferred_pset_id, chosen_pset, "Thread should choose un-loaded preferred pset %s", sched_policy_dump_metadata()); /* Thread generally should not avoid a processor in its chosen pset */ for (int c = 0; c < topo.psets[chosen_pset].num_cpus; c++) { int avoid_cpu_id = pset_id_to_cpu_id(chosen_pset) + c; sched_policy_push_metadata("avoid_cpu_id", avoid_cpu_id); ret = thread_avoid_processor_expect(thread, avoid_cpu_id, false, false); T_QUIET; T_EXPECT_TRUE(ret, "Thread should not want to leave processor in just chosen pset %s", sched_policy_dump_metadata()); sched_policy_pop_metadata(); } /* Thread should avoid processor if not allowed to run on the pset */ for (int c = 0; c < topo.total_cpus; c++) { sched_clutch_edge edge = sched_rt_config_get(preferred_pset_id, cpu_id_to_pset_id(c)); if (cpu_id_to_pset_id(c) != preferred_pset_id && !(edge.sce_migration_allowed || edge.sce_steal_allowed)) { sched_policy_push_metadata("avoid_non_preferred_cpu_id", c); ret = thread_avoid_processor_expect(thread, c, false, true); T_QUIET; T_EXPECT_TRUE(ret, "Thread should avoid processor in non-preferred pset to get to idle " "preferred pset %s", sched_policy_dump_metadata()); sched_policy_pop_metadata(); } } sched_policy_pop_metadata(); } SCHED_POLICY_PASS("thread_avoid_processor works for realtime threads"); } static thread_t create_realtime_thread_with_deadline(uint64_t deadline_nanos) { test_thread_t thread = create_thread( TH_BUCKET_FIXPRI, create_tg(0) /* realtime policies don't consider thread groups */, BASEPRI_RTQUEUES); set_thread_sched_mode(thread, TH_MODE_REALTIME); set_thread_realtime( thread, 0, (uint32_t) nanos_to_abs(5000000ULL /* 5ms */), (uint32_t) nanos_to_abs(10000000ULL /* 10ms */), false, 0, nanos_to_abs(deadline_nanos)); return thread; } static void fill_all_cpus_with_realtime_threads(uint64_t deadline_nanos) { for (int i = 0; i < get_hw_topology().total_cpus; i++) { cpu_set_thread_current(i, create_realtime_thread_with_deadline(deadline_nanos)); } } SCHED_POLICY_T_DECL(rt_choose_thread, "Verify realtime thread selection policy and mechanism") { int ret; test_hw_topology_t topo = dual_die; init_migration_harness(topo); const uint64_t start = mach_absolute_time(); const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */ const uint64_t later_deadline = rt_deadline_add(start, nanos_to_abs(6000000)); /* start + 6ms */ fill_all_cpus_with_realtime_threads(later_deadline); /* One of these threads will be on the stealing pset runqueue: */ test_thread_t later_deadline_thread = create_realtime_thread_with_deadline(later_deadline); test_thread_t earlier_deadline_thread = create_realtime_thread_with_deadline(deadline); /* And this thread will be on another runqueue: */ test_thread_t stealable_thread = create_realtime_thread_with_deadline(deadline); /* Check that sched_rt_choose_thread obeys the steal policies configured by * the realtime matrix. A pset should only steal if the thread's deadline * is earlier than that of any thread on the pset's runqueue. */ for (uint stealing_pset_id = 0; stealing_pset_id < topo.num_psets; stealing_pset_id++) { sched_policy_push_metadata("stealing_pset", stealing_pset_id); for (uint off = 1; off < topo.num_psets; off++) { uint other_pset_id = (stealing_pset_id + off) % topo.num_psets; sched_policy_push_metadata("other_pset", other_pset_id); enqueue_thread(pset_target(other_pset_id), stealable_thread); enqueue_thread(pset_target(stealing_pset_id), earlier_deadline_thread); ret = dequeue_thread_expect(pset_target(stealing_pset_id), earlier_deadline_thread); T_QUIET; T_ASSERT_TRUE(ret, "when deadlines are equal, prefer thread from local runqueue %s", sched_policy_dump_metadata()); enqueue_thread(pset_target(stealing_pset_id), later_deadline_thread); if (topo.psets[other_pset_id].cpu_type == topo.psets[stealing_pset_id].cpu_type) { T_QUIET; T_ASSERT_TRUE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal allowed between psets of the same type %s", sched_policy_dump_metadata()); ret = dequeue_thread_expect(pset_target(stealing_pset_id), stealable_thread); T_QUIET; T_ASSERT_TRUE(ret, "steal because the other pset has an earlier-deadline thread %s", sched_policy_dump_metadata()); ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread); T_QUIET; T_ASSERT_TRUE(ret, "take thread from local runqueue because no earlier-deadline threads on other psets %s", sched_policy_dump_metadata()); } else { T_QUIET; T_ASSERT_FALSE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal disallowed between psets of different types %s", sched_policy_dump_metadata()); ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread); T_QUIET; T_ASSERT_TRUE(ret, "take later-deadline thread because policy disallows steal %s", sched_policy_dump_metadata()); ret = dequeue_thread_expect(pset_target(other_pset_id), stealable_thread); T_QUIET; T_ASSERT_TRUE(ret, "removed stealable thread %s", sched_policy_dump_metadata()); } sched_policy_pop_metadata(/* other_pset */); } sched_policy_pop_metadata(/* stealing_pset */); } SCHED_POLICY_PASS("Verified realtime thread selection"); } SCHED_POLICY_T_DECL(rt_followup_ipi, "Verify that followup IPIs are sent when there are stealable realtime threads and idle processors") { int ret; test_hw_topology_t topo = dual_die; init_migration_harness(topo); const uint64_t start = mach_absolute_time(); const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */ fill_all_cpus_with_realtime_threads(deadline); /* This thread is used to load a runqueue. */ test_thread_t thread = create_realtime_thread_with_deadline(deadline); for (int target_cpu = 0; target_cpu < topo.total_cpus; target_cpu++) { sched_policy_push_metadata("target_cpu", target_cpu); for (int idle_cpu = 0; idle_cpu < topo.total_cpus; idle_cpu++) { if (target_cpu == idle_cpu) { continue; } sched_policy_push_metadata("idle_cpu", idle_cpu); enqueue_thread(cpu_target(target_cpu), thread); test_thread_t saved_idle_thread = cpu_clear_thread_current(idle_cpu); /* idle_cpu is now "idle," now simulate thread_select() on target_cpu: */ cpu_set_thread_current(target_cpu, cpu_clear_thread_current(target_cpu)); /* That should result in a deferred followup IPI, if spill is allowed between target_cpu and idle_cpu. */ if (topo.psets[cpu_id_to_pset_id(idle_cpu)].cpu_type == topo.psets[cpu_id_to_pset_id(target_cpu)].cpu_type) { ret = ipi_expect(idle_cpu, TEST_IPI_DEFERRED); T_QUIET; T_ASSERT_TRUE(ret, "should send a followup IPI %s", sched_policy_dump_metadata()); } /* Clean up for the next iteration. */ ret = dequeue_thread_expect(cpu_target(target_cpu), thread); T_QUIET; T_ASSERT_TRUE(ret, "cleaning up %s", sched_policy_dump_metadata()); cpu_set_thread_current(idle_cpu, saved_idle_thread); sched_policy_pop_metadata(/* idle_cpu */); } sched_policy_pop_metadata(/* target_cpu */); } SCHED_POLICY_PASS("Realtime followup IPIs work"); }