xref: /xnu-11417.140.69/tests/sched/thread_group_fairness.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 #include <unistd.h>
2 #include <stdlib.h>
3 #include <pthread.h>
4 #include <spawn.h>
5 #include <mach/mach.h>
6 #include <mach/mach_time.h>
7 #include <TargetConditionals.h>
8 #include <sys/work_interval.h>
9 #include <sys/stat.h>
10 #include <sys/sysctl.h>
11 
12 #include <darwintest.h>
13 #include <darwintest_utils.h>
14 #include <perfdata/perfdata.h>
15 #include "sched_test_utils.h"
16 
17 #include "thread_group_fairness_workload_config.h"
18 
19 T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
20     T_META_RADAR_COMPONENT_NAME("xnu"),
21     T_META_RADAR_COMPONENT_VERSION("scheduler"),
22     T_META_TAG_PERF,
23     T_META_TAG_VM_NOT_ELIGIBLE);
24 
25 static const size_t MAX_PDJ_PATH_LEN = 256;
26 static unsigned int num_cores;
27 
28 static void
workload_config_load(void)29 workload_config_load(void)
30 {
31 	int ret;
32 	size_t len = 0;
33 	ret = sysctlbyname("kern.workload_config", NULL, &len,
34 	    sched_thread_group_fairness_workload_config_plist,
35 	    sched_thread_group_fairness_workload_config_plist_len);
36 	if (ret == -1 && errno == ENOENT) {
37 		T_SKIP("kern.workload_config failed");
38 	}
39 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.workload_config");
40 }
41 
42 static void
workload_config_cleanup(void)43 workload_config_cleanup(void)
44 {
45 	size_t len = 0;
46 	sysctlbyname("kern.workload_config", NULL, &len, "", 1);
47 }
48 
49 static void
environment_init(void)50 environment_init(void)
51 {
52 	num_cores = (unsigned int) dt_ncpu();
53 
54 	if (platform_is_amp()) {
55 		/*
56 		 * Derecommend all clusters except the E cores, to ensure that thread groups
57 		 * compete over the same cores irrespective of CLPC's cluster recommendations
58 		 */
59 		char *clpcctrl_args[] = {"-C", "e", NULL};
60 		execute_clpcctrl(clpcctrl_args, false);
61 	}
62 
63 	/*
64 	 * Load a test workload plist containing a Workload ID with
65 	 * WorkloadClass == DISCRETIONARY, in order to mark the thread group
66 	 * for that workload as THREAD_GROUP_FLAGS_EFFICIENT
67 	 */
68 	T_ATEND(workload_config_cleanup);
69 	workload_config_load();
70 }
71 
72 static void
set_work_interval_id(work_interval_t * handle,uint32_t work_interval_flags)73 set_work_interval_id(work_interval_t *handle, uint32_t work_interval_flags)
74 {
75 	int ret;
76 	mach_port_t port = MACH_PORT_NULL;
77 
78 	ret = work_interval_copy_port(*handle, &port);
79 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "work_interval_copy_port");
80 
81 	struct work_interval_workload_id_params wlid_params = {
82 		.wlidp_flags = WORK_INTERVAL_WORKLOAD_ID_HAS_ID,
83 		.wlidp_wicreate_flags = work_interval_flags,
84 		.wlidp_name = (uintptr_t)"com.test.myapp.discretionary",
85 	};
86 
87 	ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_SET_WORKLOAD_ID, port, &wlid_params, sizeof(wlid_params));
88 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "WORK_INTERVAL_OPERATION_SET_WORKLOAD_ID");
89 }
90 
91 static uint32_t
make_work_interval(work_interval_t * handle,uint32_t work_type_flags)92 make_work_interval(work_interval_t *handle, uint32_t work_type_flags)
93 {
94 	int ret;
95 	uint32_t work_interval_flags = WORK_INTERVAL_FLAG_JOINABLE | WORK_INTERVAL_FLAG_GROUP | work_type_flags;
96 	ret = work_interval_create(handle, work_interval_flags);
97 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "work_interval_create");
98 
99 	if (work_type_flags & WORK_INTERVAL_FLAG_HAS_WORKLOAD_ID) {
100 		set_work_interval_id(handle, work_interval_flags);
101 	}
102 	return work_interval_flags;
103 }
104 
105 struct thread_data {
106 	work_interval_t *handle;
107 	uint32_t work_interval_flags;
108 };
109 
110 static void *
spin_thread_fn(void * arg)111 spin_thread_fn(void *arg)
112 {
113 	struct thread_data *info = (struct thread_data *)arg;
114 	int ret;
115 
116 	/* Join the thread group associated with the work interval handle */
117 	ret = work_interval_join(*(info->handle));
118 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "work_interval_join");
119 
120 	/* Spin indefinitely */
121 	volatile uint64_t spin_count = 0;
122 	while (mach_absolute_time() < UINT64_MAX) {
123 		spin_count++;
124 	}
125 	return NULL;
126 }
127 
128 static void
start_threads(pthread_t * threads,struct thread_data * thread_datas,work_interval_t * handle,uint32_t work_interval_flags)129 start_threads(pthread_t *threads, struct thread_data *thread_datas, work_interval_t *handle, uint32_t work_interval_flags)
130 {
131 	int ret;
132 	for (unsigned int i = 0; i < num_cores; i++) {
133 		thread_datas[i].handle = handle;
134 		thread_datas[i].work_interval_flags = work_interval_flags;
135 		ret = pthread_create(&threads[i], NULL, spin_thread_fn, &thread_datas[i]);
136 		T_QUIET; T_ASSERT_POSIX_ZERO(ret, "pthread_create");
137 	}
138 }
139 
140 static uint64_t
snapshot_user_time_usec(pthread_t * threads)141 snapshot_user_time_usec(pthread_t *threads)
142 {
143 	kern_return_t kr;
144 	uint64_t cumulative_user_time_usec = 0;
145 	mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
146 	for (unsigned int i = 0; i < num_cores; i++) {
147 		mach_port_t thread_port = pthread_mach_thread_np(threads[i]);
148 		thread_basic_info_data_t info;
149 		kr = thread_info(thread_port, THREAD_BASIC_INFO, (thread_info_t)&info, &count);
150 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_info");
151 		uint64_t thread_usr_usec = (uint64_t) (info.user_time.seconds) * USEC_PER_SEC + (uint64_t) info.user_time.microseconds;
152 		cumulative_user_time_usec += thread_usr_usec;
153 	}
154 	return cumulative_user_time_usec;
155 }
156 
157 T_DECL(thread_group_fairness,
158     "Ensure that thread groups tagged as higher priority do not starve out "
159     "thread groups tagged as lower priority when both behave as CPU spinners",
160     T_META_ASROOT(YES))
161 {
162 	T_SETUPBEGIN;
163 
164 	wait_for_quiescence_default(argc, argv);
165 	environment_init();
166 
167 	/*
168 	 * Create two work intervals with corresponding thread groups that would
169 	 * be associated with differing priorities.
170 	 */
171 	work_interval_t lower_pri_handle, higher_pri_handle;
172 	uint32_t lower_pri_flags = make_work_interval(&lower_pri_handle, WORK_INTERVAL_TYPE_DEFAULT | WORK_INTERVAL_FLAG_HAS_WORKLOAD_ID);
173 	uint32_t higher_pri_flags = make_work_interval(&higher_pri_handle, WORK_INTERVAL_TYPE_DEFAULT);
174 
175 	/* Start threads to join the lower priority thread group */
176 	pthread_t lower_threads[num_cores];
177 	struct thread_data lower_thread_datas[num_cores];
178 	start_threads(lower_threads, lower_thread_datas, &lower_pri_handle, lower_pri_flags);
179 
180 	/* Start threads to join the higher priority thread group  */
181 	pthread_t higher_threads[num_cores];
182 	struct thread_data higher_thread_datas[num_cores];
183 	start_threads(higher_threads, higher_thread_datas, &higher_pri_handle, higher_pri_flags);
184 
185 	T_SETUPEND;
186 
187 	/* Snapshot thread runtimes */
188 	uint64_t start_lower_priority_runtime_usec = snapshot_user_time_usec(lower_threads);
189 	uint64_t start_higher_priority_runtime_usec = snapshot_user_time_usec(higher_threads);
190 
191 	/* Allow thread groups time to compete */
192 	sleep(3);
193 
194 	/*
195 	 * Snapshot runtimes again and compare the usage ratio between the lower and
196 	 * higher priority thread groups, to determine whether the lower priority group
197 	 * has been starved
198 	 */
199 	uint64_t finish_lower_priority_runtime_usec = snapshot_user_time_usec(lower_threads);
200 	uint64_t finish_higher_priority_runtime_usec = snapshot_user_time_usec(higher_threads);
201 
202 	uint64_t lower_priority_runtime = finish_lower_priority_runtime_usec - start_lower_priority_runtime_usec;
203 	uint64_t higher_priority_runtime = finish_higher_priority_runtime_usec - start_higher_priority_runtime_usec;
204 
205 	T_QUIET; T_ASSERT_GT(lower_priority_runtime, 10000LL, "lower priority thread group got at least 10ms of CPU time");
206 	T_QUIET; T_ASSERT_GT(higher_priority_runtime, 10000LL, "higher priority thread group got at least 10ms of CPU time");
207 
208 	/* Record the observed runtime ratio */
209 	char pdj_path[MAX_PDJ_PATH_LEN];
210 	pdwriter_t writer = pdwriter_open_tmp("xnu", "scheduler.thread_group_fairness", 0, 0, pdj_path, MAX_PDJ_PATH_LEN);
211 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(writer, "pdwriter_open_tmp");
212 
213 	double runtime_ratio_value;
214 	double total_runtime = (double)(lower_priority_runtime + higher_priority_runtime);
215 	if (lower_priority_runtime <= higher_priority_runtime) {
216 		runtime_ratio_value = (double)(lower_priority_runtime) / total_runtime;
217 	} else {
218 		runtime_ratio_value = (double)(higher_priority_runtime) / total_runtime;
219 	}
220 	T_LOG("Observed timeshare ratio: %f", runtime_ratio_value);
221 
222 	pdwriter_new_value(writer, "Thread Group Runtime Ratio", PDUNIT_CUSTOM(runtime_ratio), runtime_ratio_value);
223 	pdwriter_record_larger_better(writer);
224 	pdwriter_close(writer);
225 	/* Ensure that the perfdata file can be copied by BATS */
226 	T_QUIET; T_ASSERT_POSIX_ZERO(chmod(pdj_path, 0644), "chmod");
227 
228 	T_END;
229 }
230