xref: /xnu-8796.101.5/tests/sched_thread_group_fairness.c (revision aca3beaa3dfbd42498b42c5e5ce20a938e6554e5)
1 #include <unistd.h>
2 #include <stdlib.h>
3 #include <pthread.h>
4 #include <spawn.h>
5 #include <mach/mach.h>
6 #include <mach/mach_time.h>
7 #include <TargetConditionals.h>
8 #include <sys/work_interval.h>
9 #include <sys/stat.h>
10 #include <sys/sysctl.h>
11 
12 #include <darwintest.h>
13 #include <perfdata/perfdata.h>
14 
15 extern unsigned char sched_thread_group_fairness_workload_config_plist[];
16 extern unsigned int sched_thread_group_fairness_workload_config_plist_len;
17 
18 #include "sched_thread_group_fairness_workload_config.h"
19 
20 T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
21     T_META_RADAR_COMPONENT_NAME("xnu"),
22     T_META_RADAR_COMPONENT_VERSION("scheduler"),
23     T_META_TAG_PERF);
24 
25 static const size_t MAX_PDJ_PATH_LEN = 256;
26 static unsigned int num_cores;
27 static unsigned int num_perf_levels;
28 
29 static bool
platform_is_amp(void)30 platform_is_amp(void)
31 {
32 	if (num_perf_levels != 0) {
33 		return num_perf_levels > 1;
34 	}
35 	int ret;
36 	num_perf_levels = 0;
37 	ret = sysctlbyname("hw.nperflevels", &num_perf_levels, &(size_t){ sizeof(num_perf_levels) }, NULL, 0);
38 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "hw.nperflevels");
39 	return num_perf_levels > 1;
40 }
41 
42 static unsigned int
get_ncpu(void)43 get_ncpu(void)
44 {
45 	int ret;
46 	int ncpu;
47 	char cpu_sysctl_name[32];
48 	if (platform_is_amp()) {
49 		sprintf(cpu_sysctl_name, "hw.perflevel%u.logicalcpu", num_perf_levels - 1);
50 	} else {
51 		sprintf(cpu_sysctl_name, "hw.ncpu");
52 	}
53 	ret = sysctlbyname(cpu_sysctl_name, &ncpu, &(size_t){ sizeof(ncpu) }, NULL, 0);
54 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "%s", cpu_sysctl_name);
55 	return (unsigned int) ncpu;
56 }
57 
58 extern char **environ;
59 
60 static void
execute_clpcctrl(char * const clpcctrl_args[])61 execute_clpcctrl(char *const clpcctrl_args[])
62 {
63 	int ret, pid;
64 	ret = posix_spawn(&pid, clpcctrl_args[0], NULL, NULL, clpcctrl_args, environ);
65 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "posix_spawn");
66 	waitpid(pid, &ret, 0);
67 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "waitpid");
68 }
69 
70 static void
clpcctrl_cleanup(void)71 clpcctrl_cleanup(void)
72 {
73 	char *const recommend_all_cores_args[] = {"/usr/local/bin/clpcctrl", "-C", "all", NULL};
74 	execute_clpcctrl(recommend_all_cores_args);
75 	char *const restore_dynamic_control_args[] = {"/usr/local/bin/clpcctrl", "-d", NULL};
76 	execute_clpcctrl(restore_dynamic_control_args);
77 }
78 
79 static void
workload_config_load(void)80 workload_config_load(void)
81 {
82 	int ret;
83 	size_t len = 0;
84 	ret = sysctlbyname("kern.workload_config", NULL, &len,
85 	    sched_thread_group_fairness_workload_config_plist,
86 	    sched_thread_group_fairness_workload_config_plist_len);
87 	if (ret == -1 && errno == ENOENT) {
88 		T_SKIP("kern.workload_config failed");
89 	}
90 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.workload_config");
91 }
92 
93 static void
workload_config_cleanup(void)94 workload_config_cleanup(void)
95 {
96 	size_t len = 0;
97 	sysctlbyname("kern.workload_config", NULL, &len, "", 1);
98 }
99 
100 static void
environment_init(void)101 environment_init(void)
102 {
103 	num_cores = get_ncpu();
104 
105 	if (platform_is_amp()) {
106 		/*
107 		 * Derecommend all clusters except the E cores, to ensure that thread groups
108 		 * compete over the same cores irrespective of CLPC's cluster recommendations
109 		 */
110 		T_ATEND(clpcctrl_cleanup);
111 		char *const clpcctrl_args[] = {"/usr/local/bin/clpcctrl", "-C", "e", NULL};
112 		execute_clpcctrl(clpcctrl_args);
113 	}
114 
115 	/*
116 	 * Load a test workload plist containing a Workload ID with
117 	 * WorkloadClass == DISCRETIONARY, in order to mark the thread group
118 	 * for that workload as THREAD_GROUP_FLAGS_EFFICIENT
119 	 */
120 	T_ATEND(workload_config_cleanup);
121 	workload_config_load();
122 }
123 
124 static void
set_work_interval_id(work_interval_t * handle,uint32_t work_interval_flags)125 set_work_interval_id(work_interval_t *handle, uint32_t work_interval_flags)
126 {
127 	int ret;
128 	mach_port_t port = MACH_PORT_NULL;
129 
130 	ret = work_interval_copy_port(*handle, &port);
131 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "work_interval_copy_port");
132 
133 	struct work_interval_workload_id_params wlid_params = {
134 		.wlidp_flags = WORK_INTERVAL_WORKLOAD_ID_HAS_ID,
135 		.wlidp_wicreate_flags = work_interval_flags,
136 		.wlidp_name = (uintptr_t)"com.test.myapp.discretionary",
137 	};
138 
139 	ret = __work_interval_ctl(WORK_INTERVAL_OPERATION_SET_WORKLOAD_ID, port, &wlid_params, sizeof(wlid_params));
140 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "WORK_INTERVAL_OPERATION_SET_WORKLOAD_ID");
141 }
142 
143 static uint32_t
make_work_interval(work_interval_t * handle,uint32_t work_type_flags)144 make_work_interval(work_interval_t *handle, uint32_t work_type_flags)
145 {
146 	int ret;
147 	uint32_t work_interval_flags = WORK_INTERVAL_FLAG_JOINABLE | WORK_INTERVAL_FLAG_GROUP | work_type_flags;
148 	ret = work_interval_create(handle, work_interval_flags);
149 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "work_interval_create");
150 
151 	if (work_type_flags & WORK_INTERVAL_FLAG_HAS_WORKLOAD_ID) {
152 		set_work_interval_id(handle, work_interval_flags);
153 	}
154 	return work_interval_flags;
155 }
156 
157 struct thread_data {
158 	work_interval_t *handle;
159 	uint32_t work_interval_flags;
160 };
161 
162 static void *
spin_thread_fn(void * arg)163 spin_thread_fn(void *arg)
164 {
165 	struct thread_data *info = (struct thread_data *)arg;
166 	int ret;
167 
168 	/* Join the thread group associated with the work interval handle */
169 	ret = work_interval_join(*(info->handle));
170 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "work_interval_join");
171 
172 	/* Spin indefinitely */
173 	volatile uint64_t spin_count = 0;
174 	while (mach_absolute_time() < UINT64_MAX) {
175 		spin_count++;
176 	}
177 	return NULL;
178 }
179 
180 static void
start_threads(pthread_t * threads,struct thread_data * thread_datas,work_interval_t * handle,uint32_t work_interval_flags)181 start_threads(pthread_t *threads, struct thread_data *thread_datas, work_interval_t *handle, uint32_t work_interval_flags)
182 {
183 	int ret;
184 	for (unsigned int i = 0; i < num_cores; i++) {
185 		thread_datas[i].handle = handle;
186 		thread_datas[i].work_interval_flags = work_interval_flags;
187 		ret = pthread_create(&threads[i], NULL, spin_thread_fn, &thread_datas[i]);
188 		T_QUIET; T_ASSERT_POSIX_ZERO(ret, "pthread_create");
189 	}
190 }
191 
192 static uint64_t
snapshot_user_time_usec(pthread_t * threads)193 snapshot_user_time_usec(pthread_t *threads)
194 {
195 	kern_return_t kr;
196 	uint64_t cumulative_user_time_usec = 0;
197 	mach_msg_type_number_t count = THREAD_BASIC_INFO_COUNT;
198 	for (unsigned int i = 0; i < num_cores; i++) {
199 		mach_port_t thread_port = pthread_mach_thread_np(threads[i]);
200 		thread_basic_info_data_t info;
201 		kr = thread_info(thread_port, THREAD_BASIC_INFO, (thread_info_t)&info, &count);
202 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_info");
203 		uint64_t thread_usr_usec = (uint64_t) (info.user_time.seconds) * USEC_PER_SEC + (uint64_t) info.user_time.microseconds;
204 		cumulative_user_time_usec += thread_usr_usec;
205 	}
206 	return cumulative_user_time_usec;
207 }
208 
209 T_DECL(thread_group_fairness,
210     "Ensure that thread groups tagged as higher priority do not starve out "
211     "thread groups tagged as lower priority when both behave as CPU spinners",
212     T_META_ASROOT(YES))
213 {
214 	T_SETUPBEGIN;
215 
216 	environment_init();
217 
218 	/*
219 	 * Create two work intervals with corresponding thread groups that would
220 	 * be associated with differing priorities.
221 	 */
222 	work_interval_t lower_pri_handle, higher_pri_handle;
223 	uint32_t lower_pri_flags = make_work_interval(&lower_pri_handle, WORK_INTERVAL_TYPE_DEFAULT | WORK_INTERVAL_FLAG_HAS_WORKLOAD_ID);
224 	uint32_t higher_pri_flags = make_work_interval(&higher_pri_handle, WORK_INTERVAL_TYPE_DEFAULT);
225 
226 	/* Start threads to join the lower priority thread group */
227 	pthread_t lower_threads[num_cores];
228 	struct thread_data lower_thread_datas[num_cores];
229 	start_threads(lower_threads, lower_thread_datas, &lower_pri_handle, lower_pri_flags);
230 
231 	/* Start threads to join the higher priority thread group  */
232 	pthread_t higher_threads[num_cores];
233 	struct thread_data higher_thread_datas[num_cores];
234 	start_threads(higher_threads, higher_thread_datas, &higher_pri_handle, higher_pri_flags);
235 
236 	T_SETUPEND;
237 
238 	/* Snapshot thread runtimes */
239 	uint64_t start_lower_priority_runtime_usec = snapshot_user_time_usec(lower_threads);
240 	uint64_t start_higher_priority_runtime_usec = snapshot_user_time_usec(higher_threads);
241 
242 	/* Allow thread groups time to compete */
243 	sleep(3);
244 
245 	/*
246 	 * Snapshot runtimes again and compare the usage ratio between the lower and
247 	 * higher priority thread groups, to determine whether the lower priority group
248 	 * has been starved
249 	 */
250 	uint64_t finish_lower_priority_runtime_usec = snapshot_user_time_usec(lower_threads);
251 	uint64_t finish_higher_priority_runtime_usec = snapshot_user_time_usec(higher_threads);
252 
253 	uint64_t lower_priority_runtime = finish_lower_priority_runtime_usec - start_lower_priority_runtime_usec;
254 	uint64_t higher_priority_runtime = finish_higher_priority_runtime_usec - start_higher_priority_runtime_usec;
255 
256 	T_QUIET; T_ASSERT_GT(lower_priority_runtime, 10000LL, "lower priority thread group got at least 10ms of CPU time");
257 	T_QUIET; T_ASSERT_GT(higher_priority_runtime, 10000LL, "higher priority thread group got at least 10ms of CPU time");
258 
259 	/* Record the observed runtime ratio */
260 	char pdj_path[MAX_PDJ_PATH_LEN];
261 	pdwriter_t writer = pdwriter_open_tmp("xnu", "scheduler.thread_group_fairness", 0, 0, pdj_path, MAX_PDJ_PATH_LEN);
262 	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(writer, "pdwriter_open_tmp");
263 
264 	double runtime_ratio_value;
265 	double total_runtime = (double)(lower_priority_runtime + higher_priority_runtime);
266 	if (lower_priority_runtime <= higher_priority_runtime) {
267 		runtime_ratio_value = (double)(lower_priority_runtime) / total_runtime;
268 	} else {
269 		runtime_ratio_value = (double)(higher_priority_runtime) / total_runtime;
270 	}
271 
272 	pdwriter_new_value(writer, "Thread Group Runtime Ratio", PDUNIT_CUSTOM(runtime_ratio), runtime_ratio_value);
273 	pdwriter_record_larger_better(writer);
274 	pdwriter_close(writer);
275 	/* Ensure that the perfdata file can be copied by BATS */
276 	T_QUIET; T_ASSERT_POSIX_ZERO(chmod(pdj_path, 0644), "chmod");
277 
278 	T_END;
279 }
280