xref: /xnu-11215.1.10/tests/sched/sched_test_utils.c (revision 8d741a5de7ff4191bf97d57b9f54c2f6d4a15585)
1 #include <mach/mach.h>
2 #include <stdlib.h>
3 #include <signal.h>
4 #include <mach/mach_time.h>
5 
6 #include <darwintest.h>
7 #include <darwintest_utils.h>
8 #include "sched_test_utils.h"
9 
10 static bool verbosity_enabled = true;
11 
12 void
disable_verbose_sched_utils(void)13 disable_verbose_sched_utils(void)
14 {
15 	T_QUIET; T_ASSERT_TRUE(verbosity_enabled, "verbosity was enabled");
16 	verbosity_enabled = false;
17 }
18 
19 void
reenable_verbose_sched_utils(void)20 reenable_verbose_sched_utils(void)
21 {
22 	T_QUIET; T_ASSERT_EQ(verbosity_enabled, false, "verbosity was disabled");
23 	verbosity_enabled = true;
24 }
25 
26 static mach_timebase_info_data_t timebase_info;
27 static bool initialized_timebase = false;
28 
29 uint64_t
nanos_to_abs(uint64_t nanos)30 nanos_to_abs(uint64_t nanos)
31 {
32 	kern_return_t kr;
33 	if (!initialized_timebase) {
34 		kr = mach_timebase_info(&timebase_info);
35 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
36 		initialized_timebase = true;
37 	}
38 	return nanos * timebase_info.denom / timebase_info.numer;
39 }
40 
41 uint64_t
abs_to_nanos(uint64_t abs)42 abs_to_nanos(uint64_t abs)
43 {
44 	kern_return_t kr;
45 	if (!initialized_timebase) {
46 		kr = mach_timebase_info(&timebase_info);
47 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
48 		initialized_timebase = true;
49 	}
50 	return abs * timebase_info.numer / timebase_info.denom;
51 }
52 
53 static int num_perf_levels = 0;
54 bool
platform_is_amp(void)55 platform_is_amp(void)
56 {
57 	if (num_perf_levels == 0) {
58 		int ret;
59 		ret = sysctlbyname("hw.nperflevels", &num_perf_levels, &(size_t){ sizeof(num_perf_levels) }, NULL, 0);
60 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "hw.nperflevels");
61 	}
62 	bool is_amp = num_perf_levels > 1;
63 	if (verbosity_enabled) {
64 		T_LOG("��️ Platform has %d perflevels (%s)", num_perf_levels, is_amp ? "AMP" : "SMP");
65 	}
66 	return is_amp;
67 }
68 
69 bool
platform_is_virtual_machine(void)70 platform_is_virtual_machine(void)
71 {
72 	int ret;
73 	int vmm_present = 0;
74 	ret = sysctlbyname("kern.hv_vmm_present", &vmm_present, &(size_t){ sizeof(vmm_present) }, NULL, 0);
75 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.hv_vmm_present");
76 	if (vmm_present && verbosity_enabled) {
77 		T_LOG("��️ Platform is a virtual machine!");
78 	}
79 	return (bool)vmm_present;
80 }
81 
82 static char sched_policy_name[64];
83 char *
platform_sched_policy(void)84 platform_sched_policy(void)
85 {
86 	int ret;
87 	ret = sysctlbyname("kern.sched", sched_policy_name, &(size_t){ sizeof(sched_policy_name) }, NULL, 0);
88 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched");
89 	if (verbosity_enabled) {
90 		T_LOG("��️ Platform is running the \"%s\" scheduler policy", sched_policy_name);
91 	}
92 	return sched_policy_name;
93 }
94 
95 void
spin_for_duration(uint32_t seconds)96 spin_for_duration(uint32_t seconds)
97 {
98 	uint64_t duration       = nanos_to_abs((uint64_t)seconds * NSEC_PER_SEC);
99 	uint64_t current_time   = mach_absolute_time();
100 	uint64_t timeout        = duration + current_time;
101 
102 	uint64_t spin_count = 0;
103 
104 	while (mach_absolute_time() < timeout) {
105 		spin_count++;
106 	}
107 }
108 
109 static const double default_idle_threshold = 0.9;
110 static const int default_timeout_sec = 3;
111 
112 bool
wait_for_quiescence_default(void)113 wait_for_quiescence_default(void)
114 {
115 	return wait_for_quiescence(default_idle_threshold, default_timeout_sec);
116 }
117 
118 /* Logic taken from __wait_for_quiescence in qos_tests.c */
119 bool
wait_for_quiescence(double idle_threshold,int timeout_seconds)120 wait_for_quiescence(double idle_threshold, int timeout_seconds)
121 {
122 	kern_return_t kr;
123 
124 	bool quiesced = false;
125 	double idle_ratio = 0.0;
126 	if (verbosity_enabled) {
127 		T_LOG("�� Waiting up to %d second(s) for the system to quiesce above %.2f%% idle...",
128 		    timeout_seconds, idle_threshold * 100.0);
129 	}
130 
131 	host_cpu_load_info_data_t host_load;
132 	mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
133 	int waited_seconds = 0;
134 	int ind = 0;
135 	double user_ticks[2];
136 	double system_ticks[2];
137 	double idle_ticks[2];
138 
139 	while (waited_seconds < timeout_seconds) {
140 		kr = host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (host_info_t)&host_load, &count);
141 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_statistics HOST_CPU_LOAD_INFO");
142 
143 		user_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_USER];
144 		system_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_SYSTEM];
145 		idle_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_IDLE];
146 
147 		if (waited_seconds >= 1) {
148 			int old_ind = (ind + 1) % 2;
149 			double idle_delta = idle_ticks[ind] - idle_ticks[old_ind];
150 			double total_delta = idle_delta + (user_ticks[ind] - user_ticks[old_ind]) + (system_ticks[ind] - system_ticks[old_ind]);
151 			if (total_delta > 0.0) {
152 				idle_ratio = idle_delta / total_delta;
153 				if (idle_ratio >= idle_threshold) {
154 					quiesced = true;
155 					break;
156 				}
157 				if (verbosity_enabled) {
158 					T_LOG("�� Not yet quiesced (%.2f%% idle)", idle_ratio * 100.0);
159 				}
160 			}
161 		}
162 
163 		sleep(1);
164 		ind = (ind + 1) % 2;
165 		waited_seconds++;
166 	}
167 
168 	if (verbosity_enabled) {
169 		if (quiesced) {
170 			T_LOG("�� System quiesced to %.2f%% idle within %d second(s)", idle_ratio * 100.0, waited_seconds);
171 		} else {
172 			T_LOG("�� Failed to quiesce within %.2f%% idle after %d second(s)", idle_threshold * 100.0, waited_seconds);
173 		}
174 	}
175 	return quiesced;
176 }
177 
178 static bool atend_handler_registered = false;
179 
180 static void
sched_utils_sigint_handler(int sig)181 sched_utils_sigint_handler(int sig)
182 {
183 	T_QUIET; T_EXPECT_EQ(sig, SIGINT, "unexpected signal received");
184 	T_FAIL("SIGINT received. Failing test to induce ATEND handlers for cleanup...");
185 }
186 
187 static void
register_atend_handler(void)188 register_atend_handler(void)
189 {
190 	if (!atend_handler_registered) {
191 		signal(SIGINT, sched_utils_sigint_handler);
192 		atend_handler_registered = true;
193 	}
194 }
195 
196 static char *clpcctrl_bin = "/usr/local/bin/clpcctrl";
197 static bool running_clpcctrl_atend_handler = false;
198 
199 static void
clpcctrl_cleanup(void)200 clpcctrl_cleanup(void)
201 {
202 	T_LOG("��️ Restoring CLPC state...");
203 	running_clpcctrl_atend_handler = true;
204 
205 	char *recommend_all_cores_args[] = {"-C", "all", NULL};
206 	execute_clpcctrl(recommend_all_cores_args, false);
207 
208 	char *restore_dynamic_control_args[] = {"-d", NULL};
209 	execute_clpcctrl(restore_dynamic_control_args, false);
210 }
211 
212 uint64_t
execute_clpcctrl(char * clpcctrl_args[],bool read_value)213 execute_clpcctrl(char *clpcctrl_args[], bool read_value)
214 {
215 	int ret;
216 
217 	/* Avoid recursion during teardown */
218 	if (!running_clpcctrl_atend_handler) {
219 		register_atend_handler();
220 		T_ATEND(clpcctrl_cleanup);
221 	}
222 
223 	/* Populate arg array with clpcctrl location */
224 	char *full_clpcctrl_args[100];
225 	full_clpcctrl_args[0] = clpcctrl_bin;
226 	int arg_ind = 0;
227 	while (clpcctrl_args[arg_ind] != NULL) {
228 		T_QUIET; T_ASSERT_LT(arg_ind + 1, 100, "too many clpcctrl args");
229 		full_clpcctrl_args[arg_ind + 1] = clpcctrl_args[arg_ind];
230 		arg_ind++;
231 	}
232 	full_clpcctrl_args[arg_ind + 1] = NULL;
233 
234 	__block uint64_t value = 0;
235 	pid_t pid = dt_launch_tool_pipe(full_clpcctrl_args, false, NULL,
236 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
237 		T_LOG("��️ [clpcctrl] %s", data);
238 		if (read_value) {
239 		        char *token = strtok(data, " ");
240 		        token = strtok(NULL, " ");
241 		        value = strtoull(token, NULL, 10);
242 		}
243 		return true;
244 	},
245 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
246 		T_LOG("��️ [clpcctrl] Error msg: %s", data);
247 		return true;
248 	},
249 	    BUFFER_PATTERN_LINE, NULL);
250 
251 	ret = dt_waitpid(pid, NULL, NULL, 0);
252 	T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for clpcctrl");
253 
254 	return value;
255 }
256 
257 bool
check_recommended_core_mask(uint64_t * core_mask)258 check_recommended_core_mask(uint64_t *core_mask)
259 {
260 	int ret;
261 	uint64_t recommended_cores = 0;
262 	size_t recommended_cores_size = sizeof(recommended_cores);
263 	ret = sysctlbyname("kern.sched_recommended_cores", &recommended_cores, &recommended_cores_size, NULL, 0);
264 	T_QUIET; T_EXPECT_POSIX_SUCCESS(ret, "sysctlbyname(kern.sched_recommended_cores)");
265 
266 	if (verbosity_enabled) {
267 		uint64_t expected_recommended_mask = ~0ULL >> (64 - dt_ncpu());
268 		T_LOG("�� kern.sched_recommended_cores: %016llx, expecting %016llx if all are recommended",
269 		    recommended_cores, expected_recommended_mask);
270 	}
271 
272 	if (core_mask != NULL) {
273 		*core_mask = recommended_cores;
274 	}
275 	return __builtin_popcountll(recommended_cores) == dt_ncpu();
276 }
277 
278 /* Trace Management */
279 
280 enum trace_status {
281 	STARTED = 1,
282 	ENDED = 2,
283 	SAVED = 3,
284 	DISCARDED = 4,
285 };
286 
287 struct trace_handle {
288 	char *short_name;
289 	char *trace_filename;
290 	char *abs_filename;
291 	pid_t trace_pid;
292 	enum trace_status status;
293 	pid_t wait_on_start_pid;
294 	pid_t wait_on_end_pid;
295 };
296 
297 #define MAX_TRACES 1024
298 static struct trace_handle handles[MAX_TRACES];
299 static int handle_ind = 0;
300 
301 static void
atend_trace_cleanup(void)302 atend_trace_cleanup(void)
303 {
304 	int ret;
305 	for (int i = 0; i < handle_ind; i++) {
306 		struct trace_handle *handle = &handles[i];
307 		if (handle->status == STARTED) {
308 			end_collect_trace(handle);
309 		}
310 		T_QUIET; T_EXPECT_EQ(handle->status, ENDED, "ended trace");
311 		if (handle->status == ENDED && T_FAILCOUNT > 0) {
312 			/* Save the trace as an artifact for debugging the failure(s) */
313 			save_collected_trace(handle);
314 		}
315 		/* Make sure to free up the tmp dir space we used */
316 		discard_collected_trace(handle);
317 		/* Kill trace just in case */
318 		ret = kill(handle->trace_pid, SIGKILL);
319 		T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_SUCCESS(ret, "kill SIGKILL");
320 	}
321 }
322 
323 static bool
sched_utils_tracing_supported(void)324 sched_utils_tracing_supported(void)
325 {
326 #if TARGET_OS_BRIDGE
327 	/*
328 	 * Don't support the tracing on BridgeOS due to limited disk space
329 	 * and CLPC compatibility issues.
330 	 */
331 	return false;
332 #else /* !TARGET_OS_BRIDGE */
333 	disable_verbose_sched_utils();
334 	/* Virtual machines do not support trace */
335 	bool supported = (platform_is_virtual_machine() == false);
336 	reenable_verbose_sched_utils();
337 	return supported;
338 #endif /* !TARGET_OS_BRIDGE */
339 }
340 
341 trace_handle_t
begin_collect_trace(char * filename)342 begin_collect_trace(char *filename)
343 {
344 	return begin_collect_trace_fmt(filename);
345 }
346 static bool first_trace = true;
347 
348 static char *trace_bin = "/usr/local/bin/trace";
349 static char *notifyutil_bin = "/usr/bin/notifyutil";
350 static char *tar_bin = "/usr/bin/tar";
351 
352 static char *begin_notification = "��️_trace_begun...";
353 static char *end_notification = "��️_trace_ended...";
354 static char *trigger_end_notification = "��️_stopping_trace...";
355 
356 static const int waiting_timeout_sec = 60 * 2; /* 2 minutes, allows trace post-processing to finish */
357 
358 trace_handle_t
begin_collect_trace_fmt(char * fmt,...)359 begin_collect_trace_fmt(char *fmt, ...)
360 {
361 	/* Check trace requirements */
362 	if (sched_utils_tracing_supported() == false) {
363 		return NULL;
364 	}
365 	T_QUIET; T_ASSERT_EQ(geteuid(), 0, "��️ Tracing requires the test to be run as root user");
366 
367 	int ret;
368 	struct trace_handle *handle = &handles[handle_ind++];
369 	T_QUIET; T_ASSERT_LE(handle_ind, MAX_TRACES, "Ran out of trace handles");
370 
371 	/* Generate the trace filename from the formatted string and args */
372 	char *name = (char *)malloc(sizeof(char) * MAXPATHLEN);
373 	va_list args;
374 	va_start(args, fmt);
375 	vsnprintf(name, MAXPATHLEN, fmt, args);
376 	va_end(args);
377 	handle->short_name = name;
378 	char *full_filename = (char *)malloc(sizeof(char) * MAXPATHLEN);
379 	memset(full_filename, 0, MAXPATHLEN);
380 	snprintf(full_filename, MAXPATHLEN, "%s/%s.atrc", dt_tmpdir(), handle->short_name);
381 	handle->abs_filename = full_filename;
382 	char *filename = (char *)malloc(sizeof(char) * MAXPATHLEN);
383 	memset(filename, 0, MAXPATHLEN);
384 	snprintf(filename, MAXPATHLEN, "%s.atrc", handle->short_name);
385 	handle->trace_filename = filename;
386 
387 	/* If filename already exists, delete old trace */
388 	ret = remove(handle->abs_filename);
389 	T_QUIET; T_WITH_ERRNO; T_EXPECT_TRUE(ret == 0 || errno == ENOENT, "remove trace file");
390 
391 	if (first_trace) {
392 		/* Run tracing cleanup a single time */
393 		register_atend_handler();
394 		T_ATEND(atend_trace_cleanup);
395 		first_trace = false;
396 	}
397 
398 	/* Launch procs to monitor trace start/stop */
399 	char *wait_on_start_args[] = {"/usr/bin/notifyutil", "-1", begin_notification, NULL};
400 	ret = dt_launch_tool(&handle->wait_on_start_pid, wait_on_start_args, false, NULL, NULL);
401 	T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool");
402 	char *wait_on_end_args[] = {"/usr/bin/notifyutil", "-1", end_notification, NULL};
403 	ret = dt_launch_tool(&handle->wait_on_end_pid, wait_on_end_args, false, NULL, NULL);
404 	T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool");
405 
406 	/* Launch trace record */
407 	char *trace_args[] = {trace_bin, "record", handle->abs_filename, "--plan", "default", "--unsafe",
408 		              "--kdebug-filter-include", "C0x01", "--notify-after-start", begin_notification,
409 		              "--notify-after-end", end_notification, "--end-on-notification", trigger_end_notification, "&", NULL};
410 	pid_t trace_pid = dt_launch_tool_pipe(trace_args, false, NULL,
411 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
412 		T_LOG("��️ [trace] %s", data);
413 		return true;
414 	},
415 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
416 		T_LOG("��️ [trace] Error msg: %s", data);
417 		return true;
418 	},
419 	    BUFFER_PATTERN_NONE, NULL);
420 
421 	T_LOG("��️ Starting trace collection for \"%s\" trace[%u]", handle->trace_filename, trace_pid);
422 
423 	/* Wait for tracing to start */
424 	int signal_num;
425 	ret = dt_waitpid(handle->wait_on_start_pid, NULL, &signal_num, waiting_timeout_sec);
426 	T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for trace start signal_num %d", signal_num);
427 
428 	handle->trace_pid = trace_pid;
429 	handle->status = STARTED;
430 
431 	return (trace_handle_t)handle;
432 }
433 
434 void
end_collect_trace(trace_handle_t handle)435 end_collect_trace(trace_handle_t handle)
436 {
437 	if (sched_utils_tracing_supported() == false) {
438 		return;
439 	}
440 
441 	int ret;
442 	struct trace_handle *trace_state = (struct trace_handle *)handle;
443 	T_QUIET; T_EXPECT_EQ(trace_state->status, STARTED, "trace was started");
444 
445 	/* Notify trace to stop tracing */
446 	char *wait_on_start_args[] = {notifyutil_bin, "-p", trigger_end_notification, NULL};
447 	pid_t trigger_end_pid = 0;
448 	ret = dt_launch_tool(&trigger_end_pid, wait_on_start_args, false, NULL, NULL);
449 	T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool for notify end trace");
450 
451 	/* Wait for tracing to actually stop */
452 	T_LOG("��️ Now waiting on trace to finish up...");
453 	int signal_num;
454 	ret = dt_waitpid(trace_state->wait_on_end_pid, NULL, &signal_num, waiting_timeout_sec);
455 	T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for trace stop signal_num %d", signal_num);
456 
457 	trace_state->status = ENDED;
458 }
459 
460 void
save_collected_trace(trace_handle_t handle)461 save_collected_trace(trace_handle_t handle)
462 {
463 	if (sched_utils_tracing_supported() == false) {
464 		return;
465 	}
466 
467 	int ret;
468 	struct trace_handle *trace_state = (struct trace_handle *)handle;
469 	T_QUIET; T_EXPECT_EQ(trace_state->status, ENDED, "trace was ended");
470 
471 	/* Generate compressed filepath and mark for upload */
472 	char compressed_path[MAXPATHLEN];
473 	snprintf(compressed_path, MAXPATHLEN, "%s.tar.gz", trace_state->short_name);
474 	ret = dt_resultfile(compressed_path, MAXPATHLEN);
475 	T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(ret, "dt_resultfile marking \"%s\" for collection", compressed_path);
476 	T_LOG("��️ \"%s\" marked for upload", compressed_path);
477 
478 	char *tar_args[] = {tar_bin, "-czvf", compressed_path, "-C", (char *)dt_tmpdir(), trace_state->trace_filename, NULL};
479 	pid_t tar_pid = dt_launch_tool_pipe(tar_args, false, NULL,
480 	    ^bool (__unused char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
481 		T_LOG("��️ [tar] %s", data);
482 		return true;
483 	},
484 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
485 		T_LOG("��️ [tar] Error msg: %s", data);
486 		return true;
487 	},
488 	    BUFFER_PATTERN_LINE, NULL);
489 
490 	T_QUIET; T_EXPECT_TRUE(tar_pid, "��️ [tar] pid %d", tar_pid);
491 	ret = dt_waitpid(tar_pid, NULL, NULL, 0);
492 	T_QUIET; T_EXPECT_POSIX_SUCCESS(ret, "dt_waitpid for tar");
493 
494 	/* Lax permissions in case a user wants to open the compressed file without sudo */
495 	ret = chmod(compressed_path, 0666);
496 	T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(ret, "chmod");
497 
498 	T_LOG("��️ Finished saving trace (%s), which is available compressed at \"%s\"",
499 	    trace_state->short_name, compressed_path);
500 
501 	trace_state->status = SAVED;
502 }
503 
504 void
discard_collected_trace(trace_handle_t handle)505 discard_collected_trace(trace_handle_t handle)
506 {
507 	if (sched_utils_tracing_supported() == false) {
508 		return;
509 	}
510 
511 	int ret;
512 	struct trace_handle *trace_state = (struct trace_handle *)handle;
513 	T_QUIET; T_EXPECT_TRUE(trace_state->status == ENDED || trace_state->status == SAVED,
514 	    "trace was ended or saved");
515 
516 	/* Delete trace file in order to reclaim disk space on the test device */
517 	ret = remove(trace_state->abs_filename);
518 	T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_SUCCESS(ret, "remove trace file");
519 
520 	if (trace_state->status == ENDED) {
521 		T_LOG("��️ Deleted recorded trace file at \"%s\"", trace_state->abs_filename);
522 	}
523 	trace_state->status = DISCARDED;
524 }
525