xref: /xnu-11417.140.69/tests/sched/sched_test_utils.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 #include <mach/mach.h>
2 #include <stdlib.h>
3 #include <signal.h>
4 #include <mach/mach_time.h>
5 #include <os/atomic_private.h>
6 #include <sys/commpage.h>
7 #include <machine/cpu_capabilities.h>
8 
9 #include <darwintest.h>
10 #include <darwintest_utils.h>
11 #include "sched_test_utils.h"
12 
13 static bool verbosity_enabled = true;
14 
15 void
disable_verbose_sched_utils(void)16 disable_verbose_sched_utils(void)
17 {
18 	T_QUIET; T_ASSERT_TRUE(verbosity_enabled, "verbosity was enabled");
19 	verbosity_enabled = false;
20 }
21 
22 void
reenable_verbose_sched_utils(void)23 reenable_verbose_sched_utils(void)
24 {
25 	T_QUIET; T_ASSERT_EQ(verbosity_enabled, false, "verbosity was disabled");
26 	verbosity_enabled = true;
27 }
28 
29 static mach_timebase_info_data_t timebase_info;
30 static bool initialized_timebase = false;
31 
32 uint64_t
nanos_to_abs(uint64_t nanos)33 nanos_to_abs(uint64_t nanos)
34 {
35 	kern_return_t kr;
36 	if (!initialized_timebase) {
37 		kr = mach_timebase_info(&timebase_info);
38 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
39 		initialized_timebase = true;
40 	}
41 	return nanos * timebase_info.denom / timebase_info.numer;
42 }
43 
44 uint64_t
abs_to_nanos(uint64_t abs)45 abs_to_nanos(uint64_t abs)
46 {
47 	kern_return_t kr;
48 	if (!initialized_timebase) {
49 		kr = mach_timebase_info(&timebase_info);
50 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
51 		initialized_timebase = true;
52 	}
53 	return abs * timebase_info.numer / timebase_info.denom;
54 }
55 
56 static int num_perf_levels = 0;
57 unsigned int
platform_nperflevels(void)58 platform_nperflevels(void)
59 {
60 	if (num_perf_levels == 0) {
61 		int ret;
62 		ret = sysctlbyname("hw.nperflevels", &num_perf_levels, &(size_t){ sizeof(num_perf_levels) }, NULL, 0);
63 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "hw.nperflevels");
64 	}
65 	return (unsigned int)num_perf_levels;
66 }
67 
68 static char perflevel_names[64][16];
69 const char *
platform_perflevel_name(unsigned int perflevel)70 platform_perflevel_name(unsigned int perflevel)
71 {
72 	if (perflevel_names[perflevel][0] == 0) {
73 		int ret;
74 		char sysctl_name[64] = { 0 };
75 		snprintf(sysctl_name, sizeof(sysctl_name), "hw.perflevel%d.name", perflevel);
76 		ret = sysctlbyname(sysctl_name, &perflevel_names[perflevel], &(size_t){ sizeof(perflevel_names[perflevel]) }, NULL, 0);
77 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, sysctl_name);
78 	}
79 	return (const char *)perflevel_names[perflevel];
80 }
81 
82 static bool reported_is_amp = false;
83 bool
platform_is_amp(void)84 platform_is_amp(void)
85 {
86 	bool is_amp = platform_nperflevels() > 1;
87 	if (verbosity_enabled && !reported_is_amp) {
88 		T_LOG("��️ Platform has %d perflevels (%s)", platform_nperflevels(), is_amp ? "AMP" : "SMP");
89 		reported_is_amp = true;
90 	}
91 	return is_amp;
92 }
93 
94 bool
platform_is_virtual_machine(void)95 platform_is_virtual_machine(void)
96 {
97 	int ret;
98 	int vmm_present = 0;
99 	ret = sysctlbyname("kern.hv_vmm_present", &vmm_present, &(size_t){ sizeof(vmm_present) }, NULL, 0);
100 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.hv_vmm_present");
101 	if (vmm_present && verbosity_enabled) {
102 		T_LOG("��️ Platform is a virtual machine!");
103 	}
104 	return (bool)vmm_present;
105 }
106 
107 static char sched_policy_name[64];
108 char *
platform_sched_policy(void)109 platform_sched_policy(void)
110 {
111 	int ret;
112 	ret = sysctlbyname("kern.sched", sched_policy_name, &(size_t){ sizeof(sched_policy_name) }, NULL, 0);
113 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched");
114 	if (verbosity_enabled) {
115 		T_LOG("��️ Platform is running the \"%s\" scheduler policy", sched_policy_name);
116 	}
117 	return sched_policy_name;
118 }
119 
120 static uint8_t num_clusters = 0;
121 
122 unsigned int
platform_num_clusters(void)123 platform_num_clusters(void)
124 {
125 	if (num_clusters == 0) {
126 		num_clusters = COMM_PAGE_READ(uint8_t, CPU_CLUSTERS);
127 		if (verbosity_enabled) {
128 			T_LOG("��️ Platform has %u CPU clusters", num_clusters);
129 		}
130 	}
131 	return num_clusters;
132 }
133 
134 char
bind_to_cluster_of_type(char type)135 bind_to_cluster_of_type(char type)
136 {
137 	int ret;
138 	char old_type;
139 	ret = sysctlbyname("kern.sched_thread_bind_cluster_type",
140 	    &old_type, &(size_t){ sizeof(old_type) }, &type, sizeof(type));
141 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched_thread_bind_cluster_type");
142 	return old_type;
143 }
144 
145 int
bind_to_cluster_id(int cluster_id)146 bind_to_cluster_id(int cluster_id)
147 {
148 	int ret;
149 	int old_cluster = 0;
150 	ret = sysctlbyname("kern.sched_thread_bind_cluster_id", &old_cluster,
151 	    &(size_t){ sizeof(old_cluster) }, &cluster_id, sizeof(cluster_id));
152 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched_thread_bind_cluster_id");
153 	return old_cluster;
154 }
155 
156 static volatile _Atomic int stop_spinning_flag = 0;
157 
158 void
stop_spinning_threads(void)159 stop_spinning_threads(void)
160 {
161 	os_atomic_store(&stop_spinning_flag, 1, release);
162 }
163 
164 void
spin_for_duration(uint32_t seconds)165 spin_for_duration(uint32_t seconds)
166 {
167 	uint64_t duration       = nanos_to_abs((uint64_t)seconds * NSEC_PER_SEC);
168 	uint64_t current_time   = mach_absolute_time();
169 	uint64_t timeout        = duration + current_time;
170 
171 	uint64_t spin_count = 0;
172 
173 	while ((mach_absolute_time() < timeout) &&
174 	    (os_atomic_load(&stop_spinning_flag, acquire) == 0)) {
175 		spin_count++;
176 	}
177 }
178 
179 pthread_attr_t *
create_pthread_attr(int priority,detach_state_t detach_state,qos_class_t qos_class,sched_policy_t sched_policy,size_t stack_size)180 create_pthread_attr(int priority,
181     detach_state_t detach_state, qos_class_t qos_class,
182     sched_policy_t sched_policy, size_t stack_size)
183 {
184 	int ret;
185 	pthread_attr_t *attr = (pthread_attr_t *)malloc(sizeof(pthread_attr_t));
186 	ret = pthread_attr_init(attr);
187 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_init");
188 
189 	struct sched_param param = { .sched_priority = priority };
190 	ret = pthread_attr_setschedparam(attr, &param);
191 	T_QUIET; T_ASSERT_POSIX_ZERO(ret, "pthread_attr_setschedparam");
192 
193 	if (detach_state == eDetached) {
194 		ret = pthread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED);
195 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_setdetachstate");
196 	}
197 
198 	if (qos_class != QOS_CLASS_UNSPECIFIED) {
199 		ret = pthread_attr_set_qos_class_np(attr, qos_class, 0);
200 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_set_qos_class_np");
201 	}
202 
203 	if (sched_policy != eSchedDefault) {
204 		int sched_policy_val = 0;
205 		switch (sched_policy) {
206 		case eSchedFIFO:
207 			sched_policy_val = SCHED_FIFO;
208 			break;
209 		case eSchedRR:
210 			sched_policy_val = SCHED_RR;
211 			break;
212 		case eSchedOther:
213 			sched_policy_val = SCHED_OTHER;
214 			break;
215 		case eSchedDefault:
216 			T_QUIET; T_ASSERT_FAIL("unexpected sched_policy");
217 			break;
218 		}
219 		ret = pthread_attr_setschedpolicy(attr, (int)sched_policy);
220 		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_setschedpolicy");
221 	}
222 
223 	if (stack_size != DEFAULT_STACK_SIZE) {
224 		ret = pthread_attr_setstacksize(attr, stack_size);
225 		T_QUIET; T_ASSERT_POSIX_ZERO(ret, "pthread_attr_setstacksize");
226 	}
227 	return attr;
228 }
229 
230 void
create_thread(pthread_t * thread_handle,pthread_attr_t * attr,void * (* func)(void *),void * arg)231 create_thread(pthread_t *thread_handle, pthread_attr_t *attr,
232     void *(*func)(void *), void *arg)
233 {
234 	int ret;
235 	ret = pthread_create(thread_handle, attr, func, arg);
236 	T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_create");
237 }
238 
239 void
create_thread_pri(pthread_t * thread_handle,int priority,void * (* func)(void *),void * arg)240 create_thread_pri(pthread_t *thread_handle, int priority,
241     void *(*func)(void *), void *arg)
242 {
243 	pthread_attr_t *attr = create_pthread_attr(priority, eJoinable,
244 	    QOS_CLASS_UNSPECIFIED, eSchedDefault, DEFAULT_STACK_SIZE);
245 	create_thread(thread_handle, attr, func, arg);
246 }
247 
248 pthread_t *
create_threads(int num_threads,int priority,detach_state_t detach_state,qos_class_t qos_class,sched_policy_t sched_policy,size_t stack_size,void * (* func)(void *),void * arg_array[])249 create_threads(int num_threads, int priority,
250     detach_state_t detach_state, qos_class_t qos_class,
251     sched_policy_t sched_policy, size_t stack_size,
252     void *(*func)(void *), void *arg_array[])
253 {
254 	pthread_attr_t *attr = create_pthread_attr(priority, detach_state,
255 	    qos_class, sched_policy, stack_size);
256 
257 	pthread_t *thread_handles = (pthread_t *)malloc(sizeof(pthread_t) * (size_t)num_threads);
258 	for (int i = 0; i < num_threads; i++) {
259 		create_thread(&thread_handles[i], attr, func, arg_array == NULL ? NULL : arg_array[i]);
260 	}
261 	return thread_handles;
262 }
263 
264 static const double default_idle_threshold = 0.9;
265 static const int default_timeout_sec = 3;
266 
267 bool
wait_for_quiescence_default(int argc,char * const argv[])268 wait_for_quiescence_default(int argc, char *const argv[])
269 {
270 	return wait_for_quiescence(argc, argv, default_idle_threshold, default_timeout_sec);
271 }
272 
273 /* Logic taken from __wait_for_quiescence in qos_tests.c */
274 bool
wait_for_quiescence(int argc,char * const argv[],double idle_threshold,int timeout_seconds)275 wait_for_quiescence(int argc, char *const argv[], double idle_threshold, int timeout_seconds)
276 {
277 	kern_return_t kr;
278 
279 	for (int i = 0; i < argc; i++) {
280 		if (strcmp(argv[i], "--no-quiesce") == 0) {
281 			T_LOG("�� Skipping quiescence due to \"--no-quiesce\"");
282 			return true;
283 		}
284 	}
285 
286 	bool quiesced = false;
287 	double idle_ratio = 0.0;
288 	if (verbosity_enabled) {
289 		T_LOG("�� Waiting up to %d second(s) for the system to quiesce above %.2f%% idle...",
290 		    timeout_seconds, idle_threshold * 100.0);
291 	}
292 
293 	host_cpu_load_info_data_t host_load;
294 	mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
295 	int waited_seconds = 0;
296 	int ind = 0;
297 	double user_ticks[2];
298 	double system_ticks[2];
299 	double idle_ticks[2];
300 
301 	while (waited_seconds < timeout_seconds) {
302 		kr = host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (host_info_t)&host_load, &count);
303 		T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_statistics HOST_CPU_LOAD_INFO");
304 
305 		user_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_USER];
306 		system_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_SYSTEM];
307 		idle_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_IDLE];
308 
309 		if (waited_seconds >= 1) {
310 			int old_ind = (ind + 1) % 2;
311 			double idle_delta = idle_ticks[ind] - idle_ticks[old_ind];
312 			double total_delta = idle_delta + (user_ticks[ind] - user_ticks[old_ind]) + (system_ticks[ind] - system_ticks[old_ind]);
313 			if (total_delta > 0.0) {
314 				idle_ratio = idle_delta / total_delta;
315 				if (idle_ratio >= idle_threshold) {
316 					quiesced = true;
317 					break;
318 				}
319 				if (verbosity_enabled) {
320 					T_LOG("�� Not yet quiesced (%.2f%% idle)", idle_ratio * 100.0);
321 				}
322 			}
323 		}
324 
325 		sleep(1);
326 		ind = (ind + 1) % 2;
327 		waited_seconds++;
328 	}
329 
330 	if (verbosity_enabled) {
331 		if (quiesced) {
332 			T_LOG("�� System quiesced to %.2f%% idle within %d second(s)", idle_ratio * 100.0, waited_seconds);
333 		} else {
334 			T_LOG("�� Failed to quiesce within %.2f%% idle after %d second(s)", idle_threshold * 100.0, waited_seconds);
335 		}
336 	}
337 	return quiesced;
338 }
339 
340 static bool atend_handler_registered = false;
341 
342 static void
sched_utils_sigint_handler(int sig)343 sched_utils_sigint_handler(int sig)
344 {
345 	T_QUIET; T_EXPECT_EQ(sig, SIGINT, "unexpected signal received");
346 	T_ASSERT_TRUE(false, "SIGINT received. Failing test to induce ATEND handlers for cleanup...");
347 }
348 
349 static void
register_atend_handler(void)350 register_atend_handler(void)
351 {
352 	if (!atend_handler_registered) {
353 		signal(SIGINT, sched_utils_sigint_handler);
354 		atend_handler_registered = true;
355 	}
356 }
357 
358 static char *clpcctrl_bin = "/usr/local/bin/clpcctrl";
359 static bool setup_clpcctrl_atend = false;
360 
361 static void
clpcctrl_cleanup(void)362 clpcctrl_cleanup(void)
363 {
364 	T_LOG("��️ Restoring CLPC state...");
365 
366 	char *recommend_all_cores_args[] = {"-C", "all", NULL};
367 	execute_clpcctrl(recommend_all_cores_args, false);
368 
369 	char *restore_dynamic_control_args[] = {"-d", NULL};
370 	execute_clpcctrl(restore_dynamic_control_args, false);
371 }
372 
373 uint64_t
execute_clpcctrl(char * clpcctrl_args[],bool read_value)374 execute_clpcctrl(char *clpcctrl_args[], bool read_value)
375 {
376 	int ret;
377 
378 	/* Avoid recursion during teardown */
379 	if (!setup_clpcctrl_atend) {
380 		register_atend_handler();
381 		T_ATEND(clpcctrl_cleanup);
382 		setup_clpcctrl_atend = true;
383 	}
384 
385 	/* Populate arg array with clpcctrl location */
386 	char *full_clpcctrl_args[100];
387 	full_clpcctrl_args[0] = clpcctrl_bin;
388 	int arg_ind = 0;
389 	while (clpcctrl_args[arg_ind] != NULL) {
390 		T_QUIET; T_ASSERT_LT(arg_ind + 1, 100, "too many clpcctrl args");
391 		full_clpcctrl_args[arg_ind + 1] = clpcctrl_args[arg_ind];
392 		arg_ind++;
393 	}
394 	full_clpcctrl_args[arg_ind + 1] = NULL;
395 
396 	__block uint64_t value = 0;
397 	pid_t pid = dt_launch_tool_pipe(full_clpcctrl_args, false, NULL,
398 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
399 		T_LOG("��️ [clpcctrl] %s", data);
400 		if (read_value) {
401 		        char *token = strtok(data, " ");
402 		        token = strtok(NULL, " ");
403 		        value = strtoull(token, NULL, 10);
404 		}
405 		return false;
406 	},
407 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
408 		T_LOG("��️ [clpcctrl] Error msg: %s", data);
409 		return false;
410 	},
411 	    BUFFER_PATTERN_LINE, NULL);
412 
413 	ret = dt_waitpid(pid, NULL, NULL, 0);
414 	T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for clpcctrl");
415 
416 	return value;
417 }
418 
419 bool
check_recommended_core_mask(uint64_t * core_mask)420 check_recommended_core_mask(uint64_t *core_mask)
421 {
422 	int ret;
423 	uint64_t recommended_cores = 0;
424 	size_t recommended_cores_size = sizeof(recommended_cores);
425 	ret = sysctlbyname("kern.sched_recommended_cores", &recommended_cores, &recommended_cores_size, NULL, 0);
426 	T_QUIET; T_EXPECT_POSIX_SUCCESS(ret, "sysctlbyname(kern.sched_recommended_cores)");
427 
428 	if (verbosity_enabled) {
429 		uint64_t expected_recommended_mask = ~0ULL >> (64 - dt_ncpu());
430 		T_LOG("�� kern.sched_recommended_cores: %016llx, expecting %016llx if all are recommended",
431 		    recommended_cores, expected_recommended_mask);
432 	}
433 
434 	if (core_mask != NULL) {
435 		*core_mask = recommended_cores;
436 	}
437 	return __builtin_popcountll(recommended_cores) == dt_ncpu();
438 }
439 
440 /* Trace Management */
441 
442 enum trace_status {
443 	STARTED = 1,
444 	ENDED = 2,
445 	SAVED = 3,
446 	DISCARDED = 4,
447 };
448 
449 struct trace_handle {
450 	char *short_name;
451 	char *trace_filename;
452 	char *abs_filename;
453 	pid_t trace_pid;
454 	enum trace_status status;
455 	pid_t wait_on_start_pid;
456 	pid_t wait_on_end_pid;
457 };
458 
459 #define MAX_TRACES 1024
460 static struct trace_handle handles[MAX_TRACES];
461 static int handle_ind = 0;
462 
463 /* Default setting is to record a trace but only save it if the test failed */
464 static bool tracing_enabled = true;
465 static bool trace_save_requested = false;
466 
467 static bool
trace_requested(int argc,char * const argv[])468 trace_requested(int argc, char *const argv[])
469 {
470 	for (int i = 0; i < argc; i++) {
471 		if (strcmp(argv[i], "--save-trace") == 0) {
472 			trace_save_requested = true;
473 		}
474 		if (strcmp(argv[i], "--no-trace") == 0) {
475 			tracing_enabled = false;
476 		}
477 	}
478 	T_QUIET; T_ASSERT_TRUE(tracing_enabled || !trace_save_requested, "Can't use both --save-trace and --no-trace");
479 	return tracing_enabled;
480 }
481 
482 static void
atend_trace_cleanup(void)483 atend_trace_cleanup(void)
484 {
485 	int ret;
486 	for (int i = 0; i < handle_ind; i++) {
487 		struct trace_handle *handle = &handles[i];
488 		if (handle->status == STARTED) {
489 			end_collect_trace(handle);
490 		}
491 		T_QUIET; T_EXPECT_EQ(handle->status, ENDED, "ended trace");
492 		if (handle->status == ENDED && ((T_FAILCOUNT > 0) || trace_save_requested)) {
493 			/* Save the trace as an artifact for debugging the failure(s) */
494 			save_collected_trace(handle);
495 		}
496 		/* Make sure to free up the tmp dir space we used */
497 		discard_collected_trace(handle);
498 		/* Kill trace just in case */
499 		ret = kill(handle->trace_pid, SIGKILL);
500 		T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_SUCCESS(ret, "kill SIGKILL");
501 	}
502 }
503 
504 static bool
sched_utils_tracing_supported(void)505 sched_utils_tracing_supported(void)
506 {
507 #if TARGET_OS_BRIDGE
508 	/*
509 	 * Don't support the tracing on BridgeOS due to limited disk space
510 	 * and CLPC compatibility issues.
511 	 */
512 	return false;
513 #else /* !TARGET_OS_BRIDGE */
514 	disable_verbose_sched_utils();
515 	/* Virtual machines do not support trace */
516 	bool supported = (platform_is_virtual_machine() == false);
517 	reenable_verbose_sched_utils();
518 	return supported;
519 #endif /* !TARGET_OS_BRIDGE */
520 }
521 
522 trace_handle_t
begin_collect_trace(int argc,char * const argv[],char * filename)523 begin_collect_trace(int argc, char *const argv[], char *filename)
524 {
525 	return begin_collect_trace_fmt(argc, argv, filename);
526 }
527 static bool first_trace = true;
528 
529 static char *trace_bin = "/usr/local/bin/trace";
530 static char *notifyutil_bin = "/usr/bin/notifyutil";
531 #if TARGET_OS_OSX
532 static char *aa_bin = "/usr/bin/aa";
533 #else
534 static char *aa_bin = "/usr/local/bin/aa";
535 #endif
536 
537 static char *begin_notification = "��️_trace_begun...";
538 static char *end_notification = "��️_trace_ended...";
539 static char *trigger_end_notification = "��️_stopping_trace...";
540 
541 static const int waiting_timeout_sec = 60 * 2; /* 2 minutes, allows trace post-processing to finish */
542 
543 trace_handle_t
begin_collect_trace_fmt(int argc,char * const argv[],char * fmt,...)544 begin_collect_trace_fmt(int argc, char *const argv[], char *fmt, ...)
545 {
546 	/* Check trace requirements */
547 	if (!sched_utils_tracing_supported() || !trace_requested(argc, argv)) {
548 		return NULL;
549 	}
550 	T_QUIET; T_ASSERT_EQ(geteuid(), 0, "��️ Tracing requires the test to be run as root user");
551 
552 	int ret;
553 	struct trace_handle *handle = &handles[handle_ind++];
554 	T_QUIET; T_ASSERT_LE(handle_ind, MAX_TRACES, "Ran out of trace handles");
555 
556 	/* Generate the trace filename from the formatted string and args */
557 	char *name = (char *)malloc(sizeof(char) * MAXPATHLEN);
558 	va_list args;
559 	va_start(args, fmt);
560 	vsnprintf(name, MAXPATHLEN, fmt, args);
561 	va_end(args);
562 	handle->short_name = name;
563 	char *full_filename = (char *)malloc(sizeof(char) * MAXPATHLEN);
564 	memset(full_filename, 0, MAXPATHLEN);
565 	snprintf(full_filename, MAXPATHLEN, "%s/%s.atrc", dt_tmpdir(), handle->short_name);
566 	handle->abs_filename = full_filename;
567 	char *filename = (char *)malloc(sizeof(char) * MAXPATHLEN);
568 	memset(filename, 0, MAXPATHLEN);
569 	snprintf(filename, MAXPATHLEN, "%s.atrc", handle->short_name);
570 	handle->trace_filename = filename;
571 
572 	/* If filename already exists, delete old trace */
573 	ret = remove(handle->abs_filename);
574 	T_QUIET; T_WITH_ERRNO; T_EXPECT_TRUE(ret == 0 || errno == ENOENT, "remove trace file");
575 
576 	if (first_trace) {
577 		/* Run tracing cleanup a single time */
578 		register_atend_handler();
579 		T_ATEND(atend_trace_cleanup);
580 		first_trace = false;
581 	}
582 
583 	/* Launch procs to monitor trace start/stop */
584 	char *wait_on_start_args[] = {"/usr/bin/notifyutil", "-1", begin_notification, NULL};
585 	ret = dt_launch_tool(&handle->wait_on_start_pid, wait_on_start_args, false, NULL, NULL);
586 	T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool");
587 	char *wait_on_end_args[] = {"/usr/bin/notifyutil", "-1", end_notification, NULL};
588 	ret = dt_launch_tool(&handle->wait_on_end_pid, wait_on_end_args, false, NULL, NULL);
589 	T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool");
590 
591 	/* Launch trace record */
592 	char *trace_args[] = {trace_bin, "record", handle->abs_filename, "--plan", "default", "--unsafe",
593 		              "--kdebug-filter-include", "C0x01", "--omit", "Logging", "--kdebug-buffer-size", "1gb",
594 		              "--notify-after-start", begin_notification, "--notify-after-end", end_notification,
595 		              "--end-on-notification", trigger_end_notification, "&", NULL};
596 	pid_t trace_pid = dt_launch_tool_pipe(trace_args, false, NULL,
597 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
598 		T_LOG("��️ [trace] %s", data);
599 		return false;
600 	},
601 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
602 		T_LOG("��️ [trace] Error msg: %s", data);
603 		return false;
604 	},
605 	    BUFFER_PATTERN_LINE, NULL);
606 
607 	T_LOG("��️ Starting trace collection for \"%s\" trace[%u]", handle->trace_filename, trace_pid);
608 
609 	/* Wait for tracing to start */
610 	int signal_num;
611 	ret = dt_waitpid(handle->wait_on_start_pid, NULL, &signal_num, waiting_timeout_sec);
612 	T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for trace start signal_num %d", signal_num);
613 
614 	handle->trace_pid = trace_pid;
615 	handle->status = STARTED;
616 
617 	return (trace_handle_t)handle;
618 }
619 
620 void
end_collect_trace(trace_handle_t handle)621 end_collect_trace(trace_handle_t handle)
622 {
623 	if (!sched_utils_tracing_supported() || !tracing_enabled) {
624 		return;
625 	}
626 
627 	int ret;
628 	struct trace_handle *trace_state = (struct trace_handle *)handle;
629 	T_QUIET; T_EXPECT_EQ(trace_state->status, STARTED, "trace was started");
630 
631 	/* Notify trace to stop tracing */
632 	char *wait_on_start_args[] = {notifyutil_bin, "-p", trigger_end_notification, NULL};
633 	pid_t trigger_end_pid = 0;
634 	ret = dt_launch_tool(&trigger_end_pid, wait_on_start_args, false, NULL, NULL);
635 	T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool for notify end trace");
636 
637 	/* Wait for tracing to actually stop */
638 	T_LOG("��️ Now waiting on trace to finish up...");
639 	int signal_num;
640 	int exit_status;
641 	ret = dt_waitpid(trace_state->wait_on_end_pid, &exit_status, &signal_num, waiting_timeout_sec);
642 	T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for trace stop, exit status %d signal_num %d", exit_status, signal_num);
643 
644 	trace_state->status = ENDED;
645 }
646 
647 void
save_collected_trace(trace_handle_t handle)648 save_collected_trace(trace_handle_t handle)
649 {
650 	if (!sched_utils_tracing_supported() || !tracing_enabled) {
651 		return;
652 	}
653 
654 	int ret;
655 	struct trace_handle *trace_state = (struct trace_handle *)handle;
656 	T_QUIET; T_EXPECT_EQ(trace_state->status, ENDED, "trace was ended");
657 
658 	/* Generate compressed filepath and mark for upload */
659 	char compressed_path[MAXPATHLEN];
660 	snprintf(compressed_path, MAXPATHLEN, "%s.aar", trace_state->short_name);
661 	ret = dt_resultfile(compressed_path, MAXPATHLEN);
662 	T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(ret, "dt_resultfile marking \"%s\" for collection", compressed_path);
663 	T_LOG("��️ \"%s\" marked for upload", compressed_path);
664 
665 	char *compress_args[] = {aa_bin, "archive", "-i", trace_state->trace_filename, "-d", (char *)dt_tmpdir(), "-o", compressed_path, NULL};
666 	pid_t aa_pid = dt_launch_tool_pipe(compress_args, false, NULL,
667 	    ^bool (__unused char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
668 		T_LOG("��️ [aa] %s", data);
669 		return false;
670 	},
671 	    ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
672 		T_LOG("��️ [aa] Error/debug msg: %s", data);
673 		return false;
674 	},
675 	    BUFFER_PATTERN_LINE, NULL);
676 	T_QUIET; T_EXPECT_TRUE(aa_pid, "��️ [aa] pid %d", aa_pid);
677 	int exit_status = 0;
678 	int signal_num = SIGPIPE;
679 	ret = dt_waitpid(aa_pid, &exit_status, &signal_num, 0);
680 	T_QUIET; T_WITH_ERRNO; T_EXPECT_TRUE(ret, "dt_waitpid for aa, exit status %d signal num %d", exit_status, signal_num);
681 
682 	/* Lax permissions in case a user wants to open the compressed file without sudo */
683 	ret = chmod(compressed_path, 0666);
684 	T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(ret, "chmod");
685 
686 	T_LOG("��️ Finished saving trace (%s), which is available compressed at \"%s\"",
687 	    trace_state->short_name, compressed_path);
688 
689 	trace_state->status = SAVED;
690 }
691 
692 void
discard_collected_trace(trace_handle_t handle)693 discard_collected_trace(trace_handle_t handle)
694 {
695 	if (!sched_utils_tracing_supported() || !tracing_enabled || trace_save_requested) {
696 		return;
697 	}
698 
699 	int ret;
700 	struct trace_handle *trace_state = (struct trace_handle *)handle;
701 	T_QUIET; T_EXPECT_TRUE(trace_state->status == ENDED || trace_state->status == SAVED,
702 	    "trace was ended or saved");
703 
704 	/* Delete trace file in order to reclaim disk space on the test device */
705 	ret = remove(trace_state->abs_filename);
706 	T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_SUCCESS(ret, "remove trace file");
707 
708 	if (trace_state->status == ENDED) {
709 		T_LOG("��️ Deleted recorded trace file at \"%s\"", trace_state->abs_filename);
710 	}
711 	trace_state->status = DISCARDED;
712 }
713