1 #include <mach/mach_time.h>
2 #include <mach/mach.h>
3 #include <machine/cpu_capabilities.h>
4 #include <os/atomic_private.h>
5 #include <signal.h>
6 #include <stdlib.h>
7 #include <sys/commpage.h>
8 #include <sys/kdebug.h>
9
10 #include <darwintest.h>
11 #include <darwintest_utils.h>
12 #include "sched_test_utils.h"
13
14 static bool verbosity_enabled = true;
15
16 void
disable_verbose_sched_utils(void)17 disable_verbose_sched_utils(void)
18 {
19 T_QUIET; T_ASSERT_TRUE(verbosity_enabled, "verbosity was enabled");
20 verbosity_enabled = false;
21 }
22
23 void
reenable_verbose_sched_utils(void)24 reenable_verbose_sched_utils(void)
25 {
26 T_QUIET; T_ASSERT_EQ(verbosity_enabled, false, "verbosity was disabled");
27 verbosity_enabled = true;
28 }
29
30 static mach_timebase_info_data_t timebase_info;
31
32 uint64_t
nanos_to_abs(uint64_t nanos)33 nanos_to_abs(uint64_t nanos)
34 {
35 mach_timebase_info_data_t timebase = timebase_info;
36
37 if (timebase.numer == 0 || timebase.denom == 0) {
38 kern_return_t kr;
39
40 kr = mach_timebase_info(&timebase_info);
41 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
42
43 timebase = timebase_info;
44 }
45 return nanos * timebase.denom / timebase.numer;
46 }
47
48 uint64_t
abs_to_nanos(uint64_t abs)49 abs_to_nanos(uint64_t abs)
50 {
51 mach_timebase_info_data_t timebase = timebase_info;
52
53 if (timebase.numer == 0 || timebase.denom == 0) {
54 kern_return_t kr;
55 kr = mach_timebase_info(&timebase_info);
56 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
57 timebase = timebase_info;
58 }
59 return abs * timebase.numer / timebase.denom;
60 }
61
62 static int num_perf_levels = 0;
63 unsigned int
platform_nperflevels(void)64 platform_nperflevels(void)
65 {
66 if (num_perf_levels == 0) {
67 int ret;
68 ret = sysctlbyname("hw.nperflevels", &num_perf_levels, &(size_t){ sizeof(num_perf_levels) }, NULL, 0);
69 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "hw.nperflevels");
70 }
71 return (unsigned int)num_perf_levels;
72 }
73
74 static char perflevel_names[64][16];
75 const char *
platform_perflevel_name(unsigned int perflevel)76 platform_perflevel_name(unsigned int perflevel)
77 {
78 if (perflevel_names[perflevel][0] == 0) {
79 int ret;
80 char sysctl_name[64] = { 0 };
81 snprintf(sysctl_name, sizeof(sysctl_name), "hw.perflevel%d.name", perflevel);
82 ret = sysctlbyname(sysctl_name, &perflevel_names[perflevel], &(size_t){ sizeof(perflevel_names[perflevel]) }, NULL, 0);
83 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "%s", sysctl_name);
84 }
85 return (const char *)perflevel_names[perflevel];
86 }
87
88 static unsigned int perflevel_ncpus[64] = {0};
89 unsigned int
platform_perflevel_ncpus(unsigned int perflevel)90 platform_perflevel_ncpus(unsigned int perflevel)
91 {
92 if (perflevel_ncpus[perflevel] == 0) {
93 int ret;
94 char sysctl_name[64] = { 0 };
95 snprintf(sysctl_name, sizeof(sysctl_name), "hw.perflevel%d.logicalcpu", perflevel);
96 ret = sysctlbyname(sysctl_name, &perflevel_ncpus[perflevel], &(size_t){ sizeof(perflevel_ncpus[perflevel]) }, NULL, 0);
97 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "%s", sysctl_name);
98 }
99 return perflevel_ncpus[perflevel];
100 }
101
102 static bool reported_is_amp = false;
103 bool
platform_is_amp(void)104 platform_is_amp(void)
105 {
106 bool is_amp = platform_nperflevels() > 1;
107 if (verbosity_enabled && !reported_is_amp) {
108 T_LOG("️ Platform has %d perflevels (%s)", platform_nperflevels(), is_amp ? "AMP" : "SMP");
109 reported_is_amp = true;
110 }
111 return is_amp;
112 }
113
114 bool
platform_is_virtual_machine(void)115 platform_is_virtual_machine(void)
116 {
117 int ret;
118 int vmm_present = 0;
119 ret = sysctlbyname("kern.hv_vmm_present", &vmm_present, &(size_t){ sizeof(vmm_present) }, NULL, 0);
120 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.hv_vmm_present");
121 if (vmm_present) {
122 if (verbosity_enabled) {
123 T_LOG("️ Platform is a virtual machine!");
124 }
125 return true;
126 }
127
128 return false;
129 }
130
131 static char sched_policy_name[64];
132 char *
platform_sched_policy(void)133 platform_sched_policy(void)
134 {
135 int ret;
136 ret = sysctlbyname("kern.sched", sched_policy_name, &(size_t){ sizeof(sched_policy_name) }, NULL, 0);
137 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched");
138 if (verbosity_enabled) {
139 T_LOG("️ Platform is running the \"%s\" scheduler policy", sched_policy_name);
140 }
141 return sched_policy_name;
142 }
143
144 static uint8_t num_clusters = 0;
145
146 unsigned int
platform_num_clusters(void)147 platform_num_clusters(void)
148 {
149 if (num_clusters == 0) {
150 num_clusters = COMM_PAGE_READ(uint8_t, CPU_CLUSTERS);
151 if (verbosity_enabled) {
152 T_LOG("️ Platform has %u CPU clusters", num_clusters);
153 }
154 }
155 return num_clusters;
156 }
157
158 char
bind_to_cluster_of_type(char type)159 bind_to_cluster_of_type(char type)
160 {
161 int ret;
162 char old_type;
163 ret = sysctlbyname("kern.sched_thread_bind_cluster_type",
164 &old_type, &(size_t){ sizeof(old_type) }, &type, sizeof(type));
165 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched_thread_bind_cluster_type");
166 return old_type;
167 }
168
169 int
bind_to_cluster_id(int cluster_id)170 bind_to_cluster_id(int cluster_id)
171 {
172 int ret;
173 int old_cluster = 0;
174 ret = sysctlbyname("kern.sched_thread_bind_cluster_id", &old_cluster,
175 &(size_t){ sizeof(old_cluster) }, &cluster_id, sizeof(cluster_id));
176 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.sched_thread_bind_cluster_id");
177 return old_cluster;
178 }
179
180 static volatile _Atomic int stop_spinning_flag = 0;
181
182 void
stop_spinning_threads(void)183 stop_spinning_threads(void)
184 {
185 os_atomic_store(&stop_spinning_flag, 1, release);
186 }
187
188 void
spin_for_duration(uint32_t seconds)189 spin_for_duration(uint32_t seconds)
190 {
191 uint64_t duration = nanos_to_abs((uint64_t)seconds * NSEC_PER_SEC);
192 uint64_t current_time = mach_absolute_time();
193 uint64_t timeout = duration + current_time;
194
195 uint64_t spin_count = 0;
196
197 while ((mach_absolute_time() < timeout) &&
198 (os_atomic_load(&stop_spinning_flag, acquire) == 0)) {
199 spin_count++;
200 }
201 }
202
203 pthread_attr_t *
create_pthread_attr(int priority,detach_state_t detach_state,qos_class_t qos_class,sched_policy_t sched_policy,size_t stack_size)204 create_pthread_attr(int priority,
205 detach_state_t detach_state, qos_class_t qos_class,
206 sched_policy_t sched_policy, size_t stack_size)
207 {
208 int ret;
209 pthread_attr_t *attr = (pthread_attr_t *)malloc(sizeof(pthread_attr_t));
210 ret = pthread_attr_init(attr);
211 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_init");
212
213 struct sched_param param = { .sched_priority = priority };
214 ret = pthread_attr_setschedparam(attr, ¶m);
215 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "pthread_attr_setschedparam");
216
217 if (detach_state == eDetached) {
218 ret = pthread_attr_setdetachstate(attr, PTHREAD_CREATE_DETACHED);
219 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_setdetachstate");
220 }
221
222 if (qos_class != QOS_CLASS_UNSPECIFIED) {
223 ret = pthread_attr_set_qos_class_np(attr, qos_class, 0);
224 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_set_qos_class_np");
225 }
226
227 if (sched_policy != eSchedDefault) {
228 int sched_policy_val = 0;
229 switch (sched_policy) {
230 case eSchedFIFO:
231 sched_policy_val = SCHED_FIFO;
232 break;
233 case eSchedRR:
234 sched_policy_val = SCHED_RR;
235 break;
236 case eSchedOther:
237 sched_policy_val = SCHED_OTHER;
238 break;
239 case eSchedDefault:
240 T_QUIET; T_ASSERT_FAIL("unexpected sched_policy");
241 break;
242 }
243 ret = pthread_attr_setschedpolicy(attr, (int)sched_policy);
244 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_attr_setschedpolicy");
245 }
246
247 if (stack_size != DEFAULT_STACK_SIZE) {
248 ret = pthread_attr_setstacksize(attr, stack_size);
249 T_QUIET; T_ASSERT_POSIX_ZERO(ret, "pthread_attr_setstacksize");
250 }
251 return attr;
252 }
253
254 void
create_thread(pthread_t * thread_handle,pthread_attr_t * attr,void * (* func)(void *),void * arg)255 create_thread(pthread_t *thread_handle, pthread_attr_t *attr,
256 void *(*func)(void *), void *arg)
257 {
258 int ret;
259 ret = pthread_create(thread_handle, attr, func, arg);
260 T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "pthread_create");
261 }
262
263 void
create_thread_pri(pthread_t * thread_handle,int priority,void * (* func)(void *),void * arg)264 create_thread_pri(pthread_t *thread_handle, int priority,
265 void *(*func)(void *), void *arg)
266 {
267 pthread_attr_t *attr = create_pthread_attr(priority, eJoinable,
268 QOS_CLASS_UNSPECIFIED, eSchedDefault, DEFAULT_STACK_SIZE);
269 create_thread(thread_handle, attr, func, arg);
270 }
271
272 pthread_t *
create_threads(int num_threads,int priority,detach_state_t detach_state,qos_class_t qos_class,sched_policy_t sched_policy,size_t stack_size,void * (* func)(void *),void * arg_array[])273 create_threads(int num_threads, int priority,
274 detach_state_t detach_state, qos_class_t qos_class,
275 sched_policy_t sched_policy, size_t stack_size,
276 void *(*func)(void *), void *arg_array[])
277 {
278 pthread_attr_t *attr = create_pthread_attr(priority, detach_state,
279 qos_class, sched_policy, stack_size);
280
281 pthread_t *thread_handles = (pthread_t *)malloc(sizeof(pthread_t) * (size_t)num_threads);
282 for (int i = 0; i < num_threads; i++) {
283 create_thread(&thread_handles[i], attr, func, arg_array == NULL ? NULL : arg_array[i]);
284 }
285 return thread_handles;
286 }
287
288 const char *
platform_train_descriptor(void)289 platform_train_descriptor(void)
290 {
291 #if TARGET_OS_XR
292 return "visionOS";
293 #elif TARGET_OS_TV
294 return "tvOS";
295 #elif TARGET_OS_WATCH
296 return "watchOS";
297 #elif TARGET_OS_BRIDGE
298 return "bridgeOS";
299 #elif TARGET_OS_OSX
300 return "macOS";
301 #elif TARGET_OS_IOS
302 return "iOS";
303 #else
304 return "unknown";
305 #endif
306 }
307
308 static const double default_idle_threshold = 0.9;
309 static const int default_timeout_sec = 3;
310
311 bool
wait_for_quiescence_default(int argc,char * const argv[])312 wait_for_quiescence_default(int argc, char *const argv[])
313 {
314 return wait_for_quiescence(argc, argv, default_idle_threshold, default_timeout_sec);
315 }
316
317 /* Logic taken from __wait_for_quiescence in qos_tests.c */
318 bool
wait_for_quiescence(int argc,char * const argv[],double idle_threshold,int timeout_seconds)319 wait_for_quiescence(int argc, char *const argv[], double idle_threshold, int timeout_seconds)
320 {
321 kern_return_t kr;
322
323 for (int i = 0; i < argc; i++) {
324 if (strcmp(argv[i], "--no-quiesce") == 0) {
325 T_LOG(" Skipping quiescence due to \"--no-quiesce\"");
326 return true;
327 }
328 }
329
330 bool quiesced = false;
331 double idle_ratio = 0.0;
332 if (verbosity_enabled) {
333 T_LOG(" Waiting up to %d second(s) for the system to quiesce above %.2f%% idle...",
334 timeout_seconds, idle_threshold * 100.0);
335 }
336
337 host_cpu_load_info_data_t host_load;
338 mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
339 int waited_seconds = 0;
340 int ind = 0;
341 double user_ticks[2];
342 double system_ticks[2];
343 double idle_ticks[2];
344
345 while (waited_seconds < timeout_seconds) {
346 kr = host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (host_info_t)&host_load, &count);
347 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "host_statistics HOST_CPU_LOAD_INFO");
348
349 user_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_USER];
350 system_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_SYSTEM];
351 idle_ticks[ind] = (double)host_load.cpu_ticks[CPU_STATE_IDLE];
352
353 if (waited_seconds >= 1) {
354 int old_ind = (ind + 1) % 2;
355 double idle_delta = idle_ticks[ind] - idle_ticks[old_ind];
356 double total_delta = idle_delta + (user_ticks[ind] - user_ticks[old_ind]) + (system_ticks[ind] - system_ticks[old_ind]);
357 if (total_delta > 0.0) {
358 idle_ratio = idle_delta / total_delta;
359 if (idle_ratio >= idle_threshold) {
360 quiesced = true;
361 break;
362 }
363 if (verbosity_enabled) {
364 T_LOG(" Not yet quiesced (%.2f%% idle)", idle_ratio * 100.0);
365 }
366 }
367 }
368
369 sleep(1);
370 ind = (ind + 1) % 2;
371 waited_seconds++;
372 }
373
374 if (verbosity_enabled) {
375 if (quiesced) {
376 T_LOG(" System quiesced to %.2f%% idle within %d second(s)", idle_ratio * 100.0, waited_seconds);
377 } else {
378 T_LOG(" Failed to quiesce within %.2f%% idle after %d second(s)", idle_threshold * 100.0, waited_seconds);
379 }
380 }
381 return quiesced;
382 }
383
384 static bool atend_handler_registered = false;
385
386 static void
sched_utils_sigint_handler(int sig)387 sched_utils_sigint_handler(int sig)
388 {
389 T_QUIET; T_EXPECT_EQ(sig, SIGINT, "unexpected signal received");
390 T_ASSERT_TRUE(false, "SIGINT received. Failing test to induce ATEND handlers for cleanup...");
391 }
392
393 static void
register_atend_handler(void)394 register_atend_handler(void)
395 {
396 if (!atend_handler_registered) {
397 signal(SIGINT, sched_utils_sigint_handler);
398 atend_handler_registered = true;
399 }
400 }
401
402 static char *clpcctrl_bin = "/usr/local/bin/clpcctrl";
403 static bool setup_clpcctrl_atend = false;
404
405 static void
clpcctrl_cleanup(void)406 clpcctrl_cleanup(void)
407 {
408 T_LOG("️ Restoring CLPC state...");
409
410 char *recommend_all_cores_args[] = {"-C", "all", NULL};
411 execute_clpcctrl(recommend_all_cores_args, false);
412
413 char *restore_dynamic_control_args[] = {"-d", NULL};
414 execute_clpcctrl(restore_dynamic_control_args, false);
415 }
416
417 uint64_t
execute_clpcctrl(char * clpcctrl_args[],bool read_value)418 execute_clpcctrl(char *clpcctrl_args[], bool read_value)
419 {
420 int ret;
421
422 /* Avoid recursion during teardown */
423 if (!setup_clpcctrl_atend) {
424 register_atend_handler();
425 T_ATEND(clpcctrl_cleanup);
426 setup_clpcctrl_atend = true;
427 }
428
429 /* Populate arg array with clpcctrl location */
430 char *full_clpcctrl_args[100];
431 full_clpcctrl_args[0] = clpcctrl_bin;
432 int arg_ind = 0;
433 while (clpcctrl_args[arg_ind] != NULL) {
434 T_QUIET; T_ASSERT_LT(arg_ind + 1, 100, "too many clpcctrl args");
435 full_clpcctrl_args[arg_ind + 1] = clpcctrl_args[arg_ind];
436 arg_ind++;
437 }
438 full_clpcctrl_args[arg_ind + 1] = NULL;
439
440 __block uint64_t value = 0;
441 pid_t pid = dt_launch_tool_pipe(full_clpcctrl_args, false, NULL,
442 ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
443 T_LOG("️ [clpcctrl] %s", data);
444 if (read_value) {
445 char *token = strtok(data, " ");
446 token = strtok(NULL, " ");
447 value = strtoull(token, NULL, 10);
448 }
449 return false;
450 },
451 ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
452 T_LOG("️ [clpcctrl] Error msg: %s", data);
453 return false;
454 },
455 BUFFER_PATTERN_LINE, NULL);
456
457 ret = dt_waitpid(pid, NULL, NULL, 0);
458 T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for clpcctrl");
459
460 return value;
461 }
462
463 bool
check_recommended_core_mask(uint64_t * core_mask)464 check_recommended_core_mask(uint64_t *core_mask)
465 {
466 int ret;
467 uint64_t recommended_cores = 0;
468 size_t recommended_cores_size = sizeof(recommended_cores);
469 ret = sysctlbyname("kern.sched_recommended_cores", &recommended_cores, &recommended_cores_size, NULL, 0);
470 T_QUIET; T_EXPECT_POSIX_SUCCESS(ret, "sysctlbyname(kern.sched_recommended_cores)");
471
472 if (verbosity_enabled) {
473 uint64_t expected_recommended_mask = ~0ULL >> (64 - dt_ncpu());
474 T_LOG(" kern.sched_recommended_cores: %016llx, expecting %016llx if all are recommended",
475 recommended_cores, expected_recommended_mask);
476 }
477
478 if (core_mask != NULL) {
479 *core_mask = recommended_cores;
480 }
481 return __builtin_popcountll(recommended_cores) == dt_ncpu();
482 }
483
484 /* Trace Management */
485
486 enum trace_status {
487 STARTED = 1,
488 ENDED = 2,
489 SAVED = 3,
490 DISCARDED = 4,
491 };
492
493 struct trace_handle {
494 char *short_name;
495 char *trace_filename;
496 char *abs_filename;
497 pid_t trace_pid;
498 enum trace_status status;
499 pid_t wait_on_start_pid;
500 pid_t wait_on_end_pid;
501 };
502
503 #define MAX_TRACES 1024
504 static struct trace_handle handles[MAX_TRACES];
505 static int handle_ind = 0;
506
507 /* Default setting is to record a trace but only save it if the test failed */
508 static bool tracing_enabled = true;
509 static bool trace_save_requested = false;
510
511 static bool
trace_requested(int argc,char * const argv[])512 trace_requested(int argc, char *const argv[])
513 {
514 for (int i = 0; i < argc; i++) {
515 if (strcmp(argv[i], "--save-trace") == 0) {
516 trace_save_requested = true;
517 }
518 if (strcmp(argv[i], "--no-trace") == 0) {
519 tracing_enabled = false;
520 }
521 }
522 T_QUIET; T_ASSERT_TRUE(tracing_enabled || !trace_save_requested, "Can't use both --save-trace and --no-trace");
523 return tracing_enabled;
524 }
525
526 static void
atend_trace_cleanup(void)527 atend_trace_cleanup(void)
528 {
529 int ret;
530 for (int i = 0; i < handle_ind; i++) {
531 struct trace_handle *handle = &handles[i];
532 if (handle->status == STARTED) {
533 end_collect_trace(handle);
534 }
535 T_QUIET; T_EXPECT_EQ(handle->status, ENDED, "ended trace");
536 if (handle->status == ENDED && ((T_FAILCOUNT > 0) || trace_save_requested)) {
537 /* Save the trace as an artifact for debugging the failure(s) */
538 save_collected_trace(handle);
539 }
540 /* Make sure to free up the tmp dir space we used */
541 discard_collected_trace(handle);
542 /* Kill trace just in case */
543 ret = kill(handle->trace_pid, SIGKILL);
544 T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_SUCCESS(ret, "kill SIGKILL");
545 }
546 }
547
548 static bool
sched_utils_tracing_supported(void)549 sched_utils_tracing_supported(void)
550 {
551 #if TARGET_OS_BRIDGE
552 /*
553 * Don't support the tracing on BridgeOS due to limited disk space
554 * and CLPC compatibility issues.
555 */
556 return false;
557 #else /* !TARGET_OS_BRIDGE */
558 disable_verbose_sched_utils();
559 /* Virtual machines do not support trace */
560 bool supported = (platform_is_virtual_machine() == false);
561 reenable_verbose_sched_utils();
562 return supported;
563 #endif /* !TARGET_OS_BRIDGE */
564 }
565
566 trace_handle_t
begin_collect_trace(int argc,char * const argv[],char * filename)567 begin_collect_trace(int argc, char *const argv[], char *filename)
568 {
569 return begin_collect_trace_fmt(COLLECT_TRACE_FLAG_NONE, argc, argv, filename);
570 }
571 static bool first_trace = true;
572
573 static char *trace_bin = "/usr/local/bin/trace";
574 static char *notifyutil_bin = "/usr/bin/notifyutil";
575 #if TARGET_OS_OSX
576 static char *aa_bin = "/usr/bin/aa";
577 #else
578 static char *aa_bin = "/usr/local/bin/aa";
579 #endif
580
581 static char *begin_notification = "️_trace_begun...";
582 static char *end_notification = "️_trace_ended...";
583 static char *trigger_end_notification = "️_stopping_trace...";
584
585 #if !(TARGET_OS_WATCH || TARGET_OS_TV)
586 static const int waiting_timeout_sec = 60 * 2; /* 2 minutes, allows trace post-processing to finish */
587 #else /* !(TARGET_OS_WATCH || TARGET_OS_TV) */
588 static const int waiting_timeout_sec = 60 * 3 + 30; /* 3 minutes and 30 seconds for slower targets */
589 #endif /* !(TARGET_OS_WATCH || TARGET_OS_TV) */
590
591 trace_handle_t
begin_collect_trace_fmt(collect_trace_flags_t flags,int argc,char * const argv[],char * fmt,...)592 begin_collect_trace_fmt(collect_trace_flags_t flags, int argc, char *const argv[], char *fmt, ...)
593 {
594 /* Check trace requirements */
595 if (!sched_utils_tracing_supported() || !trace_requested(argc, argv)) {
596 return NULL;
597 }
598 T_QUIET; T_ASSERT_EQ(geteuid(), 0, "️ Tracing requires the test to be run as root user");
599
600 int ret;
601 struct trace_handle *handle = &handles[handle_ind++];
602 T_QUIET; T_ASSERT_LE(handle_ind, MAX_TRACES, "Ran out of trace handles");
603
604 /* Generate the trace filename from the formatted string and args */
605 char *name = (char *)malloc(sizeof(char) * MAXPATHLEN);
606 va_list args;
607 va_start(args, fmt);
608 vsnprintf(name, MAXPATHLEN, fmt, args);
609 va_end(args);
610 handle->short_name = name;
611 char *full_filename = (char *)malloc(sizeof(char) * MAXPATHLEN);
612 memset(full_filename, 0, MAXPATHLEN);
613 snprintf(full_filename, MAXPATHLEN, "%s/%s.atrc", dt_tmpdir(), handle->short_name);
614 handle->abs_filename = full_filename;
615 char *filename = (char *)malloc(sizeof(char) * MAXPATHLEN);
616 memset(filename, 0, MAXPATHLEN);
617 snprintf(filename, MAXPATHLEN, "%s.atrc", handle->short_name);
618 handle->trace_filename = filename;
619
620 /* If filename already exists, delete old trace */
621 ret = remove(handle->abs_filename);
622 T_QUIET; T_WITH_ERRNO; T_EXPECT_TRUE(ret == 0 || errno == ENOENT, "remove trace file");
623
624 if (first_trace) {
625 /* Run tracing cleanup a single time */
626 register_atend_handler();
627 T_ATEND(atend_trace_cleanup);
628 first_trace = false;
629 }
630
631 /* Launch procs to monitor trace start/stop */
632 char *wait_on_start_args[] = {"/usr/bin/notifyutil", "-1", begin_notification, NULL};
633 ret = dt_launch_tool(&handle->wait_on_start_pid, wait_on_start_args, false, NULL, NULL);
634 T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool");
635 char *wait_on_end_args[] = {"/usr/bin/notifyutil", "-1", end_notification, NULL};
636 ret = dt_launch_tool(&handle->wait_on_end_pid, wait_on_end_args, false, NULL, NULL);
637 T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool");
638
639 /* Launch trace record */
640 char *trace_args_base[18] = {trace_bin, "record", handle->abs_filename, "--plan", "default", "--unsafe",
641 "--kdebug-filter-include", "C0x01",
642 "--omit", "Logging", "--kdebug-buffer-size", "1gb",
643 "--notify-after-start", begin_notification, "--notify-after-end", end_notification,
644 "--end-on-notification", trigger_end_notification};
645 const unsigned trace_args_cap = 32; /* INCREASE THIS if there are too many trace args */
646 char* trace_args[trace_args_cap];
647 unsigned trace_args_len = 0;
648 for (unsigned i = 0; i < sizeof(trace_args_base) / sizeof(char *); ++i) {
649 trace_args[trace_args_len++] = trace_args_base[i];
650 T_QUIET; T_ASSERT_LT(trace_args_len, trace_args_cap, "too many trace args");
651 }
652 if (flags & COLLECT_TRACE_FLAG_DISABLE_SYSCALLS) {
653 trace_args[trace_args_len++] = "--omit=syscalls,syscall-sampling";
654 T_QUIET; T_ASSERT_LT(trace_args_len, trace_args_cap, "too many trace args");
655 trace_args[trace_args_len++] = "--kdebug-filter-exclude=S0x0103,S0x040c";
656 T_QUIET; T_ASSERT_LT(trace_args_len, trace_args_cap, "too many trace args");
657 }
658 if (flags & COLLECT_TRACE_FLAG_DISABLE_CLUTCH) {
659 trace_args[trace_args_len++] = "--kdebug-filter-exclude=S0x01A9";
660 T_QUIET; T_ASSERT_LT(trace_args_len, trace_args_cap, "too many trace args");
661 }
662 trace_args[trace_args_len++] = NULL;
663 T_QUIET; T_ASSERT_LT(trace_args_len, trace_args_cap, "too many trace args");
664
665 pid_t trace_pid = dt_launch_tool_pipe(trace_args, false, NULL,
666 ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
667 T_LOG("️ [trace] %s", data);
668 return false;
669 },
670 ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
671 T_LOG("️ [trace] Error msg: %s", data);
672 return false;
673 },
674 BUFFER_PATTERN_LINE, NULL);
675
676 T_LOG("️ Starting trace collection for \"%s\" trace[%u]", handle->trace_filename, trace_pid);
677
678 /* Wait for tracing to start */
679 int signal_num = 0;
680 ret = dt_waitpid(handle->wait_on_start_pid, NULL, &signal_num, waiting_timeout_sec);
681 T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for trace start signal_num %d", signal_num);
682
683 handle->trace_pid = trace_pid;
684 handle->status = STARTED;
685
686 return (trace_handle_t)handle;
687 }
688
689 void
end_collect_trace(trace_handle_t handle)690 end_collect_trace(trace_handle_t handle)
691 {
692 if (!sched_utils_tracing_supported() || !tracing_enabled) {
693 return;
694 }
695
696 int ret;
697 struct trace_handle *trace_state = (struct trace_handle *)handle;
698 T_QUIET; T_EXPECT_EQ(trace_state->status, STARTED, "trace was started");
699
700 /* Notify trace to stop tracing */
701 char *wait_on_start_args[] = {notifyutil_bin, "-p", trigger_end_notification, NULL};
702 pid_t trigger_end_pid = 0;
703 ret = dt_launch_tool(&trigger_end_pid, wait_on_start_args, false, NULL, NULL);
704 T_QUIET; T_WITH_ERRNO; T_EXPECT_EQ(ret, 0, "dt_launch_tool for notify end trace");
705
706 /* Wait for tracing to actually stop */
707 T_LOG("️ Now waiting on trace to finish up...");
708 int signal_num = 0;
709 int exit_status = 0;
710 ret = dt_waitpid(trace_state->wait_on_end_pid, &exit_status, &signal_num, waiting_timeout_sec);
711 T_QUIET; T_EXPECT_TRUE(ret, "dt_waitpid for trace stop, exit status %d signal_num %d", exit_status, signal_num);
712
713 trace_state->status = ENDED;
714 }
715
716 void
save_collected_trace(trace_handle_t handle)717 save_collected_trace(trace_handle_t handle)
718 {
719 if (!sched_utils_tracing_supported() || !tracing_enabled) {
720 return;
721 }
722
723 int ret;
724 struct trace_handle *trace_state = (struct trace_handle *)handle;
725 T_QUIET; T_EXPECT_EQ(trace_state->status, ENDED, "trace was ended");
726
727 /* Generate compressed filepath and mark for upload */
728 char compressed_path[MAXPATHLEN];
729 snprintf(compressed_path, MAXPATHLEN, "%s.aar", trace_state->short_name);
730 ret = dt_resultfile(compressed_path, MAXPATHLEN);
731 T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(ret, "dt_resultfile marking \"%s\" for collection", compressed_path);
732 T_LOG("️ \"%s\" marked for upload", compressed_path);
733
734 char *compress_args[] = {aa_bin, "archive", "-i", trace_state->trace_filename, "-d", (char *)dt_tmpdir(), "-o", compressed_path, NULL};
735 pid_t aa_pid = dt_launch_tool_pipe(compress_args, false, NULL,
736 ^bool (__unused char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
737 T_LOG("️ [aa] %s", data);
738 return false;
739 },
740 ^bool (char *data, __unused size_t data_size, __unused dt_pipe_data_handler_context_t *context) {
741 T_LOG("️ [aa] Error/debug msg: %s", data);
742 return false;
743 },
744 BUFFER_PATTERN_LINE, NULL);
745 T_QUIET; T_EXPECT_TRUE(aa_pid, "️ [aa] pid %d", aa_pid);
746 int exit_status = 0;
747 int signal_num = SIGPIPE;
748 ret = dt_waitpid(aa_pid, &exit_status, &signal_num, 0);
749 T_QUIET; T_WITH_ERRNO; T_EXPECT_TRUE(ret, "dt_waitpid for aa, exit status %d signal num %d", exit_status, signal_num);
750
751 /* Lax permissions in case a user wants to open the compressed file without sudo */
752 ret = chmod(compressed_path, 0666);
753 T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(ret, "chmod");
754
755 T_LOG("️ Finished saving trace (%s), which is available compressed at \"%s\"",
756 trace_state->short_name, compressed_path);
757
758 trace_state->status = SAVED;
759 }
760
761 void
discard_collected_trace(trace_handle_t handle)762 discard_collected_trace(trace_handle_t handle)
763 {
764 if (!sched_utils_tracing_supported() || !tracing_enabled || trace_save_requested) {
765 return;
766 }
767
768 int ret;
769 struct trace_handle *trace_state = (struct trace_handle *)handle;
770 T_QUIET; T_EXPECT_TRUE(trace_state->status == ENDED || trace_state->status == SAVED,
771 "trace was ended or saved");
772
773 /* Delete trace file in order to reclaim disk space on the test device */
774 ret = remove(trace_state->abs_filename);
775 T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_SUCCESS(ret, "remove trace file");
776
777 if (trace_state->status == ENDED) {
778 T_LOG("️ Deleted recorded trace file at \"%s\"", trace_state->abs_filename);
779 }
780 trace_state->status = DISCARDED;
781 }
782
783 void
sched_kdebug_test_fail(uint64_t arg0,uint64_t arg1,uint64_t arg2,uint64_t arg3)784 sched_kdebug_test_fail(uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3)
785 {
786 kdebug_trace(ARIADNEDBG_CODE(0, 0), arg0, arg1, arg2, arg3);
787 }
788