xref: /xnu-8020.140.41/tools/tests/zero-to-n/zero-to-n.c (revision 27b03b360a988dfd3dfdf34262bb0042026747cc)
1 /*
2  * Copyright (c) 2009 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 #include <unistd.h>
29 #include <stdio.h>
30 #include <math.h>
31 #include <sys/kdebug.h>
32 #include <stdlib.h>
33 #include <pthread.h>
34 #include <errno.h>
35 #include <err.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <sysexits.h>
39 #include <sys/sysctl.h>
40 #include <getopt.h>
41 #include <libproc.h>
42 
43 #include <spawn.h>
44 #include <spawn_private.h>
45 #include <sys/spawn_internal.h>
46 #include <mach-o/dyld.h>
47 
48 #include <mach/mach_time.h>
49 #include <mach/mach.h>
50 #include <mach/task.h>
51 #include <mach/semaphore.h>
52 
53 #include <pthread/qos_private.h>
54 
55 #include <sys/resource.h>
56 
57 #include <stdatomic.h>
58 
59 #include <os/tsd.h>
60 #include <os/lock.h>
61 #include <TargetConditionals.h>
62 
63 typedef enum wake_type { WAKE_BROADCAST_ONESEM, WAKE_BROADCAST_PERTHREAD, WAKE_CHAIN, WAKE_HOP } wake_type_t;
64 typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY_TIMESHARE_NO_SMT, MY_POLICY_FIXEDPRI } my_policy_type_t;
65 
66 #define mach_assert_zero(error)        do { if ((error) != 0) { fprintf(stderr, "[FAIL] error %d (%s) ", (error), mach_error_string(error)); assert(error == 0); } } while (0)
67 #define mach_assert_zero_t(tid, error) do { if ((error) != 0) { fprintf(stderr, "[FAIL] Thread %d error %d (%s) ", (tid), (error), mach_error_string(error)); assert(error == 0); } } while (0)
68 #define assert_zero_t(tid, error)      do { if ((error) != 0) { fprintf(stderr, "[FAIL] Thread %d error %d ", (tid), (error)); assert(error == 0); } } while (0)
69 
70 #define CONSTRAINT_NANOS        (20000000ll)    /* 20 ms */
71 #define COMPUTATION_NANOS       (10000000ll)    /* 10 ms */
72 #define LL_CONSTRAINT_NANOS     ( 2000000ll)    /*  2 ms */
73 #define LL_COMPUTATION_NANOS    ( 1000000ll)    /*  1 ms */
74 #define RT_CHURN_COMP_NANOS     ( 1000000ll)    /*  1 ms */
75 #define TRACEWORTHY_NANOS       (10000000ll)    /* 10 ms */
76 #define TRACEWORTHY_NANOS_TEST  ( 1000000ll)    /*  1 ms */
77 #define TRACEWORTHY_NANOS_LL    (  500000ll)    /*500 us */
78 
79 #if DEBUG
80 #define debug_log(args ...) printf(args)
81 #else
82 #define debug_log(args ...) do { } while(0)
83 #endif
84 
85 /* Declarations */
86 static void*                    worker_thread(void *arg);
87 static void                     usage();
88 static int                      thread_setup(uint32_t my_id);
89 static my_policy_type_t         parse_thread_policy(const char *str);
90 static void                     selfexec_with_apptype(int argc, char *argv[]);
91 static void                     parse_args(int argc, char *argv[]);
92 
93 static __attribute__((aligned(128))) _Atomic uint32_t   g_done_threads;
94 static __attribute__((aligned(128))) _Atomic boolean_t  g_churn_stop = FALSE;
95 static __attribute__((aligned(128))) _Atomic uint64_t   g_churn_stopped_at = 0;
96 
97 /* Global variables (general) */
98 static uint32_t                 g_maxcpus;
99 static uint32_t                 g_numcpus;
100 static uint32_t                 g_nphysicalcpu;
101 static uint32_t                 g_nlogicalcpu;
102 static uint32_t                 g_numthreads;
103 static wake_type_t              g_waketype;
104 static policy_t                 g_policy;
105 static uint32_t                 g_iterations;
106 static struct mach_timebase_info g_mti;
107 static semaphore_t              g_main_sem;
108 static uint64_t                *g_thread_endtimes_abs;
109 static boolean_t                g_verbose       = FALSE;
110 static boolean_t                g_do_affinity   = FALSE;
111 static uint64_t                 g_starttime_abs;
112 static uint32_t                 g_iteration_sleeptime_us = 0;
113 static uint32_t                 g_priority = 0;
114 static uint32_t                 g_churn_pri = 0;
115 static uint32_t                 g_churn_count = 0;
116 static boolean_t                g_churn_random = FALSE; /* churn threads randomly sleep and wake */
117 static uint32_t                 g_rt_churn_count = 0;
118 static uint32_t                 g_traceworthy_count = 0;
119 
120 /*
121  * If the number of threads on the command line is 0, meaning ncpus,
122  * this signed number is added to the number of threads, making it
123  * possible to specify ncpus-3 threads, or ncpus+1 etc.
124  */
125 static int32_t                  g_extra_thread_count = 0;
126 
127 static pthread_t*               g_churn_threads = NULL;
128 static pthread_t*               g_rt_churn_threads = NULL;
129 
130 /* should we skip test if run on non-intel */
131 static boolean_t                g_run_on_intel_only = FALSE;
132 
133 /* Threshold for dropping a 'bad run' tracepoint */
134 static uint64_t                 g_traceworthy_latency_ns = TRACEWORTHY_NANOS;
135 
136 /* Have we re-execed to set apptype? */
137 static boolean_t                g_seen_apptype = FALSE;
138 
139 /* usleep in betweeen iterations */
140 static boolean_t                g_do_sleep      = TRUE;
141 
142 /* Every thread spins until all threads have checked in */
143 static boolean_t                g_do_all_spin = FALSE;
144 
145 /* Every thread backgrounds temporarily before parking */
146 static boolean_t                g_drop_priority = FALSE;
147 
148 /* Use low-latency (sub 4ms deadline) realtime threads */
149 static boolean_t                g_rt_ll = FALSE;
150 
151 /* Test whether realtime threads are scheduled on the separate CPUs */
152 static boolean_t                g_test_rt = FALSE;
153 
154 static boolean_t                g_rt_churn = FALSE;
155 
156 /* On SMT machines, test whether realtime threads are scheduled on the correct CPUs */
157 static boolean_t                g_test_rt_smt = FALSE;
158 
159 /* Test whether realtime threads are successfully avoiding CPU 0 on Intel */
160 static boolean_t                g_test_rt_avoid0 = FALSE;
161 
162 /* Fail the test if any iteration fails */
163 static boolean_t                g_test_strict_fail = FALSE;
164 
165 /* Print a histgram showing how many threads ran on each CPU */
166 static boolean_t                g_histogram = FALSE;
167 
168 /* One randomly chosen thread holds up the train for a certain duration. */
169 static boolean_t                g_do_one_long_spin = FALSE;
170 static uint32_t                 g_one_long_spin_id = 0;
171 static uint64_t                 g_one_long_spin_length_abs = 0;
172 static uint64_t                 g_one_long_spin_length_ns = 0;
173 
174 /* Each thread spins for a certain duration after waking up before blocking again. */
175 static boolean_t                g_do_each_spin = FALSE;
176 static uint64_t                 g_each_spin_duration_abs = 0;
177 static uint64_t                 g_each_spin_duration_ns = 0;
178 
179 /* Global variables (broadcast) */
180 static semaphore_t              g_broadcastsem;
181 static semaphore_t              g_leadersem;
182 static semaphore_t              g_readysem;
183 static semaphore_t              g_donesem;
184 static semaphore_t              g_rt_churn_sem;
185 static semaphore_t              g_rt_churn_start_sem;
186 
187 /* Global variables (chain) */
188 static semaphore_t             *g_semarr;
189 
190 typedef struct {
191 	__attribute__((aligned(128))) uint32_t current;
192 	uint32_t accum;
193 } histogram_t;
194 
195 static histogram_t             *g_cpu_histogram;
196 static _Atomic uint64_t        *g_cpu_map;
197 
198 static uint64_t
abs_to_nanos(uint64_t abstime)199 abs_to_nanos(uint64_t abstime)
200 {
201 	return (uint64_t)(abstime * (((double)g_mti.numer) / ((double)g_mti.denom)));
202 }
203 
204 static uint64_t
nanos_to_abs(uint64_t ns)205 nanos_to_abs(uint64_t ns)
206 {
207 	return (uint64_t)(ns * (((double)g_mti.denom) / ((double)g_mti.numer)));
208 }
209 
210 inline static void
yield(void)211 yield(void)
212 {
213 #if defined(__arm__) || defined(__arm64__)
214 	asm volatile ("yield");
215 #elif defined(__x86_64__) || defined(__i386__)
216 	asm volatile ("pause");
217 #else
218 #error Unrecognized architecture
219 #endif
220 }
221 
222 static void *
churn_thread(__unused void * arg)223 churn_thread(__unused void *arg)
224 {
225 	uint64_t spin_count = 0;
226 
227 	/*
228 	 * As a safety measure to avoid wedging, we will bail on the spin if
229 	 * it's been more than 1s after the most recent run start
230 	 */
231 
232 	uint64_t sleep_us = 1000;
233 	uint64_t ctime = mach_absolute_time();
234 	uint64_t sleep_at_time = ctime + nanos_to_abs(arc4random_uniform(sleep_us * NSEC_PER_USEC) + 1);
235 	while ((g_churn_stop == FALSE) && (ctime < (g_starttime_abs + NSEC_PER_SEC))) {
236 		spin_count++;
237 		yield();
238 		ctime = mach_absolute_time();
239 		if (g_churn_random && (ctime > sleep_at_time)) {
240 			usleep(arc4random_uniform(sleep_us) + 1);
241 			ctime = mach_absolute_time();
242 			sleep_at_time = ctime + nanos_to_abs(arc4random_uniform(sleep_us * NSEC_PER_USEC) + 1);
243 		}
244 	}
245 
246 	/* This is totally racy, but only here to detect if anyone stops early */
247 	atomic_fetch_add_explicit(&g_churn_stopped_at, spin_count, memory_order_relaxed);
248 
249 	return NULL;
250 }
251 
252 static void
create_churn_threads()253 create_churn_threads()
254 {
255 	if (g_churn_count == 0) {
256 		g_churn_count = g_test_rt_smt ? g_numcpus : g_numcpus - 1;
257 	}
258 
259 	errno_t err;
260 
261 	struct sched_param param = { .sched_priority = (int)g_churn_pri };
262 	pthread_attr_t attr;
263 
264 	/* Array for churn threads */
265 	g_churn_threads = (pthread_t*) valloc(sizeof(pthread_t) * g_churn_count);
266 	assert(g_churn_threads);
267 
268 	if ((err = pthread_attr_init(&attr))) {
269 		errc(EX_OSERR, err, "pthread_attr_init");
270 	}
271 
272 	if ((err = pthread_attr_setschedparam(&attr, &param))) {
273 		errc(EX_OSERR, err, "pthread_attr_setschedparam");
274 	}
275 
276 	if ((err = pthread_attr_setschedpolicy(&attr, SCHED_RR))) {
277 		errc(EX_OSERR, err, "pthread_attr_setschedpolicy");
278 	}
279 
280 	for (uint32_t i = 0; i < g_churn_count; i++) {
281 		pthread_t new_thread;
282 
283 		if ((err = pthread_create(&new_thread, &attr, churn_thread, NULL))) {
284 			errc(EX_OSERR, err, "pthread_create");
285 		}
286 		g_churn_threads[i] = new_thread;
287 	}
288 
289 	if ((err = pthread_attr_destroy(&attr))) {
290 		errc(EX_OSERR, err, "pthread_attr_destroy");
291 	}
292 }
293 
294 static void
join_churn_threads(void)295 join_churn_threads(void)
296 {
297 	if (atomic_load_explicit(&g_churn_stopped_at, memory_order_seq_cst) != 0) {
298 		printf("Warning: Some of the churn threads may have stopped early: %lld\n",
299 		    g_churn_stopped_at);
300 	}
301 
302 	atomic_store_explicit(&g_churn_stop, TRUE, memory_order_seq_cst);
303 
304 	/* Rejoin churn threads */
305 	for (uint32_t i = 0; i < g_churn_count; i++) {
306 		errno_t err = pthread_join(g_churn_threads[i], NULL);
307 		if (err) {
308 			errc(EX_OSERR, err, "pthread_join %d", i);
309 		}
310 	}
311 }
312 
313 /*
314  * Set policy
315  */
316 static int
rt_churn_thread_setup(void)317 rt_churn_thread_setup(void)
318 {
319 	kern_return_t kr;
320 	thread_time_constraint_policy_data_t pol;
321 
322 	/* Hard-coded realtime parameters (similar to what Digi uses) */
323 	pol.period      = 100000;
324 	pol.constraint  = (uint32_t) nanos_to_abs(CONSTRAINT_NANOS * 2);
325 	pol.computation = (uint32_t) nanos_to_abs(RT_CHURN_COMP_NANOS * 2);
326 	pol.preemptible = 0;         /* Ignored by OS */
327 
328 	kr = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY,
329 	    (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
330 	mach_assert_zero_t(0, kr);
331 
332 	return 0;
333 }
334 
335 static void *
rt_churn_thread(__unused void * arg)336 rt_churn_thread(__unused void *arg)
337 {
338 	rt_churn_thread_setup();
339 
340 	for (uint32_t i = 0; i < g_iterations; i++) {
341 		kern_return_t kr = semaphore_wait_signal(g_rt_churn_start_sem, g_rt_churn_sem);
342 		mach_assert_zero_t(0, kr);
343 
344 		volatile double x = 0.0;
345 		volatile double y = 0.0;
346 
347 		uint64_t endspin = mach_absolute_time() + nanos_to_abs(RT_CHURN_COMP_NANOS);
348 		while (mach_absolute_time() < endspin) {
349 			y = y + 1.5 + x;
350 			x = sqrt(y);
351 		}
352 	}
353 
354 	kern_return_t kr = semaphore_signal(g_rt_churn_sem);
355 	mach_assert_zero_t(0, kr);
356 
357 	return NULL;
358 }
359 
360 static void
wait_for_rt_churn_threads(void)361 wait_for_rt_churn_threads(void)
362 {
363 	for (uint32_t i = 0; i < g_rt_churn_count; i++) {
364 		kern_return_t kr = semaphore_wait(g_rt_churn_sem);
365 		mach_assert_zero_t(0, kr);
366 	}
367 }
368 
369 static void
start_rt_churn_threads(void)370 start_rt_churn_threads(void)
371 {
372 	for (uint32_t i = 0; i < g_rt_churn_count; i++) {
373 		kern_return_t kr = semaphore_signal(g_rt_churn_start_sem);
374 		mach_assert_zero_t(0, kr);
375 	}
376 }
377 
378 static void
create_rt_churn_threads(void)379 create_rt_churn_threads(void)
380 {
381 	if (g_rt_churn_count == 0) {
382 		/* Leave 1 CPU to ensure that the main thread can make progress */
383 		g_rt_churn_count = g_numcpus - 1;
384 	}
385 
386 	errno_t err;
387 
388 	struct sched_param param = { .sched_priority = (int)g_churn_pri };
389 	pthread_attr_t attr;
390 
391 	/* Array for churn threads */
392 	g_rt_churn_threads = (pthread_t*) valloc(sizeof(pthread_t) * g_rt_churn_count);
393 	assert(g_rt_churn_threads);
394 
395 	if ((err = pthread_attr_init(&attr))) {
396 		errc(EX_OSERR, err, "pthread_attr_init");
397 	}
398 
399 	if ((err = pthread_attr_setschedparam(&attr, &param))) {
400 		errc(EX_OSERR, err, "pthread_attr_setschedparam");
401 	}
402 
403 	if ((err = pthread_attr_setschedpolicy(&attr, SCHED_RR))) {
404 		errc(EX_OSERR, err, "pthread_attr_setschedpolicy");
405 	}
406 
407 	for (uint32_t i = 0; i < g_rt_churn_count; i++) {
408 		pthread_t new_thread;
409 
410 		if ((err = pthread_create(&new_thread, &attr, rt_churn_thread, NULL))) {
411 			errc(EX_OSERR, err, "pthread_create");
412 		}
413 		g_rt_churn_threads[i] = new_thread;
414 	}
415 
416 	if ((err = pthread_attr_destroy(&attr))) {
417 		errc(EX_OSERR, err, "pthread_attr_destroy");
418 	}
419 
420 	/* Wait until all threads have checked in */
421 	wait_for_rt_churn_threads();
422 }
423 
424 static void
join_rt_churn_threads(void)425 join_rt_churn_threads(void)
426 {
427 	/* Rejoin rt churn threads */
428 	for (uint32_t i = 0; i < g_rt_churn_count; i++) {
429 		errno_t err = pthread_join(g_rt_churn_threads[i], NULL);
430 		if (err) {
431 			errc(EX_OSERR, err, "pthread_join %d", i);
432 		}
433 	}
434 }
435 
436 /*
437  * Figure out what thread policy to use
438  */
439 static my_policy_type_t
parse_thread_policy(const char * str)440 parse_thread_policy(const char *str)
441 {
442 	if (strcmp(str, "timeshare") == 0) {
443 		return MY_POLICY_TIMESHARE;
444 	} else if (strcmp(str, "timeshare_no_smt") == 0) {
445 		return MY_POLICY_TIMESHARE_NO_SMT;
446 	} else if (strcmp(str, "realtime") == 0) {
447 		return MY_POLICY_REALTIME;
448 	} else if (strcmp(str, "fixed") == 0) {
449 		return MY_POLICY_FIXEDPRI;
450 	} else {
451 		errx(EX_USAGE, "Invalid thread policy \"%s\"", str);
452 	}
453 }
454 
455 /*
456  * Figure out what wakeup pattern to use
457  */
458 static wake_type_t
parse_wakeup_pattern(const char * str)459 parse_wakeup_pattern(const char *str)
460 {
461 	if (strcmp(str, "chain") == 0) {
462 		return WAKE_CHAIN;
463 	} else if (strcmp(str, "hop") == 0) {
464 		return WAKE_HOP;
465 	} else if (strcmp(str, "broadcast-single-sem") == 0) {
466 		return WAKE_BROADCAST_ONESEM;
467 	} else if (strcmp(str, "broadcast-per-thread") == 0) {
468 		return WAKE_BROADCAST_PERTHREAD;
469 	} else {
470 		errx(EX_USAGE, "Invalid wakeup pattern \"%s\"", str);
471 	}
472 }
473 
474 /*
475  * Set policy
476  */
477 static int
thread_setup(uint32_t my_id)478 thread_setup(uint32_t my_id)
479 {
480 	kern_return_t kr;
481 	errno_t ret;
482 	thread_time_constraint_policy_data_t pol;
483 
484 	if (g_priority) {
485 		int policy = SCHED_OTHER;
486 		if (g_policy == MY_POLICY_FIXEDPRI) {
487 			policy = SCHED_RR;
488 		}
489 
490 		struct sched_param param = {.sched_priority = (int)g_priority};
491 		if ((ret = pthread_setschedparam(pthread_self(), policy, &param))) {
492 			errc(EX_OSERR, ret, "pthread_setschedparam: %d", my_id);
493 		}
494 	}
495 
496 	switch (g_policy) {
497 	case MY_POLICY_TIMESHARE:
498 		break;
499 	case MY_POLICY_TIMESHARE_NO_SMT:
500 		proc_setthread_no_smt();
501 		break;
502 	case MY_POLICY_REALTIME:
503 		/* Hard-coded realtime parameters (similar to what Digi uses) */
504 		pol.period      = 100000;
505 		if (g_rt_ll) {
506 			pol.constraint  = (uint32_t) nanos_to_abs(LL_CONSTRAINT_NANOS);
507 			pol.computation = (uint32_t) nanos_to_abs(LL_COMPUTATION_NANOS);
508 		} else {
509 			pol.constraint  = (uint32_t) nanos_to_abs(CONSTRAINT_NANOS);
510 			pol.computation = (uint32_t) nanos_to_abs(COMPUTATION_NANOS);
511 		}
512 		pol.preemptible = 0;         /* Ignored by OS */
513 
514 		kr = thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY,
515 		    (thread_policy_t) &pol, THREAD_TIME_CONSTRAINT_POLICY_COUNT);
516 		mach_assert_zero_t(my_id, kr);
517 		break;
518 	case MY_POLICY_FIXEDPRI:
519 		ret = pthread_set_fixedpriority_self();
520 		if (ret) {
521 			errc(EX_OSERR, ret, "pthread_set_fixedpriority_self");
522 		}
523 		break;
524 	default:
525 		errx(EX_USAGE, "invalid policy type %d", g_policy);
526 	}
527 
528 	if (g_do_affinity) {
529 		thread_affinity_policy_data_t affinity;
530 
531 		affinity.affinity_tag = my_id % 2;
532 
533 		kr = thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY,
534 		    (thread_policy_t)&affinity, THREAD_AFFINITY_POLICY_COUNT);
535 		mach_assert_zero_t(my_id, kr);
536 	}
537 
538 	return 0;
539 }
540 
541 time_value_t
get_thread_runtime(void)542 get_thread_runtime(void)
543 {
544 	thread_basic_info_data_t info;
545 	mach_msg_type_number_t info_count = THREAD_BASIC_INFO_COUNT;
546 	thread_info(pthread_mach_thread_np(pthread_self()), THREAD_BASIC_INFO, (thread_info_t)&info, &info_count);
547 
548 	time_value_add(&info.user_time, &info.system_time);
549 
550 	return info.user_time;
551 }
552 
553 time_value_t worker_threads_total_runtime = {};
554 
555 /*
556  * Wait for a wakeup, potentially wake up another of the "0-N" threads,
557  * and notify the main thread when done.
558  */
559 static void*
worker_thread(void * arg)560 worker_thread(void *arg)
561 {
562 	static os_unfair_lock runtime_lock = OS_UNFAIR_LOCK_INIT;
563 
564 	uint32_t my_id = (uint32_t)(uintptr_t)arg;
565 	kern_return_t kr;
566 
567 	volatile double x = 0.0;
568 	volatile double y = 0.0;
569 
570 	/* Set policy and so forth */
571 	thread_setup(my_id);
572 
573 	for (uint32_t i = 0; i < g_iterations; i++) {
574 		if (my_id == 0) {
575 			/*
576 			 * Leader thread either wakes everyone up or starts the chain going.
577 			 */
578 
579 			/* Give the worker threads undisturbed time to finish before waiting on them */
580 			if (g_do_sleep) {
581 				usleep(g_iteration_sleeptime_us);
582 			}
583 
584 			debug_log("%d Leader thread wait for ready\n", i);
585 
586 			/*
587 			 * Wait for everyone else to declare ready
588 			 * Is there a better way to do this that won't interfere with the rest of the chain?
589 			 * TODO: Invent 'semaphore wait for N signals'
590 			 */
591 
592 			for (uint32_t j = 0; j < g_numthreads - 1; j++) {
593 				kr = semaphore_wait(g_readysem);
594 				mach_assert_zero_t(my_id, kr);
595 			}
596 
597 			debug_log("%d Leader thread wait\n", i);
598 
599 			if (i > 0) {
600 				for (int cpuid = 0; cpuid < g_maxcpus; cpuid++) {
601 					if (g_cpu_histogram[cpuid].current == 1) {
602 						atomic_fetch_or_explicit(&g_cpu_map[i - 1], (1UL << cpuid), memory_order_relaxed);
603 						g_cpu_histogram[cpuid].current = 0;
604 					}
605 				}
606 			}
607 
608 			/* Signal main thread and wait for start of iteration */
609 
610 			kr = semaphore_wait_signal(g_leadersem, g_main_sem);
611 			mach_assert_zero_t(my_id, kr);
612 
613 			g_thread_endtimes_abs[my_id] = mach_absolute_time();
614 
615 			debug_log("%d Leader thread go\n", i);
616 
617 			assert_zero_t(my_id, atomic_load_explicit(&g_done_threads, memory_order_relaxed));
618 
619 			switch (g_waketype) {
620 			case WAKE_BROADCAST_ONESEM:
621 				kr = semaphore_signal_all(g_broadcastsem);
622 				mach_assert_zero_t(my_id, kr);
623 				break;
624 			case WAKE_BROADCAST_PERTHREAD:
625 				for (uint32_t j = 1; j < g_numthreads; j++) {
626 					kr = semaphore_signal(g_semarr[j]);
627 					mach_assert_zero_t(my_id, kr);
628 				}
629 				break;
630 			case WAKE_CHAIN:
631 				kr = semaphore_signal(g_semarr[my_id + 1]);
632 				mach_assert_zero_t(my_id, kr);
633 				break;
634 			case WAKE_HOP:
635 				kr = semaphore_wait_signal(g_donesem, g_semarr[my_id + 1]);
636 				mach_assert_zero_t(my_id, kr);
637 				break;
638 			}
639 		} else {
640 			/*
641 			 * Everyone else waits to be woken up,
642 			 * records when she wakes up, and possibly
643 			 * wakes up a friend.
644 			 */
645 			switch (g_waketype) {
646 			case WAKE_BROADCAST_ONESEM:
647 				kr = semaphore_wait_signal(g_broadcastsem, g_readysem);
648 				mach_assert_zero_t(my_id, kr);
649 
650 				g_thread_endtimes_abs[my_id] = mach_absolute_time();
651 				break;
652 
653 			case WAKE_BROADCAST_PERTHREAD:
654 				kr = semaphore_wait_signal(g_semarr[my_id], g_readysem);
655 				mach_assert_zero_t(my_id, kr);
656 
657 				g_thread_endtimes_abs[my_id] = mach_absolute_time();
658 				break;
659 
660 			case WAKE_CHAIN:
661 				kr = semaphore_wait_signal(g_semarr[my_id], g_readysem);
662 				mach_assert_zero_t(my_id, kr);
663 
664 				/* Signal the next thread *after* recording wake time */
665 
666 				g_thread_endtimes_abs[my_id] = mach_absolute_time();
667 
668 				if (my_id < (g_numthreads - 1)) {
669 					kr = semaphore_signal(g_semarr[my_id + 1]);
670 					mach_assert_zero_t(my_id, kr);
671 				}
672 
673 				break;
674 
675 			case WAKE_HOP:
676 				kr = semaphore_wait_signal(g_semarr[my_id], g_readysem);
677 				mach_assert_zero_t(my_id, kr);
678 
679 				/* Signal the next thread *after* recording wake time */
680 
681 				g_thread_endtimes_abs[my_id] = mach_absolute_time();
682 
683 				if (my_id < (g_numthreads - 1)) {
684 					kr = semaphore_wait_signal(g_donesem, g_semarr[my_id + 1]);
685 					mach_assert_zero_t(my_id, kr);
686 				} else {
687 					kr = semaphore_signal_all(g_donesem);
688 					mach_assert_zero_t(my_id, kr);
689 				}
690 
691 				break;
692 			}
693 		}
694 
695 		unsigned int cpuid =  _os_cpu_number();
696 		assert(cpuid < g_maxcpus);
697 		debug_log("Thread %p woke up on CPU %d for iteration %d.\n", pthread_self(), cpuid, i);
698 		g_cpu_histogram[cpuid].current = 1;
699 		g_cpu_histogram[cpuid].accum++;
700 
701 		if (g_do_one_long_spin && g_one_long_spin_id == my_id) {
702 			/* One randomly chosen thread holds up the train for a while. */
703 
704 			uint64_t endspin = g_starttime_abs + g_one_long_spin_length_abs;
705 			while (mach_absolute_time() < endspin) {
706 				y = y + 1.5 + x;
707 				x = sqrt(y);
708 			}
709 		}
710 
711 		if (g_do_each_spin) {
712 			/* Each thread spins for a certain duration after waking up before blocking again. */
713 
714 			uint64_t endspin = mach_absolute_time() + g_each_spin_duration_abs;
715 			while (mach_absolute_time() < endspin) {
716 				y = y + 1.5 + x;
717 				x = sqrt(y);
718 			}
719 		}
720 
721 		uint32_t done_threads;
722 		done_threads = atomic_fetch_add_explicit(&g_done_threads, 1, memory_order_relaxed) + 1;
723 
724 		debug_log("Thread %p new value is %d, iteration %d\n", pthread_self(), done_threads, i);
725 
726 		if (g_drop_priority) {
727 			/* Drop priority to BG momentarily */
728 			errno_t ret = setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG);
729 			if (ret) {
730 				errc(EX_OSERR, ret, "setpriority PRIO_DARWIN_BG");
731 			}
732 		}
733 
734 		if (g_do_all_spin) {
735 			/* Everyone spins until the last thread checks in. */
736 
737 			while (atomic_load_explicit(&g_done_threads, memory_order_relaxed) < g_numthreads) {
738 				y = y + 1.5 + x;
739 				x = sqrt(y);
740 			}
741 		}
742 
743 		if (g_drop_priority) {
744 			/* Restore normal priority */
745 			errno_t ret = setpriority(PRIO_DARWIN_THREAD, 0, 0);
746 			if (ret) {
747 				errc(EX_OSERR, ret, "setpriority 0");
748 			}
749 		}
750 
751 		debug_log("Thread %p done spinning, iteration %d\n", pthread_self(), i);
752 	}
753 
754 	if (my_id == 0) {
755 		/* Give the worker threads undisturbed time to finish before waiting on them */
756 		if (g_do_sleep) {
757 			usleep(g_iteration_sleeptime_us);
758 		}
759 
760 		/* Wait for the worker threads to finish */
761 		for (uint32_t i = 0; i < g_numthreads - 1; i++) {
762 			kr = semaphore_wait(g_readysem);
763 			mach_assert_zero_t(my_id, kr);
764 		}
765 
766 		/* Tell everyone and the main thread that the last iteration is done */
767 		debug_log("%d Leader thread done\n", g_iterations - 1);
768 
769 		for (int cpuid = 0; cpuid < g_maxcpus; cpuid++) {
770 			if (g_cpu_histogram[cpuid].current == 1) {
771 				atomic_fetch_or_explicit(&g_cpu_map[g_iterations - 1], (1UL << cpuid), memory_order_relaxed);
772 				g_cpu_histogram[cpuid].current = 0;
773 			}
774 		}
775 
776 		kr = semaphore_signal_all(g_main_sem);
777 		mach_assert_zero_t(my_id, kr);
778 	} else {
779 		/* Hold up thread teardown so it doesn't affect the last iteration */
780 		kr = semaphore_wait_signal(g_main_sem, g_readysem);
781 		mach_assert_zero_t(my_id, kr);
782 	}
783 
784 	time_value_t runtime = get_thread_runtime();
785 	os_unfair_lock_lock(&runtime_lock);
786 	time_value_add(&worker_threads_total_runtime, &runtime);
787 	os_unfair_lock_unlock(&runtime_lock);
788 
789 	return 0;
790 }
791 
792 /*
793  * Given an array of uint64_t values, compute average, max, min, and standard deviation
794  */
795 static void
compute_stats(uint64_t * values,uint64_t count,float * averagep,uint64_t * maxp,uint64_t * minp,float * stddevp)796 compute_stats(uint64_t *values, uint64_t count, float *averagep, uint64_t *maxp, uint64_t *minp, float *stddevp)
797 {
798 	uint32_t i;
799 	uint64_t _sum = 0;
800 	uint64_t _max = 0;
801 	uint64_t _min = UINT64_MAX;
802 	float    _avg = 0;
803 	float    _dev = 0;
804 
805 	for (i = 0; i < count; i++) {
806 		_sum += values[i];
807 		_max = values[i] > _max ? values[i] : _max;
808 		_min = values[i] < _min ? values[i] : _min;
809 	}
810 
811 	_avg = ((float)_sum) / ((float)count);
812 
813 	_dev = 0;
814 	for (i = 0; i < count; i++) {
815 		_dev += powf((((float)values[i]) - _avg), 2);
816 	}
817 
818 	_dev /= count;
819 	_dev = sqrtf(_dev);
820 
821 	*averagep = _avg;
822 	*maxp = _max;
823 	*minp = _min;
824 	*stddevp = _dev;
825 }
826 
827 typedef struct {
828 	natural_t sys;
829 	natural_t user;
830 	natural_t idle;
831 } cpu_time_t;
832 
833 void
record_cpu_time(cpu_time_t * cpu_time)834 record_cpu_time(cpu_time_t *cpu_time)
835 {
836 	host_cpu_load_info_data_t load;
837 	mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
838 	kern_return_t kr = host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (int *)&load, &count);
839 	mach_assert_zero_t(0, kr);
840 
841 	natural_t total_system_time = load.cpu_ticks[CPU_STATE_SYSTEM];
842 	natural_t total_user_time = load.cpu_ticks[CPU_STATE_USER] + load.cpu_ticks[CPU_STATE_NICE];
843 	natural_t total_idle_time = load.cpu_ticks[CPU_STATE_IDLE];
844 
845 	cpu_time->sys = total_system_time;
846 	cpu_time->user = total_user_time;
847 	cpu_time->idle = total_idle_time;
848 }
849 
850 static int
set_recommended_cluster(char cluster_char)851 set_recommended_cluster(char cluster_char)
852 {
853 	char buff[4];
854 	buff[1] = '\0';
855 
856 	buff[0] = cluster_char;
857 
858 	int ret = sysctlbyname("kern.sched_task_set_cluster_type", NULL, NULL, buff, 1);
859 	if (ret != 0) {
860 		perror("kern.sched_task_set_cluster_type");
861 	}
862 
863 	return ret;
864 }
865 
866 int
main(int argc,char ** argv)867 main(int argc, char **argv)
868 {
869 	errno_t ret;
870 	kern_return_t kr;
871 
872 	pthread_t       *threads;
873 	uint64_t        *worst_latencies_ns;
874 	uint64_t        *worst_latencies_from_first_ns;
875 	uint64_t        *worst_latencies_from_previous_ns;
876 	uint64_t        max, min;
877 	float           avg, stddev;
878 
879 	bool test_fail = false;
880 	bool test_warn = false;
881 
882 	for (int i = 0; i < argc; i++) {
883 		if (strcmp(argv[i], "--switched_apptype") == 0) {
884 			g_seen_apptype = TRUE;
885 		}
886 	}
887 
888 	if (!g_seen_apptype) {
889 		selfexec_with_apptype(argc, argv);
890 	}
891 
892 	parse_args(argc, argv);
893 
894 	srand((unsigned int)time(NULL));
895 
896 	mach_timebase_info(&g_mti);
897 
898 #if TARGET_OS_OSX
899 	/* SKIP test if running on arm platform */
900 	if (g_run_on_intel_only) {
901 		int is_arm = 0;
902 		size_t is_arm_size = sizeof(is_arm);
903 		ret = sysctlbyname("hw.optional.arm64", &is_arm, &is_arm_size, NULL, 0);
904 		if (ret == 0 && is_arm) {
905 			printf("Unsupported platform. Skipping test.\n");
906 			printf("TEST SKIPPED\n");
907 			exit(0);
908 		}
909 	}
910 #endif /* TARGET_OS_OSX */
911 
912 	size_t maxcpu_size = sizeof(g_maxcpus);
913 	ret = sysctlbyname("hw.ncpu", &g_maxcpus, &maxcpu_size, NULL, 0);
914 	if (ret) {
915 		err(EX_OSERR, "Failed sysctlbyname(hw.ncpu)");
916 	}
917 	assert(g_maxcpus <= 64); /* g_cpu_map needs to be extended for > 64 cpus */
918 
919 	size_t numcpu_size = sizeof(g_numcpus);
920 	ret = sysctlbyname("hw.perflevel0.logicalcpu", &g_numcpus, &numcpu_size, NULL, 0);
921 	if (ret) {
922 		/* hw.perflevel0.logicalcpu failed so falling back to hw.ncpu */
923 		g_numcpus = g_maxcpus;
924 	} else {
925 		/* Test for multiple perf levels */
926 		uint32_t result = 0;
927 		size_t result_size = sizeof(result);
928 		ret = sysctlbyname("hw.perflevel1.logicalcpu", &result, &result_size, NULL, 0);
929 		if ((ret == 0) && (result > 0)) {
930 			/* Multiple perf levels detected, so bind this task to the highest perf node */
931 			ret = set_recommended_cluster('p');
932 			if (ret && g_test_rt) {
933 				printf("set_recommended_cluster('p') failed.  Skipping test\n");
934 				printf("TEST SKIPPED\n");
935 				exit(0);
936 			}
937 		}
938 	}
939 
940 	size_t physicalcpu_size = sizeof(g_nphysicalcpu);
941 	ret = sysctlbyname("hw.perflevel0.physicalcpu", &g_nphysicalcpu, &physicalcpu_size, NULL, 0);
942 	if (ret) {
943 		/* hw.perflevel0.physicalcpu failed so falling back to hw.physicalcpu */
944 		ret = sysctlbyname("hw.physicalcpu", &g_nphysicalcpu, &physicalcpu_size, NULL, 0);
945 		if (ret) {
946 			err(EX_OSERR, "Failed sysctlbyname(hw.physicalcpu)");
947 		}
948 	}
949 
950 	size_t logicalcpu_size = sizeof(g_nlogicalcpu);
951 	ret = sysctlbyname("hw.perflevel0.logicalcpu", &g_nlogicalcpu, &logicalcpu_size, NULL, 0);
952 	if (ret) {
953 		/* hw.perflevel0.logicalcpu failed so falling back to hw.logicalcpu */
954 		ret = sysctlbyname("hw.logicalcpu", &g_nlogicalcpu, &logicalcpu_size, NULL, 0);
955 		if (ret) {
956 			err(EX_OSERR, "Failed sysctlbyname(hw.logicalcpu)");
957 		}
958 	}
959 
960 	if (g_test_rt) {
961 		if (g_numthreads == 0) {
962 			g_numthreads = g_numcpus + g_extra_thread_count;
963 			if ((int32_t)g_numthreads < 1) {
964 				g_numthreads = 1;
965 			}
966 			if ((g_numthreads == 1) && ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP))) {
967 				g_numthreads = 2;
968 			}
969 		}
970 		g_policy = MY_POLICY_REALTIME;
971 		g_histogram = true;
972 		/* Don't change g_traceworthy_latency_ns if it's explicity been set to something other than the default */
973 		if (g_traceworthy_latency_ns == TRACEWORTHY_NANOS) {
974 			g_traceworthy_latency_ns = g_rt_ll ? TRACEWORTHY_NANOS_LL : TRACEWORTHY_NANOS_TEST;
975 		}
976 	} else if (g_test_rt_smt) {
977 		if (g_nlogicalcpu != 2 * g_nphysicalcpu) {
978 			/* Not SMT */
979 			printf("Attempt to run --test-rt-smt on a non-SMT device\n");
980 			printf("TEST SKIPPED\n");
981 			exit(0);
982 		}
983 
984 		if (g_numthreads == 0) {
985 			g_numthreads = g_nphysicalcpu + g_extra_thread_count;
986 		}
987 		if ((int32_t)g_numthreads < 1) {
988 			g_numthreads = 1;
989 		}
990 		if ((g_numthreads == 1) && ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP))) {
991 			g_numthreads = 2;
992 		}
993 		g_policy = MY_POLICY_REALTIME;
994 		g_histogram = true;
995 	} else if (g_test_rt_avoid0) {
996 #if defined(__x86_64__) || defined(__i386__)
997 		if (g_nphysicalcpu == 1) {
998 			printf("Attempt to run --test-rt-avoid0 on a uniprocessor\n");
999 			printf("TEST SKIPPED\n");
1000 			exit(0);
1001 		}
1002 		if (g_numthreads == 0) {
1003 			g_numthreads = g_nphysicalcpu - 1 + g_extra_thread_count;
1004 		}
1005 		if ((int32_t)g_numthreads < 1) {
1006 			g_numthreads = 1;
1007 		}
1008 		if ((g_numthreads == 1) && ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP))) {
1009 			g_numthreads = 2;
1010 		}
1011 		g_policy = MY_POLICY_REALTIME;
1012 		g_histogram = true;
1013 #else
1014 		printf("Attempt to run --test-rt-avoid0 on a non-Intel device\n");
1015 		printf("TEST SKIPPED\n");
1016 		exit(0);
1017 #endif
1018 	} else if (g_numthreads == 0) {
1019 		g_numthreads = g_numcpus + g_extra_thread_count;
1020 		if ((int32_t)g_numthreads < 1) {
1021 			g_numthreads = 1;
1022 		}
1023 		if ((g_numthreads == 1) && ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP))) {
1024 			g_numthreads = 2;
1025 		}
1026 	}
1027 
1028 	if (g_do_each_spin) {
1029 		g_each_spin_duration_abs = nanos_to_abs(g_each_spin_duration_ns);
1030 	}
1031 
1032 	/* Configure the long-spin thread to take up half of its computation */
1033 	if (g_do_one_long_spin) {
1034 		g_one_long_spin_length_ns = COMPUTATION_NANOS / 2;
1035 		g_one_long_spin_length_abs = nanos_to_abs(g_one_long_spin_length_ns);
1036 	}
1037 
1038 	/* Estimate the amount of time the cleanup phase needs to back off */
1039 	g_iteration_sleeptime_us = g_numthreads * 20;
1040 
1041 	uint32_t threads_per_core = (g_numthreads / g_numcpus) + 1;
1042 	if (g_do_each_spin) {
1043 		g_iteration_sleeptime_us += threads_per_core * (g_each_spin_duration_ns / NSEC_PER_USEC);
1044 	}
1045 	if (g_do_one_long_spin) {
1046 		g_iteration_sleeptime_us += g_one_long_spin_length_ns / NSEC_PER_USEC;
1047 	}
1048 
1049 	/* Arrays for threads and their wakeup times */
1050 	threads = (pthread_t*) valloc(sizeof(pthread_t) * g_numthreads);
1051 	assert(threads);
1052 
1053 	size_t endtimes_size = sizeof(uint64_t) * g_numthreads;
1054 
1055 	g_thread_endtimes_abs = (uint64_t*) valloc(endtimes_size);
1056 	assert(g_thread_endtimes_abs);
1057 
1058 	/* Ensure the allocation is pre-faulted */
1059 	ret = memset_s(g_thread_endtimes_abs, endtimes_size, 0, endtimes_size);
1060 	if (ret) {
1061 		errc(EX_OSERR, ret, "memset_s endtimes");
1062 	}
1063 
1064 	size_t latencies_size = sizeof(uint64_t) * g_iterations;
1065 
1066 	worst_latencies_ns = (uint64_t*) valloc(latencies_size);
1067 	assert(worst_latencies_ns);
1068 
1069 	/* Ensure the allocation is pre-faulted */
1070 	ret = memset_s(worst_latencies_ns, latencies_size, 0, latencies_size);
1071 	if (ret) {
1072 		errc(EX_OSERR, ret, "memset_s latencies");
1073 	}
1074 
1075 	worst_latencies_from_first_ns = (uint64_t*) valloc(latencies_size);
1076 	assert(worst_latencies_from_first_ns);
1077 
1078 	/* Ensure the allocation is pre-faulted */
1079 	ret = memset_s(worst_latencies_from_first_ns, latencies_size, 0, latencies_size);
1080 	if (ret) {
1081 		errc(EX_OSERR, ret, "memset_s latencies_from_first");
1082 	}
1083 
1084 	worst_latencies_from_previous_ns = (uint64_t*) valloc(latencies_size);
1085 	assert(worst_latencies_from_previous_ns);
1086 
1087 	/* Ensure the allocation is pre-faulted */
1088 	ret = memset_s(worst_latencies_from_previous_ns, latencies_size, 0, latencies_size);
1089 	if (ret) {
1090 		errc(EX_OSERR, ret, "memset_s latencies_from_previous");
1091 	}
1092 
1093 	size_t histogram_size = sizeof(histogram_t) * g_maxcpus;
1094 	g_cpu_histogram = (histogram_t *)valloc(histogram_size);
1095 	assert(g_cpu_histogram);
1096 	/* Ensure the allocation is pre-faulted */
1097 	ret = memset_s(g_cpu_histogram, histogram_size, 0, histogram_size);
1098 	if (ret) {
1099 		errc(EX_OSERR, ret, "memset_s g_cpu_histogram");
1100 	}
1101 
1102 	size_t map_size = sizeof(uint64_t) * g_iterations;
1103 	g_cpu_map = (_Atomic uint64_t *)valloc(map_size);
1104 	assert(g_cpu_map);
1105 	/* Ensure the allocation is pre-faulted */
1106 	ret = memset_s(g_cpu_map, map_size, 0, map_size);
1107 	if (ret) {
1108 		errc(EX_OSERR, ret, "memset_s g_cpu_map");
1109 	}
1110 
1111 	kr = semaphore_create(mach_task_self(), &g_main_sem, SYNC_POLICY_FIFO, 0);
1112 	mach_assert_zero(kr);
1113 
1114 	/* Either one big semaphore or one per thread */
1115 	if (g_waketype == WAKE_CHAIN ||
1116 	    g_waketype == WAKE_BROADCAST_PERTHREAD ||
1117 	    g_waketype == WAKE_HOP) {
1118 		g_semarr = valloc(sizeof(semaphore_t) * g_numthreads);
1119 		assert(g_semarr);
1120 
1121 		for (uint32_t i = 0; i < g_numthreads; i++) {
1122 			kr = semaphore_create(mach_task_self(), &g_semarr[i], SYNC_POLICY_FIFO, 0);
1123 			mach_assert_zero(kr);
1124 		}
1125 
1126 		g_leadersem = g_semarr[0];
1127 	} else {
1128 		kr = semaphore_create(mach_task_self(), &g_broadcastsem, SYNC_POLICY_FIFO, 0);
1129 		mach_assert_zero(kr);
1130 		kr = semaphore_create(mach_task_self(), &g_leadersem, SYNC_POLICY_FIFO, 0);
1131 		mach_assert_zero(kr);
1132 	}
1133 
1134 	if (g_waketype == WAKE_HOP) {
1135 		kr = semaphore_create(mach_task_self(), &g_donesem, SYNC_POLICY_FIFO, 0);
1136 		mach_assert_zero(kr);
1137 	}
1138 
1139 	kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
1140 	mach_assert_zero(kr);
1141 
1142 	kr = semaphore_create(mach_task_self(), &g_rt_churn_sem, SYNC_POLICY_FIFO, 0);
1143 	mach_assert_zero(kr);
1144 
1145 	kr = semaphore_create(mach_task_self(), &g_rt_churn_start_sem, SYNC_POLICY_FIFO, 0);
1146 	mach_assert_zero(kr);
1147 
1148 	atomic_store_explicit(&g_done_threads, 0, memory_order_relaxed);
1149 
1150 	/* Create the threads */
1151 	for (uint32_t i = 0; i < g_numthreads; i++) {
1152 		ret = pthread_create(&threads[i], NULL, worker_thread, (void*)(uintptr_t)i);
1153 		if (ret) {
1154 			errc(EX_OSERR, ret, "pthread_create %d", i);
1155 		}
1156 	}
1157 
1158 	ret = setpriority(PRIO_DARWIN_ROLE, 0, PRIO_DARWIN_ROLE_UI_FOCAL);
1159 	if (ret) {
1160 		errc(EX_OSERR, ret, "setpriority");
1161 	}
1162 
1163 	thread_setup(0);
1164 
1165 	g_starttime_abs = mach_absolute_time();
1166 
1167 	if (g_churn_pri) {
1168 		create_churn_threads();
1169 	}
1170 	if (g_rt_churn) {
1171 		create_rt_churn_threads();
1172 	}
1173 
1174 	/* Let everyone get settled */
1175 	kr = semaphore_wait(g_main_sem);
1176 	mach_assert_zero(kr);
1177 
1178 	/* Give the system a bit more time to settle */
1179 	if (g_do_sleep) {
1180 		usleep(g_iteration_sleeptime_us);
1181 	}
1182 
1183 	cpu_time_t start_time;
1184 	cpu_time_t finish_time;
1185 
1186 	record_cpu_time(&start_time);
1187 
1188 	/* Go! */
1189 	for (uint32_t i = 0; i < g_iterations; i++) {
1190 		uint32_t j;
1191 		uint64_t worst_abs = 0, best_abs = UINT64_MAX;
1192 
1193 		if (g_do_one_long_spin) {
1194 			g_one_long_spin_id = (uint32_t)rand() % g_numthreads;
1195 		}
1196 
1197 		if (g_rt_churn) {
1198 			start_rt_churn_threads();
1199 			usleep(100);
1200 		}
1201 
1202 		debug_log("%d Main thread reset\n", i);
1203 
1204 		atomic_store_explicit(&g_done_threads, 0, memory_order_seq_cst);
1205 
1206 		g_starttime_abs = mach_absolute_time();
1207 
1208 		/* Fire them off and wait for worker threads to finish */
1209 		kr = semaphore_wait_signal(g_main_sem, g_leadersem);
1210 		mach_assert_zero(kr);
1211 
1212 		debug_log("%d Main thread return\n", i);
1213 
1214 		assert(atomic_load_explicit(&g_done_threads, memory_order_relaxed) == g_numthreads);
1215 
1216 		if (g_rt_churn) {
1217 			wait_for_rt_churn_threads();
1218 		}
1219 
1220 		/*
1221 		 * We report the worst latencies relative to start time
1222 		 * and relative to the lead worker thread
1223 		 * and (where relevant) relative to the previous thread
1224 		 */
1225 		for (j = 0; j < g_numthreads; j++) {
1226 			uint64_t latency_abs;
1227 
1228 			latency_abs = g_thread_endtimes_abs[j] - g_starttime_abs;
1229 			worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
1230 		}
1231 
1232 		worst_latencies_ns[i] = abs_to_nanos(worst_abs);
1233 
1234 		worst_abs = 0;
1235 		for (j = 1; j < g_numthreads; j++) {
1236 			uint64_t latency_abs;
1237 
1238 			latency_abs = g_thread_endtimes_abs[j] - g_thread_endtimes_abs[0];
1239 			worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
1240 			best_abs = best_abs > latency_abs ? latency_abs : best_abs;
1241 		}
1242 
1243 		worst_latencies_from_first_ns[i] = abs_to_nanos(worst_abs);
1244 
1245 		if ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP)) {
1246 			worst_abs = 0;
1247 			for (j = 1; j < g_numthreads; j++) {
1248 				uint64_t latency_abs;
1249 
1250 				latency_abs = g_thread_endtimes_abs[j] - g_thread_endtimes_abs[j - 1];
1251 				worst_abs = worst_abs < latency_abs ? latency_abs : worst_abs;
1252 				best_abs = best_abs > latency_abs ? latency_abs : best_abs;
1253 			}
1254 
1255 			worst_latencies_from_previous_ns[i] = abs_to_nanos(worst_abs);
1256 		}
1257 
1258 		/*
1259 		 * In the event of a bad run, cut a trace point.
1260 		 */
1261 		uint64_t worst_latency_ns = ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP)) ? worst_latencies_from_previous_ns[i] : worst_latencies_ns[i];
1262 		if (worst_latency_ns > g_traceworthy_latency_ns) {
1263 			g_traceworthy_count++;
1264 			/* Ariadne's ad-hoc test signpost */
1265 			kdebug_trace(ARIADNEDBG_CODE(0, 0), worst_latency_ns, g_traceworthy_latency_ns, 0, 0);
1266 
1267 			if (g_verbose) {
1268 				printf("Worst on this round was %.2f us.\n", ((float)worst_latency_ns) / 1000.0);
1269 			}
1270 		}
1271 
1272 		/* Give the system a bit more time to settle */
1273 		if (g_do_sleep) {
1274 			usleep(g_iteration_sleeptime_us);
1275 		}
1276 	}
1277 
1278 	record_cpu_time(&finish_time);
1279 
1280 	/* Rejoin threads */
1281 	for (uint32_t i = 0; i < g_numthreads; i++) {
1282 		ret = pthread_join(threads[i], NULL);
1283 		if (ret) {
1284 			errc(EX_OSERR, ret, "pthread_join %d", i);
1285 		}
1286 	}
1287 
1288 	if (g_rt_churn) {
1289 		join_rt_churn_threads();
1290 	}
1291 
1292 	if (g_churn_pri) {
1293 		join_churn_threads();
1294 	}
1295 
1296 	uint32_t cpu_idle_time = (finish_time.idle - start_time.idle) * 10;
1297 	uint32_t worker_threads_runtime = worker_threads_total_runtime.seconds * 1000 + worker_threads_total_runtime.microseconds / 1000;
1298 
1299 	compute_stats(worst_latencies_ns, g_iterations, &avg, &max, &min, &stddev);
1300 	printf("Results (from a stop):\n");
1301 	printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
1302 	printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
1303 	printf("Avg:\t\t%.2f us\n", avg / 1000.0);
1304 	printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
1305 
1306 	putchar('\n');
1307 
1308 	compute_stats(worst_latencies_from_first_ns, g_iterations, &avg, &max, &min, &stddev);
1309 	printf("Results (relative to first thread):\n");
1310 	printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
1311 	printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
1312 	printf("Avg:\t\t%.2f us\n", avg / 1000.0);
1313 	printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
1314 
1315 	if ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP)) {
1316 		putchar('\n');
1317 
1318 		compute_stats(worst_latencies_from_previous_ns, g_iterations, &avg, &max, &min, &stddev);
1319 		printf("Results (relative to previous thread):\n");
1320 		printf("Max:\t\t%.2f us\n", ((float)max) / 1000.0);
1321 		printf("Min:\t\t%.2f us\n", ((float)min) / 1000.0);
1322 		printf("Avg:\t\t%.2f us\n", avg / 1000.0);
1323 		printf("Stddev:\t\t%.2f us\n", stddev / 1000.0);
1324 	}
1325 
1326 	if (g_test_rt) {
1327 		putchar('\n');
1328 		printf("Count of trace-worthy latencies (>%.2f us): %d\n", ((float)g_traceworthy_latency_ns) / 1000.0, g_traceworthy_count);
1329 	}
1330 
1331 #if 0
1332 	for (uint32_t i = 0; i < g_iterations; i++) {
1333 		printf("Iteration %d: %.2f us\n", i, worst_latencies_ns[i] / 1000.0);
1334 	}
1335 #endif
1336 
1337 	if (g_histogram) {
1338 		putchar('\n');
1339 
1340 		for (uint32_t i = 0; i < g_maxcpus; i++) {
1341 			printf("%d\t%d\n", i, g_cpu_histogram[i].accum);
1342 		}
1343 	}
1344 
1345 	if (g_test_rt || g_test_rt_smt || g_test_rt_avoid0) {
1346 #define PRIMARY   0x5555555555555555ULL
1347 #define SECONDARY 0xaaaaaaaaaaaaaaaaULL
1348 
1349 		int fail_count = 0;
1350 		uint64_t *sched_latencies_ns = ((g_waketype == WAKE_CHAIN) || (g_waketype == WAKE_HOP)) ? worst_latencies_from_previous_ns : worst_latencies_ns;
1351 
1352 		for (uint32_t i = 0; i < g_iterations; i++) {
1353 			bool secondary = false;
1354 			bool fail = false;
1355 			bool warn = false;
1356 			uint64_t map = g_cpu_map[i];
1357 			if (g_test_rt_smt) {
1358 				/* Test for one or more threads running on secondary cores unexpectedly (WARNING) */
1359 				secondary = (map & SECONDARY);
1360 				/* Test for threads running on both primary and secondary cpus of the same core (FAIL) */
1361 				fail = ((map & PRIMARY) & ((map & SECONDARY) >> 1));
1362 			} else if (g_test_rt) {
1363 				/* Test that each thread runs on its own core (WARNING for now) */
1364 				warn = (__builtin_popcountll(map) != g_numthreads);
1365 				/* Test for latency probems (FAIL) */
1366 				fail = (sched_latencies_ns[i] > g_traceworthy_latency_ns);
1367 			} else if (g_test_rt_avoid0) {
1368 				fail = ((map & 0x1) == 0x1);
1369 			}
1370 			if (warn || secondary || fail) {
1371 				printf("Iteration %d: 0x%llx worst latency %.2fus%s%s%s\n", i, map,
1372 				    sched_latencies_ns[i] / 1000.0,
1373 				    warn ? " WARNING" : "",
1374 				    secondary ? " SECONDARY" : "",
1375 				    fail ? " FAIL" : "");
1376 			}
1377 			test_warn |= (warn || secondary || fail);
1378 			test_fail |= fail;
1379 			fail_count += fail;
1380 		}
1381 
1382 		if (test_fail && !g_test_strict_fail && (g_iterations >= 100) && (fail_count <= g_iterations / 100)) {
1383 			printf("99%% or better success rate\n");
1384 			test_fail = 0;
1385 		}
1386 	}
1387 
1388 	if (g_test_rt_smt && (g_each_spin_duration_ns >= 200000) && !test_warn) {
1389 		printf("cpu_idle_time=%dms worker_threads_runtime=%dms\n", cpu_idle_time, worker_threads_runtime);
1390 		if (cpu_idle_time < worker_threads_runtime / 4) {
1391 			printf("FAIL cpu_idle_time unexpectedly small\n");
1392 			test_fail = 1;
1393 		} else if (cpu_idle_time > worker_threads_runtime * 2) {
1394 			printf("FAIL cpu_idle_time unexpectedly large\n");
1395 			test_fail = 1;
1396 		}
1397 	}
1398 
1399 	if (test_fail) {
1400 		printf("TEST FAILED\n");
1401 	} else {
1402 		printf("TEST PASSED\n");
1403 	}
1404 
1405 	free(threads);
1406 	free(g_thread_endtimes_abs);
1407 	free(worst_latencies_ns);
1408 	free(worst_latencies_from_first_ns);
1409 	free(worst_latencies_from_previous_ns);
1410 	free(g_cpu_histogram);
1411 	free(g_cpu_map);
1412 
1413 	return test_fail;
1414 }
1415 
1416 /*
1417  * WARNING: This is SPI specifically intended for use by launchd to start UI
1418  * apps. We use it here for a test tool only to opt into QoS using the same
1419  * policies. Do not use this outside xnu or libxpc/launchd.
1420  */
1421 static void
selfexec_with_apptype(int argc,char * argv[])1422 selfexec_with_apptype(int argc, char *argv[])
1423 {
1424 	int ret;
1425 	posix_spawnattr_t attr;
1426 	extern char **environ;
1427 	char *new_argv[argc + 1 + 1 /* NULL */];
1428 	int i;
1429 	char prog[PATH_MAX];
1430 	uint32_t prog_size = PATH_MAX;
1431 
1432 	ret = _NSGetExecutablePath(prog, &prog_size);
1433 	if (ret) {
1434 		err(EX_OSERR, "_NSGetExecutablePath");
1435 	}
1436 
1437 	for (i = 0; i < argc; i++) {
1438 		new_argv[i] = argv[i];
1439 	}
1440 
1441 	new_argv[i]   = "--switched_apptype";
1442 	new_argv[i + 1] = NULL;
1443 
1444 	ret = posix_spawnattr_init(&attr);
1445 	if (ret) {
1446 		errc(EX_OSERR, ret, "posix_spawnattr_init");
1447 	}
1448 
1449 	ret = posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETEXEC);
1450 	if (ret) {
1451 		errc(EX_OSERR, ret, "posix_spawnattr_setflags");
1452 	}
1453 
1454 	ret = posix_spawnattr_setprocesstype_np(&attr, POSIX_SPAWN_PROC_TYPE_APP_DEFAULT);
1455 	if (ret) {
1456 		errc(EX_OSERR, ret, "posix_spawnattr_setprocesstype_np");
1457 	}
1458 
1459 	ret = posix_spawn(NULL, prog, NULL, &attr, new_argv, environ);
1460 	if (ret) {
1461 		errc(EX_OSERR, ret, "posix_spawn");
1462 	}
1463 }
1464 
1465 /*
1466  * Admittedly not very attractive.
1467  */
1468 static void __attribute__((noreturn))
usage()1469 usage()
1470 {
1471 	errx(EX_USAGE, "Usage: %s <threads> <chain | hop | broadcast-single-sem | broadcast-per-thread> "
1472 	    "<realtime | timeshare | timeshare_no_smt | fixed> <iterations>\n\t\t"
1473 	    "[--trace <traceworthy latency in ns>] "
1474 	    "[--verbose] [--spin-one] [--spin-all] [--spin-time <nanos>] [--affinity]\n\t\t"
1475 	    "[--no-sleep] [--drop-priority] [--churn-pri <pri>] [--churn-count <n>] [--churn-random]\n\t\t"
1476 	    "[--extra-thread-count <signed int>]\n\t\t"
1477 	    "[--rt-churn] [--rt-churn-count <n>] [--rt-ll]\n\t\t"
1478 	    "[--test-rt] [--test-rt-smt] [--test-rt-avoid0] [--test-strict-fail]",
1479 	    getprogname());
1480 }
1481 
1482 static struct option* g_longopts;
1483 static int option_index;
1484 
1485 static uint32_t
read_dec_arg()1486 read_dec_arg()
1487 {
1488 	char *cp;
1489 	/* char* optarg is a magic global */
1490 
1491 	uint32_t arg_val = (uint32_t)strtoull(optarg, &cp, 10);
1492 
1493 	if (cp == optarg || *cp) {
1494 		errx(EX_USAGE, "arg --%s requires a decimal number, found \"%s\"",
1495 		    g_longopts[option_index].name, optarg);
1496 	}
1497 
1498 	return arg_val;
1499 }
1500 
1501 static int32_t
read_signed_dec_arg()1502 read_signed_dec_arg()
1503 {
1504 	char *cp;
1505 	/* char* optarg is a magic global */
1506 
1507 	int32_t arg_val = (int32_t)strtoull(optarg, &cp, 10);
1508 
1509 	if (cp == optarg || *cp) {
1510 		errx(EX_USAGE, "arg --%s requires a decimal number, found \"%s\"",
1511 		    g_longopts[option_index].name, optarg);
1512 	}
1513 
1514 	return arg_val;
1515 }
1516 
1517 static void
parse_args(int argc,char * argv[])1518 parse_args(int argc, char *argv[])
1519 {
1520 	enum {
1521 		OPT_GETOPT = 0,
1522 		OPT_SPIN_TIME,
1523 		OPT_TRACE,
1524 		OPT_PRIORITY,
1525 		OPT_CHURN_PRI,
1526 		OPT_CHURN_COUNT,
1527 		OPT_RT_CHURN_COUNT,
1528 		OPT_EXTRA_THREAD_COUNT,
1529 	};
1530 
1531 	static struct option longopts[] = {
1532 		/* BEGIN IGNORE CODESTYLE */
1533 		{ "spin-time",          required_argument,      NULL,                           OPT_SPIN_TIME },
1534 		{ "trace",              required_argument,      NULL,                           OPT_TRACE     },
1535 		{ "priority",           required_argument,      NULL,                           OPT_PRIORITY  },
1536 		{ "churn-pri",          required_argument,      NULL,                           OPT_CHURN_PRI },
1537 		{ "churn-count",        required_argument,      NULL,                           OPT_CHURN_COUNT },
1538 		{ "rt-churn-count",     required_argument,      NULL,                           OPT_RT_CHURN_COUNT },
1539 		{ "extra-thread-count", required_argument,      NULL,                           OPT_EXTRA_THREAD_COUNT },
1540 		{ "churn-random",       no_argument,            (int*)&g_churn_random,          TRUE },
1541 		{ "switched_apptype",   no_argument,            (int*)&g_seen_apptype,          TRUE },
1542 		{ "spin-one",           no_argument,            (int*)&g_do_one_long_spin,      TRUE },
1543 		{ "intel-only",         no_argument,            (int*)&g_run_on_intel_only,     TRUE },
1544 		{ "spin-all",           no_argument,            (int*)&g_do_all_spin,           TRUE },
1545 		{ "affinity",           no_argument,            (int*)&g_do_affinity,           TRUE },
1546 		{ "no-sleep",           no_argument,            (int*)&g_do_sleep,              FALSE },
1547 		{ "drop-priority",      no_argument,            (int*)&g_drop_priority,         TRUE },
1548 		{ "test-rt",            no_argument,            (int*)&g_test_rt,               TRUE },
1549 		{ "test-rt-smt",        no_argument,            (int*)&g_test_rt_smt,           TRUE },
1550 		{ "test-rt-avoid0",     no_argument,            (int*)&g_test_rt_avoid0,        TRUE },
1551 		{ "test-strict-fail",   no_argument,            (int*)&g_test_strict_fail,      TRUE },
1552 		{ "rt-churn",           no_argument,            (int*)&g_rt_churn,              TRUE },
1553 		{ "rt-ll",              no_argument,            (int*)&g_rt_ll,                 TRUE },
1554 		{ "histogram",          no_argument,            (int*)&g_histogram,             TRUE },
1555 		{ "verbose",            no_argument,            (int*)&g_verbose,               TRUE },
1556 		{ "help",               no_argument,            NULL,                           'h' },
1557 		{ NULL,                 0,                      NULL,                           0 }
1558 		/* END IGNORE CODESTYLE */
1559 	};
1560 
1561 	g_longopts = longopts;
1562 	int ch = 0;
1563 
1564 	while ((ch = getopt_long(argc, argv, "h", longopts, &option_index)) != -1) {
1565 		switch (ch) {
1566 		case OPT_GETOPT:
1567 			/* getopt_long set a variable */
1568 			break;
1569 		case OPT_SPIN_TIME:
1570 			g_do_each_spin = TRUE;
1571 			g_each_spin_duration_ns = read_dec_arg();
1572 			break;
1573 		case OPT_TRACE:
1574 			g_traceworthy_latency_ns = read_dec_arg();
1575 			break;
1576 		case OPT_PRIORITY:
1577 			g_priority = read_dec_arg();
1578 			break;
1579 		case OPT_CHURN_PRI:
1580 			g_churn_pri = read_dec_arg();
1581 			break;
1582 		case OPT_CHURN_COUNT:
1583 			g_churn_count = read_dec_arg();
1584 			break;
1585 		case OPT_RT_CHURN_COUNT:
1586 			g_rt_churn_count = read_dec_arg();
1587 			break;
1588 		case OPT_EXTRA_THREAD_COUNT:
1589 			g_extra_thread_count = read_signed_dec_arg();
1590 			break;
1591 		case '?':
1592 		case 'h':
1593 		default:
1594 			usage();
1595 			/* NORETURN */
1596 		}
1597 	}
1598 
1599 	/*
1600 	 * getopt_long reorders all the options to the beginning of the argv array.
1601 	 * Jump past them to the non-option arguments.
1602 	 */
1603 
1604 	argc -= optind;
1605 	argv += optind;
1606 
1607 	if (argc > 4) {
1608 		warnx("Too many non-option arguments passed");
1609 		usage();
1610 	}
1611 
1612 	if (argc != 4) {
1613 		warnx("Missing required <threads> <waketype> <policy> <iterations> arguments");
1614 		usage();
1615 	}
1616 
1617 	char *cp;
1618 
1619 	/* How many threads? */
1620 	g_numthreads = (uint32_t)strtoull(argv[0], &cp, 10);
1621 
1622 	if (cp == argv[0] || *cp) {
1623 		errx(EX_USAGE, "numthreads requires a decimal number, found \"%s\"", argv[0]);
1624 	}
1625 
1626 	/* What wakeup pattern? */
1627 	g_waketype = parse_wakeup_pattern(argv[1]);
1628 
1629 	/* Policy */
1630 	g_policy = parse_thread_policy(argv[2]);
1631 
1632 	/* Iterations */
1633 	g_iterations = (uint32_t)strtoull(argv[3], &cp, 10);
1634 
1635 	if (cp == argv[3] || *cp) {
1636 		errx(EX_USAGE, "numthreads requires a decimal number, found \"%s\"", argv[3]);
1637 	}
1638 
1639 	if (g_iterations < 1) {
1640 		errx(EX_USAGE, "Must have at least one iteration");
1641 	}
1642 
1643 	if (g_numthreads == 1 && g_waketype == WAKE_CHAIN) {
1644 		errx(EX_USAGE, "chain mode requires more than one thread");
1645 	}
1646 
1647 	if (g_numthreads == 1 && g_waketype == WAKE_HOP) {
1648 		errx(EX_USAGE, "hop mode requires more than one thread");
1649 	}
1650 }
1651