xref: /xnu-12377.1.9/tests/select_stress.c (revision f6217f891ac0bb64f3d375211650a4c1ff8ca1ea)
1 #include <darwintest.h>
2 
3 #include <errno.h>
4 #include <pthread.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 
9 #include <mach/mach.h>
10 #include <mach/mach_time.h>
11 #include <mach/semaphore.h>
12 #include <sys/select.h>
13 
14 T_GLOBAL_META(
15 	T_META_RADAR_COMPONENT_NAME("xnu"),
16 	T_META_RADAR_COMPONENT_VERSION("file descriptors"));
17 
18 #define BACKOFF_SLEEP_SECONDS   3
19 
20 /* Select parameters */
21 #define TIMEOUT_CHANCE          17      /* one in this many times, timeout */
22 #define TIMEOUT_POLLCHANCE      11      /* one in this many is a poll */
23 #define TIMEOUT_SCALE           5       /* microseconds multiplier */
24 
25 static semaphore_t g_thread_sem;
26 static semaphore_t g_sync_sem;
27 
28 struct endpoint {
29 	int       fd[4];
30 	pthread_t pth;
31 };
32 
33 typedef void * (*thread_func)(struct endpoint *ep);
34 typedef void   (*setup_func)(struct endpoint *ep);
35 
36 struct thread_sync_arg {
37 	struct endpoint ep;
38 	setup_func  setup;
39 	thread_func work;
40 };
41 
42 static mach_timebase_info_data_t g_timebase;
43 
44 static int g_sleep_iterations = 150000;
45 static int g_sleep_usecs = 30;
46 static int g_stress_nthreads = 100;
47 static uint64_t g_stress_duration = 5;
48 
49 static inline uint64_t
ns_to_abs(uint64_t ns)50 ns_to_abs(uint64_t ns)
51 {
52 	return ns * g_timebase.denom / g_timebase.numer;
53 }
54 
55 static inline uint64_t
abs_to_ns(uint64_t abs)56 abs_to_ns(uint64_t abs)
57 {
58 	return abs * g_timebase.numer / g_timebase.denom;
59 }
60 
61 
62 
63 /*
64  * Synchronize the startup / initialization of a set of threads
65  */
66 static void *
thread_sync(void * ctx)67 thread_sync(void *ctx)
68 {
69 	struct thread_sync_arg *a = (struct thread_sync_arg *)ctx;
70 	T_QUIET;
71 	T_ASSERT_TRUE(((a != NULL) && (a->work != NULL)), "thread setup error");
72 
73 	if (a->setup) {
74 		(a->setup)(&a->ep);
75 	}
76 
77 	semaphore_wait_signal(g_thread_sem, g_sync_sem);
78 	return (a->work)(&a->ep);
79 }
80 
81 struct select_stress_args {
82 	struct endpoint *ep;
83 	int nthreads;
84 };
85 
86 static void
setup_stress_event(struct endpoint * ep)87 setup_stress_event(struct endpoint *ep)
88 {
89 	T_QUIET;
90 	T_WITH_ERRNO;
91 	T_ASSERT_POSIX_SUCCESS(pipe(&ep->fd[0]), "pipe()");
92 
93 	T_LOG("th[0x%lx]: fd:{%d,%d}, ep@%p",
94 	    (uintptr_t)pthread_self(), ep->fd[0], ep->fd[1], (void *)ep);
95 }
96 
97 /*
98  * Cause file descriptors to be reused/replaced.  We expect that it will at
99  * least take the lowest fd as part of the descriptor list.  This may be
100  * optimistic, but it shows replacing an fd out from under a select() if it
101  * happens.
102  *
103  * We potentially delay the open for a random amount of time so that another
104  * thread can come in and wake up the fd_set with a bad (closed) fd in the set.
105  */
106 static void
recycle_fds(struct endpoint * ep)107 recycle_fds(struct endpoint *ep)
108 {
109 	/* close endpoint descriptors in random order */
110 	if (random() % 1) {
111 		close(ep->fd[0]);
112 		close(ep->fd[1]);
113 	} else {
114 		close(ep->fd[1]);
115 		close(ep->fd[0]);
116 	}
117 
118 	/* randomize a delay */
119 	if ((random() % ep->fd[0]) == 0) {
120 		usleep(((random() % ep->fd[1]) + 1) * ep->fd[1]);
121 	}
122 
123 	/* reopen the FDs, hopefully in the middle of select() */
124 	T_QUIET;
125 	T_WITH_ERRNO;
126 	T_ASSERT_POSIX_SUCCESS(pipe(&ep->fd[0]), "pipe");
127 }
128 
129 
130 /*
131  * Send a byte of data down the thread end of a pipe to wake up the select
132  * on the other end of it.  Select will wake up normally because of this,
133  * and read the byte out. Hopefully, another thread has closed/reopened its FDs.
134  */
135 static void
write_data(struct endpoint * ep)136 write_data(struct endpoint *ep)
137 {
138 	T_QUIET;
139 	T_WITH_ERRNO;
140 	T_ASSERT_POSIX_SUCCESS(write(ep->fd[1], "X", 1), "th[0x%lx] write_data(fd=%d)",
141 	    (uintptr_t)pthread_self(), ep->fd[1]);
142 }
143 
144 static void *
do_stress_events(struct endpoint * ep)145 do_stress_events(struct endpoint *ep)
146 {
147 	unsigned write_freq = (unsigned)(((uintptr_t)pthread_self() & 0xff0000) >> 16);
148 
149 	/* some default */
150 	if (write_freq == 0) {
151 		write_freq = 31;
152 	}
153 
154 	T_LOG("th[0x%lx] write_freq:%d", (uintptr_t)pthread_self(), write_freq);
155 
156 	for (;;) {
157 		/* randomized delay between events */
158 		usleep(((random() % ep->fd[1]) + 1) * ep->fd[1]);
159 
160 		if ((random() % write_freq) == 0) {
161 			write_data(ep);
162 		} else {
163 			recycle_fds(ep);
164 		}
165 	}
166 }
167 
168 struct selarg {
169 	struct thread_sync_arg *th;
170 	fd_set  def_readfds;
171 	int max_fd;
172 	int nthreads;
173 	int ret;
174 
175 	pthread_t pth;
176 };
177 
178 /*
179  * Put the actual call to select in its own thread so we can catch errors that
180  * occur only the first time a thread calls select.
181  */
182 static void *
do_select(void * arg)183 do_select(void *arg)
184 {
185 	struct selarg *sarg = (struct selarg *)arg;
186 	struct timeval timeout;
187 	struct timeval *tp = NULL;
188 	fd_set  readfds;
189 	int nfd;
190 
191 	sarg->ret = 0;
192 
193 	FD_COPY(&sarg->def_readfds, &readfds);
194 
195 	/* Add a timeout probablistically */
196 	if ((random() % TIMEOUT_CHANCE) == 0) {
197 		timeout.tv_sec = random() % 1;
198 		timeout.tv_usec = ((random() % TIMEOUT_POLLCHANCE) * TIMEOUT_SCALE);
199 		tp = &timeout;
200 	}
201 
202 	/* Do the select */
203 	nfd = select(sarg->max_fd + 1, &readfds, 0, 0, tp);
204 	if (nfd < 0) {
205 		/* EBADF: fd_set has changed */
206 		if (errno == EBADF) {
207 			sarg->ret = EBADF;
208 			return NULL;
209 		}
210 
211 		/* Other errors are fatal */
212 		T_QUIET;
213 		T_WITH_ERRNO;
214 		T_ASSERT_POSIX_SUCCESS(nfd, "select:stress");
215 	}
216 
217 	/* Fast: handle timeouts */
218 	if (nfd == 0) {
219 		return NULL;
220 	}
221 
222 	/* Slower: discard read input thrown at us from threads */
223 	for (int i = 0; i < sarg->nthreads; i++) {
224 		struct endpoint *ep = &sarg->th[i].ep;
225 
226 		if (FD_ISSET(ep->fd[0], &readfds)) {
227 			char c;
228 			(void)read(ep->fd[0], &c, 1);
229 		}
230 	}
231 
232 	return NULL;
233 }
234 
235 /*
236  * Work around rdar://87992172: pthread_join() doesn't wait for the thread to be
237  * fully cleaned up, so create/join loops may spuriously fail with too many
238  * threads.
239  */
240 static int
create_thread_backoff(pthread_t * pthread,const pthread_attr_t * attr,void * (* thread_func)(void *),void * arg)241 create_thread_backoff(pthread_t *pthread, const pthread_attr_t *attr, void *(*thread_func)(void *), void *arg)
242 {
243 	int ret, tries;
244 
245 	for (tries = 0; tries < 3; tries++) {
246 		ret = pthread_create(pthread, NULL, thread_func, arg);
247 		if (ret != EAGAIN) {
248 			break;
249 		}
250 
251 		T_LOG("warning: pthread_create failed with %d (%s), backing off for %d seconds...", ret, strerror(ret), BACKOFF_SLEEP_SECONDS);
252 		sleep(BACKOFF_SLEEP_SECONDS);
253 	}
254 
255 	T_QUIET;
256 	T_ASSERT_POSIX_ZERO(ret, "pthread_create (after %d retries)", tries);
257 	return tries;
258 }
259 
260 
261 static void
test_select_stress(int nthreads,uint64_t duration_seconds)262 test_select_stress(int nthreads, uint64_t duration_seconds)
263 {
264 	uint64_t deadline;
265 	uint64_t seconds_remain, last_print_time;
266 
267 	struct selarg sarg;
268 
269 	int started_threads = 0;
270 	struct thread_sync_arg *th;
271 
272 	if (nthreads < 2) {
273 		T_LOG("forcing a minimum of 2 threads");
274 		nthreads = 2;
275 	}
276 
277 	/*
278 	 * Allocate memory for endpoint data
279 	 */
280 	th = calloc(nthreads, sizeof(*th));
281 	T_QUIET;
282 	T_ASSERT_NOTNULL(th, "select_stress: No memory for thread endpoints");
283 
284 	T_LOG("Select stress test: %d threads, for %lld seconds", nthreads, duration_seconds);
285 
286 	/*
287 	 * Startup all the threads
288 	 */
289 	T_LOG("\tcreating threads...");
290 	for (int i = 0; i < nthreads; i++) {
291 		struct endpoint *e = &th[i].ep;
292 		th[i].setup = setup_stress_event;
293 		th[i].work = do_stress_events;
294 		create_thread_backoff(&e->pth, 0, thread_sync, &th[i]);
295 	}
296 
297 	/*
298 	 * Wait for all the threads to start up
299 	 */
300 	while (started_threads < nthreads) {
301 		if (semaphore_wait(g_sync_sem) == KERN_SUCCESS) {
302 			++started_threads;
303 		}
304 	}
305 
306 	/*
307 	 * Kick everyone off
308 	 */
309 	semaphore_signal_all(g_thread_sem);
310 
311 	/*
312 	 * Calculate a stop time
313 	 */
314 	deadline = mach_absolute_time() + ns_to_abs(duration_seconds * NSEC_PER_SEC);
315 	seconds_remain = duration_seconds;
316 	last_print_time = seconds_remain + 1;
317 
318 	/*
319 	 * Perform the select and read any data that comes from the
320 	 * constituent thread FDs.
321 	 */
322 
323 	T_LOG("\ttest running!");
324 handle_ebadf:
325 	/* (re) set up the select fd set */
326 	sarg.max_fd = 0;
327 	FD_ZERO(&sarg.def_readfds);
328 	for (int i = 0; i < nthreads; i++) {
329 		struct endpoint *ep = &th[i].ep;
330 
331 		FD_SET(ep->fd[0], &sarg.def_readfds);
332 		if (ep->fd[0] > sarg.max_fd) {
333 			sarg.max_fd = ep->fd[0];
334 		}
335 	}
336 
337 	sarg.th = th;
338 	sarg.nthreads = nthreads;
339 
340 	while (mach_absolute_time() < deadline) {
341 		void *thret = NULL;
342 
343 		seconds_remain = abs_to_ns(deadline - mach_absolute_time()) / NSEC_PER_SEC;
344 		if (last_print_time > seconds_remain) {
345 			T_LOG(" %6lld...", seconds_remain);
346 			last_print_time = seconds_remain;
347 		}
348 
349 		sarg.ret = 0;
350 		int retries = create_thread_backoff(&sarg.pth, 0, do_select, &sarg);
351 		if (retries > 0) {
352 			/* we had backoff for a few seconds, so extend our deadline */
353 			deadline += ns_to_abs(NSEC_PER_SEC * BACKOFF_SLEEP_SECONDS * retries);
354 		}
355 
356 		T_QUIET;
357 		T_WITH_ERRNO;
358 		T_ASSERT_POSIX_ZERO(pthread_cancel(sarg.pth), "pthread_cancel");
359 		T_QUIET;
360 		T_WITH_ERRNO;
361 		T_ASSERT_POSIX_ZERO(pthread_join(sarg.pth, &thret), "pthread_join");
362 
363 		if (sarg.ret == EBADF) {
364 			goto handle_ebadf;
365 		}
366 		T_QUIET;
367 		T_ASSERT_GE(sarg.ret, 0, "threaded do_select returned an \
368 		    error: %d!", sarg.ret);
369 	}
370 
371 	T_PASS("select stress test passed");
372 }
373 
374 
375 /*
376  * TEST: use select as sleep()
377  */
378 static void
test_select_sleep(uint32_t niterations,unsigned long usecs)379 test_select_sleep(uint32_t niterations, unsigned long usecs)
380 {
381 	int ret;
382 	struct timeval tv;
383 	tv.tv_sec = 0;
384 	tv.tv_usec = usecs;
385 
386 	if (!niterations) {
387 		T_FAIL("select sleep test skipped");
388 		return;
389 	}
390 
391 	T_LOG("Testing select as sleep (n=%d, us=%ld)...", niterations, usecs);
392 
393 	while (niterations--) {
394 		ret = select(0, NULL, NULL, NULL, &tv);
395 		if (ret < 0 && errno != EINTR) {
396 			T_QUIET;
397 			T_WITH_ERRNO;
398 			T_ASSERT_POSIX_SUCCESS(ret, "select:sleep");
399 		}
400 	}
401 
402 	T_PASS("select sleep test passed");
403 }
404 
405 #define get_env_arg(NM, sval, val) \
406 	do { \
407 	        sval = getenv(#NM); \
408 	        if (sval) { \
409 	                long v = atol(sval); \
410 	                if (v <= 0) \
411 	                        v =1 ; \
412 	                val = (typeof(val))v; \
413 	        } \
414 	} while (0)
415 
416 T_DECL(select_sleep, "select sleep test for rdar://problem/20804876 Gala: select with no FDs leaks waitq table objects (causes asserts/panics)", T_META_TAG_VM_PREFERRED)
417 {
418 	char *env_sval = NULL;
419 
420 	get_env_arg(SELSLEEP_ITERATIONS, env_sval, g_sleep_iterations);
421 	get_env_arg(SELSLEEP_INTERVAL, env_sval, g_sleep_usecs);
422 
423 	test_select_sleep((uint32_t)g_sleep_iterations, (unsigned long)g_sleep_usecs);
424 }
425 
426 T_DECL(select_stress, "select stress test for rdar://problem/20804876 Gala: select with no FDs leaks waitq table objects (causes asserts/panics)", T_META_TAG_VM_PREFERRED)
427 {
428 	char *env_sval = NULL;
429 
430 	T_QUIET;
431 	T_ASSERT_MACH_SUCCESS(mach_timebase_info(&g_timebase),
432 	    "Can't get mach_timebase_info!");
433 
434 	get_env_arg(SELSTRESS_THREADS, env_sval, g_stress_nthreads);
435 	get_env_arg(SELSTRESS_DURATION, env_sval, g_stress_duration);
436 
437 	T_QUIET;
438 	T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &g_sync_sem, SYNC_POLICY_FIFO, 0),
439 	    "semaphore_create(g_sync_sem)");
440 	T_QUIET;
441 	T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &g_thread_sem, SYNC_POLICY_FIFO, 0),
442 	    "semaphore_create(g_thread_sem)");
443 
444 	test_select_stress(g_stress_nthreads, g_stress_duration);
445 }
446