1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #include <kern/ast.h>
58 #include <kern/backtrace.h>
59 #include <kern/kern_types.h>
60 #include <kern/mach_param.h>
61 #include <kern/percpu.h>
62 #include <kern/queue.h>
63 #include <kern/sched_prim.h>
64 #include <kern/simple_lock.h>
65 #include <kern/spl.h>
66 #include <kern/waitq.h>
67 #include <kern/zalloc.h>
68 #include <kern/policy_internal.h>
69 #include <kern/turnstile.h>
70
71 #include <os/hash.h>
72 #include <libkern/section_keywords.h>
73 #include <mach/sync_policy.h>
74 #include <vm/vm_kern_xnu.h>
75
76 #include <sys/kdebug.h>
77
78 /*!
79 * @const waitq_set_unlink_batch
80 *
81 * @brief
82 * How many links are unhooked under a single set lock hold.
83 *
84 * @discussion
85 * Holding a waitq set lock for too long can cause
86 * extreme contention (when a set is being torn down concurrently
87 * to messages being sent to ports who used to belong to that set).
88 *
89 * In order to fight this, large wait queue sets will drop
90 * and reacquire their lock for each unlinking batch.
91 */
92 static TUNABLE(uint32_t, waitq_set_unlink_batch, "waitq_set_unlink_batch", 64);
93
94 /*!
95 * @const WQL_PREPOST_MARKER
96 *
97 * @brief
98 * Marker set in the @c wql_wqs field of wait queue linkages to denote that
99 * this linkage has preposted to its wait queue set already.
100 *
101 * @discussion
102 * This bit is manipulated under both the wait queue and the wait queue set
103 * locks, and is used for two purposes:
104 *
105 * - for port set queues, it denotes in which circle queue the linkage
106 * is queued on (@c waitq_set::wqset_links or @c waitq_set::wqset_preposts)
107 *
108 * - as an optimization during pre-post to not walk sets this link already
109 * preposted to.
110 */
111 #define WQL_PREPOST_MARKER 1ul
112
113 #if __LP64__
114 /*!
115 * @struct waitq_link_hdr
116 *
117 * @brief
118 * Common "header" between all linkages, in order to find the waitq_set
119 * of this linkage.
120 *
121 * @discussion
122 * Due to unfortunate alignment constraints on @c queue_chain_t,
123 * this is wildly different for LP64 and ILP32.
124 *
125 * Do note that `wql
126 */
127 struct waitq_link_hdr {
128 uintptr_t wql_wqs;
129 };
130
131 /*!
132 * @struct waitq_sellink
133 *
134 * @brief
135 * Linkages used for select waitq queues to select wait queue sets.
136 *
137 * @discussion
138 * Select linkages are one way (queue to set) for two reasons:
139 *
140 * 1. select doesn't use the wait queue subsystem to discover which file
141 * descriptor woke up the set (it will instead scan all fds again),
142 *
143 * 2. all linkages are unhooked on each syscall return, so we minimize
144 * work to be done to be as quick as possible, using a fast invalidation
145 * scheme based on unique identifiers and sequestering
146 * (see @c select_set_nextid()).
147 */
148 struct waitq_sellink {
149 uintptr_t wql_wqs;
150 struct waitq_link_list_entry wql_next;
151 uint64_t wql_setid;
152 };
153
154 /*!
155 * @struct waitq_link
156 *
157 * @brief
158 * Linkages used for port wait queues and port-set wait queue sets.
159 *
160 * @discussion
161 * Those linkages go both ways so that receiving messages through a port-set
162 * can quickly find ports that preposted to the set.
163 *
164 * It also means that unhooking linkages cannot be lazy.
165 */
166 struct waitq_link {
167 uintptr_t wql_wqs; /**< wait queue set for this link */
168 queue_chain_t wql_qlink; /**< linkage through the waitq list */
169 queue_chain_t wql_slink; /**< linkage through the wqset list */
170 struct waitq *wql_wq; /**< wait queue for this link */
171 };
172 #else
173 struct waitq_link_hdr {
174 uint64_t __wql_padding;
175 uintptr_t wql_wqs;
176 };
177
178 struct waitq_sellink {
179 struct waitq_link_list_entry wql_next;
180 uintptr_t __wql_padding;
181 uintptr_t wql_wqs;
182 uint64_t wql_setid;
183 };
184
185 struct waitq_link {
186 queue_chain_t wql_qlink;
187 uintptr_t wql_wqs;
188 struct waitq *wql_wq;
189 queue_chain_t wql_slink;
190 };
191 #endif
192
193 static_assert(offsetof(struct waitq_link_hdr, wql_wqs) ==
194 offsetof(struct waitq_sellink, wql_wqs));
195 static_assert(offsetof(struct waitq_link_hdr, wql_wqs) ==
196 offsetof(struct waitq_link, wql_wqs));
197 static_assert(sizeof(struct waitq) <= WQ_OPAQUE_SIZE, "waitq structure size mismatch");
198 static_assert(__alignof(struct waitq) == WQ_OPAQUE_ALIGN, "waitq structure alignment mismatch");
199
200 static KALLOC_TYPE_DEFINE(waitq_sellink_zone, struct waitq_sellink, KT_PRIV_ACCT);
201 static KALLOC_TYPE_DEFINE(waitq_link_zone, struct waitq_link, KT_PRIV_ACCT);
202 ZONE_DEFINE_ID(ZONE_ID_SELECT_SET, "select_set", struct select_set,
203 ZC_SEQUESTER | ZC_NOPGZ | ZC_ZFREE_CLEARMEM);
204
205 static LCK_GRP_DECLARE(waitq_lck_grp, "waitq");
206
207 static uint64_t PERCPU_DATA(select_setid);
208 struct waitq select_conflict_queue;
209
210 #pragma mark waitq links
211
212 static inline bool
waitq_is_sellink(waitq_type_t type)213 waitq_is_sellink(waitq_type_t type)
214 {
215 return type == WQT_SELECT || type == WQT_SELECT_SET;
216 }
217
218 static inline bool
wql_sellink_valid(struct select_set * selset,struct waitq_sellink * link)219 wql_sellink_valid(struct select_set *selset, struct waitq_sellink *link)
220 {
221 return waitq_valid(selset) && selset->selset_id == link->wql_setid;
222 }
223
224 static waitq_t
wql_wqs(waitq_link_t link)225 wql_wqs(waitq_link_t link)
226 {
227 return (waitq_t){ (void *)(link.wqlh->wql_wqs & ~WQL_PREPOST_MARKER) };
228 }
229
230 static bool
wql_wqs_preposted(waitq_link_t link)231 wql_wqs_preposted(waitq_link_t link)
232 {
233 return link.wqlh->wql_wqs & WQL_PREPOST_MARKER;
234 }
235
236 static void
wql_wqs_mark_preposted(waitq_link_t link)237 wql_wqs_mark_preposted(waitq_link_t link)
238 {
239 assert(!wql_wqs_preposted(link));
240 link.wqlh->wql_wqs |= WQL_PREPOST_MARKER;
241 }
242
243 static void
wql_wqs_clear_preposted(waitq_link_t link)244 wql_wqs_clear_preposted(waitq_link_t link)
245 {
246 assert(wql_wqs_preposted(link));
247 link.wqlh->wql_wqs &= ~WQL_PREPOST_MARKER;
248 }
249
250 static circle_queue_t
wql_wqs_queue(struct waitq_set * wqs,struct waitq_link * link)251 wql_wqs_queue(struct waitq_set *wqs, struct waitq_link *link)
252 {
253 return wql_wqs_preposted(link) ? &wqs->wqset_preposts : &wqs->wqset_links;
254 }
255
256 static void
wql_list_push(waitq_link_list_t * list,waitq_link_t link)257 wql_list_push(waitq_link_list_t *list, waitq_link_t link)
258 {
259 link.wqls->wql_next.next = list->next;
260 list->next = &link.wqls->wql_next;
261 }
262
263 static inline struct waitq_sellink *
wql_list_elem(struct waitq_link_list_entry * e)264 wql_list_elem(struct waitq_link_list_entry *e)
265 {
266 return e ? __container_of(e, struct waitq_sellink, wql_next) : NULL;
267 }
268
269 /*!
270 * @function wql_list_next()
271 *
272 * @brief
273 * Helper function to implement wait queue link list enumeration.
274 *
275 * @param e in: pointer to the current element,
276 * out: pointer to the next element or NULL
277 * @param end which element to stop enumeration at (NULL for lists,
278 * or the first element enumerated for circle queues).
279 * @returns true (makes writing for(;;) based enumerators easier).
280 */
281 static inline bool
wql_list_next(struct waitq_link_list_entry ** e,struct waitq_link_list_entry * end)282 wql_list_next(struct waitq_link_list_entry **e, struct waitq_link_list_entry *end)
283 {
284 if (*e == NULL || (*e)->next == end) {
285 *e = NULL;
286 } else {
287 *e = (*e)->next;
288 }
289 return true;
290 }
291
292 #define __wql_list_foreach(it, head, end) \
293 for (struct waitq_link_list_entry *__it = (head)->next, *__end = end; \
294 ((it) = wql_list_elem(__it)); wql_list_next(&__it, __end))
295
296 #define wql_list_foreach(it, head) \
297 __wql_list_foreach(it, head, NULL)
298
299 #define wql_list_foreach_safe(it, head) \
300 for (struct waitq_link_list_entry *__it = (head)->next; \
301 ((it) = wql_list_elem(__it)) && wql_list_next(&__it, NULL); )
302
303 /*
304 * Gross hack: passing `__it` to `__wql_list_foreach` makes it stop whether
305 * we circle back to the first element or NULL (whichever comes first).
306 *
307 * This allows to have a single enumeration function oblivious to whether
308 * we enumerate a circle queue or a sellink list.
309 */
310 #define waitq_link_foreach(link, waitq) \
311 __wql_list_foreach((link).wqls, &(waitq).wq_q->waitq_sellinks, __it)
312
313 static_assert(offsetof(struct waitq, waitq_sellinks) ==
314 offsetof(struct waitq, waitq_links));
315 static_assert(offsetof(struct waitq_sellink, wql_next) ==
316 offsetof(struct waitq_link, wql_qlink.next));
317
318 static struct waitq_link *
wql_find(struct waitq * waitq,waitq_t wqset)319 wql_find(struct waitq *waitq, waitq_t wqset)
320 {
321 struct waitq_link *link;
322
323 cqe_foreach_element(link, &waitq->waitq_links, wql_qlink) {
324 if (waitq_same(wql_wqs(link), wqset)) {
325 return link;
326 }
327 }
328
329 return NULL;
330 }
331
332 waitq_link_t
waitq_link_alloc(waitq_type_t type)333 waitq_link_alloc(waitq_type_t type)
334 {
335 waitq_link_t link;
336
337 if (waitq_is_sellink(type)) {
338 link.wqls = zalloc_flags(waitq_sellink_zone, Z_WAITOK | Z_ZERO);
339 } else {
340 link.wqll = zalloc_flags(waitq_link_zone, Z_WAITOK | Z_ZERO);
341 }
342 return link;
343 }
344
345 void
waitq_link_free(waitq_type_t type,waitq_link_t link)346 waitq_link_free(waitq_type_t type, waitq_link_t link)
347 {
348 if (waitq_is_sellink(type)) {
349 return zfree(waitq_sellink_zone, link.wqls);
350 } else {
351 return zfree(waitq_link_zone, link.wqll);
352 }
353 }
354
355 void
waitq_link_free_list(waitq_type_t type,waitq_link_list_t * free_l)356 waitq_link_free_list(waitq_type_t type, waitq_link_list_t *free_l)
357 {
358 waitq_link_t link;
359
360 wql_list_foreach_safe(link.wqls, free_l) {
361 waitq_link_free(type, link);
362 }
363
364 free_l->next = NULL;
365 }
366
367
368 #pragma mark global wait queues
369
370 static __startup_data struct waitq g_boot_waitq;
371 static SECURITY_READ_ONLY_LATE(struct waitq *) global_waitqs = &g_boot_waitq;
372 static SECURITY_READ_ONLY_LATE(uint32_t) g_num_waitqs = 1;
373
374 /*
375 * Zero out the used MSBs of the event.
376 */
377 #define _CAST_TO_EVENT_MASK(event) \
378 ((waitq_flags_t)(uintptr_t)(event) & ((1ul << _EVENT_MASK_BITS) - 1ul))
379
380 /* return a global waitq pointer corresponding to the given event */
381 struct waitq *
_global_eventq(event64_t event)382 _global_eventq(event64_t event)
383 {
384 /*
385 * this doesn't use os_hash_kernel_pointer() because
386 * some clients use "numbers" here.
387 */
388 return &global_waitqs[os_hash_uint64(event) & (g_num_waitqs - 1)];
389 }
390
391 bool
waitq_is_valid(waitq_t waitq)392 waitq_is_valid(waitq_t waitq)
393 {
394 return waitq_valid(waitq);
395 }
396
397 static inline bool
waitq_is_global(waitq_t waitq)398 waitq_is_global(waitq_t waitq)
399 {
400 if (waitq_type(waitq) != WQT_QUEUE) {
401 return false;
402 }
403 return waitq.wq_q >= global_waitqs && waitq.wq_q < global_waitqs + g_num_waitqs;
404 }
405
406 static inline bool
waitq_empty(waitq_t wq)407 waitq_empty(waitq_t wq)
408 {
409 struct turnstile *ts;
410
411 switch (waitq_type(wq)) {
412 case WQT_TURNSTILE:
413 return priority_queue_empty(&wq.wq_q->waitq_prio_queue);
414 case WQT_PORT:
415 ts = wq.wq_q->waitq_ts;
416 return ts == TURNSTILE_NULL ||
417 priority_queue_empty(&ts->ts_waitq.waitq_prio_queue);
418 case WQT_QUEUE:
419 case WQT_SELECT:
420 case WQT_PORT_SET:
421 case WQT_SELECT_SET:
422 return circle_queue_empty(&wq.wq_q->waitq_queue);
423
424 default:
425 return true;
426 }
427 }
428
429 #if CONFIG_WAITQ_STATS
430 #define NWAITQ_BTFRAMES 5
431
432 struct wq_stats {
433 uint64_t waits;
434 uint64_t wakeups;
435 uint64_t clears;
436 uint64_t failed_wakeups;
437
438 uintptr_t last_wait[NWAITQ_BTFRAMES];
439 uintptr_t last_wakeup[NWAITQ_BTFRAMES];
440 uintptr_t last_failed_wakeup[NWAITQ_BTFRAMES];
441 };
442
443 /* this global is for lldb */
444 const uint32_t g_nwaitq_btframes = NWAITQ_BTFRAMES;
445 struct wq_stats g_boot_stats;
446 struct wq_stats *g_waitq_stats = &g_boot_stats;
447
448 static __inline__ void
waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES],unsigned skip)449 waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], unsigned skip)
450 {
451 uintptr_t buf[NWAITQ_BTFRAMES + skip];
452
453 memset(buf, 0, (NWAITQ_BTFRAMES + skip) * sizeof(uintptr_t));
454 backtrace(buf, g_nwaitq_btframes + skip, NULL, NULL);
455 memcpy(&bt[0], &buf[skip], NWAITQ_BTFRAMES * sizeof(uintptr_t));
456 }
457
458 static __inline__ struct wq_stats *
waitq_global_stats(waitq_t waitq)459 waitq_global_stats(waitq_t waitq)
460 {
461 struct wq_stats *wqs;
462 uint32_t idx;
463
464 if (!waitq_is_global(waitq)) {
465 return NULL;
466 }
467
468 idx = (uint32_t)(waitq.wq_q - global_waitqs);
469 assert(idx < g_num_waitqs);
470 wqs = &g_waitq_stats[idx];
471 return wqs;
472 }
473
474 static __inline__ void
waitq_stats_count_wait(waitq_t waitq)475 waitq_stats_count_wait(waitq_t waitq)
476 {
477 struct wq_stats *wqs = waitq_global_stats(waitq);
478 if (wqs != NULL) {
479 wqs->waits++;
480 waitq_grab_backtrace(wqs->last_wait, 2);
481 }
482 }
483
484 static __inline__ void
waitq_stats_count_wakeup(waitq_t waitq,int n)485 waitq_stats_count_wakeup(waitq_t waitq, int n)
486 {
487 struct wq_stats *wqs = waitq_global_stats(waitq);
488 if (wqs != NULL) {
489 if (n > 0) {
490 wqs->wakeups += n;
491 waitq_grab_backtrace(wqs->last_wakeup, 2);
492 } else {
493 wqs->failed_wakeups++;
494 waitq_grab_backtrace(wqs->last_failed_wakeup, 2);
495 }
496 }
497 }
498
499 static __inline__ void
waitq_stats_count_clear_wakeup(waitq_t waitq)500 waitq_stats_count_clear_wakeup(waitq_t waitq)
501 {
502 struct wq_stats *wqs = waitq_global_stats(waitq);
503 if (wqs != NULL) {
504 wqs->wakeups++;
505 wqs->clears++;
506 waitq_grab_backtrace(wqs->last_wakeup, 2);
507 }
508 }
509 #else /* !CONFIG_WAITQ_STATS */
510 #define waitq_stats_count_wait(q) do { } while (0)
511 #define waitq_stats_count_wakeup(q, n) do { } while (0)
512 #define waitq_stats_count_clear_wakeup(q) do { } while (0)
513 #endif
514
515 static struct waitq *
waitq_get_safeq(waitq_t waitq)516 waitq_get_safeq(waitq_t waitq)
517 {
518 if (waitq_type(waitq) == WQT_PORT) {
519 struct turnstile *ts = waitq.wq_q->waitq_ts;
520 return ts ? &ts->ts_waitq : NULL;
521 }
522
523 uint32_t hash = os_hash_kernel_pointer(waitq.wq_q);
524 return &global_waitqs[hash & (g_num_waitqs - 1)];
525 }
526
527 /*
528 * Since the priority ordered waitq uses basepri as the
529 * ordering key assert that this value fits in a uint8_t.
530 */
531 static_assert(MAXPRI <= UINT8_MAX);
532
533 static inline void
waitq_thread_insert(struct waitq * safeq,thread_t thread,waitq_t wq,event64_t event)534 waitq_thread_insert(struct waitq *safeq, thread_t thread,
535 waitq_t wq, event64_t event)
536 {
537 if (waitq_type(safeq) == WQT_TURNSTILE) {
538 turnstile_stats_update(0, TSU_TURNSTILE_BLOCK_COUNT, NULL);
539 turnstile_waitq_add_thread_priority_queue(safeq, thread);
540 } else {
541 turnstile_stats_update(0, TSU_REGULAR_WAITQ_BLOCK_COUNT, NULL);
542 /*
543 * This is the extent to which we currently take scheduling
544 * attributes into account:
545 *
546 * - If the thread is vm privileged, we stick it at the front
547 * of the queue, later, these queues will honor the policy
548 * value set at waitq_init time.
549 *
550 * - Realtime threads get priority for wait queue placements.
551 * This allows wait_queue_wakeup_one to prefer a waiting
552 * realtime thread, similar in principle to performing
553 * a wait_queue_wakeup_all and allowing scheduler
554 * prioritization to run the realtime thread, but without
555 * causing the lock contention of that scenario.
556 */
557 if (thread->sched_pri >= BASEPRI_REALTIME ||
558 !safeq->waitq_fifo ||
559 (thread->options & TH_OPT_VMPRIV)) {
560 circle_enqueue_head(&safeq->waitq_queue, &thread->wait_links);
561 } else {
562 circle_enqueue_tail(&safeq->waitq_queue, &thread->wait_links);
563 }
564 }
565
566 /* mark the event and real waitq, even if enqueued on a global safeq */
567 thread->wait_event = event;
568 thread->waitq = wq;
569 }
570
571 /**
572 * clear the thread-related waitq state, moving the thread from
573 * TH_WAIT to TH_WAIT | TH_WAKING, where it is no longer on a waitq and
574 * can expect to be go'ed in the near future.
575 *
576 * Clearing the waitq prevents further propagation of a turnstile boost
577 * on the thread and stops a clear_wait from succeeding.
578 *
579 * Conditions:
580 * 'thread' is locked, thread is waiting
581 */
582 static inline void
thread_clear_waitq_state(thread_t thread)583 thread_clear_waitq_state(thread_t thread)
584 {
585 assert(thread->state & TH_WAIT);
586
587 thread->waitq.wq_q = NULL;
588 thread->wait_event = NO_EVENT64;
589 thread->at_safe_point = FALSE;
590 thread->block_hint = kThreadWaitNone;
591 thread->state |= TH_WAKING;
592 }
593
594 static inline void
waitq_thread_remove(waitq_t wq,thread_t thread)595 waitq_thread_remove(waitq_t wq, thread_t thread)
596 {
597 if (waitq_type(wq) == WQT_TURNSTILE) {
598 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
599 (TURNSTILE_CODE(TURNSTILE_HEAP_OPERATIONS,
600 (THREAD_REMOVED_FROM_TURNSTILE_WAITQ))) | DBG_FUNC_NONE,
601 VM_KERNEL_UNSLIDE_OR_PERM(waitq_to_turnstile(wq.wq_q)),
602 thread_tid(thread), 0, 0, 0);
603 priority_queue_remove(&wq.wq_q->waitq_prio_queue,
604 &thread->wait_prioq_links);
605 } else {
606 circle_dequeue(&wq.wq_q->waitq_queue, &thread->wait_links);
607 if (waitq_is_global(wq) && waitq_empty(wq)) {
608 wq.wq_q->waitq_eventmask = 0;
609 }
610 }
611
612 thread_clear_waitq_state(thread);
613 }
614
615 bool
waitq_wait_possible(thread_t thread)616 waitq_wait_possible(thread_t thread)
617 {
618 return waitq_is_null(thread->waitq) &&
619 ((thread->state & TH_WAKING) == 0);
620 }
621
622 __startup_func
623 static void
waitq_bootstrap(void)624 waitq_bootstrap(void)
625 {
626 const uint32_t qsz = sizeof(struct waitq);
627 vm_offset_t whsize;
628 int cpu = 0;
629
630 /*
631 * Determine the amount of memory we're willing to reserve for
632 * the waitqueue hash table
633 */
634 if (!PE_parse_boot_argn("wqsize", &whsize, sizeof(whsize))) {
635 whsize = round_page(thread_max * qsz / 5);
636 }
637
638 /*
639 * Determine the number of waitqueues we can fit.
640 * The hash algorithm requires that this be a power of 2.
641 */
642 g_num_waitqs = 0x80000000u >> __builtin_clzl(whsize / qsz);
643 assert(g_num_waitqs > 0);
644 whsize = round_page(g_num_waitqs * qsz);
645
646 kmem_alloc(kernel_map, (vm_offset_t *)&global_waitqs, whsize,
647 KMA_NOFAIL | KMA_KOBJECT | KMA_NOPAGEWAIT | KMA_PERMANENT,
648 VM_KERN_MEMORY_WAITQ);
649
650 #if CONFIG_WAITQ_STATS
651 whsize = round_page(g_num_waitqs * sizeof(struct wq_stats));
652 kmem_alloc(kernel_map, (vm_offset_t *)&g_waitq_stats, whsize,
653 KMA_NOFAIL | KMA_KOBJECT | KMA_NOPAGEWAIT | KMA_ZERO | KMA_PERMANENT,
654 VM_KERN_MEMORY_WAITQ);
655 #endif
656
657 for (uint32_t i = 0; i < g_num_waitqs; i++) {
658 waitq_init(&global_waitqs[i], WQT_QUEUE, SYNC_POLICY_FIFO);
659 }
660
661 waitq_init(&select_conflict_queue, WQT_SELECT, SYNC_POLICY_FIFO);
662
663 percpu_foreach(setid, select_setid) {
664 /* is not cpu_number() but CPUs haven't been numbered yet */
665 *setid = cpu++;
666 }
667 }
668 STARTUP(MACH_IPC, STARTUP_RANK_FIRST, waitq_bootstrap);
669
670
671 #pragma mark locking
672
673 static hw_spin_timeout_status_t
waitq_timeout_handler(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)674 waitq_timeout_handler(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
675 {
676 lck_spinlock_to_info_t lsti;
677 hw_lck_ticket_t tmp;
678 struct waitq *wq = _lock;
679
680 if (machine_timeout_suspended()) {
681 return HW_LOCK_TIMEOUT_CONTINUE;
682 }
683
684 lsti = lck_spinlock_timeout_hit(&wq->waitq_interlock, 0);
685 tmp.tcurnext = os_atomic_load(&wq->waitq_interlock.tcurnext, relaxed);
686
687 panic("waitq(%p) lock " HW_SPIN_TIMEOUT_FMT "; cpu=%d, "
688 "cticket: 0x%x, nticket: 0x%x, waiting for 0x%x, "
689 HW_SPIN_TIMEOUT_DETAILS_FMT,
690 wq, HW_SPIN_TIMEOUT_ARG(to, st), cpu_number(),
691 tmp.cticket, tmp.nticket, lsti->extra,
692 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
693 }
694
695 static const struct hw_spin_policy waitq_spin_policy = {
696 .hwsp_name = "waitq",
697 #if defined(__i386__) || defined(__x86_64__)
698 .hwsp_timeout = &LockTimeOutTSC,
699 #else
700 .hwsp_timeout_atomic = &LockTimeOut,
701 #endif
702 /*
703 * Double the standard lock timeout, because wait queues tend
704 * to iterate over a number of threads - locking each. If there is
705 * a problem with a thread lock, it normally times out at the wait
706 * queue level first, hiding the real problem.
707 */
708 .hwsp_timeout_shift = 1,
709 .hwsp_lock_offset = offsetof(struct waitq, waitq_interlock),
710 .hwsp_op_timeout = waitq_timeout_handler,
711 };
712
713 void
waitq_invalidate(waitq_t waitq)714 waitq_invalidate(waitq_t waitq)
715 {
716 hw_lck_ticket_invalidate(&waitq.wq_q->waitq_interlock);
717 }
718
719 bool
waitq_held(waitq_t wq)720 waitq_held(waitq_t wq)
721 {
722 return hw_lck_ticket_held(&wq.wq_q->waitq_interlock);
723 }
724
725 void
waitq_lock(waitq_t wq)726 waitq_lock(waitq_t wq)
727 {
728 (void)hw_lck_ticket_lock_to(&wq.wq_q->waitq_interlock,
729 &waitq_spin_policy, &waitq_lck_grp);
730 #if defined(__x86_64__)
731 pltrace(FALSE);
732 #endif
733 }
734
735 bool
waitq_lock_try(waitq_t wq)736 waitq_lock_try(waitq_t wq)
737 {
738 bool rc = hw_lck_ticket_lock_try(&wq.wq_q->waitq_interlock, &waitq_lck_grp);
739
740 #if defined(__x86_64__)
741 if (rc) {
742 pltrace(FALSE);
743 }
744 #endif
745 return rc;
746 }
747
748 bool
waitq_lock_reserve(waitq_t wq,uint32_t * ticket)749 waitq_lock_reserve(waitq_t wq, uint32_t *ticket)
750 {
751 return hw_lck_ticket_reserve(&wq.wq_q->waitq_interlock, ticket, &waitq_lck_grp);
752 }
753
754 void
waitq_lock_wait(waitq_t wq,uint32_t ticket)755 waitq_lock_wait(waitq_t wq, uint32_t ticket)
756 {
757 (void)hw_lck_ticket_wait(&wq.wq_q->waitq_interlock, ticket,
758 &waitq_spin_policy, &waitq_lck_grp);
759 #if defined(__x86_64__)
760 pltrace(FALSE);
761 #endif
762 }
763
764 bool
waitq_lock_allow_invalid(waitq_t wq)765 waitq_lock_allow_invalid(waitq_t wq)
766 {
767 hw_lock_status_t rc;
768
769 rc = hw_lck_ticket_lock_allow_invalid(&wq.wq_q->waitq_interlock,
770 &waitq_spin_policy, &waitq_lck_grp);
771
772 #if defined(__x86_64__)
773 if (rc == HW_LOCK_ACQUIRED) {
774 pltrace(FALSE);
775 }
776 #endif
777 return rc == HW_LOCK_ACQUIRED;
778 }
779
780 void
waitq_unlock(waitq_t wq)781 waitq_unlock(waitq_t wq)
782 {
783 assert(waitq_held(wq));
784 #if defined(__x86_64__)
785 pltrace(TRUE);
786 #endif
787 hw_lck_ticket_unlock(&wq.wq_q->waitq_interlock);
788 }
789
790
791 #pragma mark assert_wait / wakeup
792
793 struct waitq_select_args {
794 /* input parameters */
795 event64_t event;
796 wait_result_t result;
797 waitq_wakeup_flags_t flags;
798 uint32_t max_threads;
799 bool is_identified;
800
801 /* output parameters */
802 /* counts all woken threads, may have more threads than on threadq */
803 uint32_t nthreads;
804 /* preemption is disabled while threadq is non-empty */
805 circle_queue_head_t threadq;
806 };
807
808 static inline void
maybe_adjust_thread_pri(thread_t thread,waitq_wakeup_flags_t flags,__kdebug_only waitq_t waitq)809 maybe_adjust_thread_pri(
810 thread_t thread,
811 waitq_wakeup_flags_t flags,
812 __kdebug_only waitq_t waitq)
813 {
814 /*
815 * If the caller is requesting the waitq subsystem to promote the
816 * priority of the awoken thread, then boost the thread's priority to
817 * the default WAITQ_BOOST_PRIORITY (if it's not already equal or
818 * higher priority). This boost must be removed via a call to
819 * waitq_clear_promotion_locked before the thread waits again.
820 */
821 if (flags & WAITQ_PROMOTE_PRIORITY) {
822 uintptr_t trace_waitq = 0;
823 if (__improbable(kdebug_enable)) {
824 trace_waitq = VM_KERNEL_UNSLIDE_OR_PERM(waitq.wq_q);
825 }
826
827 sched_thread_promote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, trace_waitq);
828 }
829 }
830
831 static void
waitq_select_queue_add(waitq_t waitq,thread_t thread,struct waitq_select_args * args)832 waitq_select_queue_add(waitq_t waitq, thread_t thread, struct waitq_select_args *args)
833 {
834 spl_t s = splsched();
835
836 thread_lock(thread);
837 thread_clear_waitq_state(thread);
838
839 if (!args->is_identified && thread->state & TH_RUN) {
840 /*
841 * A thread that is currently on core may try to clear its own
842 * wait with clear wait or by waking its own event instead of
843 * calling thread_block as is normally expected. After doing
844 * this, it expects to be able to immediately wait again.
845 *
846 * If we are currently on a different CPU and waking that
847 * thread, as soon as we unlock the waitq and thread, that
848 * operation could complete, but we would still be holding the
849 * thread on our flush queue, leaving it in the waking state
850 * where it can't yet assert another wait.
851 *
852 * Since we know that we won't actually need to enqueue the
853 * thread on the runq due to it being on core, we can just
854 * immediately unblock it here so that the thread will be in a
855 * waitable state after we release its thread lock from this
856 * lock hold.
857 *
858 * Wakeups using *_identify can't be allowed to pass
859 * thread block until they're resumed, so they can't use
860 * this path. That means they are not allowed to skip calling
861 * thread_block.
862 */
863 maybe_adjust_thread_pri(thread, args->flags, waitq);
864 thread_go(thread, args->result, false);
865 } else {
866 if (circle_queue_empty(&args->threadq)) {
867 /*
868 * preemption is disabled while threads are
869 * on threadq - balanced in:
870 * waitq_resume_identified_thread
871 * waitq_select_queue_flush
872 */
873 disable_preemption();
874 }
875
876 circle_enqueue_tail(&args->threadq, &thread->wait_links);
877 }
878
879 thread_unlock(thread);
880
881 splx(s);
882 }
883
884
885 #if SCHED_HYGIENE_DEBUG
886
887 TUNABLE_DEV_WRITEABLE(uint32_t, waitq_flush_excess_threads, "waitq_flush_excess_threads", 20);
888 TUNABLE_DEV_WRITEABLE(uint32_t, waitq_flush_excess_time_mt, "waitq_flush_excess_time_mt", 7200); /* 300us */
889
890 #endif /* SCHED_HYGIENE_DEBUG */
891
892
893 static void
waitq_select_queue_flush(waitq_t waitq,struct waitq_select_args * args)894 waitq_select_queue_flush(waitq_t waitq, struct waitq_select_args *args)
895 {
896 thread_t thread = THREAD_NULL;
897
898 assert(!circle_queue_empty(&args->threadq));
899
900 int flushed_threads = 0;
901
902 #if SCHED_HYGIENE_DEBUG
903 uint64_t start_time = ml_get_sched_hygiene_timebase();
904 disable_preemption();
905 #endif /* SCHED_HYGIENE_DEBUG */
906
907 cqe_foreach_element_safe(thread, &args->threadq, wait_links) {
908 circle_dequeue(&args->threadq, &thread->wait_links);
909 assert_thread_magic(thread);
910
911 spl_t s = splsched();
912
913 thread_lock(thread);
914 maybe_adjust_thread_pri(thread, args->flags, waitq);
915 thread_go(thread, args->result, args->flags & WAITQ_HANDOFF);
916 thread_unlock(thread);
917
918 splx(s);
919
920 flushed_threads++;
921 }
922
923 #if SCHED_HYGIENE_DEBUG
924 uint64_t end_time = ml_get_sched_hygiene_timebase();
925
926 /*
927 * Check for a combination of excess threads and long time,
928 * so that a single thread wakeup that gets stuck is still caught
929 */
930 if (waitq_flush_excess_threads && waitq_flush_excess_time_mt &&
931 flushed_threads > waitq_flush_excess_threads &&
932 (end_time - start_time) > waitq_flush_excess_time_mt) {
933 /*
934 * Hack alert:
935 *
936 * If a wakeup-all is done with interrupts disabled, or if
937 * there are enough threads / lock contention to pass the
938 * preemption disable threshold, it can take Too Long to get
939 * through waking up all the threads, leading to
940 * the watchdog going off.
941 *
942 * While we are working on a change to break up this
943 * giant glob of work into smaller chunks, remove this
944 * time region from the watchdog's memory to avoid
945 * unit tests that wake up hundreds of threads on
946 * one semaphore from causing this to blow up.
947 *
948 * We only trigger this when seeing a combination of
949 * excess threads and long time, so that a single
950 * thread wakeup that gets stuck is still caught.
951 *
952 * This was improved with
953 * rdar://90325140
954 * to enable interrupts during most wakeup-all's
955 * and will be removed with
956 * rdar://101110793
957 */
958 if (ml_get_interrupts_enabled() == false) {
959 ml_spin_debug_reset(current_thread());
960 ml_irq_debug_abandon();
961 }
962 abandon_preemption_disable_measurement();
963
964 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_INT_MASKED_RESET), flushed_threads, end_time - start_time);
965 }
966
967 enable_preemption();
968
969 #endif /* SCHED_HYGIENE_DEBUG */
970
971 /*
972 * match the disable when making threadq nonempty from
973 * waitq_select_queue_add
974 */
975 enable_preemption();
976 }
977
978 /**
979 * Routine to iterate over the waitq for non-priority ordered waitqs
980 *
981 * Conditions:
982 * args->waitq (and the posted waitq) is locked
983 *
984 * Notes:
985 * If one or more threads are selected, this may disable preemption,
986 * which is balanced when the threadq is flushed in
987 * waitq_resume_identified_thread or waitq_select_queue_flush.
988 */
989 static waitq_flags_t
waitq_queue_iterate_locked(struct waitq * safeq,struct waitq * waitq,struct waitq_select_args * args)990 waitq_queue_iterate_locked(struct waitq *safeq, struct waitq *waitq,
991 struct waitq_select_args *args)
992 {
993 thread_t thread = THREAD_NULL;
994 waitq_flags_t eventmask = 0;
995
996 cqe_foreach_element_safe(thread, &safeq->waitq_queue, wait_links) {
997 assert_thread_magic(thread);
998
999 /*
1000 * For non-priority ordered waitqs, we allow multiple events to be
1001 * mux'ed into the same waitq. Also safeqs may contain threads from
1002 * multiple waitqs. Only pick threads that match the
1003 * requested wait event.
1004 */
1005 if (waitq_same(thread->waitq, waitq) && thread->wait_event == args->event) {
1006 /* We found a matching thread! Pull it from the queue. */
1007
1008 circle_dequeue(&safeq->waitq_queue, &thread->wait_links);
1009
1010 waitq_select_queue_add(waitq, thread, args);
1011
1012 if (++args->nthreads >= args->max_threads) {
1013 break;
1014 }
1015 } else {
1016 /* thread wasn't selected so track its event */
1017 eventmask |= waitq_same(thread->waitq, safeq)
1018 ? _CAST_TO_EVENT_MASK(thread->wait_event)
1019 : _CAST_TO_EVENT_MASK(thread->waitq.wq_q);
1020 }
1021 }
1022
1023 return eventmask;
1024 }
1025
1026 /**
1027 * Routine to iterate and remove threads from priority ordered waitqs
1028 *
1029 * Conditions:
1030 * args->waitq (and the posted waitq) is locked
1031 *
1032 * Notes:
1033 * The priority ordered waitqs only support maximum priority element removal.
1034 *
1035 * Also, the implementation makes sure that all threads in a priority ordered
1036 * waitq are waiting on the same wait event. This is not necessarily true for
1037 * non-priority ordered waitqs. If one or more threads are selected, this may
1038 * disable preemption.
1039 */
1040 static void
waitq_prioq_iterate_locked(struct waitq * ts_wq,struct waitq * waitq,struct waitq_select_args * args)1041 waitq_prioq_iterate_locked(
1042 struct waitq *ts_wq,
1043 struct waitq *waitq,
1044 struct waitq_select_args *args)
1045 {
1046 struct turnstile *ts = waitq_to_turnstile(ts_wq);
1047 bool update_inheritor = (args->flags & WAITQ_UPDATE_INHERITOR);
1048
1049 if (update_inheritor && args->max_threads == UINT32_MAX) {
1050 /*
1051 * If we are going to wake up all threads,
1052 * go ahead and set the inheritor to NULL.
1053 */
1054 turnstile_kernel_update_inheritor_on_wake_locked(ts,
1055 TURNSTILE_INHERITOR_NULL, TURNSTILE_INHERITOR_THREAD);
1056 update_inheritor = false;
1057 }
1058
1059 while (!priority_queue_empty(&ts_wq->waitq_prio_queue)) {
1060 thread_t thread;
1061
1062 thread = priority_queue_remove_max(&ts_wq->waitq_prio_queue,
1063 struct thread, wait_prioq_links);
1064
1065 assert_thread_magic(thread);
1066
1067 /*
1068 * Ensure the wait event matches since priority ordered waitqs do not
1069 * support multiple events in the same waitq.
1070 */
1071 assert(waitq_same(thread->waitq, waitq) && (thread->wait_event == args->event));
1072
1073 if (update_inheritor) {
1074 turnstile_inheritor_t inheritor = thread;
1075
1076 if (priority_queue_empty(&ts_wq->waitq_prio_queue)) {
1077 inheritor = TURNSTILE_INHERITOR_NULL;
1078 }
1079 turnstile_kernel_update_inheritor_on_wake_locked(ts,
1080 inheritor, TURNSTILE_INHERITOR_THREAD);
1081 update_inheritor = false;
1082 }
1083
1084 waitq_select_queue_add(waitq, thread, args);
1085
1086 if (++args->nthreads >= args->max_threads) {
1087 break;
1088 }
1089 }
1090 }
1091
1092 /**
1093 * @function do_waitq_select_n_locked_queue
1094 *
1095 * @brief
1096 * Selects threads waiting on a wait queue.
1097 *
1098 * @discussion
1099 * @c waitq is locked.
1100 * If @c waitq is a set, then the wait queue posting to it is locked too.
1101 *
1102 * If one or more threads are selected, this may disable preemption.
1103 */
1104 static void
do_waitq_select_n_locked_queue(waitq_t waitq,struct waitq_select_args * args)1105 do_waitq_select_n_locked_queue(waitq_t waitq, struct waitq_select_args *args)
1106 {
1107 spl_t s = 0;
1108
1109 struct waitq *safeq;
1110 waitq_flags_t eventmask, remaining_eventmask;
1111
1112 if (waitq_irq_safe(waitq)) {
1113 eventmask = _CAST_TO_EVENT_MASK(args->event);
1114 safeq = waitq.wq_q;
1115 } else {
1116 /* JMM - add flag to waitq to avoid global lookup if no waiters */
1117 eventmask = _CAST_TO_EVENT_MASK(waitq.wq_q);
1118 safeq = waitq_get_safeq(waitq);
1119 if (safeq == NULL) {
1120 return;
1121 }
1122
1123 s = splsched();
1124 waitq_lock(safeq);
1125 }
1126
1127 /*
1128 * If the safeq doesn't have an eventmask (not global) or the event
1129 * we're looking for IS set in its eventmask, then scan the threads
1130 * in that queue for ones that match the original <waitq,event> pair.
1131 */
1132 if (waitq_type(safeq) == WQT_TURNSTILE) {
1133 waitq_prioq_iterate_locked(safeq, waitq.wq_q, args);
1134 } else if (!waitq_is_global(safeq)) {
1135 waitq_queue_iterate_locked(safeq, waitq.wq_q, args);
1136 } else if ((safeq->waitq_eventmask & eventmask) == eventmask) {
1137 remaining_eventmask = waitq_queue_iterate_locked(safeq,
1138 waitq.wq_q, args);
1139
1140 /*
1141 * Update the eventmask of global queues we just scanned:
1142 * - If we selected all the threads in the queue,
1143 * we can clear its eventmask.
1144 *
1145 * - If we didn't find enough threads to fill our needs,
1146 * then we can assume we looked at every thread in the queue
1147 * and the mask we computed is complete - so reset it.
1148 */
1149 if (waitq_empty(safeq)) {
1150 safeq->waitq_eventmask = 0;
1151 } else if (args->nthreads < args->max_threads) {
1152 safeq->waitq_eventmask = remaining_eventmask;
1153 }
1154 }
1155
1156 /* unlock the safe queue if we locked one above */
1157 if (!waitq_same(waitq, safeq)) {
1158 waitq_unlock(safeq);
1159 splx(s);
1160 }
1161 }
1162
1163 /**
1164 * @function do_waitq_link_select_n_locked()
1165 *
1166 * @brief
1167 * Selects threads waiting on any set a wait queue belongs to,
1168 * or preposts the wait queue onto them.
1169 *
1170 * @discussion
1171 * @c waitq is locked.
1172 */
1173 __attribute__((noinline))
1174 static void
do_waitq_select_n_locked_sets(waitq_t waitq,struct waitq_select_args * args)1175 do_waitq_select_n_locked_sets(waitq_t waitq, struct waitq_select_args *args)
1176 {
1177 waitq_type_t wq_type = waitq_type(waitq);
1178 waitq_link_t link;
1179
1180 assert(args->event == NO_EVENT64);
1181 assert(waitq_preposts(waitq));
1182
1183 waitq_link_foreach(link, waitq) {
1184 waitq_t wqset = wql_wqs(link);
1185
1186 if (wql_wqs_preposted(link)) {
1187 /*
1188 * The wql_wqs_preposted() bit is cleared
1189 * under both the wq/wqset lock.
1190 *
1191 * If the wqset is still preposted,
1192 * we really won't find threads there.
1193 *
1194 * Just mark the waitq as preposted and move on.
1195 */
1196 if (wq_type == WQT_PORT) {
1197 waitq.wq_q->waitq_preposted = true;
1198 }
1199 continue;
1200 }
1201
1202 if (wq_type == WQT_SELECT) {
1203 /*
1204 * If PGZ picked this select set,
1205 * translate it to the real address
1206 *
1207 * If it is still a select set
1208 * (the slot could have been reused),
1209 * then keep using it for the rest of the logic.
1210 *
1211 * Even in the extremely unlikely case where
1212 * the slot was reused for another select_set,
1213 * the `wql_sellink_valid` check below will
1214 * take care of debouncing it. But we must
1215 * forget the original pointer we read
1216 * so that we unlock the proper object.
1217 */
1218 wqset.wqs_sel = pgz_decode_allow_invalid(wqset.wqs_sel,
1219 ZONE_ID_SELECT_SET);
1220 if (!wqset.wqs_sel) {
1221 continue;
1222 }
1223 if (!waitq_lock_allow_invalid(wqset)) {
1224 continue;
1225 }
1226 if (!wql_sellink_valid(wqset.wqs_sel, link.wqls)) {
1227 goto out_unlock;
1228 }
1229 } else {
1230 waitq_lock(wqset);
1231 if (!waitq_valid(wqset)) {
1232 goto out_unlock;
1233 }
1234 }
1235
1236 /*
1237 * Find any threads waiting on this wait queue set as a queue.
1238 */
1239 do_waitq_select_n_locked_queue(wqset, args);
1240
1241 if (args->nthreads == 0) {
1242 /* No thread selected: prepost 'waitq' to 'wqset' */
1243 wql_wqs_mark_preposted(link);
1244 if (wq_type == WQT_SELECT) {
1245 wqset.wqs_sel->selset_preposted = true;
1246 } else {
1247 waitq.wq_q->waitq_preposted = true;
1248 circle_dequeue(&wqset.wqs_set->wqset_links,
1249 &link.wqll->wql_slink);
1250 circle_enqueue_tail(&wqset.wqs_set->wqset_preposts,
1251 &link.wqll->wql_slink);
1252 ipc_pset_prepost(wqset.wqs_set, waitq.wq_q);
1253 }
1254 }
1255
1256 out_unlock:
1257 waitq_unlock(wqset);
1258
1259 if (args->nthreads >= args->max_threads) {
1260 break;
1261 }
1262 }
1263 }
1264
1265 /**
1266 * @function do_waitq_select_n_locked
1267 *
1268 * @brief
1269 * Selects threads waiting on a wait queue, or preposts it.
1270 *
1271 * @discussion
1272 * @c waitq is locked.
1273 *
1274 * Recurses into all sets this wait queue belongs to.
1275 */
1276 static void
do_waitq_select_n_locked(waitq_t waitq,struct waitq_select_args * args)1277 do_waitq_select_n_locked(waitq_t waitq, struct waitq_select_args *args)
1278 {
1279 do_waitq_select_n_locked_queue(waitq, args);
1280
1281 if (args->nthreads >= args->max_threads) {
1282 /* already enough threads found */
1283 return;
1284 }
1285
1286 if (args->event != NO_EVENT64 || !waitq_preposts(waitq)) {
1287 /* this wakeup should not recurse into sets */
1288 return;
1289 }
1290
1291 do_waitq_select_n_locked_sets(waitq, args);
1292 }
1293
1294 static inline bool
waitq_is_preposted_set(waitq_t waitq)1295 waitq_is_preposted_set(waitq_t waitq)
1296 {
1297 switch (waitq_type(waitq)) {
1298 case WQT_PORT_SET:
1299 return waitq_set_first_prepost(waitq.wqs_set, WQS_PREPOST_PEEK) != NULL;
1300
1301 case WQT_SELECT_SET:
1302 return waitq.wqs_sel->selset_preposted;
1303
1304 default:
1305 return false;
1306 }
1307 }
1308
1309 wait_result_t
waitq_assert_wait64_locked(waitq_t waitq,event64_t wait_event,wait_interrupt_t interruptible,wait_timeout_urgency_t urgency,uint64_t deadline,uint64_t leeway,thread_t thread)1310 waitq_assert_wait64_locked(waitq_t waitq,
1311 event64_t wait_event,
1312 wait_interrupt_t interruptible,
1313 wait_timeout_urgency_t urgency,
1314 uint64_t deadline,
1315 uint64_t leeway,
1316 thread_t thread)
1317 {
1318 wait_result_t wait_result;
1319 struct waitq *safeq;
1320 uintptr_t eventmask;
1321 spl_t s;
1322
1323 switch (waitq_type(waitq)) {
1324 case WQT_PORT:
1325 case WQT_SELECT:
1326 case WQT_PORT_SET:
1327 case WQT_SELECT_SET:
1328 assert(wait_event == NO_EVENT64);
1329 break;
1330 default:
1331 assert(wait_event != NO_EVENT64);
1332 break;
1333 }
1334
1335 /*
1336 * Warning: Do _not_ place debugging print statements here.
1337 * The waitq is locked!
1338 */
1339 assert(!thread->started || thread == current_thread());
1340
1341 if (!waitq_wait_possible(thread)) {
1342 panic("thread already waiting on %p", thread->waitq.wq_q);
1343 }
1344
1345 s = splsched();
1346
1347 /*
1348 * early-out if the thread is waiting on a wait queue set
1349 * that has already been pre-posted.
1350 *
1351 * Note: waitq_is_preposted_set() may unlock the waitq-set
1352 */
1353 if (waitq_is_preposted_set(waitq)) {
1354 thread_lock(thread);
1355 thread->wait_result = THREAD_AWAKENED;
1356 thread_unlock(thread);
1357 splx(s);
1358 return THREAD_AWAKENED;
1359 }
1360
1361 /*
1362 * If already dealing with an irq safe wait queue, we are all set.
1363 * Otherwise, determine a global queue to use and lock it.
1364 */
1365 if (waitq_irq_safe(waitq)) {
1366 safeq = waitq.wq_q;
1367 eventmask = _CAST_TO_EVENT_MASK(wait_event);
1368 } else {
1369 safeq = waitq_get_safeq(waitq);
1370 if (__improbable(safeq == NULL)) {
1371 panic("Trying to assert_wait on a turnstile proxy "
1372 "that hasn't been donated one (waitq: %p)", waitq.wq_q);
1373 }
1374 eventmask = _CAST_TO_EVENT_MASK(waitq.wq_q);
1375 waitq_lock(safeq);
1376 }
1377
1378 /* lock the thread now that we have the irq-safe waitq locked */
1379 thread_lock(thread);
1380
1381 wait_result = thread_mark_wait_locked(thread, interruptible);
1382 /* thread->wait_result has been set */
1383 if (wait_result == THREAD_WAITING) {
1384 waitq_thread_insert(safeq, thread, waitq, wait_event);
1385
1386 if (deadline != 0) {
1387 bool was_active;
1388
1389 was_active = timer_call_enter_with_leeway(thread->wait_timer,
1390 NULL,
1391 deadline, leeway,
1392 urgency, FALSE);
1393 if (!was_active) {
1394 thread->wait_timer_active++;
1395 }
1396 thread->wait_timer_armed = true;
1397 }
1398
1399 if (waitq_is_global(safeq)) {
1400 safeq->waitq_eventmask |= (waitq_flags_t)eventmask;
1401 }
1402
1403 waitq_stats_count_wait(waitq);
1404 }
1405
1406 /* unlock the thread */
1407 thread_unlock(thread);
1408
1409 /* update the inheritor's thread priority if the waitq is embedded in turnstile */
1410 if (waitq_type(safeq) == WQT_TURNSTILE && wait_result == THREAD_WAITING) {
1411 turnstile_recompute_priority_locked(waitq_to_turnstile(safeq));
1412 turnstile_update_inheritor_locked(waitq_to_turnstile(safeq));
1413 }
1414
1415 /* unlock the safeq if we locked it here */
1416 if (!waitq_same(waitq, safeq)) {
1417 waitq_unlock(safeq);
1418 }
1419
1420 splx(s);
1421
1422 return wait_result;
1423 }
1424
1425 bool
waitq_pull_thread_locked(waitq_t waitq,thread_t thread)1426 waitq_pull_thread_locked(waitq_t waitq, thread_t thread)
1427 {
1428 struct waitq *safeq;
1429 uint32_t ticket;
1430
1431 assert_thread_magic(thread);
1432
1433 /* Find the interrupts disabled queue thread is waiting on */
1434 if (waitq_irq_safe(waitq)) {
1435 safeq = waitq.wq_q;
1436 } else {
1437 safeq = waitq_get_safeq(waitq);
1438 if (__improbable(safeq == NULL)) {
1439 panic("Trying to clear_wait on a turnstile proxy "
1440 "that hasn't been donated one (waitq: %p)", waitq.wq_q);
1441 }
1442 }
1443
1444 /*
1445 * thread is already locked so have to try for the waitq lock.
1446 *
1447 * We can't wait for the waitq lock under the thread lock,
1448 * however we can reserve our slot in the lock queue,
1449 * and if that reservation requires waiting, we are guaranteed
1450 * that this waitq can't die until we got our turn!
1451 */
1452 if (!waitq_lock_reserve(safeq, &ticket)) {
1453 thread_unlock(thread);
1454 waitq_lock_wait(safeq, ticket);
1455 thread_lock(thread);
1456
1457 if (!waitq_same(waitq, thread->waitq)) {
1458 /*
1459 * While we were waiting for our reservation the thread
1460 * stopped waiting on this waitq, bail out.
1461 */
1462 waitq_unlock(safeq);
1463 return false;
1464 }
1465 }
1466
1467 waitq_thread_remove(safeq, thread);
1468 waitq_stats_count_clear_wakeup(waitq);
1469 waitq_unlock(safeq);
1470 return true;
1471 }
1472
1473
1474 void
waitq_clear_promotion_locked(waitq_t waitq,thread_t thread)1475 waitq_clear_promotion_locked(waitq_t waitq, thread_t thread)
1476 {
1477 spl_t s = 0;
1478
1479 assert(waitq_held(waitq));
1480 assert(thread != THREAD_NULL);
1481 assert(thread == current_thread());
1482
1483 /* This flag is only cleared by the thread itself, so safe to check outside lock */
1484 if ((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) != TH_SFLAG_WAITQ_PROMOTED) {
1485 return;
1486 }
1487
1488 if (!waitq_irq_safe(waitq)) {
1489 s = splsched();
1490 }
1491 thread_lock(thread);
1492
1493 sched_thread_unpromote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, 0);
1494
1495 thread_unlock(thread);
1496 if (!waitq_irq_safe(waitq)) {
1497 splx(s);
1498 }
1499 }
1500
1501 static inline bool
waitq_should_unlock(waitq_wakeup_flags_t flags)1502 waitq_should_unlock(waitq_wakeup_flags_t flags)
1503 {
1504 return (flags & (WAITQ_UNLOCK | WAITQ_KEEP_LOCKED)) == WAITQ_UNLOCK;
1505 }
1506
1507 static inline bool
waitq_should_enable_interrupts(waitq_wakeup_flags_t flags)1508 waitq_should_enable_interrupts(waitq_wakeup_flags_t flags)
1509 {
1510 return (flags & (WAITQ_UNLOCK | WAITQ_KEEP_LOCKED | WAITQ_ENABLE_INTERRUPTS)) == (WAITQ_UNLOCK | WAITQ_ENABLE_INTERRUPTS);
1511 }
1512
1513 uint32_t
waitq_wakeup64_nthreads_locked(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags,uint32_t nthreads)1514 waitq_wakeup64_nthreads_locked(
1515 waitq_t waitq,
1516 event64_t wake_event,
1517 wait_result_t result,
1518 waitq_wakeup_flags_t flags,
1519 uint32_t nthreads)
1520 {
1521 struct waitq_select_args args = {
1522 .event = wake_event,
1523 .result = result,
1524 .flags = (nthreads == 1) ? flags : (flags & ~WAITQ_HANDOFF),
1525 .max_threads = nthreads,
1526 };
1527
1528 assert(waitq_held(waitq));
1529
1530 if (flags & WAITQ_ENABLE_INTERRUPTS) {
1531 assert(waitq_should_unlock(flags));
1532 assert(ml_get_interrupts_enabled() == false);
1533 }
1534
1535 do_waitq_select_n_locked(waitq, &args);
1536 waitq_stats_count_wakeup(waitq, args.nthreads);
1537
1538 if (waitq_should_unlock(flags)) {
1539 waitq_unlock(waitq);
1540 }
1541
1542 if (waitq_should_enable_interrupts(flags)) {
1543 ml_set_interrupts_enabled(true);
1544 }
1545
1546 if (!circle_queue_empty(&args.threadq)) {
1547 waitq_select_queue_flush(waitq, &args);
1548 }
1549
1550 return args.nthreads;
1551 }
1552
1553 kern_return_t
waitq_wakeup64_all_locked(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)1554 waitq_wakeup64_all_locked(
1555 waitq_t waitq,
1556 event64_t wake_event,
1557 wait_result_t result,
1558 waitq_wakeup_flags_t flags)
1559 {
1560 uint32_t count;
1561
1562 count = waitq_wakeup64_nthreads_locked(waitq, wake_event, result,
1563 flags, UINT32_MAX);
1564 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
1565 }
1566
1567 kern_return_t
waitq_wakeup64_one_locked(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)1568 waitq_wakeup64_one_locked(
1569 waitq_t waitq,
1570 event64_t wake_event,
1571 wait_result_t result,
1572 waitq_wakeup_flags_t flags)
1573 {
1574 uint32_t count;
1575
1576 count = waitq_wakeup64_nthreads_locked(waitq, wake_event, result,
1577 flags, 1);
1578 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
1579 }
1580
1581 thread_t
waitq_wakeup64_identify_locked(waitq_t waitq,event64_t wake_event,waitq_wakeup_flags_t flags)1582 waitq_wakeup64_identify_locked(
1583 waitq_t waitq,
1584 event64_t wake_event,
1585 waitq_wakeup_flags_t flags)
1586 {
1587 struct waitq_select_args args = {
1588 .event = wake_event,
1589 .result = THREAD_AWAKENED, /* this won't be used */
1590 .flags = flags,
1591 .max_threads = 1,
1592 .is_identified = true,
1593 };
1594
1595 assert(waitq_held(waitq));
1596
1597 do_waitq_select_n_locked(waitq, &args);
1598 waitq_stats_count_wakeup(waitq, args.nthreads);
1599
1600 if (waitq_should_unlock(flags)) {
1601 waitq_unlock(waitq);
1602 }
1603
1604 if (waitq_should_enable_interrupts(flags)) {
1605 ml_set_interrupts_enabled(true);
1606 }
1607
1608 if (args.nthreads > 0) {
1609 thread_t thread = cqe_dequeue_head(&args.threadq, struct thread, wait_links);
1610
1611 assert(args.nthreads == 1 && circle_queue_empty(&args.threadq));
1612
1613 /* Thread is off waitq, not unblocked yet */
1614
1615 return thread;
1616 }
1617
1618 return THREAD_NULL;
1619 }
1620
1621 void
waitq_resume_identified_thread(waitq_t waitq,thread_t thread,wait_result_t result,waitq_wakeup_flags_t flags)1622 waitq_resume_identified_thread(
1623 waitq_t waitq,
1624 thread_t thread,
1625 wait_result_t result,
1626 waitq_wakeup_flags_t flags)
1627 {
1628 spl_t spl = splsched();
1629
1630 thread_lock(thread);
1631
1632 assert((thread->state & (TH_WAIT | TH_WAKING)) == (TH_WAIT | TH_WAKING));
1633
1634 maybe_adjust_thread_pri(thread, flags, waitq);
1635 thread_go(thread, result, (flags & WAITQ_HANDOFF));
1636
1637 thread_unlock(thread);
1638 splx(spl);
1639
1640 enable_preemption(); // balance disable upon pulling thread
1641 }
1642
1643 void
waitq_resume_and_bind_identified_thread(waitq_t waitq,thread_t thread,processor_t processor,wait_result_t result,waitq_wakeup_flags_t flags)1644 waitq_resume_and_bind_identified_thread(
1645 waitq_t waitq,
1646 thread_t thread,
1647 processor_t processor,
1648 wait_result_t result,
1649 waitq_wakeup_flags_t flags)
1650 {
1651 spl_t spl = splsched();
1652
1653 thread_lock(thread);
1654
1655 assert((thread->state & (TH_WAIT | TH_WAKING)) == (TH_WAIT | TH_WAKING));
1656
1657 maybe_adjust_thread_pri(thread, flags, waitq);
1658 thread_bind_during_wakeup(thread, processor);
1659 thread_go(thread, result, (flags & WAITQ_HANDOFF));
1660
1661 thread_unlock(thread);
1662 splx(spl);
1663
1664 enable_preemption(); // balance disable upon pulling thread
1665 }
1666
1667 kern_return_t
waitq_wakeup64_thread_and_unlock(struct waitq * waitq,event64_t event,thread_t thread,wait_result_t result)1668 waitq_wakeup64_thread_and_unlock(
1669 struct waitq *waitq,
1670 event64_t event,
1671 thread_t thread,
1672 wait_result_t result)
1673 {
1674 kern_return_t ret = KERN_NOT_WAITING;
1675
1676 assert(waitq_irq_safe(waitq));
1677 assert(waitq_held(waitq));
1678 assert_thread_magic(thread);
1679
1680 /*
1681 * See if the thread was still waiting there. If so, it got
1682 * dequeued and returned locked.
1683 *
1684 * By holding the thread locked across the go, a thread on another CPU
1685 * can't see itself in 'waking' state, even if it uses clear_wait.
1686 */
1687 thread_lock(thread);
1688
1689 if (waitq_same(thread->waitq, waitq) && thread->wait_event == event) {
1690 waitq_thread_remove(waitq, thread);
1691 ret = KERN_SUCCESS;
1692 }
1693 waitq_stats_count_wakeup(waitq, ret == KERN_SUCCESS ? 1 : 0);
1694
1695 waitq_unlock(waitq);
1696
1697 if (ret == KERN_SUCCESS) {
1698 thread_go(thread, result, /* handoff */ false);
1699 }
1700
1701 thread_unlock(thread);
1702
1703 return ret;
1704 }
1705
1706
1707 #pragma mark waitq
1708
1709 __attribute__((always_inline))
1710 void
waitq_init(waitq_t waitq,waitq_type_t type,int policy)1711 waitq_init(waitq_t waitq, waitq_type_t type, int policy)
1712 {
1713 assert((policy & SYNC_POLICY_FIXED_PRIORITY) == 0);
1714
1715 *waitq.wq_q = (struct waitq){
1716 .waitq_type = type,
1717 .waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0),
1718 };
1719
1720 switch (type) {
1721 case WQT_INVALID:
1722 __builtin_trap();
1723
1724 case WQT_TURNSTILE:
1725 /* For turnstile, initialize it as a priority queue */
1726 priority_queue_init(&waitq.wq_q->waitq_prio_queue);
1727 assert(waitq.wq_q->waitq_fifo == 0);
1728 break;
1729
1730 case WQT_PORT:
1731 waitq.wq_q->waitq_ts = TURNSTILE_NULL;
1732 break;
1733
1734 case WQT_PORT_SET:
1735 circle_queue_init(&waitq.wqs_set->wqset_preposts);
1736 OS_FALLTHROUGH;
1737 case WQT_SELECT_SET:
1738 case WQT_QUEUE:
1739 case WQT_SELECT:
1740 circle_queue_init(&waitq.wq_q->waitq_queue);
1741 break;
1742 }
1743
1744 if (policy & SYNC_POLICY_INIT_LOCKED) {
1745 hw_lck_ticket_init_locked(&waitq.wq_q->waitq_interlock, &waitq_lck_grp);
1746 } else {
1747 hw_lck_ticket_init(&waitq.wq_q->waitq_interlock, &waitq_lck_grp);
1748 }
1749 }
1750
1751 void
waitq_deinit(waitq_t waitq)1752 waitq_deinit(waitq_t waitq)
1753 {
1754 waitq_type_t type = waitq_type(waitq);
1755
1756 switch (type) {
1757 case WQT_QUEUE:
1758 assert(circle_queue_empty(&waitq.wq_q->waitq_queue));
1759 waitq_invalidate(waitq);
1760 break;
1761
1762 case WQT_TURNSTILE:
1763 assert(priority_queue_empty(&waitq.wq_q->waitq_prio_queue));
1764 assert(waitq.wq_q->waitq_inheritor == TURNSTILE_INHERITOR_NULL);
1765 waitq_invalidate(waitq);
1766 break;
1767
1768 case WQT_PORT:
1769 assert(waitq.wq_q->waitq_ts == TURNSTILE_NULL);
1770 assert(circle_queue_empty(&waitq.wq_q->waitq_links));
1771 break;
1772
1773 case WQT_SELECT:
1774 assert(waitq.wq_q->waitq_sellinks.next == NULL);
1775 assert(circle_queue_empty(&waitq.wqs_set->wqset_queue));
1776 break;
1777
1778 case WQT_PORT_SET:
1779 assert(circle_queue_empty(&waitq.wqs_set->wqset_queue));
1780 assert(circle_queue_empty(&waitq.wqs_set->wqset_links));
1781 assert(circle_queue_empty(&waitq.wqs_set->wqset_preposts));
1782 break;
1783
1784 default:
1785 panic("invalid wait type: %p/%d", waitq.wq_q, type);
1786 }
1787
1788 /*
1789 * The waitq must have been invalidated, or hw_lck_ticket_destroy()
1790 * below won't wait for reservations from waitq_lock_reserve(),
1791 * or waitq_lock_allow_invalid().
1792 */
1793 assert(!waitq_valid(waitq.wqs_set));
1794 hw_lck_ticket_destroy(&waitq.wq_q->waitq_interlock, &waitq_lck_grp);
1795 }
1796
1797
1798 #pragma mark port-set sets
1799
1800 void
waitq_set_unlink_all_locked(struct waitq_set * wqset,waitq_link_list_t * free_l)1801 waitq_set_unlink_all_locked(struct waitq_set *wqset, waitq_link_list_t *free_l)
1802 {
1803 uint32_t batch = waitq_set_unlink_batch;
1804
1805 waitq_invalidate(wqset);
1806
1807 for (;;) {
1808 struct waitq_link *link;
1809 queue_entry_t elt;
1810 circle_queue_t q;
1811 struct waitq *wq;
1812 uint32_t ticket;
1813 bool stable = true;
1814
1815 if (!circle_queue_empty(&wqset->wqset_links)) {
1816 q = &wqset->wqset_links;
1817 } else if (!circle_queue_empty(&wqset->wqset_preposts)) {
1818 q = &wqset->wqset_preposts;
1819 } else {
1820 break;
1821 }
1822
1823 if (batch-- == 0) {
1824 waitq_unlock(wqset);
1825 waitq_lock(wqset);
1826 batch = waitq_set_unlink_batch;
1827 continue;
1828 }
1829
1830 elt = circle_queue_first(q);
1831 link = cqe_element(elt, struct waitq_link, wql_slink);
1832 wq = link->wql_wq;
1833
1834 if (__improbable(!waitq_lock_reserve(wq, &ticket))) {
1835 waitq_unlock(wqset);
1836 waitq_lock_wait(wq, ticket);
1837 waitq_lock(wqset);
1838 stable = (elt == circle_queue_first(q) && link->wql_wq == wq);
1839 }
1840
1841 if (stable) {
1842 circle_dequeue(q, &link->wql_slink);
1843 circle_dequeue(&wq->waitq_links, &link->wql_qlink);
1844 wql_list_push(free_l, link);
1845 }
1846
1847 waitq_unlock(wq);
1848 }
1849 }
1850
1851 void
waitq_clear_prepost_locked(struct waitq * waitq)1852 waitq_clear_prepost_locked(struct waitq *waitq)
1853 {
1854 assert(waitq_type(waitq) == WQT_PORT);
1855 waitq->waitq_preposted = false;
1856 }
1857
1858 void
1859 waitq_set_foreach_member_locked(struct waitq_set *wqs, void (^cb)(struct waitq *))
1860 {
1861 struct waitq_link *link;
1862
1863 cqe_foreach_element(link, &wqs->wqset_links, wql_slink) {
1864 cb(link->wql_wq);
1865 }
1866
1867 cqe_foreach_element(link, &wqs->wqset_preposts, wql_slink) {
1868 cb(link->wql_wq);
1869 }
1870 }
1871
1872 __abortlike
1873 static void
__waitq_link_arguments_panic(struct waitq * waitq,struct waitq_set * wqset)1874 __waitq_link_arguments_panic(struct waitq *waitq, struct waitq_set *wqset)
1875 {
1876 if (!waitq_valid(waitq)) {
1877 panic("Invalid waitq: %p", waitq);
1878 }
1879 if (waitq_type(waitq) != WQT_PORT) {
1880 panic("Invalid waitq type: %p:%d", waitq, waitq->waitq_type);
1881 }
1882 panic("Invalid waitq-set: %p", wqset);
1883 }
1884
1885 static inline void
__waitq_link_arguments_validate(struct waitq * waitq,struct waitq_set * wqset)1886 __waitq_link_arguments_validate(struct waitq *waitq, struct waitq_set *wqset)
1887 {
1888 if (!waitq_valid(waitq) ||
1889 waitq_type(waitq) != WQT_PORT ||
1890 waitq_type(wqset) != WQT_PORT_SET) {
1891 __waitq_link_arguments_panic(waitq, wqset);
1892 }
1893 }
1894
1895 __abortlike
1896 static void
__waitq_invalid_panic(waitq_t waitq)1897 __waitq_invalid_panic(waitq_t waitq)
1898 {
1899 panic("Invalid waitq: %p", waitq.wq_q);
1900 }
1901
1902 static void
__waitq_validate(waitq_t waitq)1903 __waitq_validate(waitq_t waitq)
1904 {
1905 if (!waitq_valid(waitq)) {
1906 __waitq_invalid_panic(waitq);
1907 }
1908 }
1909
1910 kern_return_t
waitq_link_locked(struct waitq * waitq,struct waitq_set * wqset,waitq_link_t * linkp)1911 waitq_link_locked(struct waitq *waitq, struct waitq_set *wqset,
1912 waitq_link_t *linkp)
1913 {
1914 assert(linkp->wqlh);
1915
1916 __waitq_link_arguments_validate(waitq, wqset);
1917
1918 if (wql_find(waitq, wqset)) {
1919 return KERN_ALREADY_IN_SET;
1920 }
1921
1922 linkp->wqll->wql_wq = waitq;
1923 linkp->wqll->wql_wqs = (uintptr_t)wqset;
1924
1925 if (waitq_valid(wqset)) {
1926 circle_enqueue_tail(&wqset->wqset_links, &linkp->wqll->wql_slink);
1927 circle_enqueue_tail(&waitq->waitq_links, &linkp->wqll->wql_qlink);
1928 *linkp = WQL_NULL;
1929 }
1930
1931 return KERN_SUCCESS;
1932 }
1933
1934 kern_return_t
waitq_link_prepost_locked(struct waitq * waitq,struct waitq_set * wqset)1935 waitq_link_prepost_locked(struct waitq *waitq, struct waitq_set *wqset)
1936 {
1937 struct waitq_link *link;
1938
1939 __waitq_link_arguments_validate(waitq, wqset);
1940
1941 link = wql_find(waitq, wqset);
1942 if (link == NULL) {
1943 return KERN_NOT_IN_SET;
1944 }
1945
1946 if (!wql_wqs_preposted(link)) {
1947 wql_wqs_mark_preposted(link);
1948 waitq->waitq_preposted = true;
1949 circle_dequeue(&wqset->wqset_links, &link->wql_slink);
1950 circle_enqueue_tail(&wqset->wqset_preposts, &link->wql_slink);
1951 ipc_pset_prepost(wqset, waitq);
1952 }
1953
1954 return KERN_SUCCESS;
1955 }
1956
1957 waitq_link_t
waitq_unlink_locked(struct waitq * waitq,struct waitq_set * wqset)1958 waitq_unlink_locked(struct waitq *waitq, struct waitq_set *wqset)
1959 {
1960 struct waitq_link *link;
1961
1962 __waitq_link_arguments_validate(waitq, wqset);
1963
1964 link = wql_find(waitq, wqset);
1965 if (link) {
1966 circle_dequeue(wql_wqs_queue(wqset, link), &link->wql_slink);
1967 circle_dequeue(&waitq->waitq_links, &link->wql_qlink);
1968 }
1969
1970 return (waitq_link_t){ .wqll = link };
1971 }
1972
1973 void
waitq_unlink_all_locked(struct waitq * waitq,struct waitq_set * except_wqset,waitq_link_list_t * free_l)1974 waitq_unlink_all_locked(struct waitq *waitq, struct waitq_set *except_wqset,
1975 waitq_link_list_t *free_l)
1976 {
1977 struct waitq_link *kept_link = NULL;
1978 struct waitq_link *link;
1979
1980 assert(waitq_type(waitq) == WQT_PORT);
1981
1982 cqe_foreach_element_safe(link, &waitq->waitq_links, wql_qlink) {
1983 waitq_t wqs = wql_wqs(link);
1984
1985 if (wqs.wqs_set == except_wqset) {
1986 kept_link = link;
1987 continue;
1988 }
1989
1990 waitq_lock(wqs);
1991 circle_dequeue(wql_wqs_queue(wqs.wqs_set, link),
1992 &link->wql_slink);
1993 wql_list_push(free_l, link);
1994 waitq_unlock(wqs);
1995 }
1996
1997 circle_queue_init(&waitq->waitq_links);
1998 if (kept_link) {
1999 circle_enqueue_tail(&waitq->waitq_links, &kept_link->wql_qlink);
2000 }
2001 }
2002
2003 struct waitq *
waitq_set_first_prepost(struct waitq_set * wqset,wqs_prepost_flags_t flags)2004 waitq_set_first_prepost(struct waitq_set *wqset, wqs_prepost_flags_t flags)
2005 {
2006 circle_queue_t q = &wqset->wqset_preposts;
2007 queue_entry_t elt;
2008 struct waitq_link *link;
2009 struct waitq *wq;
2010 uint32_t ticket;
2011
2012 if (__improbable(!waitq_valid(wqset))) {
2013 return NULL;
2014 }
2015
2016 while (!circle_queue_empty(q)) {
2017 elt = circle_queue_first(q);
2018 link = cqe_element(elt, struct waitq_link, wql_slink);
2019 wq = link->wql_wq;
2020
2021 if (__improbable(!waitq_lock_reserve(wq, &ticket))) {
2022 waitq_unlock(wqset);
2023 waitq_lock_wait(wq, ticket);
2024 waitq_lock(wqset);
2025 if (!waitq_valid(wqset)) {
2026 waitq_unlock(wq);
2027 return NULL;
2028 }
2029
2030 if (elt != circle_queue_first(q) || link->wql_wq != wq) {
2031 waitq_unlock(wq);
2032 continue;
2033 }
2034 }
2035
2036 if (wq->waitq_preposted) {
2037 if ((flags & WQS_PREPOST_PEEK) == 0) {
2038 circle_queue_rotate_head_forward(q);
2039 }
2040 if ((flags & WQS_PREPOST_LOCK) == 0) {
2041 waitq_unlock(wq);
2042 }
2043 return wq;
2044 }
2045
2046 /*
2047 * We found a link that is no longer preposted,
2048 * someone must have called waitq_clear_prepost_locked()
2049 * and this set just only noticed.
2050 */
2051 wql_wqs_clear_preposted(link);
2052 waitq_unlock(wq);
2053
2054 circle_dequeue(q, &link->wql_slink);
2055 circle_enqueue_tail(&wqset->wqset_links, &link->wql_slink);
2056 }
2057
2058 return NULL;
2059 }
2060
2061
2062 #pragma mark select sets
2063
2064 /**
2065 * @function select_set_nextid()
2066 *
2067 * @brief
2068 * Generate a unique ID for a select set "generation"
2069 *
2070 * @discussion
2071 * This mixes the CPU number with a monotonic clock
2072 * (in order to avoid contention on a global atomic).
2073 *
2074 * In order for select sets to be invalidated very quickly,
2075 * they do not have backward linkages to their member queues.
2076 *
2077 * Instead, each time a new @c select() "pass" is initiated,
2078 * a new ID is generated, which is copied onto the @c waitq_sellink
2079 * links at the time of link.
2080 *
2081 * The zone for select sets is sequestered, which allows for select
2082 * wait queues to speculatively lock their set during prepost
2083 * and use this ID to debounce wakeups and avoid spurious wakeups
2084 * (as an "optimization" because select recovers from spurious wakeups,
2085 * we just want those to be very rare).
2086 */
2087 __attribute__((always_inline))
2088 static inline uint64_t
select_set_nextid(bool preemption_enabled)2089 select_set_nextid(bool preemption_enabled)
2090 {
2091 /* waitq_bootstrap() set the low byte to a unique value per CPU */
2092 static_assert(MAX_CPUS <= 256);
2093 const uint64_t inc = 256;
2094 uint64_t id;
2095
2096 #ifdef __x86_64__
2097 /* uncontended atomics are slower than disabling preemption on Intel */
2098 if (preemption_enabled) {
2099 disable_preemption();
2100 }
2101 id = (*PERCPU_GET(select_setid) += inc);
2102 if (preemption_enabled) {
2103 enable_preemption();
2104 }
2105 #else
2106 /*
2107 * if preemption is enabled this might update another CPU's
2108 * setid, which will be rare but is acceptable, it still
2109 * produces a unique select ID.
2110 *
2111 * We chose this because the uncontended atomics on !intel
2112 * are faster than disabling/reenabling preemption.
2113 */
2114 (void)preemption_enabled;
2115 id = os_atomic_add(PERCPU_GET(select_setid), inc, relaxed);
2116 #endif
2117
2118 return id;
2119 }
2120
2121 struct select_set *
select_set_alloc(void)2122 select_set_alloc(void)
2123 {
2124 struct select_set *selset;
2125 selset = zalloc_id(ZONE_ID_SELECT_SET, Z_ZERO | Z_WAITOK | Z_NOFAIL);
2126
2127 waitq_init(selset, WQT_SELECT_SET, SYNC_POLICY_FIFO);
2128 selset->selset_id = select_set_nextid(true);
2129
2130 return selset;
2131 }
2132
2133 __abortlike
2134 static void
__select_set_link_arguments_panic(struct waitq * waitq,struct select_set * set)2135 __select_set_link_arguments_panic(struct waitq *waitq, struct select_set *set)
2136 {
2137 if (!waitq_valid(waitq)) {
2138 panic("Invalid waitq: %p", waitq);
2139 }
2140 if (waitq_type(waitq) != WQT_SELECT) {
2141 panic("Invalid waitq type: %p:%d", waitq, waitq->waitq_type);
2142 }
2143 panic("Invalid waitq-set: %p", set);
2144 }
2145
2146 static inline void
__select_set_link_arguments_validate(struct waitq * waitq,struct select_set * set)2147 __select_set_link_arguments_validate(struct waitq *waitq, struct select_set *set)
2148 {
2149 if (!waitq_valid(waitq) ||
2150 waitq_type(waitq) != WQT_SELECT ||
2151 waitq_type(set) != WQT_SELECT_SET) {
2152 __select_set_link_arguments_panic(waitq, set);
2153 }
2154 }
2155
2156 void
select_set_link(struct waitq * waitq,struct select_set * set,waitq_link_t * linkp)2157 select_set_link(struct waitq *waitq, struct select_set *set,
2158 waitq_link_t *linkp)
2159 {
2160 struct waitq_sellink *link;
2161
2162 __select_set_link_arguments_validate(waitq, set);
2163
2164 waitq_lock(waitq);
2165
2166 if (waitq == &select_conflict_queue) {
2167 waitq_lock(set);
2168 set->selset_conflict = true;
2169 waitq_unlock(set);
2170 }
2171
2172 wql_list_foreach(link, &waitq->waitq_sellinks) {
2173 if (waitq_same(wql_wqs(link), set)) {
2174 goto found;
2175 }
2176 }
2177
2178 link = linkp->wqls;
2179 *linkp = WQL_NULL;
2180 wql_list_push(&waitq->waitq_sellinks, link);
2181
2182 found:
2183 link->wql_wqs = (uintptr_t)set;
2184 link->wql_setid = set->selset_id;
2185 waitq_unlock(waitq);
2186 }
2187
2188 static void
select_set_unlink_conflict_queue(struct select_set * set)2189 select_set_unlink_conflict_queue(struct select_set *set)
2190 {
2191 struct waitq_link_list_entry **prev;
2192 struct waitq_sellink *link;
2193
2194 waitq_lock(&select_conflict_queue);
2195
2196 /*
2197 * We know the conflict queue is hooked,
2198 * so find the linkage and free it.
2199 */
2200 prev = &select_conflict_queue.waitq_sellinks.next;
2201 for (;;) {
2202 assert(*prev);
2203 link = wql_list_elem(*prev);
2204 if (waitq_same(wql_wqs(link), set)) {
2205 *prev = link->wql_next.next;
2206 break;
2207 }
2208 prev = &link->wql_next.next;
2209 }
2210
2211 waitq_unlock(&select_conflict_queue);
2212
2213 waitq_link_free(WQT_SELECT_SET, link);
2214 }
2215
2216 static void
__select_set_reset(struct select_set * set,bool invalidate)2217 __select_set_reset(struct select_set *set, bool invalidate)
2218 {
2219 if (set->selset_conflict) {
2220 select_set_unlink_conflict_queue(set);
2221 }
2222
2223 waitq_lock(set);
2224 if (invalidate) {
2225 waitq_invalidate(set);
2226 }
2227 set->selset_id = select_set_nextid(false);
2228 set->selset_preposted = 0;
2229 set->selset_conflict = 0;
2230 waitq_unlock(set);
2231 }
2232
2233 void
select_set_reset(struct select_set * set)2234 select_set_reset(struct select_set *set)
2235 {
2236 __select_set_reset(set, false);
2237 }
2238
2239 void
select_set_free(struct select_set * set)2240 select_set_free(struct select_set *set)
2241 {
2242 __select_set_reset(set, true);
2243 hw_lck_ticket_destroy(&set->selset_interlock, &waitq_lck_grp);
2244 zfree_id(ZONE_ID_SELECT_SET, set);
2245 }
2246
2247 void
select_waitq_wakeup_and_deinit(struct waitq * waitq,event64_t wake_event,wait_result_t result)2248 select_waitq_wakeup_and_deinit(
2249 struct waitq *waitq,
2250 event64_t wake_event,
2251 wait_result_t result)
2252 {
2253 waitq_link_list_t free_l = { };
2254
2255 if (waitq_is_valid(waitq)) {
2256 assert(waitq_type(waitq) == WQT_SELECT);
2257
2258 waitq_lock(waitq);
2259
2260 waitq_wakeup64_all_locked(waitq, wake_event, result,
2261 WAITQ_KEEP_LOCKED);
2262
2263 waitq_invalidate(waitq);
2264 free_l = waitq->waitq_sellinks;
2265 waitq->waitq_sellinks.next = NULL;
2266
2267 waitq_unlock(waitq);
2268
2269 waitq_link_free_list(WQT_SELECT, &free_l);
2270
2271 waitq_deinit(waitq);
2272 }
2273 }
2274
2275 #pragma mark assert_wait / wakeup (high level)
2276
2277 wait_result_t
waitq_assert_wait64(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,uint64_t deadline)2278 waitq_assert_wait64(struct waitq *waitq,
2279 event64_t wait_event,
2280 wait_interrupt_t interruptible,
2281 uint64_t deadline)
2282 {
2283 thread_t thread = current_thread();
2284 wait_result_t ret;
2285 spl_t s = 0;
2286
2287 __waitq_validate(waitq);
2288
2289 if (waitq_irq_safe(waitq)) {
2290 s = splsched();
2291 }
2292 waitq_lock(waitq);
2293
2294 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
2295 TIMEOUT_URGENCY_SYS_NORMAL, deadline, TIMEOUT_NO_LEEWAY, thread);
2296
2297 waitq_unlock(waitq);
2298 if (waitq_irq_safe(waitq)) {
2299 splx(s);
2300 }
2301
2302 return ret;
2303 }
2304
2305 wait_result_t
waitq_assert_wait64_leeway(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,wait_timeout_urgency_t urgency,uint64_t deadline,uint64_t leeway)2306 waitq_assert_wait64_leeway(struct waitq *waitq,
2307 event64_t wait_event,
2308 wait_interrupt_t interruptible,
2309 wait_timeout_urgency_t urgency,
2310 uint64_t deadline,
2311 uint64_t leeway)
2312 {
2313 wait_result_t ret;
2314 thread_t thread = current_thread();
2315 spl_t s = 0;
2316
2317 __waitq_validate(waitq);
2318
2319 if (waitq_irq_safe(waitq)) {
2320 s = splsched();
2321 }
2322 waitq_lock(waitq);
2323
2324 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
2325 urgency, deadline, leeway, thread);
2326
2327 waitq_unlock(waitq);
2328 if (waitq_irq_safe(waitq)) {
2329 splx(s);
2330 }
2331
2332 return ret;
2333 }
2334
2335 uint32_t
waitq_wakeup64_nthreads(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags,uint32_t nthreads)2336 waitq_wakeup64_nthreads(
2337 waitq_t waitq,
2338 event64_t wake_event,
2339 wait_result_t result,
2340 waitq_wakeup_flags_t flags,
2341 uint32_t nthreads)
2342 {
2343 __waitq_validate(waitq);
2344
2345 spl_t spl = 0;
2346
2347 if (waitq_irq_safe(waitq)) {
2348 spl = splsched();
2349 }
2350
2351 waitq_lock(waitq);
2352
2353 /* waitq is unlocked upon return, splx is handled */
2354 return waitq_wakeup64_nthreads_locked(waitq, wake_event, result,
2355 flags | waitq_flags_splx(spl) | WAITQ_UNLOCK, nthreads);
2356 }
2357
2358 kern_return_t
waitq_wakeup64_all(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)2359 waitq_wakeup64_all(
2360 waitq_t waitq,
2361 event64_t wake_event,
2362 wait_result_t result,
2363 waitq_wakeup_flags_t flags)
2364 {
2365 uint32_t count;
2366
2367 count = waitq_wakeup64_nthreads(waitq, wake_event, result,
2368 flags, UINT32_MAX);
2369 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
2370 }
2371
2372 kern_return_t
waitq_wakeup64_one(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)2373 waitq_wakeup64_one(
2374 waitq_t waitq,
2375 event64_t wake_event,
2376 wait_result_t result,
2377 waitq_wakeup_flags_t flags)
2378 {
2379 uint32_t count;
2380
2381 count = waitq_wakeup64_nthreads(waitq, wake_event, result, flags, 1);
2382 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
2383 }
2384
2385 kern_return_t
waitq_wakeup64_thread(struct waitq * waitq,event64_t event,thread_t thread,wait_result_t result)2386 waitq_wakeup64_thread(
2387 struct waitq *waitq,
2388 event64_t event,
2389 thread_t thread,
2390 wait_result_t result)
2391 {
2392 spl_t s = splsched();
2393 kern_return_t ret;
2394
2395 __waitq_validate(waitq);
2396 assert(waitq_irq_safe(waitq));
2397 waitq_lock(waitq);
2398
2399 ret = waitq_wakeup64_thread_and_unlock(waitq, event, thread, result);
2400
2401 splx(s);
2402
2403 return ret;
2404 }
2405
2406 thread_t
waitq_wakeup64_identify(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)2407 waitq_wakeup64_identify(
2408 waitq_t waitq,
2409 event64_t wake_event,
2410 wait_result_t result,
2411 waitq_wakeup_flags_t flags)
2412 {
2413 __waitq_validate(waitq);
2414
2415 spl_t spl = 0;
2416
2417 if (waitq_irq_safe(waitq)) {
2418 spl = splsched();
2419 }
2420
2421 waitq_lock(waitq);
2422
2423 thread_t thread = waitq_wakeup64_identify_locked(waitq, wake_event,
2424 flags | waitq_flags_splx(spl) | WAITQ_UNLOCK);
2425 /* waitq is unlocked, thread is not go-ed yet */
2426 /* preemption disabled if thread non-null */
2427 /* splx is handled */
2428
2429 if (thread != THREAD_NULL) {
2430 thread_reference(thread);
2431 waitq_resume_identified_thread(waitq, thread, result, flags);
2432 /* preemption enabled, thread go-ed */
2433 /* returns +1 ref to running thread */
2434 return thread;
2435 }
2436
2437 return THREAD_NULL;
2438 }
2439
2440
2441 #pragma mark tests
2442 #if DEBUG || DEVELOPMENT
2443
2444 #include <ipc/ipc_pset.h>
2445 #include <sys/errno.h>
2446
2447 #define MAX_GLOBAL_TEST_QUEUES 64
2448 static struct waitq wqt_waitq_array[MAX_GLOBAL_TEST_QUEUES];
2449 static bool wqt_running;
2450 static bool wqt_init;
2451
2452 static bool
wqt_start(const char * test,int64_t * out)2453 wqt_start(const char *test, int64_t *out)
2454 {
2455 if (os_atomic_xchg(&wqt_running, true, acquire)) {
2456 *out = 0;
2457 return false;
2458 }
2459
2460 if (!wqt_init) {
2461 wqt_init = true;
2462 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++) {
2463 waitq_init(&wqt_waitq_array[i], WQT_PORT, SYNC_POLICY_FIFO);
2464 }
2465 }
2466
2467 printf("[WQ] starting %s\n", test);
2468 return true;
2469 }
2470
2471 static int
wqt_end(const char * test,int64_t * out)2472 wqt_end(const char *test, int64_t *out)
2473 {
2474 os_atomic_store(&wqt_running, false, release);
2475 printf("[WQ] done %s\n", test);
2476 *out = 1;
2477 return 0;
2478 }
2479
2480 static struct waitq *
wqt_wq(uint32_t index)2481 wqt_wq(uint32_t index)
2482 {
2483 return &wqt_waitq_array[index];
2484 }
2485
2486 static uint32_t
wqt_idx(struct waitq * waitq)2487 wqt_idx(struct waitq *waitq)
2488 {
2489 assert(waitq >= wqt_waitq_array &&
2490 waitq < wqt_waitq_array + MAX_GLOBAL_TEST_QUEUES);
2491 return (uint32_t)(waitq - wqt_waitq_array);
2492 }
2493
2494 __attribute__((overloadable))
2495 static uint64_t
wqt_bit(uint32_t index)2496 wqt_bit(uint32_t index)
2497 {
2498 return 1ull << index;
2499 }
2500
2501 __attribute__((overloadable))
2502 static uint64_t
wqt_bit(struct waitq * waitq)2503 wqt_bit(struct waitq *waitq)
2504 {
2505 return wqt_bit(wqt_idx(waitq));
2506 }
2507
2508 static struct waitq_set *
wqt_wqset_create(void)2509 wqt_wqset_create(void)
2510 {
2511 struct waitq_set *wqset;
2512
2513 wqset = &ipc_pset_alloc_special(ipc_space_kernel)->ips_wqset;
2514 printf("[WQ]: created waitq set %p\n", wqset);
2515 return wqset;
2516 }
2517
2518 static void
wqt_wqset_free(struct waitq_set * wqset)2519 wqt_wqset_free(struct waitq_set *wqset)
2520 {
2521 printf("[WQ]: destroying waitq set %p\n", wqset);
2522 waitq_lock(wqset);
2523 ipc_pset_destroy(ipc_space_kernel,
2524 __container_of(wqset, struct ipc_pset, ips_wqset));
2525 }
2526
2527 static void
wqt_link(uint32_t index,struct waitq_set * wqset,kern_return_t want)2528 wqt_link(uint32_t index, struct waitq_set *wqset, kern_return_t want)
2529 {
2530 struct waitq *waitq = wqt_wq(index);
2531 waitq_link_t link = waitq_link_alloc(WQT_PORT_SET);
2532 kern_return_t kr;
2533
2534 printf("[WQ]: linking waitq [%d] to global wqset (%p)\n", index, wqset);
2535
2536 waitq_lock(waitq);
2537 waitq_lock(wqset);
2538 kr = waitq_link_locked(waitq, wqset, &link);
2539 waitq_unlock(wqset);
2540 waitq_unlock(waitq);
2541
2542 if (link.wqlh) {
2543 waitq_link_free(WQT_PORT_SET, link);
2544 }
2545
2546 printf("[WQ]:\tkr=%d\texpected=%d\n", kr, want);
2547 assert(kr == want);
2548 }
2549
2550 static void
wqt_unlink(uint32_t index,struct waitq_set * wqset,__assert_only kern_return_t want)2551 wqt_unlink(uint32_t index, struct waitq_set *wqset, __assert_only kern_return_t want)
2552 {
2553 struct waitq *waitq = wqt_wq(index);
2554 waitq_link_t link;
2555 kern_return_t kr;
2556
2557 printf("[WQ]: unlinking waitq [%d] from global wqset (%p)\n",
2558 index, wqset);
2559
2560 waitq_lock(waitq);
2561 waitq_lock(wqset);
2562 link = waitq_unlink_locked(waitq, wqset);
2563 waitq_unlock(wqset);
2564 waitq_unlock(waitq);
2565
2566 if (link.wqlh) {
2567 waitq_link_free(WQT_PORT_SET, link);
2568 kr = KERN_SUCCESS;
2569 } else {
2570 kr = KERN_NOT_IN_SET;
2571 }
2572
2573 printf("[WQ]: \tkr=%d\n", kr);
2574 assert(kr == want);
2575 }
2576
2577 static void
wqt_wakeup_one(uint32_t index,event64_t event64,__assert_only kern_return_t want)2578 wqt_wakeup_one(uint32_t index, event64_t event64, __assert_only kern_return_t want)
2579 {
2580 kern_return_t kr;
2581
2582 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
2583 index, event64);
2584 kr = waitq_wakeup64_one(wqt_wq(index), event64,
2585 THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
2586 printf("[WQ]: \tkr=%d\n", kr);
2587 assert(kr == want);
2588 }
2589
2590 static void
wqt_clear_preposts(uint32_t idx)2591 wqt_clear_preposts(uint32_t idx)
2592 {
2593 waitq_lock(wqt_wq(idx));
2594 (void)waitq_clear_prepost_locked(wqt_wq(idx));
2595 waitq_unlock(wqt_wq(idx));
2596 }
2597
2598 static void
wqt_preposts_gc_locked(struct waitq_set * wqset)2599 wqt_preposts_gc_locked(struct waitq_set *wqset)
2600 {
2601 circle_queue_t q = &wqset->wqset_preposts;
2602 struct waitq_link *link;
2603 uint32_t ticket;
2604
2605 again:
2606 cqe_foreach_element_safe(link, q, wql_slink) {
2607 struct waitq *wq = link->wql_wq;
2608
2609 if (!waitq_lock_reserve(wq, &ticket)) {
2610 waitq_unlock(wqset);
2611 waitq_lock_wait(wq, ticket);
2612 waitq_lock(wqset);
2613 waitq_unlock(wq);
2614 /* the list was possibly mutated, restart */
2615 goto again;
2616 }
2617
2618 if (!wq->waitq_preposted) {
2619 wql_wqs_clear_preposted(link);
2620 circle_dequeue(q, &link->wql_slink);
2621 circle_enqueue_tail(&wqset->wqset_links, &link->wql_slink);
2622 }
2623
2624 waitq_unlock(wq);
2625 }
2626 }
2627
2628 static void
wqt_expect_preposts(struct waitq_set * wqset,__assert_only uint64_t preposts)2629 wqt_expect_preposts(struct waitq_set *wqset, __assert_only uint64_t preposts)
2630 {
2631 struct waitq_link *link;
2632 uint64_t found = 0;
2633
2634 waitq_lock(wqset);
2635
2636 wqt_preposts_gc_locked(wqset);
2637
2638 cqe_foreach_element(link, &wqset->wqset_preposts, wql_slink) {
2639 struct waitq *waitq = link->wql_wq;
2640
2641 printf("[WQ]: found prepost %d\n", wqt_idx(waitq));
2642 assertf((found & wqt_bit(waitq)) == 0,
2643 "found waitq %d twice", wqt_idx(waitq));
2644 found |= wqt_bit(waitq);
2645 }
2646
2647 waitq_unlock(wqset);
2648
2649 assertf(found == preposts, "preposts expected 0x%llx, but got 0x%llx",
2650 preposts, found);
2651 }
2652
2653 static int
waitq_basic_test(__unused int64_t in,int64_t * out)2654 waitq_basic_test(__unused int64_t in, int64_t *out)
2655 {
2656 struct waitq_set *wqset;
2657
2658 if (!wqt_start(__func__, out)) {
2659 return EBUSY;
2660 }
2661
2662 wqset = wqt_wqset_create();
2663 wqt_link(10, wqset, KERN_SUCCESS);
2664 wqt_link(10, wqset, KERN_ALREADY_IN_SET);
2665 wqt_link(11, wqset, KERN_SUCCESS);
2666 wqt_link(11, wqset, KERN_ALREADY_IN_SET);
2667 wqt_link(12, wqset, KERN_SUCCESS);
2668 wqt_link(12, wqset, KERN_ALREADY_IN_SET);
2669
2670 wqt_wakeup_one(10, NO_EVENT64, KERN_NOT_WAITING);
2671 wqt_wakeup_one(12, NO_EVENT64, KERN_NOT_WAITING);
2672
2673 wqt_expect_preposts(wqset, wqt_bit(10) | wqt_bit(12));
2674 wqt_clear_preposts(10);
2675
2676 wqt_expect_preposts(wqset, wqt_bit(12));
2677 wqt_clear_preposts(12);
2678
2679 wqt_expect_preposts(wqset, 0);
2680
2681 wqt_unlink(12, wqset, KERN_SUCCESS);
2682 wqt_unlink(12, wqset, KERN_NOT_IN_SET);
2683 wqt_unlink(11, wqset, KERN_SUCCESS);
2684 wqt_unlink(10, wqset, KERN_SUCCESS);
2685 wqt_wqset_free(wqset);
2686
2687 return wqt_end(__func__, out);
2688 }
2689 SYSCTL_TEST_REGISTER(waitq_basic, waitq_basic_test);
2690 #endif /* DEBUG || DEVELOPMENT */
2691