1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 #include <kern/ast.h>
58 #include <kern/backtrace.h>
59 #include <kern/kern_types.h>
60 #include <kern/mach_param.h>
61 #include <kern/percpu.h>
62 #include <kern/queue.h>
63 #include <kern/sched_prim.h>
64 #include <kern/simple_lock.h>
65 #include <kern/spl.h>
66 #include <kern/waitq.h>
67 #include <kern/zalloc.h>
68 #include <kern/policy_internal.h>
69 #include <kern/turnstile.h>
70
71 #include <os/hash.h>
72 #include <libkern/section_keywords.h>
73 #include <mach/sync_policy.h>
74 #include <vm/vm_kern_xnu.h>
75
76 #include <sys/kdebug.h>
77
78 /*!
79 * @const waitq_set_unlink_batch
80 *
81 * @brief
82 * How many links are unhooked under a single set lock hold.
83 *
84 * @discussion
85 * Holding a waitq set lock for too long can cause
86 * extreme contention (when a set is being torn down concurrently
87 * to messages being sent to ports who used to belong to that set).
88 *
89 * In order to fight this, large wait queue sets will drop
90 * and reacquire their lock for each unlinking batch.
91 */
92 static TUNABLE(uint32_t, waitq_set_unlink_batch, "waitq_set_unlink_batch", 64);
93
94 /*!
95 * @const WQL_PREPOST_MARKER
96 *
97 * @brief
98 * Marker set in the @c wql_wqs field of wait queue linkages to denote that
99 * this linkage has preposted to its wait queue set already.
100 *
101 * @discussion
102 * This bit is manipulated under both the wait queue and the wait queue set
103 * locks, and is used for two purposes:
104 *
105 * - for port set queues, it denotes in which circle queue the linkage
106 * is queued on (@c waitq_set::wqset_links or @c waitq_set::wqset_preposts)
107 *
108 * - as an optimization during pre-post to not walk sets this link already
109 * preposted to.
110 */
111 #define WQL_PREPOST_MARKER 1ul
112
113 #if __LP64__
114 /*!
115 * @struct waitq_link_hdr
116 *
117 * @brief
118 * Common "header" between all linkages, in order to find the waitq_set
119 * of this linkage.
120 *
121 * @discussion
122 * Due to unfortunate alignment constraints on @c queue_chain_t,
123 * this is wildly different for LP64 and ILP32.
124 *
125 * Do note that `wql
126 */
127 struct waitq_link_hdr {
128 uintptr_t wql_wqs;
129 };
130
131 /*!
132 * @struct waitq_sellink
133 *
134 * @brief
135 * Linkages used for select waitq queues to select wait queue sets.
136 *
137 * @discussion
138 * Select linkages are one way (queue to set) for two reasons:
139 *
140 * 1. select doesn't use the wait queue subsystem to discover which file
141 * descriptor woke up the set (it will instead scan all fds again),
142 *
143 * 2. all linkages are unhooked on each syscall return, so we minimize
144 * work to be done to be as quick as possible, using a fast invalidation
145 * scheme based on unique identifiers and sequestering
146 * (see @c select_set_nextid()).
147 */
148 struct waitq_sellink {
149 uintptr_t wql_wqs;
150 struct waitq_link_list_entry wql_next;
151 uint64_t wql_setid;
152 };
153
154 /*!
155 * @struct waitq_link
156 *
157 * @brief
158 * Linkages used for port wait queues and port-set wait queue sets.
159 *
160 * @discussion
161 * Those linkages go both ways so that receiving messages through a port-set
162 * can quickly find ports that preposted to the set.
163 *
164 * It also means that unhooking linkages cannot be lazy.
165 */
166 struct waitq_link {
167 uintptr_t wql_wqs; /**< wait queue set for this link */
168 queue_chain_t wql_qlink; /**< linkage through the waitq list */
169 queue_chain_t wql_slink; /**< linkage through the wqset list */
170 struct waitq *wql_wq; /**< wait queue for this link */
171 };
172 #else
173 struct waitq_link_hdr {
174 uint64_t __wql_padding;
175 uintptr_t wql_wqs;
176 };
177
178 struct waitq_sellink {
179 struct waitq_link_list_entry wql_next;
180 uintptr_t __wql_padding;
181 uintptr_t wql_wqs;
182 uint64_t wql_setid;
183 };
184
185 struct waitq_link {
186 queue_chain_t wql_qlink;
187 uintptr_t wql_wqs;
188 struct waitq *wql_wq;
189 queue_chain_t wql_slink;
190 };
191 #endif
192
193 static_assert(offsetof(struct waitq_link_hdr, wql_wqs) ==
194 offsetof(struct waitq_sellink, wql_wqs));
195 static_assert(offsetof(struct waitq_link_hdr, wql_wqs) ==
196 offsetof(struct waitq_link, wql_wqs));
197 static_assert(sizeof(struct waitq) <= WQ_OPAQUE_SIZE, "waitq structure size mismatch");
198 static_assert(__alignof(struct waitq) == WQ_OPAQUE_ALIGN, "waitq structure alignment mismatch");
199
200 static KALLOC_TYPE_DEFINE(waitq_sellink_zone, struct waitq_sellink, KT_PRIV_ACCT);
201 static KALLOC_TYPE_DEFINE(waitq_link_zone, struct waitq_link, KT_PRIV_ACCT);
202 ZONE_DEFINE_ID(ZONE_ID_SELECT_SET, "select_set", struct select_set,
203 ZC_SEQUESTER | ZC_ZFREE_CLEARMEM);
204
205 static LCK_GRP_DECLARE(waitq_lck_grp, "waitq");
206
207 static uint64_t PERCPU_DATA(select_setid);
208 struct waitq select_conflict_queue;
209
210 #pragma mark waitq links
211
212 static inline bool
waitq_is_sellink(waitq_type_t type)213 waitq_is_sellink(waitq_type_t type)
214 {
215 return type == WQT_SELECT || type == WQT_SELECT_SET;
216 }
217
218 static inline bool
wql_sellink_valid(struct select_set * selset,struct waitq_sellink * link)219 wql_sellink_valid(struct select_set *selset, struct waitq_sellink *link)
220 {
221 return waitq_valid(selset) && selset->selset_id == link->wql_setid;
222 }
223
224 static waitq_t
wql_wqs(waitq_link_t link)225 wql_wqs(waitq_link_t link)
226 {
227 return (waitq_t){ (void *)(link.wqlh->wql_wqs & ~WQL_PREPOST_MARKER) };
228 }
229
230 static bool
wql_wqs_preposted(waitq_link_t link)231 wql_wqs_preposted(waitq_link_t link)
232 {
233 return link.wqlh->wql_wqs & WQL_PREPOST_MARKER;
234 }
235
236 static void
wql_wqs_mark_preposted(waitq_link_t link)237 wql_wqs_mark_preposted(waitq_link_t link)
238 {
239 assert(!wql_wqs_preposted(link));
240 link.wqlh->wql_wqs |= WQL_PREPOST_MARKER;
241 }
242
243 static void
wql_wqs_clear_preposted(waitq_link_t link)244 wql_wqs_clear_preposted(waitq_link_t link)
245 {
246 assert(wql_wqs_preposted(link));
247 link.wqlh->wql_wqs &= ~WQL_PREPOST_MARKER;
248 }
249
250 static circle_queue_t
wql_wqs_queue(struct waitq_set * wqs,struct waitq_link * link)251 wql_wqs_queue(struct waitq_set *wqs, struct waitq_link *link)
252 {
253 return wql_wqs_preposted(link) ? &wqs->wqset_preposts : &wqs->wqset_links;
254 }
255
256 static void
wql_list_push(waitq_link_list_t * list,waitq_link_t link)257 wql_list_push(waitq_link_list_t *list, waitq_link_t link)
258 {
259 link.wqls->wql_next.next = list->next;
260 list->next = &link.wqls->wql_next;
261 }
262
263 static inline struct waitq_sellink *
wql_list_elem(struct waitq_link_list_entry * e)264 wql_list_elem(struct waitq_link_list_entry *e)
265 {
266 return e ? __container_of(e, struct waitq_sellink, wql_next) : NULL;
267 }
268
269 /*!
270 * @function wql_list_next()
271 *
272 * @brief
273 * Helper function to implement wait queue link list enumeration.
274 *
275 * @param e in: pointer to the current element,
276 * out: pointer to the next element or NULL
277 * @param end which element to stop enumeration at (NULL for lists,
278 * or the first element enumerated for circle queues).
279 * @returns true (makes writing for(;;) based enumerators easier).
280 */
281 static inline bool
wql_list_next(struct waitq_link_list_entry ** e,struct waitq_link_list_entry * end)282 wql_list_next(struct waitq_link_list_entry **e, struct waitq_link_list_entry *end)
283 {
284 if (*e == NULL || (*e)->next == end) {
285 *e = NULL;
286 } else {
287 *e = (*e)->next;
288 }
289 return true;
290 }
291
292 #define __wql_list_foreach(it, head, end) \
293 for (struct waitq_link_list_entry *__it = (head)->next, *__end = end; \
294 ((it) = wql_list_elem(__it)); wql_list_next(&__it, __end))
295
296 #define wql_list_foreach(it, head) \
297 __wql_list_foreach(it, head, NULL)
298
299 #define wql_list_foreach_safe(it, head) \
300 for (struct waitq_link_list_entry *__it = (head)->next; \
301 ((it) = wql_list_elem(__it)) && wql_list_next(&__it, NULL); )
302
303 /*
304 * Gross hack: passing `__it` to `__wql_list_foreach` makes it stop whether
305 * we circle back to the first element or NULL (whichever comes first).
306 *
307 * This allows to have a single enumeration function oblivious to whether
308 * we enumerate a circle queue or a sellink list.
309 */
310 #define waitq_link_foreach(link, waitq) \
311 __wql_list_foreach((link).wqls, &(waitq).wq_q->waitq_sellinks, __it)
312
313 static_assert(offsetof(struct waitq, waitq_sellinks) ==
314 offsetof(struct waitq, waitq_links));
315 static_assert(offsetof(struct waitq_sellink, wql_next) ==
316 offsetof(struct waitq_link, wql_qlink.next));
317
318 static struct waitq_link *
wql_find(struct waitq * waitq,waitq_t wqset)319 wql_find(struct waitq *waitq, waitq_t wqset)
320 {
321 struct waitq_link *link;
322
323 cqe_foreach_element(link, &waitq->waitq_links, wql_qlink) {
324 if (waitq_same(wql_wqs(link), wqset)) {
325 return link;
326 }
327 }
328
329 return NULL;
330 }
331
332 waitq_link_t
waitq_link_alloc(waitq_type_t type)333 waitq_link_alloc(waitq_type_t type)
334 {
335 waitq_link_t link;
336
337 if (waitq_is_sellink(type)) {
338 link.wqls = zalloc_flags(waitq_sellink_zone, Z_WAITOK | Z_ZERO);
339 } else {
340 link.wqll = zalloc_flags(waitq_link_zone, Z_WAITOK | Z_ZERO);
341 }
342 return link;
343 }
344
345 void
waitq_link_free(waitq_type_t type,waitq_link_t link)346 waitq_link_free(waitq_type_t type, waitq_link_t link)
347 {
348 if (waitq_is_sellink(type)) {
349 return zfree(waitq_sellink_zone, link.wqls);
350 } else {
351 return zfree(waitq_link_zone, link.wqll);
352 }
353 }
354
355 void
waitq_link_free_list(waitq_type_t type,waitq_link_list_t * free_l)356 waitq_link_free_list(waitq_type_t type, waitq_link_list_t *free_l)
357 {
358 waitq_link_t link;
359
360 wql_list_foreach_safe(link.wqls, free_l) {
361 waitq_link_free(type, link);
362 }
363
364 free_l->next = NULL;
365 }
366
367
368 #pragma mark global wait queues
369
370 static __startup_data struct waitq g_boot_waitq;
371 static SECURITY_READ_ONLY_LATE(struct waitq *) global_waitqs = &g_boot_waitq;
372 static SECURITY_READ_ONLY_LATE(uint32_t) g_num_waitqs = 1;
373
374 /*
375 * Zero out the used MSBs of the event.
376 */
377 #define _CAST_TO_EVENT_MASK(event) \
378 ((waitq_flags_t)(uintptr_t)(event) & ((1ul << _EVENT_MASK_BITS) - 1ul))
379
380 /* return a global waitq pointer corresponding to the given event */
381 struct waitq *
_global_eventq(event64_t event)382 _global_eventq(event64_t event)
383 {
384 /*
385 * this doesn't use os_hash_kernel_pointer() because
386 * some clients use "numbers" here.
387 */
388 #if HAS_MTE
389 event = vm_memtag_canonicalize_kernel((vm_offset_t)event);
390 #endif
391 return &global_waitqs[os_hash_uint64(event) & (g_num_waitqs - 1)];
392 }
393
394 bool
waitq_is_valid(waitq_t waitq)395 waitq_is_valid(waitq_t waitq)
396 {
397 return waitq_valid(waitq);
398 }
399
400 static inline bool
waitq_is_global(waitq_t waitq)401 waitq_is_global(waitq_t waitq)
402 {
403 if (waitq_type(waitq) != WQT_QUEUE) {
404 return false;
405 }
406 return waitq.wq_q >= global_waitqs && waitq.wq_q < global_waitqs + g_num_waitqs;
407 }
408
409 static inline bool
waitq_empty(waitq_t wq)410 waitq_empty(waitq_t wq)
411 {
412 struct turnstile *ts;
413
414 switch (waitq_type(wq)) {
415 case WQT_TURNSTILE:
416 return priority_queue_empty(&wq.wq_q->waitq_prio_queue);
417 case WQT_PORT:
418 ts = wq.wq_q->waitq_ts;
419 return ts == TURNSTILE_NULL ||
420 priority_queue_empty(&ts->ts_waitq.waitq_prio_queue);
421 case WQT_QUEUE:
422 case WQT_SELECT:
423 case WQT_PORT_SET:
424 case WQT_SELECT_SET:
425 return circle_queue_empty(&wq.wq_q->waitq_queue);
426
427 default:
428 return true;
429 }
430 }
431
432 #if CONFIG_WAITQ_STATS
433 #define NWAITQ_BTFRAMES 5
434
435 struct wq_stats {
436 uint64_t waits;
437 uint64_t wakeups;
438 uint64_t clears;
439 uint64_t failed_wakeups;
440
441 uintptr_t last_wait[NWAITQ_BTFRAMES];
442 uintptr_t last_wakeup[NWAITQ_BTFRAMES];
443 uintptr_t last_failed_wakeup[NWAITQ_BTFRAMES];
444 };
445
446 /* this global is for lldb */
447 const uint32_t g_nwaitq_btframes = NWAITQ_BTFRAMES;
448 struct wq_stats g_boot_stats;
449 struct wq_stats *g_waitq_stats = &g_boot_stats;
450
451 static __inline__ void
waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES],unsigned skip)452 waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], unsigned skip)
453 {
454 uintptr_t buf[NWAITQ_BTFRAMES + skip];
455
456 memset(buf, 0, (NWAITQ_BTFRAMES + skip) * sizeof(uintptr_t));
457 backtrace(buf, g_nwaitq_btframes + skip, NULL, NULL);
458 memcpy(&bt[0], &buf[skip], NWAITQ_BTFRAMES * sizeof(uintptr_t));
459 }
460
461 static __inline__ struct wq_stats *
waitq_global_stats(waitq_t waitq)462 waitq_global_stats(waitq_t waitq)
463 {
464 struct wq_stats *wqs;
465 uint32_t idx;
466
467 if (!waitq_is_global(waitq)) {
468 return NULL;
469 }
470
471 idx = (uint32_t)(waitq.wq_q - global_waitqs);
472 assert(idx < g_num_waitqs);
473 wqs = &g_waitq_stats[idx];
474 return wqs;
475 }
476
477 static __inline__ void
waitq_stats_count_wait(waitq_t waitq)478 waitq_stats_count_wait(waitq_t waitq)
479 {
480 struct wq_stats *wqs = waitq_global_stats(waitq);
481 if (wqs != NULL) {
482 wqs->waits++;
483 waitq_grab_backtrace(wqs->last_wait, 2);
484 }
485 }
486
487 static __inline__ void
waitq_stats_count_wakeup(waitq_t waitq,int n)488 waitq_stats_count_wakeup(waitq_t waitq, int n)
489 {
490 struct wq_stats *wqs = waitq_global_stats(waitq);
491 if (wqs != NULL) {
492 if (n > 0) {
493 wqs->wakeups += n;
494 waitq_grab_backtrace(wqs->last_wakeup, 2);
495 } else {
496 wqs->failed_wakeups++;
497 waitq_grab_backtrace(wqs->last_failed_wakeup, 2);
498 }
499 }
500 }
501
502 static __inline__ void
waitq_stats_count_clear_wakeup(waitq_t waitq)503 waitq_stats_count_clear_wakeup(waitq_t waitq)
504 {
505 struct wq_stats *wqs = waitq_global_stats(waitq);
506 if (wqs != NULL) {
507 wqs->wakeups++;
508 wqs->clears++;
509 waitq_grab_backtrace(wqs->last_wakeup, 2);
510 }
511 }
512 #else /* !CONFIG_WAITQ_STATS */
513 #define waitq_stats_count_wait(q) do { } while (0)
514 #define waitq_stats_count_wakeup(q, n) do { } while (0)
515 #define waitq_stats_count_clear_wakeup(q) do { } while (0)
516 #endif
517
518 static struct waitq *
waitq_get_safeq(waitq_t waitq)519 waitq_get_safeq(waitq_t waitq)
520 {
521 if (waitq_type(waitq) == WQT_PORT) {
522 struct turnstile *ts = waitq.wq_q->waitq_ts;
523 return ts ? &ts->ts_waitq : NULL;
524 }
525
526 uint32_t hash = os_hash_kernel_pointer(waitq.wq_q);
527 return &global_waitqs[hash & (g_num_waitqs - 1)];
528 }
529
530 /*
531 * Since the priority ordered waitq uses basepri as the
532 * ordering key assert that this value fits in a uint8_t.
533 */
534 static_assert(MAXPRI <= UINT8_MAX);
535
536 static inline void
waitq_thread_insert(struct waitq * safeq,thread_t thread,waitq_t wq,event64_t event)537 waitq_thread_insert(struct waitq *safeq, thread_t thread,
538 waitq_t wq, event64_t event)
539 {
540 if (waitq_type(safeq) == WQT_TURNSTILE) {
541 turnstile_stats_update(0, TSU_TURNSTILE_BLOCK_COUNT, NULL);
542 turnstile_waitq_add_thread_priority_queue(safeq, thread);
543 } else {
544 turnstile_stats_update(0, TSU_REGULAR_WAITQ_BLOCK_COUNT, NULL);
545 /*
546 * This is the extent to which we currently take scheduling
547 * attributes into account:
548 *
549 * - If the thread is vm privileged, we stick it at the front
550 * of the queue, later, these queues will honor the policy
551 * value set at waitq_init time.
552 *
553 * - Realtime threads get priority for wait queue placements.
554 * This allows wait_queue_wakeup_one to prefer a waiting
555 * realtime thread, similar in principle to performing
556 * a wait_queue_wakeup_all and allowing scheduler
557 * prioritization to run the realtime thread, but without
558 * causing the lock contention of that scenario.
559 */
560 if (thread->sched_pri >= BASEPRI_REALTIME ||
561 !safeq->waitq_fifo ||
562 (thread->options & TH_OPT_VMPRIV)) {
563 circle_enqueue_head(&safeq->waitq_queue, &thread->wait_links);
564 } else {
565 circle_enqueue_tail(&safeq->waitq_queue, &thread->wait_links);
566 }
567 }
568
569 /* mark the event and real waitq, even if enqueued on a global safeq */
570 thread->wait_event = event;
571 thread->waitq = wq;
572 }
573
574 /**
575 * clear the thread-related waitq state, moving the thread from
576 * TH_WAIT to TH_WAIT | TH_WAKING, where it is no longer on a waitq and
577 * can expect to be go'ed in the near future.
578 *
579 * Clearing the waitq prevents further propagation of a turnstile boost
580 * on the thread and stops a clear_wait from succeeding.
581 *
582 * Conditions:
583 * 'thread' is locked, thread is waiting
584 */
585 static inline void
thread_clear_waitq_state(thread_t thread)586 thread_clear_waitq_state(thread_t thread)
587 {
588 assert(thread->state & TH_WAIT);
589
590 thread->waitq.wq_q = NULL;
591 thread->wait_event = NO_EVENT64;
592 thread->at_safe_point = FALSE;
593 thread->block_hint = kThreadWaitNone;
594 thread->state |= TH_WAKING;
595 }
596
597 static inline void
waitq_thread_remove(waitq_t wq,thread_t thread)598 waitq_thread_remove(waitq_t wq, thread_t thread)
599 {
600 if (waitq_type(wq) == WQT_TURNSTILE) {
601 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
602 (TURNSTILE_CODE(TURNSTILE_HEAP_OPERATIONS,
603 (THREAD_REMOVED_FROM_TURNSTILE_WAITQ))) | DBG_FUNC_NONE,
604 VM_KERNEL_UNSLIDE_OR_PERM(waitq_to_turnstile(wq.wq_q)),
605 thread_tid(thread), 0, 0, 0);
606 priority_queue_remove(&wq.wq_q->waitq_prio_queue,
607 &thread->wait_prioq_links);
608 } else {
609 circle_dequeue(&wq.wq_q->waitq_queue, &thread->wait_links);
610 if (waitq_is_global(wq) && waitq_empty(wq)) {
611 wq.wq_q->waitq_eventmask = 0;
612 }
613 }
614
615 thread_clear_waitq_state(thread);
616 }
617
618 bool
waitq_wait_possible(thread_t thread)619 waitq_wait_possible(thread_t thread)
620 {
621 return waitq_is_null(thread->waitq) &&
622 ((thread->state & TH_WAKING) == 0);
623 }
624
625 __static_testable void waitq_bootstrap(void);
626
627 __startup_func
628 __static_testable void
waitq_bootstrap(void)629 waitq_bootstrap(void)
630 {
631 const uint32_t qsz = sizeof(struct waitq);
632 vm_offset_t whsize;
633 int cpu = 0;
634
635 /*
636 * Determine the amount of memory we're willing to reserve for
637 * the waitqueue hash table
638 */
639 if (!PE_parse_boot_argn("wqsize", &whsize, sizeof(whsize))) {
640 whsize = round_page(thread_max * qsz / 5);
641 }
642
643 /*
644 * Determine the number of waitqueues we can fit.
645 * The hash algorithm requires that this be a power of 2.
646 */
647 g_num_waitqs = 0x80000000u >> __builtin_clzl(whsize / qsz);
648 assert(g_num_waitqs > 0);
649 whsize = round_page(g_num_waitqs * qsz);
650
651 kmem_alloc(kernel_map, (vm_offset_t *)&global_waitqs, whsize,
652 KMA_NOFAIL | KMA_KOBJECT | KMA_NOPAGEWAIT | KMA_PERMANENT,
653 VM_KERN_MEMORY_WAITQ);
654
655 #if CONFIG_WAITQ_STATS
656 whsize = round_page(g_num_waitqs * sizeof(struct wq_stats));
657 kmem_alloc(kernel_map, (vm_offset_t *)&g_waitq_stats, whsize,
658 KMA_NOFAIL | KMA_KOBJECT | KMA_NOPAGEWAIT | KMA_ZERO | KMA_PERMANENT,
659 VM_KERN_MEMORY_WAITQ);
660 #endif
661
662 for (uint32_t i = 0; i < g_num_waitqs; i++) {
663 waitq_init(&global_waitqs[i], WQT_QUEUE, SYNC_POLICY_FIFO);
664 }
665
666 waitq_init(&select_conflict_queue, WQT_SELECT, SYNC_POLICY_FIFO);
667
668 percpu_foreach(setid, select_setid) {
669 /* is not cpu_number() but CPUs haven't been numbered yet */
670 *setid = cpu++;
671 }
672 }
673 STARTUP(MACH_IPC, STARTUP_RANK_FIRST, waitq_bootstrap);
674
675
676 #pragma mark locking
677
678 static hw_spin_timeout_status_t
waitq_timeout_handler(void * _lock,hw_spin_timeout_t to,hw_spin_state_t st)679 waitq_timeout_handler(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
680 {
681 lck_spinlock_to_info_t lsti;
682 hw_lck_ticket_t tmp;
683 struct waitq *wq = _lock;
684
685 if (machine_timeout_suspended()) {
686 return HW_LOCK_TIMEOUT_CONTINUE;
687 }
688
689 lsti = lck_spinlock_timeout_hit(&wq->waitq_interlock, 0);
690 tmp.tcurnext = os_atomic_load(&wq->waitq_interlock.tcurnext, relaxed);
691
692 panic("waitq(%p) lock " HW_SPIN_TIMEOUT_FMT "; cpu=%d, "
693 "cticket: 0x%x, nticket: 0x%x, waiting for 0x%x, "
694 HW_SPIN_TIMEOUT_DETAILS_FMT,
695 wq, HW_SPIN_TIMEOUT_ARG(to, st), cpu_number(),
696 tmp.cticket, tmp.nticket, lsti->extra,
697 HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
698 }
699
700 static const struct hw_spin_policy waitq_spin_policy = {
701 .hwsp_name = "waitq",
702 #if defined(__i386__) || defined(__x86_64__)
703 .hwsp_timeout = &LockTimeOutTSC,
704 #else
705 .hwsp_timeout_atomic = &LockTimeOut,
706 #endif
707 /*
708 * Double the standard lock timeout, because wait queues tend
709 * to iterate over a number of threads - locking each. If there is
710 * a problem with a thread lock, it normally times out at the wait
711 * queue level first, hiding the real problem.
712 */
713 .hwsp_timeout_shift = 1,
714 .hwsp_lock_offset = offsetof(struct waitq, waitq_interlock),
715 .hwsp_op_timeout = waitq_timeout_handler,
716 };
717
718 __mockable void
waitq_invalidate(waitq_t waitq)719 waitq_invalidate(waitq_t waitq)
720 {
721 hw_lck_ticket_invalidate(&waitq.wq_q->waitq_interlock);
722 }
723
724 __mockable bool
waitq_held(waitq_t wq)725 waitq_held(waitq_t wq)
726 {
727 return hw_lck_ticket_held(&wq.wq_q->waitq_interlock);
728 }
729
730 __mockable void
waitq_lock(waitq_t wq)731 waitq_lock(waitq_t wq)
732 {
733 (void)hw_lck_ticket_lock_to(&wq.wq_q->waitq_interlock,
734 &waitq_spin_policy, &waitq_lck_grp);
735 #if defined(__x86_64__)
736 pltrace(FALSE);
737 #endif
738 }
739
740 __mockable bool
waitq_lock_try(waitq_t wq)741 waitq_lock_try(waitq_t wq)
742 {
743 bool rc = hw_lck_ticket_lock_try(&wq.wq_q->waitq_interlock, &waitq_lck_grp);
744
745 #if defined(__x86_64__)
746 if (rc) {
747 pltrace(FALSE);
748 }
749 #endif
750 return rc;
751 }
752
753 bool
waitq_lock_reserve(waitq_t wq,uint32_t * ticket)754 waitq_lock_reserve(waitq_t wq, uint32_t *ticket)
755 {
756 return hw_lck_ticket_reserve(&wq.wq_q->waitq_interlock, ticket, &waitq_lck_grp);
757 }
758
759 __mockable void
waitq_lock_wait(waitq_t wq,uint32_t ticket)760 waitq_lock_wait(waitq_t wq, uint32_t ticket)
761 {
762 (void)hw_lck_ticket_wait(&wq.wq_q->waitq_interlock, ticket,
763 &waitq_spin_policy, &waitq_lck_grp);
764 #if defined(__x86_64__)
765 pltrace(FALSE);
766 #endif
767 }
768
769 bool
waitq_lock_allow_invalid(waitq_t wq)770 waitq_lock_allow_invalid(waitq_t wq)
771 {
772 hw_lock_status_t rc;
773
774 rc = hw_lck_ticket_lock_allow_invalid(&wq.wq_q->waitq_interlock,
775 &waitq_spin_policy, &waitq_lck_grp);
776
777 #if defined(__x86_64__)
778 if (rc == HW_LOCK_ACQUIRED) {
779 pltrace(FALSE);
780 }
781 #endif
782 return rc == HW_LOCK_ACQUIRED;
783 }
784
785 __mockable void
waitq_unlock(waitq_t wq)786 waitq_unlock(waitq_t wq)
787 {
788 assert(waitq_held(wq));
789 #if defined(__x86_64__)
790 pltrace(TRUE);
791 #endif
792 hw_lck_ticket_unlock(&wq.wq_q->waitq_interlock);
793 }
794
795
796 #pragma mark assert_wait / wakeup
797
798 struct waitq_select_args {
799 /* input parameters */
800 event64_t event;
801 wait_result_t result;
802 waitq_wakeup_flags_t flags;
803 uint32_t max_threads;
804 bool is_identified;
805
806 /* output parameters */
807 /* counts all woken threads, may have more threads than on threadq */
808 uint32_t nthreads;
809 /* preemption is disabled while threadq is non-empty */
810 circle_queue_head_t threadq;
811 };
812
813 static inline void
maybe_adjust_thread_pri(thread_t thread,waitq_wakeup_flags_t flags,__kdebug_only waitq_t waitq)814 maybe_adjust_thread_pri(
815 thread_t thread,
816 waitq_wakeup_flags_t flags,
817 __kdebug_only waitq_t waitq)
818 {
819 /*
820 * If the caller is requesting the waitq subsystem to promote the
821 * priority of the awoken thread, then boost the thread's priority to
822 * the default WAITQ_BOOST_PRIORITY (if it's not already equal or
823 * higher priority). This boost must be removed via a call to
824 * waitq_clear_promotion_locked before the thread waits again.
825 */
826 if (flags & WAITQ_PROMOTE_PRIORITY) {
827 uintptr_t trace_waitq = 0;
828 if (__improbable(kdebug_enable)) {
829 trace_waitq = VM_KERNEL_UNSLIDE_OR_PERM(waitq.wq_q);
830 }
831
832 sched_thread_promote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, trace_waitq);
833 }
834 }
835
836 static void
waitq_select_queue_add(waitq_t waitq,thread_t thread,struct waitq_select_args * args)837 waitq_select_queue_add(waitq_t waitq, thread_t thread, struct waitq_select_args *args)
838 {
839 spl_t s = splsched();
840
841 thread_lock(thread);
842 thread_clear_waitq_state(thread);
843
844 if (!args->is_identified && thread->state & TH_RUN) {
845 /*
846 * A thread that is currently on core may try to clear its own
847 * wait with clear wait or by waking its own event instead of
848 * calling thread_block as is normally expected. After doing
849 * this, it expects to be able to immediately wait again.
850 *
851 * If we are currently on a different CPU and waking that
852 * thread, as soon as we unlock the waitq and thread, that
853 * operation could complete, but we would still be holding the
854 * thread on our flush queue, leaving it in the waking state
855 * where it can't yet assert another wait.
856 *
857 * Since we know that we won't actually need to enqueue the
858 * thread on the runq due to it being on core, we can just
859 * immediately unblock it here so that the thread will be in a
860 * waitable state after we release its thread lock from this
861 * lock hold.
862 *
863 * Wakeups using *_identify can't be allowed to pass
864 * thread block until they're resumed, so they can't use
865 * this path. That means they are not allowed to skip calling
866 * thread_block.
867 */
868 maybe_adjust_thread_pri(thread, args->flags, waitq);
869 thread_go(thread, args->result, false);
870 } else {
871 if (circle_queue_empty(&args->threadq)) {
872 /*
873 * preemption is disabled while threads are
874 * on threadq - balanced in:
875 * waitq_resume_identified_thread
876 * waitq_select_queue_flush
877 */
878 disable_preemption();
879 }
880
881 circle_enqueue_tail(&args->threadq, &thread->wait_links);
882 }
883
884 thread_unlock(thread);
885
886 splx(s);
887 }
888
889
890 #if SCHED_HYGIENE_DEBUG
891
892 TUNABLE_DEV_WRITEABLE(uint32_t, waitq_flush_excess_threads, "waitq_flush_excess_threads", 20);
893 TUNABLE_DEV_WRITEABLE(uint32_t, waitq_flush_excess_time_mt, "waitq_flush_excess_time_mt", 7200); /* 300us */
894
895 #endif /* SCHED_HYGIENE_DEBUG */
896
897
898 static void
waitq_select_queue_flush(waitq_t waitq,struct waitq_select_args * args)899 waitq_select_queue_flush(waitq_t waitq, struct waitq_select_args *args)
900 {
901 thread_t thread = THREAD_NULL;
902
903 assert(!circle_queue_empty(&args->threadq));
904
905 int flushed_threads = 0;
906
907 #if SCHED_HYGIENE_DEBUG
908 uint64_t start_time = ml_get_sched_hygiene_timebase();
909 disable_preemption();
910 #endif /* SCHED_HYGIENE_DEBUG */
911
912 cqe_foreach_element_safe(thread, &args->threadq, wait_links) {
913 circle_dequeue(&args->threadq, &thread->wait_links);
914 assert_thread_magic(thread);
915
916 spl_t s = splsched();
917
918 thread_lock(thread);
919 maybe_adjust_thread_pri(thread, args->flags, waitq);
920 thread_go(thread, args->result, args->flags & WAITQ_HANDOFF);
921 thread_unlock(thread);
922
923 splx(s);
924
925 flushed_threads++;
926 }
927
928 #if SCHED_HYGIENE_DEBUG
929 uint64_t end_time = ml_get_sched_hygiene_timebase();
930
931 /*
932 * Check for a combination of excess threads and long time,
933 * so that a single thread wakeup that gets stuck is still caught
934 */
935 if (waitq_flush_excess_threads && waitq_flush_excess_time_mt &&
936 flushed_threads > waitq_flush_excess_threads &&
937 (end_time - start_time) > waitq_flush_excess_time_mt) {
938 /*
939 * Hack alert:
940 *
941 * If a wakeup-all is done with interrupts disabled, or if
942 * there are enough threads / lock contention to pass the
943 * preemption disable threshold, it can take Too Long to get
944 * through waking up all the threads, leading to
945 * the watchdog going off.
946 *
947 * While we are working on a change to break up this
948 * giant glob of work into smaller chunks, remove this
949 * time region from the watchdog's memory to avoid
950 * unit tests that wake up hundreds of threads on
951 * one semaphore from causing this to blow up.
952 *
953 * We only trigger this when seeing a combination of
954 * excess threads and long time, so that a single
955 * thread wakeup that gets stuck is still caught.
956 *
957 * This was improved with
958 * rdar://90325140
959 * to enable interrupts during most wakeup-all's
960 * and will be removed with
961 * rdar://101110793
962 */
963 if (ml_get_interrupts_enabled() == false) {
964 ml_spin_debug_reset(current_thread());
965 ml_irq_debug_abandon();
966 }
967 abandon_preemption_disable_measurement();
968
969 KDBG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_INT_MASKED_RESET), flushed_threads, end_time - start_time);
970 }
971
972 enable_preemption();
973
974 #endif /* SCHED_HYGIENE_DEBUG */
975
976 /*
977 * match the disable when making threadq nonempty from
978 * waitq_select_queue_add
979 */
980 enable_preemption();
981 }
982
983 /**
984 * Routine to iterate over the waitq for non-priority ordered waitqs
985 *
986 * Conditions:
987 * args->waitq (and the posted waitq) is locked
988 *
989 * Notes:
990 * If one or more threads are selected, this may disable preemption,
991 * which is balanced when the threadq is flushed in
992 * waitq_resume_identified_thread or waitq_select_queue_flush.
993 */
994 static waitq_flags_t
waitq_queue_iterate_locked(struct waitq * safeq,struct waitq * waitq,struct waitq_select_args * args)995 waitq_queue_iterate_locked(struct waitq *safeq, struct waitq *waitq,
996 struct waitq_select_args *args)
997 {
998 thread_t thread = THREAD_NULL;
999 waitq_flags_t eventmask = 0;
1000
1001 cqe_foreach_element_safe(thread, &safeq->waitq_queue, wait_links) {
1002 assert_thread_magic(thread);
1003
1004 /*
1005 * For non-priority ordered waitqs, we allow multiple events to be
1006 * mux'ed into the same waitq. Also safeqs may contain threads from
1007 * multiple waitqs. Only pick threads that match the
1008 * requested wait event.
1009 */
1010 if (waitq_same(thread->waitq, waitq) && thread->wait_event == args->event) {
1011 /* We found a matching thread! Pull it from the queue. */
1012
1013 circle_dequeue(&safeq->waitq_queue, &thread->wait_links);
1014
1015 waitq_select_queue_add(waitq, thread, args);
1016
1017 if (++args->nthreads >= args->max_threads) {
1018 break;
1019 }
1020 } else {
1021 /* thread wasn't selected so track its event */
1022 eventmask |= waitq_same(thread->waitq, safeq)
1023 ? _CAST_TO_EVENT_MASK(thread->wait_event)
1024 : _CAST_TO_EVENT_MASK(thread->waitq.wq_q);
1025 }
1026 }
1027
1028 return eventmask;
1029 }
1030
1031 /**
1032 * Routine to iterate and remove threads from priority ordered waitqs
1033 *
1034 * Conditions:
1035 * args->waitq (and the posted waitq) is locked
1036 *
1037 * Notes:
1038 * The priority ordered waitqs only support maximum priority element removal.
1039 *
1040 * Also, the implementation makes sure that all threads in a priority ordered
1041 * waitq are waiting on the same wait event. This is not necessarily true for
1042 * non-priority ordered waitqs. If one or more threads are selected, this may
1043 * disable preemption.
1044 */
1045 static void
waitq_prioq_iterate_locked(struct waitq * ts_wq,struct waitq * waitq,struct waitq_select_args * args)1046 waitq_prioq_iterate_locked(
1047 struct waitq *ts_wq,
1048 struct waitq *waitq,
1049 struct waitq_select_args *args)
1050 {
1051 struct turnstile *ts = waitq_to_turnstile(ts_wq);
1052 bool update_inheritor = (args->flags & WAITQ_UPDATE_INHERITOR);
1053
1054 if (update_inheritor && args->max_threads == UINT32_MAX) {
1055 /*
1056 * If we are going to wake up all threads,
1057 * go ahead and set the inheritor to NULL.
1058 */
1059 turnstile_kernel_update_inheritor_on_wake_locked(ts,
1060 TURNSTILE_INHERITOR_NULL, TURNSTILE_INHERITOR_THREAD);
1061 update_inheritor = false;
1062 }
1063
1064 while (!priority_queue_empty(&ts_wq->waitq_prio_queue)) {
1065 thread_t thread;
1066
1067 thread = priority_queue_remove_max(&ts_wq->waitq_prio_queue,
1068 struct thread, wait_prioq_links);
1069
1070 assert_thread_magic(thread);
1071
1072 /*
1073 * Ensure the wait event matches since priority ordered waitqs do not
1074 * support multiple events in the same waitq.
1075 */
1076 assert(waitq_same(thread->waitq, waitq) && (thread->wait_event == args->event));
1077
1078 if (update_inheritor) {
1079 turnstile_inheritor_t inheritor = thread;
1080
1081 if (priority_queue_empty(&ts_wq->waitq_prio_queue)) {
1082 inheritor = TURNSTILE_INHERITOR_NULL;
1083 }
1084 turnstile_kernel_update_inheritor_on_wake_locked(ts,
1085 inheritor, TURNSTILE_INHERITOR_THREAD);
1086 update_inheritor = false;
1087 }
1088
1089 waitq_select_queue_add(waitq, thread, args);
1090
1091 if (++args->nthreads >= args->max_threads) {
1092 break;
1093 }
1094 }
1095 }
1096
1097 /**
1098 * @function do_waitq_select_n_locked_queue
1099 *
1100 * @brief
1101 * Selects threads waiting on a wait queue.
1102 *
1103 * @discussion
1104 * @c waitq is locked.
1105 * If @c waitq is a set, then the wait queue posting to it is locked too.
1106 *
1107 * If one or more threads are selected, this may disable preemption.
1108 */
1109 static void
do_waitq_select_n_locked_queue(waitq_t waitq,struct waitq_select_args * args)1110 do_waitq_select_n_locked_queue(waitq_t waitq, struct waitq_select_args *args)
1111 {
1112 spl_t s = 0;
1113
1114 struct waitq *safeq;
1115 waitq_flags_t eventmask, remaining_eventmask;
1116
1117 if (waitq_irq_safe(waitq)) {
1118 eventmask = _CAST_TO_EVENT_MASK(args->event);
1119 safeq = waitq.wq_q;
1120 } else {
1121 /* JMM - add flag to waitq to avoid global lookup if no waiters */
1122 eventmask = _CAST_TO_EVENT_MASK(waitq.wq_q);
1123 safeq = waitq_get_safeq(waitq);
1124 if (safeq == NULL) {
1125 return;
1126 }
1127
1128 s = splsched();
1129 waitq_lock(safeq);
1130 }
1131
1132 /*
1133 * If the safeq doesn't have an eventmask (not global) or the event
1134 * we're looking for IS set in its eventmask, then scan the threads
1135 * in that queue for ones that match the original <waitq,event> pair.
1136 */
1137 if (waitq_type(safeq) == WQT_TURNSTILE) {
1138 waitq_prioq_iterate_locked(safeq, waitq.wq_q, args);
1139 } else if (!waitq_is_global(safeq)) {
1140 waitq_queue_iterate_locked(safeq, waitq.wq_q, args);
1141 } else if ((safeq->waitq_eventmask & eventmask) == eventmask) {
1142 remaining_eventmask = waitq_queue_iterate_locked(safeq,
1143 waitq.wq_q, args);
1144
1145 /*
1146 * Update the eventmask of global queues we just scanned:
1147 * - If we selected all the threads in the queue,
1148 * we can clear its eventmask.
1149 *
1150 * - If we didn't find enough threads to fill our needs,
1151 * then we can assume we looked at every thread in the queue
1152 * and the mask we computed is complete - so reset it.
1153 */
1154 if (waitq_empty(safeq)) {
1155 safeq->waitq_eventmask = 0;
1156 } else if (args->nthreads < args->max_threads) {
1157 safeq->waitq_eventmask = remaining_eventmask;
1158 }
1159 }
1160
1161 /* unlock the safe queue if we locked one above */
1162 if (!waitq_same(waitq, safeq)) {
1163 waitq_unlock(safeq);
1164 splx(s);
1165 }
1166 }
1167
1168 /**
1169 * @function do_waitq_link_select_n_locked()
1170 *
1171 * @brief
1172 * Selects threads waiting on any set a wait queue belongs to,
1173 * or preposts the wait queue onto them.
1174 *
1175 * @discussion
1176 * @c waitq is locked.
1177 */
1178 __attribute__((noinline))
1179 static void
do_waitq_select_n_locked_sets(waitq_t waitq,struct waitq_select_args * args)1180 do_waitq_select_n_locked_sets(waitq_t waitq, struct waitq_select_args *args)
1181 {
1182 waitq_type_t wq_type = waitq_type(waitq);
1183 waitq_link_t link;
1184
1185 assert(args->event == NO_EVENT64);
1186 assert(waitq_preposts(waitq));
1187
1188 waitq_link_foreach(link, waitq) {
1189 waitq_t wqset = wql_wqs(link);
1190
1191 if (wql_wqs_preposted(link)) {
1192 /*
1193 * The wql_wqs_preposted() bit is cleared
1194 * under both the wq/wqset lock.
1195 *
1196 * If the wqset is still preposted,
1197 * we really won't find threads there.
1198 *
1199 * Just mark the waitq as preposted and move on.
1200 */
1201 if (wq_type == WQT_PORT) {
1202 waitq.wq_q->waitq_preposted = true;
1203 }
1204 continue;
1205 }
1206
1207 if (wq_type == WQT_SELECT) {
1208 if (!wqset.wqs_sel) {
1209 continue;
1210 }
1211 if (!waitq_lock_allow_invalid(wqset)) {
1212 continue;
1213 }
1214 if (!wql_sellink_valid(wqset.wqs_sel, link.wqls)) {
1215 goto out_unlock;
1216 }
1217 } else {
1218 waitq_lock(wqset);
1219 if (!waitq_valid(wqset)) {
1220 goto out_unlock;
1221 }
1222 }
1223
1224 /*
1225 * Find any threads waiting on this wait queue set as a queue.
1226 */
1227 do_waitq_select_n_locked_queue(wqset, args);
1228
1229 if (args->nthreads == 0) {
1230 /* No thread selected: prepost 'waitq' to 'wqset' */
1231 wql_wqs_mark_preposted(link);
1232 if (wq_type == WQT_SELECT) {
1233 wqset.wqs_sel->selset_preposted = true;
1234 } else {
1235 waitq.wq_q->waitq_preposted = true;
1236 circle_dequeue(&wqset.wqs_set->wqset_links,
1237 &link.wqll->wql_slink);
1238 circle_enqueue_tail(&wqset.wqs_set->wqset_preposts,
1239 &link.wqll->wql_slink);
1240 ipc_pset_prepost(wqset.wqs_set, waitq.wq_q);
1241 }
1242 }
1243
1244 out_unlock:
1245 waitq_unlock(wqset);
1246
1247 if (args->nthreads >= args->max_threads) {
1248 break;
1249 }
1250 }
1251 }
1252
1253 /**
1254 * @function do_waitq_select_n_locked
1255 *
1256 * @brief
1257 * Selects threads waiting on a wait queue, or preposts it.
1258 *
1259 * @discussion
1260 * @c waitq is locked.
1261 *
1262 * Recurses into all sets this wait queue belongs to.
1263 */
1264 static void
do_waitq_select_n_locked(waitq_t waitq,struct waitq_select_args * args)1265 do_waitq_select_n_locked(waitq_t waitq, struct waitq_select_args *args)
1266 {
1267 do_waitq_select_n_locked_queue(waitq, args);
1268
1269 if (args->nthreads >= args->max_threads) {
1270 /* already enough threads found */
1271 return;
1272 }
1273
1274 if (args->event != NO_EVENT64 || !waitq_preposts(waitq)) {
1275 /* this wakeup should not recurse into sets */
1276 return;
1277 }
1278
1279 do_waitq_select_n_locked_sets(waitq, args);
1280 }
1281
1282 static inline bool
waitq_is_preposted_set(waitq_t waitq)1283 waitq_is_preposted_set(waitq_t waitq)
1284 {
1285 switch (waitq_type(waitq)) {
1286 case WQT_PORT_SET:
1287 return waitq_set_first_prepost(waitq.wqs_set, WQS_PREPOST_PEEK) != NULL;
1288
1289 case WQT_SELECT_SET:
1290 return waitq.wqs_sel->selset_preposted;
1291
1292 default:
1293 return false;
1294 }
1295 }
1296
1297 wait_result_t
waitq_assert_wait64_locked(waitq_t waitq,event64_t wait_event,wait_interrupt_t interruptible,wait_timeout_urgency_t urgency,uint64_t deadline,uint64_t leeway,thread_t thread)1298 waitq_assert_wait64_locked(waitq_t waitq,
1299 event64_t wait_event,
1300 wait_interrupt_t interruptible,
1301 wait_timeout_urgency_t urgency,
1302 uint64_t deadline,
1303 uint64_t leeway,
1304 thread_t thread)
1305 {
1306 wait_result_t wait_result;
1307 struct waitq *safeq;
1308 uintptr_t eventmask;
1309 spl_t s;
1310
1311 switch (waitq_type(waitq)) {
1312 case WQT_PORT:
1313 case WQT_SELECT:
1314 case WQT_PORT_SET:
1315 case WQT_SELECT_SET:
1316 assert(wait_event == NO_EVENT64);
1317 break;
1318 default:
1319 assert(wait_event != NO_EVENT64);
1320 break;
1321 }
1322
1323 /*
1324 * Warning: Do _not_ place debugging print statements here.
1325 * The waitq is locked!
1326 */
1327 assert(!thread->started || thread == current_thread());
1328
1329 if (!waitq_wait_possible(thread)) {
1330 panic("thread already waiting on %p", thread->waitq.wq_q);
1331 }
1332
1333 s = splsched();
1334
1335 /*
1336 * early-out if the thread is waiting on a wait queue set
1337 * that has already been pre-posted.
1338 *
1339 * Note: waitq_is_preposted_set() may unlock the waitq-set
1340 */
1341 if (waitq_is_preposted_set(waitq)) {
1342 thread_lock(thread);
1343 thread->wait_result = THREAD_AWAKENED;
1344 thread_unlock(thread);
1345 splx(s);
1346 return THREAD_AWAKENED;
1347 }
1348
1349 /*
1350 * If already dealing with an irq safe wait queue, we are all set.
1351 * Otherwise, determine a global queue to use and lock it.
1352 */
1353 if (waitq_irq_safe(waitq)) {
1354 safeq = waitq.wq_q;
1355 eventmask = _CAST_TO_EVENT_MASK(wait_event);
1356 } else {
1357 safeq = waitq_get_safeq(waitq);
1358 if (__improbable(safeq == NULL)) {
1359 panic("Trying to assert_wait on a turnstile proxy "
1360 "that hasn't been donated one (waitq: %p)", waitq.wq_q);
1361 }
1362 eventmask = _CAST_TO_EVENT_MASK(waitq.wq_q);
1363 waitq_lock(safeq);
1364 }
1365
1366 /* lock the thread now that we have the irq-safe waitq locked */
1367 thread_lock(thread);
1368
1369 wait_result = thread_mark_wait_locked(thread, interruptible);
1370 /* thread->wait_result has been set */
1371 if (wait_result == THREAD_WAITING) {
1372 waitq_thread_insert(safeq, thread, waitq, wait_event);
1373
1374 if (deadline != 0) {
1375 bool was_active;
1376
1377 was_active = timer_call_enter_with_leeway(thread->wait_timer,
1378 NULL,
1379 deadline, leeway,
1380 urgency, FALSE);
1381 if (!was_active) {
1382 thread->wait_timer_active++;
1383 }
1384 thread->wait_timer_armed = true;
1385 }
1386
1387 if (waitq_is_global(safeq)) {
1388 safeq->waitq_eventmask |= (waitq_flags_t)eventmask;
1389 }
1390
1391 waitq_stats_count_wait(waitq);
1392 }
1393
1394 /* unlock the thread */
1395 thread_unlock(thread);
1396
1397 /* update the inheritor's thread priority if the waitq is embedded in turnstile */
1398 if (waitq_type(safeq) == WQT_TURNSTILE && wait_result == THREAD_WAITING) {
1399 turnstile_recompute_priority_locked(waitq_to_turnstile(safeq));
1400 turnstile_update_inheritor_locked(waitq_to_turnstile(safeq));
1401 }
1402
1403 /* unlock the safeq if we locked it here */
1404 if (!waitq_same(waitq, safeq)) {
1405 waitq_unlock(safeq);
1406 }
1407
1408 splx(s);
1409
1410 return wait_result;
1411 }
1412
1413 bool
waitq_pull_thread_locked(waitq_t waitq,thread_t thread)1414 waitq_pull_thread_locked(waitq_t waitq, thread_t thread)
1415 {
1416 struct waitq *safeq;
1417 uint32_t ticket;
1418
1419 assert_thread_magic(thread);
1420
1421 /* Find the interrupts disabled queue thread is waiting on */
1422 if (waitq_irq_safe(waitq)) {
1423 safeq = waitq.wq_q;
1424 } else {
1425 safeq = waitq_get_safeq(waitq);
1426 if (__improbable(safeq == NULL)) {
1427 panic("Trying to clear_wait on a turnstile proxy "
1428 "that hasn't been donated one (waitq: %p)", waitq.wq_q);
1429 }
1430 }
1431
1432 /*
1433 * thread is already locked so have to try for the waitq lock.
1434 *
1435 * We can't wait for the waitq lock under the thread lock,
1436 * however we can reserve our slot in the lock queue,
1437 * and if that reservation requires waiting, we are guaranteed
1438 * that this waitq can't die until we got our turn!
1439 */
1440 if (!waitq_lock_reserve(safeq, &ticket)) {
1441 thread_unlock(thread);
1442 waitq_lock_wait(safeq, ticket);
1443 thread_lock(thread);
1444
1445 if (!waitq_same(waitq, thread->waitq)) {
1446 /*
1447 * While we were waiting for our reservation the thread
1448 * stopped waiting on this waitq, bail out.
1449 */
1450 waitq_unlock(safeq);
1451 return false;
1452 }
1453 }
1454
1455 waitq_thread_remove(safeq, thread);
1456 waitq_stats_count_clear_wakeup(waitq);
1457 waitq_unlock(safeq);
1458 return true;
1459 }
1460
1461
1462 void
waitq_clear_promotion_locked(waitq_t waitq,thread_t thread)1463 waitq_clear_promotion_locked(waitq_t waitq, thread_t thread)
1464 {
1465 spl_t s = 0;
1466
1467 assert(waitq_held(waitq));
1468 assert(thread != THREAD_NULL);
1469 assert(thread == current_thread());
1470
1471 /* This flag is only cleared by the thread itself, so safe to check outside lock */
1472 if ((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) != TH_SFLAG_WAITQ_PROMOTED) {
1473 return;
1474 }
1475
1476 if (!waitq_irq_safe(waitq)) {
1477 s = splsched();
1478 }
1479 thread_lock(thread);
1480
1481 sched_thread_unpromote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, 0);
1482
1483 thread_unlock(thread);
1484 if (!waitq_irq_safe(waitq)) {
1485 splx(s);
1486 }
1487 }
1488
1489 static inline bool
waitq_should_unlock(waitq_wakeup_flags_t flags)1490 waitq_should_unlock(waitq_wakeup_flags_t flags)
1491 {
1492 return (flags & (WAITQ_UNLOCK | WAITQ_KEEP_LOCKED)) == WAITQ_UNLOCK;
1493 }
1494
1495 static inline bool
waitq_should_enable_interrupts(waitq_wakeup_flags_t flags)1496 waitq_should_enable_interrupts(waitq_wakeup_flags_t flags)
1497 {
1498 return (flags & (WAITQ_UNLOCK | WAITQ_KEEP_LOCKED | WAITQ_ENABLE_INTERRUPTS)) == (WAITQ_UNLOCK | WAITQ_ENABLE_INTERRUPTS);
1499 }
1500
1501 __mockable uint32_t
waitq_wakeup64_nthreads_locked(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags,uint32_t nthreads)1502 waitq_wakeup64_nthreads_locked(
1503 waitq_t waitq,
1504 event64_t wake_event,
1505 wait_result_t result,
1506 waitq_wakeup_flags_t flags,
1507 uint32_t nthreads)
1508 {
1509 struct waitq_select_args args = {
1510 .event = wake_event,
1511 .result = result,
1512 .flags = (nthreads == 1) ? flags : (flags & ~WAITQ_HANDOFF),
1513 .max_threads = nthreads,
1514 };
1515
1516 assert(waitq_held(waitq));
1517
1518 if (flags & WAITQ_ENABLE_INTERRUPTS) {
1519 assert(waitq_should_unlock(flags));
1520 assert(ml_get_interrupts_enabled() == false);
1521 }
1522
1523 do_waitq_select_n_locked(waitq, &args);
1524 waitq_stats_count_wakeup(waitq, args.nthreads);
1525
1526 if (waitq_should_unlock(flags)) {
1527 waitq_unlock(waitq);
1528 }
1529
1530 if (waitq_should_enable_interrupts(flags)) {
1531 ml_set_interrupts_enabled(true);
1532 }
1533
1534 if (!circle_queue_empty(&args.threadq)) {
1535 waitq_select_queue_flush(waitq, &args);
1536 }
1537
1538 return args.nthreads;
1539 }
1540
1541 kern_return_t
waitq_wakeup64_all_locked(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)1542 waitq_wakeup64_all_locked(
1543 waitq_t waitq,
1544 event64_t wake_event,
1545 wait_result_t result,
1546 waitq_wakeup_flags_t flags)
1547 {
1548 uint32_t count;
1549
1550 count = waitq_wakeup64_nthreads_locked(waitq, wake_event, result,
1551 flags, UINT32_MAX);
1552 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
1553 }
1554
1555 kern_return_t
waitq_wakeup64_one_locked(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)1556 waitq_wakeup64_one_locked(
1557 waitq_t waitq,
1558 event64_t wake_event,
1559 wait_result_t result,
1560 waitq_wakeup_flags_t flags)
1561 {
1562 uint32_t count;
1563
1564 count = waitq_wakeup64_nthreads_locked(waitq, wake_event, result,
1565 flags, 1);
1566 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
1567 }
1568
1569 __mockable thread_t
waitq_wakeup64_identify_locked(waitq_t waitq,event64_t wake_event,waitq_wakeup_flags_t flags)1570 waitq_wakeup64_identify_locked(
1571 waitq_t waitq,
1572 event64_t wake_event,
1573 waitq_wakeup_flags_t flags)
1574 {
1575 struct waitq_select_args args = {
1576 .event = wake_event,
1577 .result = THREAD_AWAKENED, /* this won't be used */
1578 .flags = flags,
1579 .max_threads = 1,
1580 .is_identified = true,
1581 };
1582
1583 assert(waitq_held(waitq));
1584
1585 do_waitq_select_n_locked(waitq, &args);
1586 waitq_stats_count_wakeup(waitq, args.nthreads);
1587
1588 if (waitq_should_unlock(flags)) {
1589 waitq_unlock(waitq);
1590 }
1591
1592 if (waitq_should_enable_interrupts(flags)) {
1593 ml_set_interrupts_enabled(true);
1594 }
1595
1596 if (args.nthreads > 0) {
1597 thread_t thread = cqe_dequeue_head(&args.threadq, struct thread, wait_links);
1598
1599 assert(args.nthreads == 1 && circle_queue_empty(&args.threadq));
1600
1601 /* Thread is off waitq, not unblocked yet */
1602
1603 return thread;
1604 }
1605
1606 return THREAD_NULL;
1607 }
1608
1609 __mockable void
waitq_resume_identified_thread(waitq_t waitq,thread_t thread,wait_result_t result,waitq_wakeup_flags_t flags)1610 waitq_resume_identified_thread(
1611 waitq_t waitq,
1612 thread_t thread,
1613 wait_result_t result,
1614 waitq_wakeup_flags_t flags)
1615 {
1616 spl_t spl = splsched();
1617
1618 thread_lock(thread);
1619
1620 assert((thread->state & (TH_WAIT | TH_WAKING)) == (TH_WAIT | TH_WAKING));
1621
1622 maybe_adjust_thread_pri(thread, flags, waitq);
1623 thread_go(thread, result, (flags & WAITQ_HANDOFF));
1624
1625 thread_unlock(thread);
1626 splx(spl);
1627
1628 enable_preemption(); // balance disable upon pulling thread
1629 }
1630
1631 void
waitq_resume_and_bind_identified_thread(waitq_t waitq,thread_t thread,processor_t processor,wait_result_t result,waitq_wakeup_flags_t flags)1632 waitq_resume_and_bind_identified_thread(
1633 waitq_t waitq,
1634 thread_t thread,
1635 processor_t processor,
1636 wait_result_t result,
1637 waitq_wakeup_flags_t flags)
1638 {
1639 spl_t spl = splsched();
1640
1641 thread_lock(thread);
1642
1643 assert((thread->state & (TH_WAIT | TH_WAKING)) == (TH_WAIT | TH_WAKING));
1644
1645 maybe_adjust_thread_pri(thread, flags, waitq);
1646 thread_bind_during_wakeup(thread, processor);
1647 thread_go(thread, result, (flags & WAITQ_HANDOFF));
1648
1649 thread_unlock(thread);
1650 splx(spl);
1651
1652 enable_preemption(); // balance disable upon pulling thread
1653 }
1654
1655 kern_return_t
waitq_wakeup64_thread_and_unlock(struct waitq * waitq,event64_t event,thread_t thread,wait_result_t result)1656 waitq_wakeup64_thread_and_unlock(
1657 struct waitq *waitq,
1658 event64_t event,
1659 thread_t thread,
1660 wait_result_t result)
1661 {
1662 kern_return_t ret = KERN_NOT_WAITING;
1663
1664 assert(waitq_irq_safe(waitq));
1665 assert(waitq_held(waitq));
1666 assert_thread_magic(thread);
1667
1668 /*
1669 * See if the thread was still waiting there. If so, it got
1670 * dequeued and returned locked.
1671 *
1672 * By holding the thread locked across the go, a thread on another CPU
1673 * can't see itself in 'waking' state, even if it uses clear_wait.
1674 */
1675 thread_lock(thread);
1676
1677 if (waitq_same(thread->waitq, waitq) && thread->wait_event == event) {
1678 waitq_thread_remove(waitq, thread);
1679 ret = KERN_SUCCESS;
1680 }
1681 waitq_stats_count_wakeup(waitq, ret == KERN_SUCCESS ? 1 : 0);
1682
1683 waitq_unlock(waitq);
1684
1685 if (ret == KERN_SUCCESS) {
1686 thread_go(thread, result, /* handoff */ false);
1687 }
1688
1689 thread_unlock(thread);
1690
1691 return ret;
1692 }
1693
1694
1695 #pragma mark waitq
1696
1697 __attribute__((always_inline))
1698 void
waitq_init(waitq_t waitq,waitq_type_t type,int policy)1699 waitq_init(waitq_t waitq, waitq_type_t type, int policy)
1700 {
1701 assert((policy & SYNC_POLICY_FIXED_PRIORITY) == 0);
1702
1703 *waitq.wq_q = (struct waitq){
1704 .waitq_type = type,
1705 .waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0),
1706 };
1707
1708 switch (type) {
1709 case WQT_INVALID:
1710 __builtin_trap();
1711
1712 case WQT_TURNSTILE:
1713 /* For turnstile, initialize it as a priority queue */
1714 priority_queue_init(&waitq.wq_q->waitq_prio_queue);
1715 assert(waitq.wq_q->waitq_fifo == 0);
1716 break;
1717
1718 case WQT_PORT:
1719 waitq.wq_q->waitq_ts = TURNSTILE_NULL;
1720 break;
1721
1722 case WQT_PORT_SET:
1723 circle_queue_init(&waitq.wqs_set->wqset_preposts);
1724 OS_FALLTHROUGH;
1725 case WQT_SELECT_SET:
1726 case WQT_QUEUE:
1727 case WQT_SELECT:
1728 circle_queue_init(&waitq.wq_q->waitq_queue);
1729 break;
1730 }
1731
1732 if (policy & SYNC_POLICY_INIT_LOCKED) {
1733 hw_lck_ticket_init_locked(&waitq.wq_q->waitq_interlock, &waitq_lck_grp);
1734 } else {
1735 hw_lck_ticket_init(&waitq.wq_q->waitq_interlock, &waitq_lck_grp);
1736 }
1737 }
1738
1739 void
waitq_deinit(waitq_t waitq)1740 waitq_deinit(waitq_t waitq)
1741 {
1742 waitq_type_t type = waitq_type(waitq);
1743
1744 switch (type) {
1745 case WQT_QUEUE:
1746 assert(circle_queue_empty(&waitq.wq_q->waitq_queue));
1747 waitq_invalidate(waitq);
1748 break;
1749
1750 case WQT_TURNSTILE:
1751 assert(priority_queue_empty(&waitq.wq_q->waitq_prio_queue));
1752 assert(waitq.wq_q->waitq_inheritor == TURNSTILE_INHERITOR_NULL);
1753 waitq_invalidate(waitq);
1754 break;
1755
1756 case WQT_PORT:
1757 assert(waitq.wq_q->waitq_ts == TURNSTILE_NULL);
1758 assert(circle_queue_empty(&waitq.wq_q->waitq_links));
1759 break;
1760
1761 case WQT_SELECT:
1762 assert(waitq.wq_q->waitq_sellinks.next == NULL);
1763 assert(circle_queue_empty(&waitq.wqs_set->wqset_queue));
1764 break;
1765
1766 case WQT_PORT_SET:
1767 assert(circle_queue_empty(&waitq.wqs_set->wqset_queue));
1768 assert(circle_queue_empty(&waitq.wqs_set->wqset_links));
1769 assert(circle_queue_empty(&waitq.wqs_set->wqset_preposts));
1770 break;
1771
1772 default:
1773 panic("invalid wait type: %p/%d", waitq.wq_q, type);
1774 }
1775
1776 /*
1777 * The waitq must have been invalidated, or hw_lck_ticket_destroy()
1778 * below won't wait for reservations from waitq_lock_reserve(),
1779 * or waitq_lock_allow_invalid().
1780 */
1781 assert(!waitq_valid(waitq.wqs_set));
1782 hw_lck_ticket_destroy(&waitq.wq_q->waitq_interlock, &waitq_lck_grp);
1783 }
1784
1785
1786 #pragma mark port-set sets
1787
1788 void
waitq_set_unlink_all_locked(struct waitq_set * wqset,waitq_link_list_t * free_l)1789 waitq_set_unlink_all_locked(struct waitq_set *wqset, waitq_link_list_t *free_l)
1790 {
1791 uint32_t batch = waitq_set_unlink_batch;
1792
1793 waitq_invalidate(wqset);
1794
1795 for (;;) {
1796 struct waitq_link *link;
1797 queue_entry_t elt;
1798 circle_queue_t q;
1799 struct waitq *wq;
1800 uint32_t ticket;
1801 bool stable = true;
1802
1803 if (!circle_queue_empty(&wqset->wqset_links)) {
1804 q = &wqset->wqset_links;
1805 } else if (!circle_queue_empty(&wqset->wqset_preposts)) {
1806 q = &wqset->wqset_preposts;
1807 } else {
1808 break;
1809 }
1810
1811 if (batch-- == 0) {
1812 waitq_unlock(wqset);
1813 waitq_lock(wqset);
1814 batch = waitq_set_unlink_batch;
1815 continue;
1816 }
1817
1818 elt = circle_queue_first(q);
1819 link = cqe_element(elt, struct waitq_link, wql_slink);
1820 wq = link->wql_wq;
1821
1822 if (__improbable(!waitq_lock_reserve(wq, &ticket))) {
1823 waitq_unlock(wqset);
1824 waitq_lock_wait(wq, ticket);
1825 waitq_lock(wqset);
1826 stable = (elt == circle_queue_first(q) && link->wql_wq == wq);
1827 }
1828
1829 if (stable) {
1830 circle_dequeue(q, &link->wql_slink);
1831 circle_dequeue(&wq->waitq_links, &link->wql_qlink);
1832 wql_list_push(free_l, link);
1833 }
1834
1835 waitq_unlock(wq);
1836 }
1837 }
1838
1839 void
waitq_clear_prepost_locked(struct waitq * waitq)1840 waitq_clear_prepost_locked(struct waitq *waitq)
1841 {
1842 assert(waitq_type(waitq) == WQT_PORT);
1843 waitq->waitq_preposted = false;
1844 }
1845
1846 void
1847 waitq_set_foreach_member_locked(struct waitq_set *wqs, void (^cb)(struct waitq *))
1848 {
1849 struct waitq_link *link;
1850
1851 cqe_foreach_element(link, &wqs->wqset_links, wql_slink) {
1852 cb(link->wql_wq);
1853 }
1854
1855 cqe_foreach_element(link, &wqs->wqset_preposts, wql_slink) {
1856 cb(link->wql_wq);
1857 }
1858 }
1859
1860 __abortlike
1861 static void
__waitq_link_arguments_panic(struct waitq * waitq,struct waitq_set * wqset)1862 __waitq_link_arguments_panic(struct waitq *waitq, struct waitq_set *wqset)
1863 {
1864 if (!waitq_valid(waitq)) {
1865 panic("Invalid waitq: %p", waitq);
1866 }
1867 if (waitq_type(waitq) != WQT_PORT) {
1868 panic("Invalid waitq type: %p:%d", waitq, waitq->waitq_type);
1869 }
1870 panic("Invalid waitq-set: %p", wqset);
1871 }
1872
1873 static inline void
__waitq_link_arguments_validate(struct waitq * waitq,struct waitq_set * wqset)1874 __waitq_link_arguments_validate(struct waitq *waitq, struct waitq_set *wqset)
1875 {
1876 if (!waitq_valid(waitq) ||
1877 waitq_type(waitq) != WQT_PORT ||
1878 waitq_type(wqset) != WQT_PORT_SET) {
1879 __waitq_link_arguments_panic(waitq, wqset);
1880 }
1881 }
1882
1883 __abortlike
1884 static void
__waitq_invalid_panic(waitq_t waitq)1885 __waitq_invalid_panic(waitq_t waitq)
1886 {
1887 panic("Invalid waitq: %p", waitq.wq_q);
1888 }
1889
1890 static void
__waitq_validate(waitq_t waitq)1891 __waitq_validate(waitq_t waitq)
1892 {
1893 if (!waitq_valid(waitq)) {
1894 __waitq_invalid_panic(waitq);
1895 }
1896 }
1897
1898 kern_return_t
waitq_link_locked(struct waitq * waitq,struct waitq_set * wqset,waitq_link_t * linkp)1899 waitq_link_locked(struct waitq *waitq, struct waitq_set *wqset,
1900 waitq_link_t *linkp)
1901 {
1902 assert(linkp->wqlh);
1903
1904 __waitq_link_arguments_validate(waitq, wqset);
1905
1906 if (wql_find(waitq, wqset)) {
1907 return KERN_ALREADY_IN_SET;
1908 }
1909
1910 linkp->wqll->wql_wq = waitq;
1911 linkp->wqll->wql_wqs = (uintptr_t)wqset;
1912
1913 if (waitq_valid(wqset)) {
1914 circle_enqueue_tail(&wqset->wqset_links, &linkp->wqll->wql_slink);
1915 circle_enqueue_tail(&waitq->waitq_links, &linkp->wqll->wql_qlink);
1916 *linkp = WQL_NULL;
1917 }
1918
1919 return KERN_SUCCESS;
1920 }
1921
1922 kern_return_t
waitq_link_prepost_locked(struct waitq * waitq,struct waitq_set * wqset)1923 waitq_link_prepost_locked(struct waitq *waitq, struct waitq_set *wqset)
1924 {
1925 struct waitq_link *link;
1926
1927 __waitq_link_arguments_validate(waitq, wqset);
1928
1929 link = wql_find(waitq, wqset);
1930 if (link == NULL) {
1931 return KERN_NOT_IN_SET;
1932 }
1933
1934 if (!wql_wqs_preposted(link)) {
1935 wql_wqs_mark_preposted(link);
1936 waitq->waitq_preposted = true;
1937 circle_dequeue(&wqset->wqset_links, &link->wql_slink);
1938 circle_enqueue_tail(&wqset->wqset_preposts, &link->wql_slink);
1939 ipc_pset_prepost(wqset, waitq);
1940 }
1941
1942 return KERN_SUCCESS;
1943 }
1944
1945 waitq_link_t
waitq_unlink_locked(struct waitq * waitq,struct waitq_set * wqset)1946 waitq_unlink_locked(struct waitq *waitq, struct waitq_set *wqset)
1947 {
1948 struct waitq_link *link;
1949
1950 __waitq_link_arguments_validate(waitq, wqset);
1951
1952 link = wql_find(waitq, wqset);
1953 if (link) {
1954 circle_dequeue(wql_wqs_queue(wqset, link), &link->wql_slink);
1955 circle_dequeue(&waitq->waitq_links, &link->wql_qlink);
1956 }
1957
1958 return (waitq_link_t){ .wqll = link };
1959 }
1960
1961 void
waitq_unlink_all_locked(struct waitq * waitq,struct waitq_set * except_wqset,waitq_link_list_t * free_l)1962 waitq_unlink_all_locked(struct waitq *waitq, struct waitq_set *except_wqset,
1963 waitq_link_list_t *free_l)
1964 {
1965 struct waitq_link *kept_link = NULL;
1966 struct waitq_link *link;
1967
1968 assert(waitq_type(waitq) == WQT_PORT);
1969
1970 cqe_foreach_element_safe(link, &waitq->waitq_links, wql_qlink) {
1971 waitq_t wqs = wql_wqs(link);
1972
1973 if (wqs.wqs_set == except_wqset) {
1974 kept_link = link;
1975 continue;
1976 }
1977
1978 waitq_lock(wqs);
1979 circle_dequeue(wql_wqs_queue(wqs.wqs_set, link),
1980 &link->wql_slink);
1981 wql_list_push(free_l, link);
1982 waitq_unlock(wqs);
1983 }
1984
1985 circle_queue_init(&waitq->waitq_links);
1986 if (kept_link) {
1987 circle_enqueue_tail(&waitq->waitq_links, &kept_link->wql_qlink);
1988 }
1989 }
1990
1991 struct waitq *
waitq_set_first_prepost(struct waitq_set * wqset,wqs_prepost_flags_t flags)1992 waitq_set_first_prepost(struct waitq_set *wqset, wqs_prepost_flags_t flags)
1993 {
1994 circle_queue_t q = &wqset->wqset_preposts;
1995 queue_entry_t elt;
1996 struct waitq_link *link;
1997 struct waitq *wq;
1998 uint32_t ticket;
1999
2000 if (__improbable(!waitq_valid(wqset))) {
2001 return NULL;
2002 }
2003
2004 while (!circle_queue_empty(q)) {
2005 elt = circle_queue_first(q);
2006 link = cqe_element(elt, struct waitq_link, wql_slink);
2007 wq = link->wql_wq;
2008
2009 if (__improbable(!waitq_lock_reserve(wq, &ticket))) {
2010 waitq_unlock(wqset);
2011 waitq_lock_wait(wq, ticket);
2012 waitq_lock(wqset);
2013 if (!waitq_valid(wqset)) {
2014 waitq_unlock(wq);
2015 return NULL;
2016 }
2017
2018 if (elt != circle_queue_first(q) || link->wql_wq != wq) {
2019 waitq_unlock(wq);
2020 continue;
2021 }
2022 }
2023
2024 if (wq->waitq_preposted) {
2025 if ((flags & WQS_PREPOST_PEEK) == 0) {
2026 circle_queue_rotate_head_forward(q);
2027 }
2028 if ((flags & WQS_PREPOST_LOCK) == 0) {
2029 waitq_unlock(wq);
2030 }
2031 return wq;
2032 }
2033
2034 /*
2035 * We found a link that is no longer preposted,
2036 * someone must have called waitq_clear_prepost_locked()
2037 * and this set just only noticed.
2038 */
2039 wql_wqs_clear_preposted(link);
2040 waitq_unlock(wq);
2041
2042 circle_dequeue(q, &link->wql_slink);
2043 circle_enqueue_tail(&wqset->wqset_links, &link->wql_slink);
2044 }
2045
2046 return NULL;
2047 }
2048
2049
2050 #pragma mark select sets
2051
2052 /**
2053 * @function select_set_nextid()
2054 *
2055 * @brief
2056 * Generate a unique ID for a select set "generation"
2057 *
2058 * @discussion
2059 * This mixes the CPU number with a monotonic clock
2060 * (in order to avoid contention on a global atomic).
2061 *
2062 * In order for select sets to be invalidated very quickly,
2063 * they do not have backward linkages to their member queues.
2064 *
2065 * Instead, each time a new @c select() "pass" is initiated,
2066 * a new ID is generated, which is copied onto the @c waitq_sellink
2067 * links at the time of link.
2068 *
2069 * The zone for select sets is sequestered, which allows for select
2070 * wait queues to speculatively lock their set during prepost
2071 * and use this ID to debounce wakeups and avoid spurious wakeups
2072 * (as an "optimization" because select recovers from spurious wakeups,
2073 * we just want those to be very rare).
2074 */
2075 __attribute__((always_inline))
2076 static inline uint64_t
select_set_nextid(bool preemption_enabled)2077 select_set_nextid(bool preemption_enabled)
2078 {
2079 /* waitq_bootstrap() set the low byte to a unique value per CPU */
2080 static_assert(MAX_CPUS <= 256);
2081 const uint64_t inc = 256;
2082 uint64_t id;
2083
2084 #ifdef __x86_64__
2085 /* uncontended atomics are slower than disabling preemption on Intel */
2086 if (preemption_enabled) {
2087 disable_preemption();
2088 }
2089 id = (*PERCPU_GET(select_setid) += inc);
2090 if (preemption_enabled) {
2091 enable_preemption();
2092 }
2093 #else
2094 /*
2095 * if preemption is enabled this might update another CPU's
2096 * setid, which will be rare but is acceptable, it still
2097 * produces a unique select ID.
2098 *
2099 * We chose this because the uncontended atomics on !intel
2100 * are faster than disabling/reenabling preemption.
2101 */
2102 (void)preemption_enabled;
2103 id = os_atomic_add(PERCPU_GET(select_setid), inc, relaxed);
2104 #endif
2105
2106 return id;
2107 }
2108
2109 struct select_set *
select_set_alloc(void)2110 select_set_alloc(void)
2111 {
2112 struct select_set *selset;
2113 selset = zalloc_id(ZONE_ID_SELECT_SET, Z_ZERO | Z_WAITOK | Z_NOFAIL);
2114
2115 waitq_init(selset, WQT_SELECT_SET, SYNC_POLICY_FIFO);
2116 selset->selset_id = select_set_nextid(true);
2117
2118 return selset;
2119 }
2120
2121 __abortlike
2122 static void
__select_set_link_arguments_panic(struct waitq * waitq,struct select_set * set)2123 __select_set_link_arguments_panic(struct waitq *waitq, struct select_set *set)
2124 {
2125 if (!waitq_valid(waitq)) {
2126 panic("Invalid waitq: %p", waitq);
2127 }
2128 if (waitq_type(waitq) != WQT_SELECT) {
2129 panic("Invalid waitq type: %p:%d", waitq, waitq->waitq_type);
2130 }
2131 panic("Invalid waitq-set: %p", set);
2132 }
2133
2134 static inline void
__select_set_link_arguments_validate(struct waitq * waitq,struct select_set * set)2135 __select_set_link_arguments_validate(struct waitq *waitq, struct select_set *set)
2136 {
2137 if (!waitq_valid(waitq) ||
2138 waitq_type(waitq) != WQT_SELECT ||
2139 waitq_type(set) != WQT_SELECT_SET) {
2140 __select_set_link_arguments_panic(waitq, set);
2141 }
2142 }
2143
2144 void
select_set_link(struct waitq * waitq,struct select_set * set,waitq_link_t * linkp)2145 select_set_link(struct waitq *waitq, struct select_set *set,
2146 waitq_link_t *linkp)
2147 {
2148 struct waitq_sellink *link;
2149
2150 __select_set_link_arguments_validate(waitq, set);
2151
2152 waitq_lock(waitq);
2153
2154 if (waitq == &select_conflict_queue) {
2155 waitq_lock(set);
2156 set->selset_conflict = true;
2157 waitq_unlock(set);
2158 }
2159
2160 wql_list_foreach(link, &waitq->waitq_sellinks) {
2161 if (waitq_same(wql_wqs(link), set)) {
2162 goto found;
2163 }
2164 }
2165
2166 link = linkp->wqls;
2167 *linkp = WQL_NULL;
2168 wql_list_push(&waitq->waitq_sellinks, link);
2169
2170 found:
2171 link->wql_wqs = (uintptr_t)set;
2172 link->wql_setid = set->selset_id;
2173 waitq_unlock(waitq);
2174 }
2175
2176 static void
select_set_unlink_conflict_queue(struct select_set * set)2177 select_set_unlink_conflict_queue(struct select_set *set)
2178 {
2179 struct waitq_link_list_entry **prev;
2180 struct waitq_sellink *link;
2181
2182 waitq_lock(&select_conflict_queue);
2183
2184 /*
2185 * We know the conflict queue is hooked,
2186 * so find the linkage and free it.
2187 */
2188 prev = &select_conflict_queue.waitq_sellinks.next;
2189 for (;;) {
2190 assert(*prev);
2191 link = wql_list_elem(*prev);
2192 if (waitq_same(wql_wqs(link), set)) {
2193 *prev = link->wql_next.next;
2194 break;
2195 }
2196 prev = &link->wql_next.next;
2197 }
2198
2199 waitq_unlock(&select_conflict_queue);
2200
2201 waitq_link_free(WQT_SELECT_SET, link);
2202 }
2203
2204 static void
__select_set_reset(struct select_set * set,bool invalidate)2205 __select_set_reset(struct select_set *set, bool invalidate)
2206 {
2207 if (set->selset_conflict) {
2208 select_set_unlink_conflict_queue(set);
2209 }
2210
2211 waitq_lock(set);
2212 if (invalidate) {
2213 waitq_invalidate(set);
2214 }
2215 set->selset_id = select_set_nextid(false);
2216 set->selset_preposted = 0;
2217 set->selset_conflict = 0;
2218 waitq_unlock(set);
2219 }
2220
2221 void
select_set_reset(struct select_set * set)2222 select_set_reset(struct select_set *set)
2223 {
2224 __select_set_reset(set, false);
2225 }
2226
2227 void
select_set_free(struct select_set * set)2228 select_set_free(struct select_set *set)
2229 {
2230 __select_set_reset(set, true);
2231 hw_lck_ticket_destroy(&set->selset_interlock, &waitq_lck_grp);
2232 zfree_id(ZONE_ID_SELECT_SET, set);
2233 }
2234
2235 void
select_waitq_wakeup_and_deinit(struct waitq * waitq,event64_t wake_event,wait_result_t result)2236 select_waitq_wakeup_and_deinit(
2237 struct waitq *waitq,
2238 event64_t wake_event,
2239 wait_result_t result)
2240 {
2241 waitq_link_list_t free_l = { };
2242
2243 if (waitq_is_valid(waitq)) {
2244 assert(waitq_type(waitq) == WQT_SELECT);
2245
2246 waitq_lock(waitq);
2247
2248 waitq_wakeup64_all_locked(waitq, wake_event, result,
2249 WAITQ_KEEP_LOCKED);
2250
2251 waitq_invalidate(waitq);
2252 free_l = waitq->waitq_sellinks;
2253 waitq->waitq_sellinks.next = NULL;
2254
2255 waitq_unlock(waitq);
2256
2257 waitq_link_free_list(WQT_SELECT, &free_l);
2258
2259 waitq_deinit(waitq);
2260 }
2261 }
2262
2263 #pragma mark assert_wait / wakeup (high level)
2264
2265 wait_result_t
waitq_assert_wait64(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,uint64_t deadline)2266 waitq_assert_wait64(struct waitq *waitq,
2267 event64_t wait_event,
2268 wait_interrupt_t interruptible,
2269 uint64_t deadline)
2270 {
2271 thread_t thread = current_thread();
2272 wait_result_t ret;
2273 spl_t s = 0;
2274
2275 __waitq_validate(waitq);
2276
2277 if (waitq_irq_safe(waitq)) {
2278 s = splsched();
2279 }
2280 waitq_lock(waitq);
2281
2282 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
2283 TIMEOUT_URGENCY_SYS_NORMAL, deadline, TIMEOUT_NO_LEEWAY, thread);
2284
2285 waitq_unlock(waitq);
2286 if (waitq_irq_safe(waitq)) {
2287 splx(s);
2288 }
2289
2290 return ret;
2291 }
2292
2293 wait_result_t
waitq_assert_wait64_leeway(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,wait_timeout_urgency_t urgency,uint64_t deadline,uint64_t leeway)2294 waitq_assert_wait64_leeway(struct waitq *waitq,
2295 event64_t wait_event,
2296 wait_interrupt_t interruptible,
2297 wait_timeout_urgency_t urgency,
2298 uint64_t deadline,
2299 uint64_t leeway)
2300 {
2301 wait_result_t ret;
2302 thread_t thread = current_thread();
2303 spl_t s = 0;
2304
2305 __waitq_validate(waitq);
2306
2307 if (waitq_irq_safe(waitq)) {
2308 s = splsched();
2309 }
2310 waitq_lock(waitq);
2311
2312 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
2313 urgency, deadline, leeway, thread);
2314
2315 waitq_unlock(waitq);
2316 if (waitq_irq_safe(waitq)) {
2317 splx(s);
2318 }
2319
2320 return ret;
2321 }
2322
2323 uint32_t
waitq_wakeup64_nthreads(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags,uint32_t nthreads)2324 waitq_wakeup64_nthreads(
2325 waitq_t waitq,
2326 event64_t wake_event,
2327 wait_result_t result,
2328 waitq_wakeup_flags_t flags,
2329 uint32_t nthreads)
2330 {
2331 __waitq_validate(waitq);
2332
2333 spl_t spl = 0;
2334
2335 if (waitq_irq_safe(waitq)) {
2336 spl = splsched();
2337 }
2338
2339 waitq_lock(waitq);
2340
2341 /* waitq is unlocked upon return, splx is handled */
2342 return waitq_wakeup64_nthreads_locked(waitq, wake_event, result,
2343 flags | waitq_flags_splx(spl) | WAITQ_UNLOCK, nthreads);
2344 }
2345
2346 kern_return_t
waitq_wakeup64_all(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)2347 waitq_wakeup64_all(
2348 waitq_t waitq,
2349 event64_t wake_event,
2350 wait_result_t result,
2351 waitq_wakeup_flags_t flags)
2352 {
2353 uint32_t count;
2354
2355 count = waitq_wakeup64_nthreads(waitq, wake_event, result,
2356 flags, UINT32_MAX);
2357 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
2358 }
2359
2360 kern_return_t
waitq_wakeup64_one(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)2361 waitq_wakeup64_one(
2362 waitq_t waitq,
2363 event64_t wake_event,
2364 wait_result_t result,
2365 waitq_wakeup_flags_t flags)
2366 {
2367 uint32_t count;
2368
2369 count = waitq_wakeup64_nthreads(waitq, wake_event, result, flags, 1);
2370 return count ? KERN_SUCCESS : KERN_NOT_WAITING;
2371 }
2372
2373 kern_return_t
waitq_wakeup64_thread(struct waitq * waitq,event64_t event,thread_t thread,wait_result_t result)2374 waitq_wakeup64_thread(
2375 struct waitq *waitq,
2376 event64_t event,
2377 thread_t thread,
2378 wait_result_t result)
2379 {
2380 spl_t s = splsched();
2381 kern_return_t ret;
2382
2383 __waitq_validate(waitq);
2384 assert(waitq_irq_safe(waitq));
2385 waitq_lock(waitq);
2386
2387 ret = waitq_wakeup64_thread_and_unlock(waitq, event, thread, result);
2388
2389 splx(s);
2390
2391 return ret;
2392 }
2393
2394 thread_t
waitq_wakeup64_identify(waitq_t waitq,event64_t wake_event,wait_result_t result,waitq_wakeup_flags_t flags)2395 waitq_wakeup64_identify(
2396 waitq_t waitq,
2397 event64_t wake_event,
2398 wait_result_t result,
2399 waitq_wakeup_flags_t flags)
2400 {
2401 __waitq_validate(waitq);
2402
2403 spl_t spl = 0;
2404
2405 if (waitq_irq_safe(waitq)) {
2406 spl = splsched();
2407 }
2408
2409 waitq_lock(waitq);
2410
2411 thread_t thread = waitq_wakeup64_identify_locked(waitq, wake_event,
2412 flags | waitq_flags_splx(spl) | WAITQ_UNLOCK);
2413 /* waitq is unlocked, thread is not go-ed yet */
2414 /* preemption disabled if thread non-null */
2415 /* splx is handled */
2416
2417 if (thread != THREAD_NULL) {
2418 thread_reference(thread);
2419 waitq_resume_identified_thread(waitq, thread, result, flags);
2420 /* preemption enabled, thread go-ed */
2421 /* returns +1 ref to running thread */
2422 return thread;
2423 }
2424
2425 return THREAD_NULL;
2426 }
2427
2428
2429 #pragma mark tests
2430 #if DEBUG || DEVELOPMENT
2431
2432 #include <ipc/ipc_space.h>
2433 #include <ipc/ipc_pset.h>
2434 #include <sys/errno.h>
2435
2436 #define MAX_GLOBAL_TEST_QUEUES 64
2437 static struct waitq wqt_waitq_array[MAX_GLOBAL_TEST_QUEUES];
2438 static bool wqt_running;
2439 static bool wqt_init;
2440
2441 static bool
wqt_start(const char * test,int64_t * out)2442 wqt_start(const char *test, int64_t *out)
2443 {
2444 if (os_atomic_xchg(&wqt_running, true, acquire)) {
2445 *out = 0;
2446 return false;
2447 }
2448
2449 if (!wqt_init) {
2450 wqt_init = true;
2451 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++) {
2452 waitq_init(&wqt_waitq_array[i], WQT_PORT, SYNC_POLICY_FIFO);
2453 }
2454 }
2455
2456 printf("[WQ] starting %s\n", test);
2457 return true;
2458 }
2459
2460 static int
wqt_end(const char * test,int64_t * out)2461 wqt_end(const char *test, int64_t *out)
2462 {
2463 os_atomic_store(&wqt_running, false, release);
2464 printf("[WQ] done %s\n", test);
2465 *out = 1;
2466 return 0;
2467 }
2468
2469 static struct waitq *
wqt_wq(uint32_t index)2470 wqt_wq(uint32_t index)
2471 {
2472 return &wqt_waitq_array[index];
2473 }
2474
2475 static uint32_t
wqt_idx(struct waitq * waitq)2476 wqt_idx(struct waitq *waitq)
2477 {
2478 assert(waitq >= wqt_waitq_array &&
2479 waitq < wqt_waitq_array + MAX_GLOBAL_TEST_QUEUES);
2480 return (uint32_t)(waitq - wqt_waitq_array);
2481 }
2482
2483 __attribute__((overloadable))
2484 static uint64_t
wqt_bit(uint32_t index)2485 wqt_bit(uint32_t index)
2486 {
2487 return 1ull << index;
2488 }
2489
2490 __attribute__((overloadable))
2491 static uint64_t
wqt_bit(struct waitq * waitq)2492 wqt_bit(struct waitq *waitq)
2493 {
2494 return wqt_bit(wqt_idx(waitq));
2495 }
2496
2497 static struct waitq_set *
wqt_wqset_create(void)2498 wqt_wqset_create(void)
2499 {
2500 struct waitq_set *wqset;
2501
2502 wqset = &ipc_pset_alloc_special(ipc_space_kernel)->ips_wqset;
2503 waitq_unlock(wqset);
2504
2505 printf("[WQ]: created waitq set %p\n", wqset);
2506 return wqset;
2507 }
2508
2509 static void
wqt_wqset_free(struct waitq_set * wqset)2510 wqt_wqset_free(struct waitq_set *wqset)
2511 {
2512 printf("[WQ]: destroying waitq set %p\n", wqset);
2513 waitq_lock(wqset);
2514 ipc_pset_destroy(ipc_space_kernel,
2515 __container_of(wqset, struct ipc_pset, ips_wqset));
2516 }
2517
2518 static void
wqt_link(uint32_t index,struct waitq_set * wqset,kern_return_t want)2519 wqt_link(uint32_t index, struct waitq_set *wqset, kern_return_t want)
2520 {
2521 struct waitq *waitq = wqt_wq(index);
2522 waitq_link_t link = waitq_link_alloc(WQT_PORT_SET);
2523 kern_return_t kr;
2524
2525 printf("[WQ]: linking waitq [%d] to global wqset (%p)\n", index, wqset);
2526
2527 waitq_lock(waitq);
2528 waitq_lock(wqset);
2529 kr = waitq_link_locked(waitq, wqset, &link);
2530 waitq_unlock(wqset);
2531 waitq_unlock(waitq);
2532
2533 if (link.wqlh) {
2534 waitq_link_free(WQT_PORT_SET, link);
2535 }
2536
2537 printf("[WQ]:\tkr=%d\texpected=%d\n", kr, want);
2538 assert(kr == want);
2539 }
2540
2541 static void
wqt_unlink(uint32_t index,struct waitq_set * wqset,__assert_only kern_return_t want)2542 wqt_unlink(uint32_t index, struct waitq_set *wqset, __assert_only kern_return_t want)
2543 {
2544 struct waitq *waitq = wqt_wq(index);
2545 waitq_link_t link;
2546 kern_return_t kr;
2547
2548 printf("[WQ]: unlinking waitq [%d] from global wqset (%p)\n",
2549 index, wqset);
2550
2551 waitq_lock(waitq);
2552 waitq_lock(wqset);
2553 link = waitq_unlink_locked(waitq, wqset);
2554 waitq_unlock(wqset);
2555 waitq_unlock(waitq);
2556
2557 if (link.wqlh) {
2558 waitq_link_free(WQT_PORT_SET, link);
2559 kr = KERN_SUCCESS;
2560 } else {
2561 kr = KERN_NOT_IN_SET;
2562 }
2563
2564 printf("[WQ]: \tkr=%d\n", kr);
2565 assert(kr == want);
2566 }
2567
2568 static void
wqt_wakeup_one(uint32_t index,event64_t event64,__assert_only kern_return_t want)2569 wqt_wakeup_one(uint32_t index, event64_t event64, __assert_only kern_return_t want)
2570 {
2571 kern_return_t kr;
2572
2573 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
2574 index, event64);
2575 kr = waitq_wakeup64_one(wqt_wq(index), event64,
2576 THREAD_AWAKENED, WAITQ_WAKEUP_DEFAULT);
2577 printf("[WQ]: \tkr=%d\n", kr);
2578 assert(kr == want);
2579 }
2580
2581 static void
wqt_clear_preposts(uint32_t idx)2582 wqt_clear_preposts(uint32_t idx)
2583 {
2584 waitq_lock(wqt_wq(idx));
2585 (void)waitq_clear_prepost_locked(wqt_wq(idx));
2586 waitq_unlock(wqt_wq(idx));
2587 }
2588
2589 static void
wqt_preposts_gc_locked(struct waitq_set * wqset)2590 wqt_preposts_gc_locked(struct waitq_set *wqset)
2591 {
2592 circle_queue_t q = &wqset->wqset_preposts;
2593 struct waitq_link *link;
2594 uint32_t ticket;
2595
2596 again:
2597 cqe_foreach_element_safe(link, q, wql_slink) {
2598 struct waitq *wq = link->wql_wq;
2599
2600 if (!waitq_lock_reserve(wq, &ticket)) {
2601 waitq_unlock(wqset);
2602 waitq_lock_wait(wq, ticket);
2603 waitq_lock(wqset);
2604 waitq_unlock(wq);
2605 /* the list was possibly mutated, restart */
2606 goto again;
2607 }
2608
2609 if (!wq->waitq_preposted) {
2610 wql_wqs_clear_preposted(link);
2611 circle_dequeue(q, &link->wql_slink);
2612 circle_enqueue_tail(&wqset->wqset_links, &link->wql_slink);
2613 }
2614
2615 waitq_unlock(wq);
2616 }
2617 }
2618
2619 static void
wqt_expect_preposts(struct waitq_set * wqset,__assert_only uint64_t preposts)2620 wqt_expect_preposts(struct waitq_set *wqset, __assert_only uint64_t preposts)
2621 {
2622 struct waitq_link *link;
2623 uint64_t found = 0;
2624
2625 waitq_lock(wqset);
2626
2627 wqt_preposts_gc_locked(wqset);
2628
2629 cqe_foreach_element(link, &wqset->wqset_preposts, wql_slink) {
2630 struct waitq *waitq = link->wql_wq;
2631
2632 printf("[WQ]: found prepost %d\n", wqt_idx(waitq));
2633 assertf((found & wqt_bit(waitq)) == 0,
2634 "found waitq %d twice", wqt_idx(waitq));
2635 found |= wqt_bit(waitq);
2636 }
2637
2638 waitq_unlock(wqset);
2639
2640 assertf(found == preposts, "preposts expected 0x%llx, but got 0x%llx",
2641 preposts, found);
2642 }
2643
2644 static int
waitq_basic_test(__unused int64_t in,int64_t * out)2645 waitq_basic_test(__unused int64_t in, int64_t *out)
2646 {
2647 struct waitq_set *wqset;
2648
2649 if (!wqt_start(__func__, out)) {
2650 return EBUSY;
2651 }
2652
2653 wqset = wqt_wqset_create();
2654 wqt_link(10, wqset, KERN_SUCCESS);
2655 wqt_link(10, wqset, KERN_ALREADY_IN_SET);
2656 wqt_link(11, wqset, KERN_SUCCESS);
2657 wqt_link(11, wqset, KERN_ALREADY_IN_SET);
2658 wqt_link(12, wqset, KERN_SUCCESS);
2659 wqt_link(12, wqset, KERN_ALREADY_IN_SET);
2660
2661 wqt_wakeup_one(10, NO_EVENT64, KERN_NOT_WAITING);
2662 wqt_wakeup_one(12, NO_EVENT64, KERN_NOT_WAITING);
2663
2664 wqt_expect_preposts(wqset, wqt_bit(10) | wqt_bit(12));
2665 wqt_clear_preposts(10);
2666
2667 wqt_expect_preposts(wqset, wqt_bit(12));
2668 wqt_clear_preposts(12);
2669
2670 wqt_expect_preposts(wqset, 0);
2671
2672 wqt_unlink(12, wqset, KERN_SUCCESS);
2673 wqt_unlink(12, wqset, KERN_NOT_IN_SET);
2674 wqt_unlink(11, wqset, KERN_SUCCESS);
2675 wqt_unlink(10, wqset, KERN_SUCCESS);
2676 wqt_wqset_free(wqset);
2677
2678 return wqt_end(__func__, out);
2679 }
2680 SYSCTL_TEST_REGISTER(waitq_basic, waitq_basic_test);
2681 #endif /* DEBUG || DEVELOPMENT */
2682