1 /*
2 * Copyright (c) 2015-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_FREE_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or [email protected]
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57 /*
58 * un-comment the following lines to debug the link/prepost tables
59 * NOTE: this expands each element by ~40 bytes
60 */
61 //#define KEEP_WAITQ_LINK_STATS
62 //#define KEEP_WAITQ_PREPOST_STATS
63
64 #include <kern/ast.h>
65 #include <kern/backtrace.h>
66 #include <kern/kern_types.h>
67 #include <kern/ltable.h>
68 #include <kern/mach_param.h>
69 #include <kern/percpu.h>
70 #include <kern/queue.h>
71 #include <kern/sched_prim.h>
72 #include <kern/simple_lock.h>
73 #include <kern/spl.h>
74 #include <kern/waitq.h>
75 #include <kern/zalloc.h>
76 #include <kern/policy_internal.h>
77 #include <kern/turnstile.h>
78
79 #include <os/hash.h>
80 #include <libkern/section_keywords.h>
81 #include <mach/sync_policy.h>
82 #include <vm/vm_kern.h>
83
84 #include <sys/kdebug.h>
85
86 #if defined(KEEP_WAITQ_LINK_STATS) || defined(KEEP_WAITQ_PREPOST_STATS)
87 # if !CONFIG_LTABLE_STATS
88 # error "You must configure LTABLE_STATS to use WAITQ_[LINK|PREPOST]_STATS"
89 # endif
90 # if !CONFIG_WAITQ_STATS
91 # error "You must configure WAITQ_STATS to use WAITQ_[LINK|PREPOST]_STATS"
92 # endif
93 #endif
94
95 #if CONFIG_WAITQ_DEBUG
96 #define wqdbg(fmt, ...) \
97 printf("WQ[%s]: " fmt "\n", __func__, ## __VA_ARGS__)
98 #else
99 #define wqdbg(fmt, ...) do { } while (0)
100 #endif
101
102 #ifdef WAITQ_VERBOSE_DEBUG
103 #define wqdbg_v(fmt, ...) \
104 printf("WQ[v:%s]: " fmt "\n", __func__, ## __VA_ARGS__)
105 #else
106 #define wqdbg_v(fmt, ...) do { } while (0)
107 #endif
108
109 #define wqinfo(fmt, ...) \
110 printf("WQ[%s]: " fmt "\n", __func__, ## __VA_ARGS__)
111
112 #define wqerr(fmt, ...) \
113 printf("WQ[%s] ERROR: " fmt "\n", __func__, ## __VA_ARGS__)
114
115 /* waitq prepost cache */
116 #define WQP_CACHE_MAX 50
117 struct wqp_cache {
118 uint64_t head;
119 unsigned int avail;
120 };
121 static struct wqp_cache PERCPU_DATA(wqp_cache);
122
123 #define P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
124 #define ROUNDDOWN(x, y) (((x)/(y))*(y))
125
126
127 #if CONFIG_LTABLE_STATS || CONFIG_WAITQ_STATS
128 static __inline__ void waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], int skip);
129 #endif
130
131 LCK_GRP_DECLARE(waitq_lck_grp, "waitq");
132
133 #define DEFAULT_MIN_FREE_TABLE_ELEM 100
134 static uint32_t g_min_free_table_elem;
135 static uint32_t g_min_free_cache;
136
137
138 /* ----------------------------------------------------------------------
139 *
140 * waitq reference helpers
141 *
142 * ---------------------------------------------------------------------- */
143
144 /* note: WAITQ_REF_NULL is considered a pointer */
145 static inline bool
wqr_is_ptr(waitq_ref_t ref)146 wqr_is_ptr(waitq_ref_t ref)
147 {
148 return (ref.wqr_value & 1) == 0;
149 }
150
151 __attribute__((overloadable))
152 static inline waitq_ref_t
wqr_make(const void * ptr)153 wqr_make(const void *ptr)
154 {
155 return (waitq_ref_t){ .wqr_value = (intptr_t)ptr };
156 }
157
158 __attribute__((overloadable))
159 static inline waitq_ref_t
wqr_make(uint64_t lid)160 wqr_make(uint64_t lid)
161 {
162 return (waitq_ref_t){ .wqr_value = lid };
163 }
164
165 static inline bool
wqr_is_equal(waitq_ref_t ref,uint64_t ltable_id)166 wqr_is_equal(waitq_ref_t ref, uint64_t ltable_id)
167 {
168 return ref.wqr_value == ltable_id;
169 }
170
171 static inline bool
wqr_is_null(waitq_ref_t ref)172 wqr_is_null(waitq_ref_t ref)
173 {
174 return ref.wqr_value == 0;
175 }
176
177 static inline void *
wqr_ptr_raw(waitq_ref_t ref)178 wqr_ptr_raw(waitq_ref_t ref)
179 {
180 return (void *)(intptr_t)ref.wqr_value;
181 }
182
183 static inline void *
wqr_ptr(waitq_ref_t ref)184 wqr_ptr(waitq_ref_t ref)
185 {
186 if (wqr_is_ptr(ref)) {
187 return wqr_ptr_raw(ref);
188 }
189 return NULL;
190 }
191
192 /* ----------------------------------------------------------------------
193 *
194 * SetID Link Table Implementation
195 *
196 * ---------------------------------------------------------------------- */
197 static struct link_table g_wqlinktable;
198
199 enum wq_link_type {
200 WQL_WQS = LT_ELEM,
201 WQL_LINK = LT_LINK,
202 };
203
204 struct waitq_link {
205 struct lt_elem wqte;
206
207 union {
208 /* wqt_type == WQL_WQS (LT_ELEM) */
209 struct waitq_set *wql_set;
210
211 /* wqt_type == WQL_LINK (LT_LINK) */
212 struct {
213 waitq_ref_t wql_next;
214 uint64_t wql_node;
215 };
216 };
217 #ifdef KEEP_WAITQ_LINK_STATS
218 thread_t sl_alloc_th;
219 task_t sl_alloc_task;
220 uintptr_t sl_alloc_bt[NWAITQ_BTFRAMES];
221 uint64_t sl_alloc_ts;
222 uintptr_t sl_invalidate_bt[NWAITQ_BTFRAMES];
223 uint64_t sl_invalidate_ts;
224 uintptr_t sl_mkvalid_bt[NWAITQ_BTFRAMES];
225 uint64_t sl_mkvalid_ts;
226 uint64_t sl_free_ts;
227 #endif
228 };
229 #if !defined(KEEP_WAITQ_LINK_STATS)
230 static_assert((sizeof(struct waitq_link) & (sizeof(struct waitq_link) - 1)) == 0,
231 "waitq_link struct must be a power of two!");
232 #endif
233
234 #define wql_refcnt(link) \
235 (lt_bits_refcnt((link)->wqte.lt_bits))
236
237 #define wql_type(link) \
238 (lt_bits_type((link)->wqte.lt_bits))
239
240 #define wql_mkvalid(link) \
241 do { \
242 lt_elem_mkvalid(&(link)->wqte); \
243 wql_do_mkvalid_stats(&(link)->wqte); \
244 } while (0)
245
246 #define wql_is_valid(link) \
247 lt_bits_valid((link)->wqte.lt_bits)
248
249 #define wql_setid wqte.lt_id
250 #define wql_count wqte.lt_next_idx
251
252 #define WQL_WQS_POISON ((void *)(0xf00df00d))
253 #define WQL_LINK_POISON (0x0bad0badffffffffull)
254
255 static KALLOC_TYPE_DEFINE(waitq_link_zone, struct waitq_link, KT_PRIV_ACCT);
256
257 static void
wql_poison(struct link_table * table,struct lt_elem * elem)258 wql_poison(struct link_table *table, struct lt_elem *elem)
259 {
260 struct waitq_link *link = (struct waitq_link *)elem;
261 (void)table;
262
263 switch (wql_type(link)) {
264 case WQL_WQS:
265 link->wql_set = WQL_WQS_POISON;
266 break;
267 case WQL_LINK:
268 link->wql_next.wqr_value = WQL_LINK_POISON;
269 link->wql_node = WQL_LINK_POISON;
270 break;
271 default:
272 break;
273 }
274 #ifdef KEEP_WAITQ_LINK_STATS
275 memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt));
276 link->sl_alloc_ts = 0;
277 memset(link->sl_mkvalid_bt, 0, sizeof(link->sl_mkvalid_bt));
278 link->sl_mkvalid_ts = 0;
279
280 link->sl_alloc_th = THREAD_NULL;
281 /* leave the sl_alloc_task in place for debugging */
282
283 link->sl_free_ts = mach_absolute_time();
284 #endif
285 }
286
287 #ifdef KEEP_WAITQ_LINK_STATS
288 static __inline__ void
wql_do_alloc_stats(struct lt_elem * elem)289 wql_do_alloc_stats(struct lt_elem *elem)
290 {
291 if (elem) {
292 struct waitq_link *link = (struct waitq_link *)elem;
293 memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt));
294 waitq_grab_backtrace(link->sl_alloc_bt, 0);
295 link->sl_alloc_th = current_thread();
296 link->sl_alloc_task = current_task();
297
298 assert(link->sl_alloc_ts == 0);
299 link->sl_alloc_ts = mach_absolute_time();
300
301 memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt));
302 link->sl_invalidate_ts = 0;
303 }
304 }
305
306 static __inline__ void
wql_do_invalidate_stats(struct lt_elem * elem)307 wql_do_invalidate_stats(struct lt_elem *elem)
308 {
309 struct waitq_link *link = (struct waitq_link *)elem;
310
311 if (!elem) {
312 return;
313 }
314
315 assert(link->sl_mkvalid_ts > 0);
316
317 memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt));
318 link->sl_invalidate_ts = mach_absolute_time();
319 waitq_grab_backtrace(link->sl_invalidate_bt, 0);
320 }
321
322 static __inline__ void
wql_do_mkvalid_stats(struct lt_elem * elem)323 wql_do_mkvalid_stats(struct lt_elem *elem)
324 {
325 struct waitq_link *link = (struct waitq_link *)elem;
326
327 if (!elem) {
328 return;
329 }
330
331 memset(link->sl_mkvalid_bt, 0, sizeof(link->sl_mkvalid_bt));
332 link->sl_mkvalid_ts = mach_absolute_time();
333 waitq_grab_backtrace(link->sl_mkvalid_bt, 0);
334 }
335 #else
336 #define wql_do_alloc_stats(e)
337 #define wql_do_invalidate_stats(e)
338 #define wql_do_mkvalid_stats(e)
339 #endif /* KEEP_WAITQ_LINK_STATS */
340
341 static void
wql_init(void)342 wql_init(void)
343 {
344 uint32_t tablesz = 0, max_links = 0;
345
346 if (PE_parse_boot_argn("wql_tsize", &tablesz, sizeof(tablesz)) != TRUE) {
347 tablesz = (uint32_t)g_lt_max_tbl_size;
348 }
349
350 tablesz = P2ROUNDUP(tablesz, PAGE_SIZE);
351 max_links = tablesz / sizeof(struct waitq_link);
352 assert(max_links > 0 && tablesz > 0);
353
354 /* we have a restricted index range */
355 if (max_links > (LT_IDX_MAX + 1)) {
356 max_links = LT_IDX_MAX + 1;
357 }
358
359 wqinfo("init linktable with max:%d elements (%d bytes)",
360 max_links, tablesz);
361 ltable_init(&g_wqlinktable, "wqslab.wql", max_links,
362 sizeof(struct waitq_link), wql_poison);
363 }
364
365 static struct waitq_link *
wql_alloc_link(int type)366 wql_alloc_link(int type)
367 {
368 struct lt_elem *elem;
369
370 elem = ltable_alloc_elem(&g_wqlinktable, type, 1, 0);
371 wql_do_alloc_stats(elem);
372 return (struct waitq_link *)elem;
373 }
374
375 static void
wql_realloc_link(struct waitq_link * link,int type)376 wql_realloc_link(struct waitq_link *link, int type)
377 {
378 ltable_realloc_elem(&g_wqlinktable, &link->wqte, type);
379 #ifdef KEEP_WAITQ_LINK_STATS
380 memset(link->sl_alloc_bt, 0, sizeof(link->sl_alloc_bt));
381 link->sl_alloc_ts = 0;
382 wql_do_alloc_stats(&link->wqte);
383
384 memset(link->sl_invalidate_bt, 0, sizeof(link->sl_invalidate_bt));
385 link->sl_invalidate_ts = 0;
386 #endif
387 }
388
389 static void
wql_invalidate(struct waitq_link * link)390 wql_invalidate(struct waitq_link *link)
391 {
392 lt_elem_invalidate(&link->wqte);
393 wql_do_invalidate_stats(&link->wqte);
394 }
395
396 static bool
wql_link_valid(uint64_t setid)397 wql_link_valid(uint64_t setid)
398 {
399 return ltable_elem_valid(&g_wqlinktable, setid);
400 }
401
402 static struct waitq_link *
wql_get_link(uint64_t setid)403 wql_get_link(uint64_t setid)
404 {
405 struct lt_elem *elem;
406
407 elem = ltable_get_elem(&g_wqlinktable, setid);
408 if (!elem) {
409 return NULL;
410 }
411 return __container_of(elem, struct waitq_link, wqte);
412 }
413
414 static void
wql_put_link(struct waitq_link * link)415 wql_put_link(struct waitq_link *link)
416 {
417 if (!link) {
418 return;
419 }
420 ltable_put_elem(&g_wqlinktable, (struct lt_elem *)link);
421 }
422
423 /*
424 * waitq links form a list hanging from the waitq `waitq_set_id` field.
425 *
426 * When the waitq is member of 0 or 1 set, it looks like this:
427 *
428 * ┌───────────────┐
429 * │ │
430 * │ waitq_set_id ┼───> set or WAITQ_REF_NULL
431 * │ │
432 * └───────────────┘
433 *
434 * When the waitq is member of 2 or more sets, then it looks like this:
435 *
436 * ┌───────────────┐
437 * │ │
438 * │ waitq_set_id │
439 * │ │
440 * └───────┼───────┘
441 * │
442 * v
443 * ┌───────────┐ ┌───────────┐ ┌───────────┐
444 * │ wql_count │ │ │ │ │
445 * │ wql_next ┼──>│ wql_next ┼──>│ wql_next ┼──────┐
446 * │ wql_node │ │ wql_node │ │ wql_node │ │
447 * └─────┼─────┘ └─────┼─────┘ └─────┼─────┘ │
448 * │ │ │ │
449 * v v v v
450 * set set set set
451 *
452 *
453 * when WQL_LINK elements are used that way, they are never made valid,
454 * and have their refcount to 1. No one should try to resolve those
455 * using ltable_get_elem().
456 */
457 #define waitq_foreach_link(it, ref) \
458 for (struct waitq_link *it = wqr_ptr(ref); it; it = wqr_ptr(it->wql_next))
459
460 #define waitq_foreach_link_safe(it, ref) \
461 for (struct waitq_link *__tmp_##it = NULL, *it = wqr_ptr(ref); \
462 (it ? (__tmp_##it = wqr_ptr(it->wql_next), 1) : 0); \
463 it = __tmp_##it)
464
465 static int
466 __wql_found_set(uint64_t setid, int (^cb)(struct waitq_link *))
467 {
468 struct waitq_link *link = wql_get_link(setid);
469 int ret = WQ_ITERATE_CONTINUE;
470
471 if (link) {
472 ret = cb(link);
473 wql_put_link(link);
474 }
475
476 return ret;
477 }
478
479 static int
480 wql_walk_sets(struct waitq *waitq, int (^cb)(struct waitq_link *))
481 {
482 waitq_ref_t root_ref = waitq->waitq_set_id;
483 int ret = WQ_ITERATE_CONTINUE;
484
485 if (wqr_is_null(root_ref)) {
486 return ret;
487 }
488
489 if (!wqr_is_ptr(root_ref)) {
490 return __wql_found_set(root_ref.wqr_value, cb);
491 }
492
waitq_foreach_link(link,root_ref)493 waitq_foreach_link(link, root_ref) {
494 ret = __wql_found_set(link->wql_node, cb);
495 if (ret != WQ_ITERATE_CONTINUE) {
496 return ret;
497 }
498 if (!wqr_is_ptr(link->wql_next)) {
499 return __wql_found_set(link->wql_next.wqr_value, cb);
500 }
501 }
502
503 __builtin_unreachable();
504 }
505
506
507 /* ----------------------------------------------------------------------
508 *
509 * Prepost Link Table Implementation
510 *
511 * ---------------------------------------------------------------------- */
512 static struct link_table g_prepost_table;
513
514 enum wq_prepost_type {
515 WQP_FREE = LT_FREE,
516 WQP_WQ = LT_ELEM,
517 WQP_POST = LT_LINK,
518 };
519
520 struct wq_prepost {
521 struct lt_elem wqte;
522
523 union {
524 /* wqt_type == WQP_WQ (LT_ELEM) */
525 struct {
526 struct waitq *wqp_wq_ptr;
527 } wqp_wq;
528 /* wqt_type == WQP_POST (LT_LINK) */
529 struct {
530 uint64_t wqp_next_id;
531 uint64_t wqp_wq_id;
532 } wqp_post;
533 };
534 #ifdef KEEP_WAITQ_PREPOST_STATS
535 thread_t wqp_alloc_th;
536 task_t wqp_alloc_task;
537 uintptr_t wqp_alloc_bt[NWAITQ_BTFRAMES];
538 #endif
539 };
540 #if !defined(KEEP_WAITQ_PREPOST_STATS)
541 static_assert((sizeof(struct wq_prepost) & (sizeof(struct wq_prepost) - 1)) == 0,
542 "wq_prepost struct must be a power of two!");
543 #endif
544
545 #define wqp_refcnt(wqp) \
546 (lt_bits_refcnt((wqp)->wqte.lt_bits))
547
548 #define wqp_type(wqp) \
549 (lt_bits_type((wqp)->wqte.lt_bits))
550
551 #define wqp_set_valid(wqp) \
552 lt_elem_mkvalid(&(wqp)->wqte)
553
554 #define wqp_is_valid(wqp) \
555 lt_bits_valid((wqp)->wqte.lt_bits)
556
557 #define wqp_prepostid wqte.lt_id
558
559 #define WQP_WQ_POISON (0x0bad0badffffffffull)
560 #define WQP_POST_POISON (0xf00df00df00df00d)
561
562 static void
wqp_poison(struct link_table * table,struct lt_elem * elem)563 wqp_poison(struct link_table *table, struct lt_elem *elem)
564 {
565 struct wq_prepost *wqp = (struct wq_prepost *)elem;
566 (void)table;
567
568 switch (wqp_type(wqp)) {
569 case WQP_WQ:
570 break;
571 case WQP_POST:
572 wqp->wqp_post.wqp_next_id = WQP_POST_POISON;
573 wqp->wqp_post.wqp_wq_id = WQP_POST_POISON;
574 break;
575 default:
576 break;
577 }
578 }
579
580 #ifdef KEEP_WAITQ_PREPOST_STATS
581 static __inline__ void
wqp_do_alloc_stats(struct lt_elem * elem)582 wqp_do_alloc_stats(struct lt_elem *elem)
583 {
584 if (!elem) {
585 return;
586 }
587
588 struct wq_prepost *wqp = (struct wq_prepost *)elem;
589 uintptr_t alloc_bt[sizeof(wqp->wqp_alloc_bt)];
590
591 waitq_grab_backtrace(alloc_bt, NWAITQ_BTFRAMES);
592
593 /* be sure the take stats for _all_ allocated objects */
594 for (;;) {
595 memcpy(wqp->wqp_alloc_bt, alloc_bt, sizeof(alloc_bt));
596 wqp->wqp_alloc_th = current_thread();
597 wqp->wqp_alloc_task = current_task();
598 wqp = (struct wq_prepost *)lt_elem_list_next(&g_prepost_table, &wqp->wqte);
599 if (!wqp) {
600 break;
601 }
602 }
603 }
604 #else
605 #define wqp_do_alloc_stats(e)
606 #endif /* KEEP_WAITQ_LINK_STATS */
607
608 static void
wqp_init(void)609 wqp_init(void)
610 {
611 uint32_t tablesz = 0, max_wqp = 0;
612
613 if (PE_parse_boot_argn("wqp_tsize", &tablesz, sizeof(tablesz)) != TRUE) {
614 tablesz = (uint32_t)g_lt_max_tbl_size;
615 }
616
617 tablesz = P2ROUNDUP(tablesz, PAGE_SIZE);
618 max_wqp = tablesz / sizeof(struct wq_prepost);
619 assert(max_wqp > 0 && tablesz > 0);
620
621 /* we have a restricted index range */
622 if (max_wqp > (LT_IDX_MAX + 1)) {
623 max_wqp = LT_IDX_MAX + 1;
624 }
625
626 wqinfo("init prepost table with max:%d elements (%d bytes)",
627 max_wqp, tablesz);
628 ltable_init(&g_prepost_table, "wqslab.prepost", max_wqp,
629 sizeof(struct wq_prepost), wqp_poison);
630 }
631
632 /*
633 * Refill the per-CPU cache.
634 */
635 static void
wq_prepost_refill_cpu_cache(uint32_t nalloc)636 wq_prepost_refill_cpu_cache(uint32_t nalloc)
637 {
638 struct lt_elem *new_head, *old_head;
639 struct wqp_cache *cache;
640
641 /* require preemption enabled to allocate elements */
642 if (get_preemption_level() != 0) {
643 return;
644 }
645
646 new_head = ltable_alloc_elem(&g_prepost_table,
647 LT_RESERVED, nalloc, 1);
648 if (new_head == NULL) {
649 return;
650 }
651
652 disable_preemption();
653 cache = PERCPU_GET(wqp_cache);
654
655 /* check once more before putting these elements on the list */
656 if (cache->avail >= WQP_CACHE_MAX) {
657 lt_elem_list_release(&g_prepost_table, new_head, LT_RESERVED);
658 goto out;
659 }
660
661 assert((cache->avail == 0) == (cache->head == 0));
662
663 cache->avail += nalloc;
664 if (cache->head == 0) {
665 cache->head = new_head->lt_id.id;
666 goto out;
667 }
668
669 old_head = lt_elem_list_first(&g_prepost_table, cache->head);
670 (void)lt_elem_list_link(&g_prepost_table, new_head, old_head);
671 cache->head = new_head->lt_id.id;
672
673 out:
674 enable_preemption();
675 }
676
677 static void
wq_prepost_ensure_free_space(void)678 wq_prepost_ensure_free_space(void)
679 {
680 uint32_t free_elem;
681 uint32_t min_free;
682 struct wqp_cache *cache;
683
684 if (g_min_free_cache == 0) {
685 g_min_free_cache = (WQP_CACHE_MAX * ml_wait_max_cpus());
686 }
687
688 /*
689 * Ensure that we always have a pool of per-CPU prepost elements
690 */
691 disable_preemption();
692 cache = PERCPU_GET(wqp_cache);
693 free_elem = cache->avail;
694 enable_preemption();
695
696 if (free_elem < (WQP_CACHE_MAX / 3)) {
697 wq_prepost_refill_cpu_cache(WQP_CACHE_MAX - free_elem);
698 }
699
700 /*
701 * Now ensure that we have a sufficient amount of free table space
702 */
703 free_elem = g_prepost_table.nelem - g_prepost_table.used_elem;
704 min_free = g_min_free_table_elem + g_min_free_cache;
705 if (free_elem < min_free) {
706 /*
707 * we don't hold locks on these values, so check for underflow
708 */
709 if (g_prepost_table.used_elem <= g_prepost_table.nelem) {
710 wqdbg_v("Forcing table growth: nelem=%d, used=%d, min_free=%d+%d",
711 g_prepost_table.nelem, g_prepost_table.used_elem,
712 g_min_free_table_elem, g_min_free_cache);
713 ltable_grow(&g_prepost_table, min_free);
714 }
715 }
716 }
717
718 static struct wq_prepost *
wq_prepost_alloc(int type,int nelem)719 wq_prepost_alloc(int type, int nelem)
720 {
721 struct lt_elem *elem;
722 struct wq_prepost *wqp;
723 struct wqp_cache *cache;
724
725 if (type != LT_RESERVED) {
726 goto do_alloc;
727 }
728 if (nelem == 0) {
729 return NULL;
730 }
731
732 /*
733 * First try to grab the elements from the per-CPU cache if we are
734 * allocating RESERVED elements
735 */
736 disable_preemption();
737 cache = PERCPU_GET(wqp_cache);
738 if (nelem <= (int)cache->avail) {
739 struct lt_elem *first, *next = NULL;
740 int nalloc = nelem;
741
742 cache->avail -= nelem;
743
744 /* grab the first element */
745 first = lt_elem_list_first(&g_prepost_table, cache->head);
746
747 /* find the last element and re-adjust the cache head */
748 for (elem = first; elem != NULL && nalloc > 0; elem = next) {
749 next = lt_elem_list_next(&g_prepost_table, elem);
750 if (--nalloc == 0) {
751 /* terminate the allocated list */
752 elem->lt_next_idx = LT_IDX_MAX;
753 break;
754 }
755 }
756 assert(nalloc == 0);
757 if (!next) {
758 cache->head = 0;
759 } else {
760 cache->head = next->lt_id.id;
761 }
762 /* assert that we don't have mis-matched book keeping */
763 assert((cache->avail == 0) == (cache->head == 0));
764 enable_preemption();
765 elem = first;
766 goto out;
767 }
768 enable_preemption();
769
770 do_alloc:
771 /* fall-back to standard table allocation */
772 elem = ltable_alloc_elem(&g_prepost_table, type, nelem, 0);
773 if (!elem) {
774 return NULL;
775 }
776
777 out:
778 wqp = (struct wq_prepost *)elem;
779 wqp_do_alloc_stats(elem);
780 return wqp;
781 }
782
783 static void
wq_prepost_invalidate(struct wq_prepost * wqp)784 wq_prepost_invalidate(struct wq_prepost *wqp)
785 {
786 lt_elem_invalidate(&wqp->wqte);
787 }
788
789 static struct wq_prepost *
wq_prepost_get(uint64_t wqp_id)790 wq_prepost_get(uint64_t wqp_id)
791 {
792 struct lt_elem *elem;
793
794 elem = ltable_get_elem(&g_prepost_table, wqp_id);
795 return (struct wq_prepost *)elem;
796 }
797
798 static void
wq_prepost_put(struct wq_prepost * wqp)799 wq_prepost_put(struct wq_prepost *wqp)
800 {
801 ltable_put_elem(&g_prepost_table, (struct lt_elem *)wqp);
802 }
803
804 static int
wq_prepost_rlink(struct wq_prepost * parent,struct wq_prepost * child)805 wq_prepost_rlink(struct wq_prepost *parent, struct wq_prepost *child)
806 {
807 return lt_elem_list_link(&g_prepost_table, &parent->wqte, &child->wqte);
808 }
809
810 static struct wq_prepost *
wq_prepost_get_rnext(struct wq_prepost * head)811 wq_prepost_get_rnext(struct wq_prepost *head)
812 {
813 struct lt_elem *elem;
814 struct wq_prepost *wqp;
815 uint64_t id;
816
817 elem = lt_elem_list_next(&g_prepost_table, &head->wqte);
818 if (!elem) {
819 return NULL;
820 }
821 id = elem->lt_id.id;
822 elem = ltable_get_elem(&g_prepost_table, id);
823
824 if (!elem) {
825 return NULL;
826 }
827 wqp = (struct wq_prepost *)elem;
828 if (elem->lt_id.id != id ||
829 wqp_type(wqp) != WQP_POST ||
830 wqp->wqp_post.wqp_next_id != head->wqp_prepostid.id) {
831 ltable_put_elem(&g_prepost_table, elem);
832 return NULL;
833 }
834
835 return wqp;
836 }
837
838 static void
wq_prepost_reset_rnext(struct wq_prepost * wqp)839 wq_prepost_reset_rnext(struct wq_prepost *wqp)
840 {
841 wqp->wqte.lt_next_idx = LT_IDX_MAX;
842 }
843
844
845 /**
846 * remove 'wqp' from the prepost list on 'wqset'
847 *
848 * Conditions:
849 * wqset is locked
850 * caller holds a reference on wqp (and is responsible to release it)
851 *
852 * Result:
853 * wqp is invalidated, wqset is potentially updated with a new
854 * prepost ID, and the next element of the prepost list may be
855 * consumed as well (if the list contained only 2 objects)
856 */
857 static int
wq_prepost_remove(struct waitq_set * wqset,struct wq_prepost * wqp)858 wq_prepost_remove(struct waitq_set *wqset, struct wq_prepost *wqp)
859 {
860 int more_posts = 1;
861 uint64_t next_id = wqp->wqp_post.wqp_next_id;
862 uint64_t wqp_id = wqp->wqp_prepostid.id;
863 struct wq_prepost *prev_wqp, *next_wqp;
864
865 assert(wqset->wqset_q.waitq_portset);
866 assert(wqp_type(wqp) == WQP_POST);
867
868 if (next_id == wqp_id) {
869 /* the list is singular and becoming empty */
870 wqset->wqset_prepost_id = 0;
871 more_posts = 0;
872 goto out;
873 }
874
875 prev_wqp = wq_prepost_get_rnext(wqp);
876 assert(prev_wqp != NULL);
877 assert(prev_wqp->wqp_post.wqp_next_id == wqp_id);
878 assert(prev_wqp->wqp_prepostid.id != wqp_id);
879 assert(wqp_type(prev_wqp) == WQP_POST);
880
881 if (prev_wqp->wqp_prepostid.id == next_id) {
882 /*
883 * There are two items in the list, and we're removing one. We
884 * only need to keep the WQP_WQ pointer from 'prev_wqp'
885 */
886 wqset->wqset_prepost_id = prev_wqp->wqp_post.wqp_wq_id;
887 wq_prepost_invalidate(prev_wqp);
888 wq_prepost_put(prev_wqp);
889 more_posts = 0;
890 goto out;
891 }
892
893 /* prev->next = next */
894 prev_wqp->wqp_post.wqp_next_id = next_id;
895
896 /* next->prev = prev */
897 next_wqp = wq_prepost_get(next_id);
898 assert(next_wqp != NULL);
899 assert(next_wqp != wqp);
900 assert(next_wqp != prev_wqp);
901 assert(wqp_type(next_wqp) == WQP_POST);
902
903 wq_prepost_reset_rnext(next_wqp);
904 wq_prepost_rlink(next_wqp, prev_wqp);
905
906 /* If we remove the head of the list, update the wqset */
907 if (wqp_id == wqset->wqset_prepost_id) {
908 wqset->wqset_prepost_id = next_id;
909 }
910
911 wq_prepost_put(prev_wqp);
912 wq_prepost_put(next_wqp);
913
914 out:
915 wq_prepost_reset_rnext(wqp);
916 wq_prepost_invalidate(wqp);
917 return more_posts;
918 }
919
920 static struct wq_prepost *
wq_prepost_rfirst(uint64_t id)921 wq_prepost_rfirst(uint64_t id)
922 {
923 struct lt_elem *elem;
924 elem = lt_elem_list_first(&g_prepost_table, id);
925 wqp_do_alloc_stats(elem);
926 return (struct wq_prepost *)(void *)elem;
927 }
928
929 static struct wq_prepost *
wq_prepost_rpop(uint64_t * id,int type)930 wq_prepost_rpop(uint64_t *id, int type)
931 {
932 struct lt_elem *elem;
933 elem = lt_elem_list_pop(&g_prepost_table, id, type);
934 wqp_do_alloc_stats(elem);
935 return (struct wq_prepost *)(void *)elem;
936 }
937
938 static void
wq_prepost_release_rlist(struct wq_prepost * wqp)939 wq_prepost_release_rlist(struct wq_prepost *wqp)
940 {
941 int nelem = 0;
942 struct wqp_cache *cache;
943 struct lt_elem *elem;
944
945 if (!wqp) {
946 return;
947 }
948
949 elem = &wqp->wqte;
950
951 /*
952 * These are reserved elements: release them back to the per-cpu pool
953 * if our cache is running low.
954 */
955 disable_preemption();
956 cache = PERCPU_GET(wqp_cache);
957 if (cache->avail < WQP_CACHE_MAX) {
958 struct lt_elem *tmp = NULL;
959 if (cache->head != 0) {
960 tmp = lt_elem_list_first(&g_prepost_table, cache->head);
961 }
962 nelem = lt_elem_list_link(&g_prepost_table, elem, tmp);
963 cache->head = elem->lt_id.id;
964 cache->avail += nelem;
965 enable_preemption();
966 return;
967 }
968 enable_preemption();
969
970 /* release these elements back to the main table */
971 nelem = lt_elem_list_release(&g_prepost_table, elem, LT_RESERVED);
972
973 #if CONFIG_WAITQ_STATS
974 g_prepost_table.nreserved_releases += 1;
975 OSDecrementAtomic64(&g_prepost_table.nreservations);
976 #endif
977 }
978
979 typedef int (^wqp_callback_t)(struct wq_prepost *wqp, struct waitq *waitq);
980
981 /**
982 * iterate over a chain of preposts associated with a waitq set.
983 *
984 * Conditions:
985 * wqset is locked
986 *
987 * Notes:
988 * This loop performs automatic prepost chain management / culling, and
989 * may reset or adjust the waitq set's prepost ID pointer. If you don't
990 * want this extra processing, you can use wq_prepost_iterate().
991 */
992 static int
wq_prepost_foreach_locked(struct waitq_set * wqset,wqp_callback_t cb)993 wq_prepost_foreach_locked(struct waitq_set *wqset, wqp_callback_t cb)
994 {
995 int ret = WQ_ITERATE_SUCCESS;
996 struct wq_prepost *wqp, *tmp_wqp;
997
998 assert(cb != NULL);
999 assert(wqset->wqset_q.waitq_portset);
1000
1001 if (!wqset->wqset_prepost_id) {
1002 return WQ_ITERATE_SUCCESS;
1003 }
1004
1005 restart:
1006 wqp = wq_prepost_get(wqset->wqset_prepost_id);
1007 if (!wqp) {
1008 /*
1009 * The prepost object is no longer valid, reset the waitq
1010 * set's prepost id.
1011 */
1012 wqset->wqset_prepost_id = 0;
1013 return WQ_ITERATE_SUCCESS;
1014 }
1015
1016 if (wqp_type(wqp) == WQP_WQ) {
1017 uint64_t __assert_only wqp_id = wqp->wqp_prepostid.id;
1018
1019 ret = cb(wqp, wqp->wqp_wq.wqp_wq_ptr);
1020
1021 switch (ret) {
1022 case WQ_ITERATE_INVALIDATE_CONTINUE:
1023 /* the caller wants to remove the only prepost here */
1024 assert(wqp_id == wqset->wqset_prepost_id);
1025 wqset->wqset_prepost_id = 0;
1026 OS_FALLTHROUGH;
1027 case WQ_ITERATE_CONTINUE:
1028 wq_prepost_put(wqp);
1029 ret = WQ_ITERATE_SUCCESS;
1030 break;
1031 case WQ_ITERATE_RESTART:
1032 wq_prepost_put(wqp);
1033 OS_FALLTHROUGH;
1034 case WQ_ITERATE_DROPPED:
1035 goto restart;
1036 default:
1037 wq_prepost_put(wqp);
1038 break;
1039 }
1040 return ret;
1041 }
1042
1043 assert(wqp->wqp_prepostid.id == wqset->wqset_prepost_id);
1044 assert(wqp_type(wqp) == WQP_POST);
1045
1046 /*
1047 * At this point we know we have a list of POST objects.
1048 * Grab a handle to the last element in the list and start
1049 * the iteration.
1050 */
1051 tmp_wqp = wq_prepost_get_rnext(wqp);
1052 assert(tmp_wqp != NULL && wqp_type(tmp_wqp) == WQP_POST);
1053
1054 uint64_t last_id = tmp_wqp->wqp_prepostid.id;
1055 wq_prepost_put(tmp_wqp);
1056
1057 ret = WQ_ITERATE_SUCCESS;
1058 for (;;) {
1059 uint64_t wqp_id, first_id, next_id;
1060
1061 wqp_id = wqp->wqp_prepostid.id;
1062 first_id = wqset->wqset_prepost_id;
1063 next_id = wqp->wqp_post.wqp_next_id;
1064
1065 /* grab the WQP_WQ object this _POST points to */
1066 tmp_wqp = wq_prepost_get(wqp->wqp_post.wqp_wq_id);
1067 if (!tmp_wqp) {
1068 /*
1069 * This WQP_POST object points to an invalid
1070 * WQP_WQ object - remove the POST object from
1071 * the list.
1072 */
1073 if (wq_prepost_remove(wqset, wqp) == 0) {
1074 wq_prepost_put(wqp);
1075 goto restart;
1076 }
1077 goto next_prepost;
1078 }
1079 assert(wqp_type(tmp_wqp) == WQP_WQ);
1080 /*
1081 * make the callback: note that this could remove 'wqp' or
1082 * drop the lock on our waitq set. We need to re-validate
1083 * our state when this function returns.
1084 */
1085 ret = cb(wqp, tmp_wqp->wqp_wq.wqp_wq_ptr);
1086 wq_prepost_put(tmp_wqp);
1087
1088 switch (ret) {
1089 case WQ_ITERATE_CONTINUE:
1090 /* continue iteration */
1091 break;
1092 case WQ_ITERATE_INVALIDATE_CONTINUE:
1093 assert(next_id == wqp->wqp_post.wqp_next_id);
1094 if (wq_prepost_remove(wqset, wqp) == 0) {
1095 wq_prepost_put(wqp);
1096 goto restart;
1097 }
1098 goto next_prepost;
1099 case WQ_ITERATE_RESTART:
1100 wq_prepost_put(wqp);
1101 OS_FALLTHROUGH;
1102 case WQ_ITERATE_DROPPED:
1103 /* the callback dropped the ref to wqp: just restart */
1104 goto restart;
1105 default:
1106 /* break out of the iteration for some other reason */
1107 goto finish_prepost_foreach;
1108 }
1109
1110 /*
1111 * the set lock may have been dropped during callback,
1112 * if something looks different, restart the prepost iteration
1113 */
1114 if (!wqp_is_valid(wqp) ||
1115 (wqp->wqp_post.wqp_next_id != next_id) ||
1116 wqset->wqset_prepost_id != first_id) {
1117 wq_prepost_put(wqp);
1118 goto restart;
1119 }
1120
1121 next_prepost:
1122 /* this was the last object in the list */
1123 if (wqp_id == last_id) {
1124 break;
1125 }
1126
1127 /* get the next object */
1128 tmp_wqp = wq_prepost_get(next_id);
1129 if (!tmp_wqp) {
1130 /*
1131 * At this point we've already checked our state
1132 * after the callback (which may have dropped the set
1133 * lock). If we find an invalid member of the list
1134 * then something is wrong.
1135 */
1136 panic("Invalid WQP_POST member 0x%llx in waitq set "
1137 "0x%llx prepost list (first:%llx, "
1138 "wqp:%p)",
1139 next_id, wqset->wqset_id, first_id, wqp);
1140 }
1141 wq_prepost_put(wqp);
1142 wqp = tmp_wqp;
1143
1144 assert(wqp_type(wqp) == WQP_POST);
1145 }
1146
1147 finish_prepost_foreach:
1148 wq_prepost_put(wqp);
1149 if (ret == WQ_ITERATE_CONTINUE) {
1150 ret = WQ_ITERATE_SUCCESS;
1151 }
1152
1153 return ret;
1154 }
1155
1156 /**
1157 * Perform a simple loop over a chain of prepost objects
1158 *
1159 * Conditions:
1160 * If 'prepost_id' is associated with a waitq (set) then that object must
1161 * be locked before calling this function.
1162 * Callback function, 'cb', must be able to handle a NULL wqset pointer
1163 * and a NULL waitq pointer!
1164 *
1165 * Notes:
1166 * This prepost chain iteration will _not_ automatically adjust any chain
1167 * element or linkage. This is the responsibility of the caller! If you
1168 * want automatic prepost chain management (at a cost of extra CPU time),
1169 * you can use: wq_prepost_foreach_locked().
1170 */
1171 static int
wq_prepost_iterate(uint64_t prepost_id,wqp_callback_t cb)1172 wq_prepost_iterate(uint64_t prepost_id, wqp_callback_t cb)
1173 {
1174 int ret;
1175 struct wq_prepost *wqp;
1176
1177 if (!prepost_id) {
1178 return WQ_ITERATE_SUCCESS;
1179 }
1180
1181 wqp = wq_prepost_get(prepost_id);
1182 if (!wqp) {
1183 return WQ_ITERATE_SUCCESS;
1184 }
1185
1186 if (wqp_type(wqp) == WQP_WQ) {
1187 ret = cb(wqp, wqp->wqp_wq.wqp_wq_ptr);
1188 if (ret != WQ_ITERATE_DROPPED) {
1189 wq_prepost_put(wqp);
1190 }
1191 return ret;
1192 }
1193
1194 assert(wqp->wqp_prepostid.id == prepost_id);
1195 assert(wqp_type(wqp) == WQP_POST);
1196
1197 /* at this point we know we have a list of POST objects */
1198 uint64_t next_id;
1199
1200 ret = WQ_ITERATE_CONTINUE;
1201 do {
1202 struct wq_prepost *tmp_wqp;
1203 struct waitq *wq = NULL;
1204
1205 next_id = wqp->wqp_post.wqp_next_id;
1206
1207 /* grab the WQP_WQ object this _POST points to */
1208 tmp_wqp = wq_prepost_get(wqp->wqp_post.wqp_wq_id);
1209 if (tmp_wqp) {
1210 assert(wqp_type(tmp_wqp) == WQP_WQ);
1211 wq = tmp_wqp->wqp_wq.wqp_wq_ptr;
1212 }
1213
1214 ret = cb(wqp, wq);
1215 if (tmp_wqp) {
1216 wq_prepost_put(tmp_wqp);
1217 }
1218
1219 if (ret != WQ_ITERATE_CONTINUE) {
1220 break;
1221 }
1222
1223 tmp_wqp = wq_prepost_get(next_id);
1224 if (!tmp_wqp) {
1225 /*
1226 * the chain is broken: nothing we can do here besides
1227 * bail from the iteration.
1228 */
1229 ret = WQ_ITERATE_ABORTED;
1230 break;
1231 }
1232
1233 wq_prepost_put(wqp);
1234 wqp = tmp_wqp;
1235
1236 assert(wqp_type(wqp) == WQP_POST);
1237 } while (next_id != prepost_id);
1238
1239 if (ret != WQ_ITERATE_DROPPED) {
1240 wq_prepost_put(wqp);
1241 }
1242
1243 if (ret == WQ_ITERATE_CONTINUE) {
1244 ret = WQ_ITERATE_SUCCESS;
1245 }
1246 return ret;
1247 }
1248
1249
1250 /**
1251 * checks if 'waitq' has already preposted on 'wqset'
1252 *
1253 * Parameters:
1254 * waitq The waitq that's preposting
1255 * wqset The set onto which waitq may be preposted
1256 *
1257 * Conditions:
1258 * both waitq and wqset are locked
1259 *
1260 * Returns non-zero if 'waitq' has already preposted to 'wqset'
1261 */
1262 static bool
wq_is_preposted_on_set(struct waitq * waitq,struct waitq_set * wqset)1263 wq_is_preposted_on_set(struct waitq *waitq, struct waitq_set *wqset)
1264 {
1265 __block bool did_prepost = false;
1266
1267 assert(wqset->wqset_q.waitq_portset);
1268
1269 /*
1270 * If the set's only prepost matches the waitq's prepost ID,
1271 * then it obviously already preposted to the set.
1272 */
1273 if (waitq->waitq_prepost_id != 0 &&
1274 wqset->wqset_prepost_id == waitq->waitq_prepost_id) {
1275 return true;
1276 }
1277
1278 /* use full prepost iteration: always trim the list */
1279 wq_prepost_foreach_locked(wqset,
1280 ^(struct wq_prepost *wqp __unused, struct waitq *found_wq) {
1281 if (found_wq == waitq) {
1282 did_prepost = true;
1283 }
1284 return WQ_ITERATE_CONTINUE;
1285 });
1286
1287 return did_prepost;
1288 }
1289
1290 static struct wq_prepost *
wq_get_prepost_obj(uint64_t * reserved,int type)1291 wq_get_prepost_obj(uint64_t *reserved, int type)
1292 {
1293 struct wq_prepost *wqp = NULL;
1294 /*
1295 * don't fail just because the caller doesn't have enough
1296 * reservations, we've kept a low-water mark on the prepost table,
1297 * so there should be some available for us.
1298 */
1299 if (reserved && *reserved) {
1300 wqp = wq_prepost_rpop(reserved, type);
1301 assert(wqp->wqte.lt_id.idx < g_prepost_table.nelem);
1302 } else {
1303 /*
1304 * TODO: if in interrupt context, grab from a special
1305 * region / reserved list!
1306 */
1307 wqp = wq_prepost_alloc(type, 1);
1308 }
1309
1310 if (wqp == NULL) {
1311 panic("Couldn't allocate prepost object!");
1312 }
1313 return wqp;
1314 }
1315
1316
1317 /**
1318 * prepost a waitq onto a waitq set
1319 *
1320 * Parameters:
1321 * wqset The set onto which waitq will be preposted
1322 * waitq The waitq that's preposting
1323 * reserved List (lt_elem_list_ style) of pre-allocated prepost elements
1324 * Could be NULL
1325 *
1326 * Conditions:
1327 * both wqset and waitq are locked
1328 *
1329 * Notes:
1330 * If reserved is NULL, this may block on prepost table growth.
1331 */
1332 static void
wq_prepost_do_post_locked(struct waitq_set * wqset,struct waitq * waitq,uint64_t * reserved)1333 wq_prepost_do_post_locked(struct waitq_set *wqset,
1334 struct waitq *waitq,
1335 uint64_t *reserved)
1336 {
1337 struct wq_prepost *wqp_post, *wqp_head, *wqp_tail;
1338
1339 assert(waitq_held(waitq) && waitq_held(&wqset->wqset_q));
1340
1341 if (!wqset->wqset_q.waitq_portset) {
1342 wqset->wqset_prepost_id = WQSET_PREPOSTED_ANON;
1343 return;
1344 }
1345
1346 /*
1347 * nothing to do if it's already preposted:
1348 * note that this also culls any invalid prepost objects
1349 */
1350 if (wq_is_preposted_on_set(waitq, wqset)) {
1351 return;
1352 }
1353
1354 assert(waitqs_is_linked(wqset));
1355
1356 /*
1357 * This function is called because an event is being posted to 'waitq'.
1358 * We need a prepost object associated with this queue. Allocate one
1359 * now if the waitq isn't already associated with one.
1360 */
1361 if (waitq->waitq_prepost_id == 0) {
1362 struct wq_prepost *wqp;
1363 wqp = wq_get_prepost_obj(reserved, WQP_WQ);
1364 wqp->wqp_wq.wqp_wq_ptr = waitq;
1365 wqp_set_valid(wqp);
1366 waitq->waitq_prepost_id = wqp->wqp_prepostid.id;
1367 wq_prepost_put(wqp);
1368 }
1369
1370 #if CONFIG_LTABLE_STATS
1371 g_prepost_table.npreposts += 1;
1372 #endif
1373
1374 wqdbg_v("preposting waitq %p (0x%llx) to set 0x%llx",
1375 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq),
1376 waitq->waitq_prepost_id, wqset->wqset_id);
1377
1378 if (wqset->wqset_prepost_id == 0) {
1379 /* the set has no previous preposts */
1380 wqset->wqset_prepost_id = waitq->waitq_prepost_id;
1381 return;
1382 }
1383
1384 wqp_head = wq_prepost_get(wqset->wqset_prepost_id);
1385 if (!wqp_head) {
1386 /* the previous prepost has become invalid */
1387 wqset->wqset_prepost_id = waitq->waitq_prepost_id;
1388 return;
1389 }
1390
1391 assert(wqp_head->wqp_prepostid.id == wqset->wqset_prepost_id);
1392
1393 /*
1394 * If we get here, we're going to need at least one new wq_prepost
1395 * object. If the previous wqset_prepost_id points to a WQP_WQ, we
1396 * actually need to allocate 2 wq_prepost objects because the WQP_WQ
1397 * is tied to the waitq and shared across all sets.
1398 */
1399 wqp_post = wq_get_prepost_obj(reserved, WQP_POST);
1400
1401 wqp_post->wqp_post.wqp_wq_id = waitq->waitq_prepost_id;
1402 wqdbg_v("POST 0x%llx :: WQ 0x%llx", wqp_post->wqp_prepostid.id,
1403 waitq->waitq_prepost_id);
1404
1405 if (wqp_type(wqp_head) == WQP_WQ) {
1406 /*
1407 * We must replace the wqset_prepost_id with a pointer
1408 * to two new WQP_POST objects
1409 */
1410 uint64_t wqp_id = wqp_head->wqp_prepostid.id;
1411 wqdbg_v("set 0x%llx previous had 1 WQ prepost (0x%llx): "
1412 "replacing with two POST preposts",
1413 wqset->wqset_id, wqp_id);
1414
1415 /* drop the old reference */
1416 wq_prepost_put(wqp_head);
1417
1418 /* grab another new object (the 2nd of two) */
1419 wqp_head = wq_get_prepost_obj(reserved, WQP_POST);
1420
1421 /* point this one to the original WQP_WQ object */
1422 wqp_head->wqp_post.wqp_wq_id = wqp_id;
1423 wqdbg_v("POST 0x%llx :: WQ 0x%llx",
1424 wqp_head->wqp_prepostid.id, wqp_id);
1425
1426 /* link it to the new wqp_post object allocated earlier */
1427 wqp_head->wqp_post.wqp_next_id = wqp_post->wqp_prepostid.id;
1428 /* make the list a double-linked and circular */
1429 wq_prepost_rlink(wqp_head, wqp_post);
1430
1431 /*
1432 * Finish setting up the new prepost: point it back to the
1433 * POST object we allocated to replace the original wqset
1434 * WQ prepost object
1435 */
1436 wqp_post->wqp_post.wqp_next_id = wqp_head->wqp_prepostid.id;
1437 wq_prepost_rlink(wqp_post, wqp_head);
1438
1439 /* mark objects valid, and reset the wqset prepost list head */
1440 wqp_set_valid(wqp_head);
1441 wqp_set_valid(wqp_post);
1442 wqset->wqset_prepost_id = wqp_head->wqp_prepostid.id;
1443
1444 /* release both references */
1445 wq_prepost_put(wqp_head);
1446 wq_prepost_put(wqp_post);
1447
1448 wqdbg_v("set 0x%llx: 0x%llx/0x%llx -> 0x%llx/0x%llx -> 0x%llx",
1449 wqset->wqset_id, wqset->wqset_prepost_id,
1450 wqp_head->wqp_prepostid.id, wqp_head->wqp_post.wqp_next_id,
1451 wqp_post->wqp_prepostid.id,
1452 wqp_post->wqp_post.wqp_next_id);
1453 return;
1454 }
1455
1456 assert(wqp_type(wqp_head) == WQP_POST);
1457
1458 /*
1459 * Add the new prepost to the end of the prepost list
1460 */
1461 wqp_tail = wq_prepost_get_rnext(wqp_head);
1462 assert(wqp_tail != NULL);
1463 assert(wqp_tail->wqp_post.wqp_next_id == wqset->wqset_prepost_id);
1464
1465 /*
1466 * link the head to the new tail
1467 * NOTE: this needs to happen first in case wqp_tail == wqp_head
1468 */
1469 wq_prepost_reset_rnext(wqp_head);
1470 wq_prepost_rlink(wqp_head, wqp_post);
1471
1472 /* point the new object to the list head, and list tail */
1473 wqp_post->wqp_post.wqp_next_id = wqp_head->wqp_prepostid.id;
1474 wq_prepost_rlink(wqp_post, wqp_tail);
1475
1476 /* point the last item in the waitq set's list to the new object */
1477 wqp_tail->wqp_post.wqp_next_id = wqp_post->wqp_prepostid.id;
1478
1479 wqp_set_valid(wqp_post);
1480
1481 wq_prepost_put(wqp_head);
1482 wq_prepost_put(wqp_tail);
1483 wq_prepost_put(wqp_post);
1484
1485 wqdbg_v("set 0x%llx (wqp:0x%llx) last_prepost:0x%llx, "
1486 "new_prepost:0x%llx->0x%llx", wqset->wqset_id,
1487 wqset->wqset_prepost_id, wqp_head->wqp_prepostid.id,
1488 wqp_post->wqp_prepostid.id, wqp_post->wqp_post.wqp_next_id);
1489 }
1490
1491
1492 /* ----------------------------------------------------------------------
1493 *
1494 * Stats collection / reporting
1495 *
1496 * ---------------------------------------------------------------------- */
1497 #if CONFIG_LTABLE_STATS && CONFIG_WAITQ_STATS
1498 static void
wq_table_stats(struct link_table * table,struct wq_table_stats * stats)1499 wq_table_stats(struct link_table *table, struct wq_table_stats *stats)
1500 {
1501 stats->version = WAITQ_STATS_VERSION;
1502 stats->table_elements = table->nelem;
1503 stats->table_used_elems = table->used_elem;
1504 stats->table_elem_sz = table->elem_sz;
1505 stats->table_slabs = table->nslabs;
1506 stats->table_slab_sz = table->slab_sz;
1507
1508 stats->table_num_allocs = table->nallocs;
1509 stats->table_num_preposts = table->npreposts;
1510 stats->table_num_reservations = table->nreservations;
1511
1512 stats->table_max_used = table->max_used;
1513 stats->table_avg_used = table->avg_used;
1514 stats->table_max_reservations = table->max_reservations;
1515 stats->table_avg_reservations = table->avg_reservations;
1516 }
1517
1518 void
waitq_link_stats(struct wq_table_stats * stats)1519 waitq_link_stats(struct wq_table_stats *stats)
1520 {
1521 if (!stats) {
1522 return;
1523 }
1524 wq_table_stats(&g_wqlinktable, stats);
1525 }
1526
1527 void
waitq_prepost_stats(struct wq_table_stats * stats)1528 waitq_prepost_stats(struct wq_table_stats *stats)
1529 {
1530 wq_table_stats(&g_prepost_table, stats);
1531 }
1532 #endif
1533
1534
1535 /* ----------------------------------------------------------------------
1536 *
1537 * Global Wait Queues
1538 *
1539 * ---------------------------------------------------------------------- */
1540
1541 static struct waitq g_boot_waitq;
1542 static SECURITY_READ_ONLY_LATE(struct waitq *) global_waitqs = &g_boot_waitq;
1543 static SECURITY_READ_ONLY_LATE(uint32_t) g_num_waitqs = 1;
1544
1545 /*
1546 * Zero out the used MSBs of the event.
1547 */
1548 #define _CAST_TO_EVENT_MASK(event) ((waitq_flags_t)(event) & ((1ul << _EVENT_MASK_BITS) - 1ul))
1549
1550 static __inline__ uint32_t
waitq_hash(char * key,size_t length)1551 waitq_hash(char *key, size_t length)
1552 {
1553 uint32_t hash = os_hash_jenkins(key, length);
1554
1555 hash &= (g_num_waitqs - 1);
1556 return hash;
1557 }
1558
1559 /* return a global waitq pointer corresponding to the given event */
1560 struct waitq *
_global_eventq(char * event,size_t event_length)1561 _global_eventq(char *event, size_t event_length)
1562 {
1563 return &global_waitqs[waitq_hash(event, event_length)];
1564 }
1565
1566 /* return an indexed global waitq pointer */
1567 struct waitq *
global_waitq(int index)1568 global_waitq(int index)
1569 {
1570 return &global_waitqs[index % g_num_waitqs];
1571 }
1572
1573
1574 #if CONFIG_LTABLE_STATS || CONFIG_WAITQ_STATS
1575 /* this global is for lldb */
1576 const uint32_t g_nwaitq_btframes = NWAITQ_BTFRAMES;
1577
1578 static __inline__ void
waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES],int skip)1579 waitq_grab_backtrace(uintptr_t bt[NWAITQ_BTFRAMES], int skip)
1580 {
1581 uintptr_t buf[NWAITQ_BTFRAMES + skip];
1582 if (skip < 0) {
1583 skip = 0;
1584 }
1585 memset(buf, 0, (NWAITQ_BTFRAMES + skip) * sizeof(uintptr_t));
1586 backtrace(buf, g_nwaitq_btframes + skip, NULL, NULL);
1587 memcpy(&bt[0], &buf[skip], NWAITQ_BTFRAMES * sizeof(uintptr_t));
1588 }
1589 #else /* no stats */
1590 #define waitq_grab_backtrace(...)
1591 #endif
1592
1593 #if CONFIG_WAITQ_STATS
1594
1595 struct wq_stats g_boot_stats;
1596 struct wq_stats *g_waitq_stats = &g_boot_stats;
1597
1598 static __inline__ struct wq_stats *
waitq_global_stats(struct waitq * waitq)1599 waitq_global_stats(struct waitq *waitq)
1600 {
1601 struct wq_stats *wqs;
1602 uint32_t idx;
1603
1604 if (!waitq_is_global(waitq)) {
1605 return NULL;
1606 }
1607
1608 idx = (uint32_t)(((uintptr_t)waitq - (uintptr_t)global_waitqs) / sizeof(*waitq));
1609 assert(idx < g_num_waitqs);
1610 wqs = &g_waitq_stats[idx];
1611 return wqs;
1612 }
1613
1614 static __inline__ void
waitq_stats_count_wait(struct waitq * waitq)1615 waitq_stats_count_wait(struct waitq *waitq)
1616 {
1617 struct wq_stats *wqs = waitq_global_stats(waitq);
1618 if (wqs != NULL) {
1619 wqs->waits++;
1620 waitq_grab_backtrace(wqs->last_wait, 2);
1621 }
1622 }
1623
1624 static __inline__ void
waitq_stats_count_wakeup(struct waitq * waitq,int n)1625 waitq_stats_count_wakeup(struct waitq *waitq, int n)
1626 {
1627 struct wq_stats *wqs = waitq_global_stats(waitq);
1628 if (wqs != NULL) {
1629 if (n > 0) {
1630 wqs->wakeups += n;
1631 waitq_grab_backtrace(wqs->last_wakeup, 2);
1632 } else {
1633 wqs->failed_wakeups++;
1634 waitq_grab_backtrace(wqs->last_failed_wakeup, 2);
1635 }
1636 }
1637 }
1638
1639 static __inline__ void
waitq_stats_count_clear_wakeup(struct waitq * waitq)1640 waitq_stats_count_clear_wakeup(struct waitq *waitq)
1641 {
1642 struct wq_stats *wqs = waitq_global_stats(waitq);
1643 if (wqs != NULL) {
1644 wqs->wakeups++;
1645 wqs->clears++;
1646 waitq_grab_backtrace(wqs->last_wakeup, 2);
1647 }
1648 }
1649 #else /* !CONFIG_WAITQ_STATS */
1650 #define waitq_stats_count_wait(q) do { } while (0)
1651 #define waitq_stats_count_wakeup(q, n) do { } while (0)
1652 #define waitq_stats_count_clear_wakeup(q) do { } while (0)
1653 #endif
1654
1655 bool
waitq_is_valid(struct waitq * waitq)1656 waitq_is_valid(struct waitq *waitq)
1657 {
1658 return waitq_valid(waitq);
1659 }
1660
1661 bool
waitq_set_is_valid(struct waitq_set * wqset)1662 waitq_set_is_valid(struct waitq_set *wqset)
1663 {
1664 return waitq_valid(&wqset->wqset_q) && waitqs_is_set(wqset);
1665 }
1666
1667 bool
waitq_is_global(struct waitq * waitq)1668 waitq_is_global(struct waitq *waitq)
1669 {
1670 return waitq >= global_waitqs && waitq < global_waitqs + g_num_waitqs;
1671 }
1672
1673 bool
waitq_irq_safe(struct waitq * waitq)1674 waitq_irq_safe(struct waitq *waitq)
1675 {
1676 /* global wait queues have this bit set on initialization */
1677 return waitq->waitq_irq;
1678 }
1679
1680 static inline bool
waitq_empty(struct waitq * wq)1681 waitq_empty(struct waitq *wq)
1682 {
1683 if (waitq_is_turnstile_queue(wq)) {
1684 return priority_queue_empty(&wq->waitq_prio_queue);
1685 } else if (waitq_is_turnstile_proxy(wq)) {
1686 struct turnstile *ts = wq->waitq_ts;
1687 return ts == TURNSTILE_NULL ||
1688 priority_queue_empty(&ts->ts_waitq.waitq_prio_queue);
1689 } else {
1690 return queue_empty(&wq->waitq_queue);
1691 }
1692 }
1693
1694 static struct waitq *
waitq_get_safeq(struct waitq * waitq)1695 waitq_get_safeq(struct waitq *waitq)
1696 {
1697 /* Check if it's a port waitq */
1698 if (waitq_is_turnstile_proxy(waitq)) {
1699 struct turnstile *ts = waitq->waitq_ts;
1700 return ts ? &ts->ts_waitq : NULL;
1701 }
1702 return global_eventq(waitq);
1703 }
1704
1705 static uint32_t
waitq_hash_size(void)1706 waitq_hash_size(void)
1707 {
1708 uint32_t hsize, queues;
1709
1710 if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize))) {
1711 return hsize;
1712 }
1713
1714 queues = thread_max / 5;
1715 hsize = P2ROUNDUP(queues * sizeof(struct waitq), PAGE_SIZE);
1716
1717 return hsize;
1718 }
1719
1720 /*
1721 * Since the priority ordered waitq uses basepri as the
1722 * ordering key assert that this value fits in a uint8_t.
1723 */
1724 static_assert(MAXPRI <= UINT8_MAX);
1725
1726 static inline void
waitq_thread_insert(struct waitq * safeq,thread_t thread,struct waitq * wq,event64_t event)1727 waitq_thread_insert(struct waitq *safeq, thread_t thread,
1728 struct waitq *wq, event64_t event)
1729 {
1730 if (waitq_is_turnstile_queue(safeq)) {
1731 turnstile_stats_update(0, TSU_TURNSTILE_BLOCK_COUNT, NULL);
1732 turnstile_waitq_add_thread_priority_queue(safeq, thread);
1733 } else {
1734 turnstile_stats_update(0, TSU_REGULAR_WAITQ_BLOCK_COUNT, NULL);
1735 /*
1736 * Realtime threads get priority for wait queue placements.
1737 * This allows wait_queue_wakeup_one to prefer a waiting
1738 * realtime thread, similar in principle to performing
1739 * a wait_queue_wakeup_all and allowing scheduler prioritization
1740 * to run the realtime thread, but without causing the
1741 * lock contention of that scenario.
1742 */
1743 if (thread->sched_pri >= BASEPRI_REALTIME ||
1744 !safeq->waitq_fifo ||
1745 thread->options & TH_OPT_VMPRIV) {
1746 enqueue_head(&safeq->waitq_queue, &thread->wait_links);
1747 } else {
1748 enqueue_tail(&safeq->waitq_queue, &thread->wait_links);
1749 }
1750 }
1751
1752 /* mark the event and real waitq, even if enqueued on a global safeq */
1753 thread->wait_event = event;
1754 thread->waitq = wq;
1755 }
1756
1757 /**
1758 * clear the thread-related waitq state
1759 *
1760 * Conditions:
1761 * 'thread' is locked
1762 */
1763 static inline void
thread_clear_waitq_state(thread_t thread)1764 thread_clear_waitq_state(thread_t thread)
1765 {
1766 thread->waitq = NULL;
1767 thread->wait_event = NO_EVENT64;
1768 thread->at_safe_point = FALSE;
1769 }
1770
1771 static inline void
waitq_thread_remove(struct waitq * wq,thread_t thread)1772 waitq_thread_remove(struct waitq *wq, thread_t thread)
1773 {
1774 if (waitq_is_turnstile_queue(wq)) {
1775 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1776 (TURNSTILE_CODE(TURNSTILE_HEAP_OPERATIONS,
1777 (THREAD_REMOVED_FROM_TURNSTILE_WAITQ))) | DBG_FUNC_NONE,
1778 VM_KERNEL_UNSLIDE_OR_PERM(waitq_to_turnstile(wq)),
1779 thread_tid(thread), 0, 0, 0);
1780 priority_queue_remove(&wq->waitq_prio_queue,
1781 &thread->wait_prioq_links);
1782 } else {
1783 remqueue(&thread->wait_links);
1784 if (waitq_is_global(wq) && waitq_empty(wq)) {
1785 wq->waitq_eventmask = 0;
1786 }
1787 }
1788
1789 thread_clear_waitq_state(thread);
1790 }
1791
1792 void
waitq_bootstrap(void)1793 waitq_bootstrap(void)
1794 {
1795 kern_return_t kret;
1796 uint32_t whsize, qsz, tmp32;
1797
1798 g_min_free_table_elem = DEFAULT_MIN_FREE_TABLE_ELEM;
1799 if (PE_parse_boot_argn("wqt_min_free", &tmp32, sizeof(tmp32)) == TRUE) {
1800 g_min_free_table_elem = tmp32;
1801 }
1802 wqdbg("Minimum free table elements: %d", tmp32);
1803
1804 /*
1805 * Determine the amount of memory we're willing to reserve for
1806 * the waitqueue hash table
1807 */
1808 whsize = waitq_hash_size();
1809
1810 /* Determine the number of waitqueues we can fit. */
1811 qsz = sizeof(struct waitq);
1812 whsize = ROUNDDOWN(whsize, qsz);
1813 g_num_waitqs = whsize / qsz;
1814
1815 /*
1816 * The hash algorithm requires that this be a power of 2, so we
1817 * just mask off all the low-order bits.
1818 */
1819 for (uint32_t i = 0; i < 31; i++) {
1820 uint32_t bit = (1 << i);
1821 if ((g_num_waitqs & bit) == g_num_waitqs) {
1822 break;
1823 }
1824 g_num_waitqs &= ~bit;
1825 }
1826 assert(g_num_waitqs > 0);
1827
1828 /* Now determine how much memory we really need. */
1829 whsize = P2ROUNDUP(g_num_waitqs * qsz, PAGE_SIZE);
1830
1831 wqdbg("allocating %d global queues (%d bytes)", g_num_waitqs, whsize);
1832 kret = kernel_memory_allocate(kernel_map, (vm_offset_t *)&global_waitqs,
1833 whsize, 0, KMA_KOBJECT | KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ);
1834 if (kret != KERN_SUCCESS || global_waitqs == NULL) {
1835 panic("kernel_memory_allocate() failed to alloc global_waitqs"
1836 ", error: %d, whsize: 0x%x", kret, whsize);
1837 }
1838
1839 #if CONFIG_WAITQ_STATS
1840 whsize = P2ROUNDUP(g_num_waitqs * sizeof(struct wq_stats), PAGE_SIZE);
1841 kret = kernel_memory_allocate(kernel_map, (vm_offset_t *)&g_waitq_stats,
1842 whsize, 0, KMA_KOBJECT | KMA_NOPAGEWAIT, VM_KERN_MEMORY_WAITQ);
1843 if (kret != KERN_SUCCESS || global_waitqs == NULL) {
1844 panic("kernel_memory_allocate() failed to alloc g_waitq_stats"
1845 ", error: %d, whsize: 0x%x", kret, whsize);
1846 }
1847 memset(g_waitq_stats, 0, whsize);
1848 #endif
1849
1850 for (uint32_t i = 0; i < g_num_waitqs; i++) {
1851 waitq_init(&global_waitqs[i], SYNC_POLICY_FIFO | SYNC_POLICY_DISABLE_IRQ);
1852 }
1853
1854 /* initialize the global waitq link table */
1855 wql_init();
1856
1857 /* initialize the global waitq prepost table */
1858 wqp_init();
1859 }
1860
1861
1862 /* ----------------------------------------------------------------------
1863 *
1864 * Wait Queue Implementation
1865 *
1866 * ---------------------------------------------------------------------- */
1867
1868 /*
1869 * Double the standard lock timeout, because wait queues tend
1870 * to iterate over a number of threads - locking each. If there is
1871 * a problem with a thread lock, it normally times out at the wait
1872 * queue level first, hiding the real problem.
1873 */
1874 /* For x86, the hardware timeout is in TSC units. */
1875 #if defined(__i386__) || defined(__x86_64__)
1876 #define waitq_timeout (2 * LockTimeOutTSC)
1877 #else
1878 #define waitq_timeout (2 * os_atomic_load(&LockTimeOut, relaxed))
1879 #endif
1880
1881 static hw_lock_timeout_status_t
waitq_timeout_handler(void * _lock,uint64_t timeout,uint64_t start,uint64_t now,uint64_t interrupt_time)1882 waitq_timeout_handler(void *_lock, uint64_t timeout, uint64_t start, uint64_t now, uint64_t interrupt_time)
1883 {
1884 #pragma unused(interrupt_time)
1885
1886 lck_spinlock_to_info_t lsti;
1887 hw_lck_ticket_t *lck = _lock;
1888 hw_lck_ticket_t tmp;
1889 struct waitq *wq = __container_of(lck, struct waitq, waitq_interlock);
1890
1891 if (machine_timeout_suspended()) {
1892 return HW_LOCK_TIMEOUT_CONTINUE;
1893 }
1894
1895 lsti = lck_spinlock_timeout_hit(lck, 0);
1896 tmp.tcurnext = os_atomic_load(&lck->tcurnext, relaxed);
1897
1898 panic("waitq(%p) lock timeout after %llu ticks; cpu=%d, "
1899 "cticket: 0x%x, nticket: 0x%x, waiting for 0x%x"
1900 #if INTERRUPT_MASKED_DEBUG
1901 "interrupt time: %llu, "
1902 #endif /* INTERRUPT_MASKED_DEBUG */
1903 "start time: %llu, now: %llu, timeout: %llu",
1904 wq, now - start, cpu_number(),
1905 tmp.cticket, tmp.nticket, lsti->extra,
1906 #if INTERRUPT_MASKED_DEBUG
1907 interrupt_time,
1908 #endif /* INTERRUPT_MASKED_DEBUG */
1909 start, now, timeout);
1910 }
1911
1912 void
waitq_lock(struct waitq * wq)1913 waitq_lock(struct waitq *wq)
1914 {
1915 (void)hw_lck_ticket_lock_to(&wq->waitq_interlock,
1916 waitq_timeout, waitq_timeout_handler, &waitq_lck_grp);
1917 #if defined(__x86_64__)
1918 pltrace(FALSE);
1919 #endif
1920 }
1921
1922 bool
waitq_lock_allow_invalid(struct waitq * wq)1923 waitq_lock_allow_invalid(struct waitq *wq)
1924 {
1925 hw_lock_status_t rc;
1926
1927 rc = hw_lck_ticket_lock_allow_invalid(&wq->waitq_interlock,
1928 waitq_timeout, waitq_timeout_handler, &waitq_lck_grp);
1929
1930 #if defined(__x86_64__)
1931 if (rc == HW_LOCK_ACQUIRED) {
1932 pltrace(FALSE);
1933 }
1934 #endif
1935 return rc == HW_LOCK_ACQUIRED;
1936 }
1937
1938 void
waitq_unlock(struct waitq * wq)1939 waitq_unlock(struct waitq *wq)
1940 {
1941 assert(waitq_held(wq));
1942 #if defined(__x86_64__)
1943 pltrace(TRUE);
1944 #endif
1945 hw_lck_ticket_unlock(&wq->waitq_interlock);
1946 }
1947
1948
1949 typedef thread_t (^waitq_select_cb)(struct waitq *waitq, thread_t thread);
1950
1951 struct waitq_select_args {
1952 /* input parameters */
1953 event64_t event;
1954 waitq_select_cb select_cb;
1955 int priority;
1956 wait_result_t result;
1957 waitq_options_t options;
1958
1959 uint64_t *reserved_preposts;
1960
1961 /* output parameters */
1962 queue_head_t threadq;
1963 uint32_t max_threads;
1964 uint32_t nthreads;
1965 spl_t spl;
1966 };
1967
1968 static void do_waitq_select_n_locked(struct waitq *, struct waitq_select_args *args);
1969 static void waitq_select_queue_flush(struct waitq *, struct waitq_select_args *args);
1970
1971 /**
1972 * callback invoked once for every waitq set to which a waitq belongs
1973 *
1974 * Conditions:
1975 * the posted waitq is locked.
1976 * 'link' points to a valid waitq set
1977 *
1978 * Notes:
1979 * Takes the waitq set lock on the set pointed to by 'link'
1980 * Calls do_waitq_select_n_locked().
1981 * If no threads were selected, it preposts the input waitq
1982 * onto the waitq set pointed to by 'link'.
1983 */
1984 static int
waitq_select_walk_cb(struct waitq_link * link,struct waitq * waitq,struct waitq_select_args * args)1985 waitq_select_walk_cb(struct waitq_link *link, struct waitq *waitq,
1986 struct waitq_select_args *args)
1987 {
1988 struct waitq_set *wqset = link->wql_set;
1989 int ret = WQ_ITERATE_CONTINUE;
1990
1991 assert(!waitq_irq_safe(&wqset->wqset_q));
1992
1993 if (queue_empty(&args->threadq)) {
1994 waitq_set_lock(wqset);
1995 } else if (!waitq_set_lock_try(wqset)) {
1996 /*
1997 * We are holding several thread locks,
1998 * and failed to acquire this waitq set lock.
1999 *
2000 * It is possible that another core is holding that
2001 * (non IRQ-safe) waitq set lock, while an interrupt
2002 * is trying to grab the thread lock of ones of those threads.
2003 *
2004 * In order to avoid deadlocks, let's flush out the queue
2005 * of threads, and then we know we're safe.
2006 *
2007 * Note: this code will never run if `max_threads` is 1
2008 * because we should not even reach this point.
2009 *
2010 * It is critical because the `identify` variants
2011 * will not want their queue flushed.
2012 */
2013 assert(args->max_threads > 1);
2014 waitq_select_queue_flush(waitq, args);
2015 waitq_set_lock(wqset);
2016 }
2017
2018 /*
2019 * verify that the link wasn't invalidated just before
2020 * we were able to take the lock.
2021 */
2022 if (wqset->wqset_id != link->wql_setid.id) {
2023 goto out_unlock;
2024 }
2025
2026 assert(waitqs_is_linked(wqset));
2027
2028 /*
2029 * Find any threads waiting on this wait queue set.
2030 */
2031 do_waitq_select_n_locked(&wqset->wqset_q, args);
2032
2033 if (args->nthreads == 0 && args->event == NO_EVENT64) {
2034 /* No thread selected: prepost 'waitq' to 'wqset' */
2035 wq_prepost_do_post_locked(wqset, waitq, args->reserved_preposts);
2036
2037 /* If this is a port-set, callout to the IPC subsystem */
2038 if (wqset->wqset_q.waitq_portset) {
2039 ipc_pset_prepost(wqset, waitq);
2040 }
2041 } else if (args->nthreads >= args->max_threads) {
2042 /* break out of the setid walk */
2043 ret = WQ_ITERATE_FOUND;
2044 }
2045
2046 out_unlock:
2047 waitq_set_unlock(wqset);
2048 return ret;
2049 }
2050
2051 /**
2052 * Routine to iterate over the waitq for non-priority ordered waitqs
2053 *
2054 * Conditions:
2055 * args->waitq (and the posted waitq) is locked
2056 *
2057 * Notes:
2058 * Uses the optional select callback function to refine the selection
2059 * of one or more threads from a waitq. The select callback is invoked
2060 * once for every thread that is found to be waiting on the input args->waitq.
2061 *
2062 * If one or more threads are selected, this may disable interrupts.
2063 * The previous interrupt state is returned in args->spl and should
2064 * be used in a call to splx() if threads are returned to the caller.
2065 */
2066 static thread_t
waitq_queue_iterate_locked(struct waitq * safeq,struct waitq * waitq,struct waitq_select_args * args,waitq_flags_t * remaining_eventmask)2067 waitq_queue_iterate_locked(struct waitq *safeq, struct waitq *waitq,
2068 struct waitq_select_args *args, waitq_flags_t *remaining_eventmask)
2069 {
2070 thread_t thread = THREAD_NULL;
2071 thread_t first_thread = THREAD_NULL;
2072
2073 qe_foreach_element_safe(thread, &safeq->waitq_queue, wait_links) {
2074 thread_t t = THREAD_NULL;
2075 assert_thread_magic(thread);
2076
2077 /*
2078 * For non-priority ordered waitqs, we allow multiple events to be
2079 * mux'ed into the same waitq. Also safeqs may contain threads from
2080 * multiple waitqs. Only pick threads that match the
2081 * requested wait event.
2082 */
2083 if (thread->waitq == waitq && thread->wait_event == args->event) {
2084 t = thread;
2085 if (first_thread == THREAD_NULL) {
2086 first_thread = thread;
2087 }
2088
2089 /* allow the caller to futher refine the selection */
2090 if (args->select_cb) {
2091 t = args->select_cb(waitq, thread);
2092 }
2093 if (t != THREAD_NULL) {
2094 args->nthreads += 1;
2095 if (args->nthreads == 1 && safeq == waitq) {
2096 args->spl = splsched();
2097 }
2098 thread_lock(t);
2099 thread_clear_waitq_state(t);
2100 re_queue_tail(&args->threadq, &t->wait_links);
2101 /* only enqueue up to 'max' threads */
2102 if (args->nthreads >= args->max_threads) {
2103 break;
2104 }
2105 }
2106 }
2107 /* thread wasn't selected so track its event */
2108 if (t == THREAD_NULL) {
2109 *remaining_eventmask |= (thread->waitq != safeq) ?
2110 _CAST_TO_EVENT_MASK(thread->waitq) : _CAST_TO_EVENT_MASK(thread->wait_event);
2111 }
2112 }
2113
2114 return first_thread;
2115 }
2116
2117 /**
2118 * Routine to iterate and remove threads from priority ordered waitqs
2119 *
2120 * Conditions:
2121 * args->waitq (and the posted waitq) is locked
2122 *
2123 * Notes:
2124 * The priority ordered waitqs only support maximum priority element removal.
2125 *
2126 * Also, the implementation makes sure that all threads in a priority ordered
2127 * waitq are waiting on the same wait event. This is not necessarily true for
2128 * non-priority ordered waitqs. If one or more threads are selected, this may
2129 * disable interrupts. The previous interrupt state is returned in args->spl
2130 * and should be used in a call to splx() if threads are returned to the caller.
2131 *
2132 * In the future, we could support priority ordered waitqs with multiple wait
2133 * events in the same queue. The way to implement that would be to keep removing
2134 * elements from the waitq and if the event does not match the requested one,
2135 * add it to a local list. This local list of elements needs to be re-inserted
2136 * into the priority queue at the end and the select_cb return value &
2137 * remaining_eventmask would need to be handled appropriately. The implementation
2138 * is not very efficient but would work functionally.
2139 */
2140 static thread_t
waitq_prioq_iterate_locked(struct waitq * safeq,struct waitq * waitq,struct waitq_select_args * args,waitq_flags_t * remaining_eventmask)2141 waitq_prioq_iterate_locked(struct waitq *safeq, struct waitq *waitq,
2142 struct waitq_select_args *args, waitq_flags_t *remaining_eventmask)
2143 {
2144 thread_t first_thread = THREAD_NULL;
2145 thread_t thread = THREAD_NULL;
2146
2147 /*
2148 * The only possible values for remaining_eventmask for the priority queue
2149 * waitq are either 0 (for the remove all threads case) or the original
2150 * safeq->waitq_eventmask (for the lookup/remove one thread cases).
2151 */
2152 *remaining_eventmask = safeq->waitq_eventmask;
2153
2154 while (args->nthreads < args->max_threads) {
2155 if (priority_queue_empty(&(safeq->waitq_prio_queue))) {
2156 *remaining_eventmask = 0;
2157 break;
2158 }
2159
2160 thread = priority_queue_remove_max(&safeq->waitq_prio_queue,
2161 struct thread, wait_prioq_links);
2162
2163 /*
2164 * Ensure the wait event matches since priority ordered waitqs do not
2165 * support multiple events in the same waitq.
2166 */
2167 assert((thread->waitq == waitq) && (thread->wait_event == args->event));
2168
2169 if (args->select_cb) {
2170 /*
2171 * Call the select_cb passed into the waitq_select args. The callback
2172 * updates the select_ctx with information about the highest priority
2173 * thread which is eventually used by the caller.
2174 */
2175 thread_t __assert_only ret_thread = args->select_cb(waitq, thread);
2176 assert(ret_thread == thread);
2177 }
2178
2179 if (first_thread == THREAD_NULL) {
2180 first_thread = thread;
2181 /*
2182 * turnstile_kernel_update_inheritor_on_wake_locked will lock
2183 * first_thread, so call it before locking it.
2184 */
2185 if (args->priority == WAITQ_PROMOTE_ON_WAKE &&
2186 first_thread != THREAD_NULL &&
2187 waitq_is_turnstile_queue(safeq)) {
2188 turnstile_kernel_update_inheritor_on_wake_locked(waitq_to_turnstile(safeq),
2189 (turnstile_inheritor_t)first_thread, TURNSTILE_INHERITOR_THREAD);
2190 }
2191 }
2192
2193 /* Add the thread to the result thread list */
2194 args->nthreads += 1;
2195 if (args->nthreads == 1 && safeq == waitq) {
2196 args->spl = splsched();
2197 }
2198 thread_lock(thread);
2199 thread_clear_waitq_state(thread);
2200 enqueue_tail(&args->threadq, &(thread->wait_links));
2201 }
2202
2203 return first_thread;
2204 }
2205
2206 /**
2207 * generic thread selection from a waitq (and sets to which the waitq belongs)
2208 *
2209 * Conditions:
2210 * 'waitq' (and the posted waitq) is locked
2211 *
2212 * Notes:
2213 * Uses the optional select callback function to refine the selection
2214 * of one or more threads from a waitq and any set to which the waitq
2215 * belongs. The select callback is invoked once for every thread that
2216 * is found to be waiting on the input args->waitq.
2217 *
2218 * If one or more threads are selected, this may disable interrupts.
2219 * The previous interrupt state is returned in args->spl and should
2220 * be used in a call to splx() if threads are returned to the caller.
2221 */
2222 static void
do_waitq_select_n_locked(struct waitq * waitq,struct waitq_select_args * args)2223 do_waitq_select_n_locked(struct waitq *waitq, struct waitq_select_args *args)
2224 {
2225 thread_t first_thread = THREAD_NULL;
2226 struct waitq *safeq;
2227 waitq_flags_t remaining_eventmask = 0;
2228 waitq_flags_t eventmask;
2229
2230 if (!waitq_irq_safe(waitq)) {
2231 /* JMM - add flag to waitq to avoid global lookup if no waiters */
2232 eventmask = _CAST_TO_EVENT_MASK(waitq);
2233 safeq = waitq_get_safeq(waitq);
2234 if (safeq == NULL) {
2235 /*
2236 * in the WQT_TSPROXY case, if there's no turnstile,
2237 * there's no queue and no waiters, so we can move straight
2238 * to the waitq set recursion
2239 */
2240 goto handle_waitq_set;
2241 }
2242
2243 if (args->nthreads == 0) {
2244 args->spl = splsched();
2245 }
2246 waitq_lock(safeq);
2247 } else {
2248 eventmask = _CAST_TO_EVENT_MASK(args->event);
2249 safeq = waitq;
2250 }
2251
2252 /*
2253 * If the safeq doesn't have an eventmask (not global) or the event
2254 * we're looking for IS set in its eventmask, then scan the threads
2255 * in that queue for ones that match the original <waitq,event> pair.
2256 */
2257 if (!waitq_is_global(safeq) ||
2258 (safeq->waitq_eventmask & eventmask) == eventmask) {
2259 if (waitq_is_turnstile_queue(safeq)) {
2260 first_thread = waitq_prioq_iterate_locked(safeq, waitq,
2261 args, &remaining_eventmask);
2262 } else {
2263 first_thread = waitq_queue_iterate_locked(safeq, waitq,
2264 args, &remaining_eventmask);
2265 }
2266
2267 /*
2268 * Update the eventmask of global queues we just scanned:
2269 * - If we selected all the threads in the queue, we can clear its
2270 * eventmask.
2271 *
2272 * - If we didn't find enough threads to fill our needs, then we can
2273 * assume we looked at every thread in the queue and the mask we
2274 * computed is complete - so reset it.
2275 */
2276 if (waitq_is_global(safeq)) {
2277 if (waitq_empty(safeq)) {
2278 safeq->waitq_eventmask = 0;
2279 } else if (args->nthreads < args->max_threads) {
2280 safeq->waitq_eventmask = remaining_eventmask;
2281 }
2282 }
2283 }
2284
2285 /*
2286 * Grab the first thread in the queue if no other thread was selected.
2287 * We can guarantee that no one has manipulated this thread because
2288 * it's waiting on the given waitq, and we have that waitq locked.
2289 */
2290 if (args->nthreads == 0 && first_thread != THREAD_NULL) {
2291 /* we know this is the first (and only) thread */
2292 args->nthreads += 1;
2293 if (safeq == waitq) {
2294 args->spl = splsched();
2295 }
2296
2297 thread_lock(first_thread);
2298 waitq_thread_remove(safeq, first_thread);
2299 enqueue_tail(&args->threadq, &first_thread->wait_links);
2300 }
2301
2302 /* unlock the safe queue if we locked one above */
2303 if (safeq != waitq) {
2304 waitq_unlock(safeq);
2305 if (args->nthreads == 0) {
2306 splx(args->spl);
2307 args->spl = 0;
2308 }
2309 }
2310
2311 if (args->nthreads >= args->max_threads) {
2312 return;
2313 }
2314
2315 handle_waitq_set:
2316 /*
2317 * If this waitq is a member of any wait queue sets, we need to look
2318 * for waiting thread(s) in any of those sets, and prepost all sets that
2319 * don't have active waiters.
2320 *
2321 * We do not need to recurse into sets for non NO_EVENT64 events,
2322 * threads never wait on sets with a non 0 event.
2323 */
2324 if (args->event == NO_EVENT64) {
2325 wql_walk_sets(waitq, ^(struct waitq_link *lnk){
2326 return waitq_select_walk_cb(lnk, waitq, args);
2327 });
2328 }
2329 }
2330
2331 static void
waitq_select_args_prepare(struct waitq_select_args * args)2332 waitq_select_args_prepare(struct waitq_select_args *args)
2333 {
2334 queue_init(&args->threadq);
2335 }
2336
2337 /**
2338 * link walk callback invoked once for each set to which a waitq belongs
2339 *
2340 * Conditions:
2341 * initial waitq is locked
2342 * thread is unlocked
2343 *
2344 * Notes:
2345 * This may disable interrupts and early-out of the full DAG link walk by
2346 * returning WQ_ITERATE_FOUND. In this case, the returned thread has
2347 * been removed from the waitq, it's waitq state has been reset, and the
2348 * caller is responsible to call splx() with the returned interrupt state
2349 * in ctx->spl.
2350 */
2351 static int
waitq_select_thread_cb(struct waitq_set * wqset,thread_t thread,event64_t event,spl_t * spl)2352 waitq_select_thread_cb(struct waitq_set *wqset, thread_t thread,
2353 event64_t event, spl_t *spl)
2354 {
2355 struct waitq *wqsetq;
2356 struct waitq *safeq;
2357 spl_t s;
2358
2359 wqsetq = &wqset->wqset_q;
2360 assert(!waitq_irq_safe(wqsetq));
2361
2362 waitq_set_lock(wqset);
2363
2364 s = splsched();
2365
2366 /* find and lock the interrupt-safe waitq the thread is thought to be on */
2367 safeq = waitq_get_safeq(wqsetq);
2368 waitq_lock(safeq);
2369
2370 thread_lock(thread);
2371
2372 if ((thread->waitq == wqsetq) && (thread->wait_event == event)) {
2373 waitq_thread_remove(wqsetq, thread);
2374 waitq_unlock(safeq);
2375 waitq_set_unlock(wqset);
2376 /*
2377 * thread still locked,
2378 * return non-zero to break out of WQS walk
2379 */
2380 *spl = s;
2381 return WQ_ITERATE_FOUND;
2382 }
2383
2384 thread_unlock(thread);
2385 waitq_set_unlock(wqset);
2386 waitq_unlock(safeq);
2387 splx(s);
2388
2389 return WQ_ITERATE_CONTINUE;
2390 }
2391
2392 /**
2393 * returns KERN_SUCCESS and locks 'thread' if-and-only-if 'thread' is waiting
2394 * on 'waitq' (or any set to which waitq belongs) for 'event'
2395 *
2396 * Conditions:
2397 * 'waitq' is locked
2398 * 'thread' is unlocked
2399 */
2400 static kern_return_t
waitq_select_thread_locked(struct waitq * waitq,event64_t event,thread_t thread,spl_t * spl)2401 waitq_select_thread_locked(struct waitq *waitq, event64_t event,
2402 thread_t thread, spl_t *spl)
2403 {
2404 struct waitq *safeq;
2405 kern_return_t kr;
2406 spl_t s;
2407
2408 /* Find and lock the interrupts disabled queue the thread is actually on */
2409 if (!waitq_irq_safe(waitq)) {
2410 safeq = waitq_get_safeq(waitq);
2411 if (safeq == NULL) {
2412 /*
2413 * in the WQT_TSPROXY case, if there's no turnstile,
2414 * there's no queue and no waiters, so we can move straight
2415 * to the waitq set recursion
2416 */
2417 goto handle_waitq_set;
2418 }
2419
2420 s = splsched();
2421 waitq_lock(safeq);
2422 } else {
2423 s = splsched();
2424 safeq = waitq;
2425 }
2426
2427 thread_lock(thread);
2428
2429 if ((thread->waitq == waitq) && (thread->wait_event == event)) {
2430 waitq_thread_remove(safeq, thread);
2431 *spl = s;
2432 /* thread still locked */
2433 return KERN_SUCCESS;
2434 }
2435
2436 thread_unlock(thread);
2437
2438 if (safeq != waitq) {
2439 waitq_unlock(safeq);
2440 }
2441
2442 splx(s);
2443
2444 handle_waitq_set:
2445 if (event != NO_EVENT64) {
2446 /*
2447 * We do not need to recurse into sets for non NO_EVENT64
2448 * events, threads never wait on sets with a non 0 event.
2449 */
2450 return KERN_NOT_WAITING;
2451 }
2452
2453 /*
2454 * The thread may be waiting on a wait queue set to which
2455 * the input 'waitq' belongs. Go look for the thread in
2456 * all wait queue sets. If it's there, we'll remove it
2457 * because it's equivalent to waiting directly on the input waitq.
2458 */
2459 kr = wql_walk_sets(waitq, ^(struct waitq_link *lnk){
2460 return waitq_select_thread_cb(lnk->wql_set,
2461 thread, event, spl);
2462 });
2463
2464 /* we found a thread, return success */
2465 return kr == WQ_ITERATE_FOUND ? KERN_SUCCESS : KERN_NOT_WAITING;
2466 }
2467
2468 /**
2469 * declare a thread's intent to wait on 'waitq' for 'wait_event'
2470 *
2471 * Conditions:
2472 * 'waitq' is locked
2473 */
2474 wait_result_t
waitq_assert_wait64_locked(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,wait_timeout_urgency_t urgency,uint64_t deadline,uint64_t leeway,thread_t thread)2475 waitq_assert_wait64_locked(struct waitq *waitq,
2476 event64_t wait_event,
2477 wait_interrupt_t interruptible,
2478 wait_timeout_urgency_t urgency,
2479 uint64_t deadline,
2480 uint64_t leeway,
2481 thread_t thread)
2482 {
2483 wait_result_t wait_result;
2484 struct waitq *safeq;
2485 uintptr_t eventmask;
2486 spl_t s;
2487
2488
2489 /*
2490 * Warning: Do _not_ place debugging print statements here.
2491 * The waitq is locked!
2492 */
2493 assert(!thread->started || thread == current_thread());
2494
2495 if (thread->waitq != NULL) {
2496 panic("thread already waiting on %p", thread->waitq);
2497 }
2498
2499 if (waitq_is_set(waitq)) {
2500 struct waitq_set *wqset = (struct waitq_set *)waitq;
2501 bool found = false;
2502
2503 assert(wait_event == NO_EVENT64);
2504
2505 /*
2506 * early-out if the thread is waiting on a wait queue set
2507 * that has already been pre-posted.
2508 */
2509 if (wqset->wqset_q.waitq_portset) {
2510 int ret;
2511
2512 /*
2513 * Run through the list of potential preposts. Because
2514 * this is a hot path, we short-circuit the iteration
2515 * if we find just one prepost object.
2516 */
2517 ret = wq_prepost_foreach_locked(wqset,
2518 ^(struct wq_prepost *wqp, struct waitq *wq) {
2519 (void)wqp; (void)wq;
2520 return WQ_ITERATE_FOUND;
2521 });
2522 found = ret == WQ_ITERATE_FOUND;
2523 } else if (wqset->wqset_prepost_id == WQSET_PREPOSTED_ANON) {
2524 found = true;
2525 }
2526
2527 if (found) {
2528 s = splsched();
2529 thread_lock(thread);
2530 thread->wait_result = THREAD_AWAKENED;
2531 thread_unlock(thread);
2532 splx(s);
2533 return THREAD_AWAKENED;
2534 }
2535 }
2536
2537 s = splsched();
2538
2539 /*
2540 * If already dealing with an irq safe wait queue, we are all set.
2541 * Otherwise, determine a global queue to use and lock it.
2542 */
2543 if (!waitq_irq_safe(waitq)) {
2544 safeq = waitq_get_safeq(waitq);
2545 if (__improbable(safeq == NULL)) {
2546 panic("Trying to assert_wait on a turnstile proxy "
2547 "that hasn't been donated one (waitq: %p)", waitq);
2548 }
2549 eventmask = _CAST_TO_EVENT_MASK(waitq);
2550 waitq_lock(safeq);
2551 } else {
2552 safeq = waitq;
2553 eventmask = _CAST_TO_EVENT_MASK(wait_event);
2554 }
2555
2556 /* lock the thread now that we have the irq-safe waitq locked */
2557 thread_lock(thread);
2558
2559 /*
2560 * This is the extent to which we currently take scheduling attributes
2561 * into account. If the thread is vm priviledged, we stick it at
2562 * the front of the queue. Later, these queues will honor the policy
2563 * value set at waitq_init time.
2564 */
2565 wait_result = thread_mark_wait_locked(thread, interruptible);
2566 /* thread->wait_result has been set */
2567 if (wait_result == THREAD_WAITING) {
2568 waitq_thread_insert(safeq, thread, waitq, wait_event);
2569
2570 if (deadline != 0) {
2571 boolean_t act;
2572
2573 act = timer_call_enter_with_leeway(thread->wait_timer,
2574 NULL,
2575 deadline, leeway,
2576 urgency, FALSE);
2577 if (!act) {
2578 thread->wait_timer_active++;
2579 }
2580 thread->wait_timer_is_set = TRUE;
2581 }
2582
2583 if (waitq_is_global(safeq)) {
2584 safeq->waitq_eventmask |= eventmask;
2585 }
2586
2587 waitq_stats_count_wait(waitq);
2588 }
2589
2590 /* unlock the thread */
2591 thread_unlock(thread);
2592
2593 /* update the inheritor's thread priority if the waitq is embedded in turnstile */
2594 if (waitq_is_turnstile_queue(safeq) && wait_result == THREAD_WAITING) {
2595 turnstile_recompute_priority_locked(waitq_to_turnstile(safeq));
2596 turnstile_update_inheritor_locked(waitq_to_turnstile(safeq));
2597 }
2598
2599 /* unlock the safeq if we locked it here */
2600 if (safeq != waitq) {
2601 waitq_unlock(safeq);
2602 }
2603
2604 splx(s);
2605
2606 return wait_result;
2607 }
2608
2609 /**
2610 * remove 'thread' from its current blocking state on 'waitq'
2611 *
2612 * Conditions:
2613 * 'thread' is locked
2614 *
2615 * Notes:
2616 * This function is only used by clear_wait_internal in sched_prim.c
2617 * (which itself is called by the timer wakeup path and clear_wait()).
2618 *
2619 * If true is returned, then the thread has been pulled successfuly.
2620 *
2621 * If false is returned, then behavior depends on the
2622 * CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID being enabled or not.
2623 * When CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID is set,
2624 * then waitq_pull_thread_locked() failing is final,
2625 * else it just means the waitq lock couldn't be taken
2626 * and it needs to be retried.
2627 */
2628 bool
waitq_pull_thread_locked(struct waitq * waitq,thread_t thread)2629 waitq_pull_thread_locked(struct waitq *waitq, thread_t thread)
2630 {
2631 struct waitq *safeq;
2632
2633 assert_thread_magic(thread);
2634 assert(thread->waitq == waitq);
2635
2636 /* Find the interrupts disabled queue thread is waiting on */
2637 if (!waitq_irq_safe(waitq)) {
2638 safeq = waitq_get_safeq(waitq);
2639 if (__improbable(safeq == NULL)) {
2640 panic("Trying to clear_wait on a turnstile proxy "
2641 "that hasn't been donated one (waitq: %p)", waitq);
2642 }
2643 } else {
2644 safeq = waitq;
2645 }
2646
2647 /* thread is already locked so have to try for the waitq lock */
2648 if (!waitq_lock_try(safeq)) {
2649 #if CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID
2650 /*
2651 * When CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID is on,
2652 * all IRQ-safe wait queues are either either global,
2653 * or allocated from zones which support using
2654 * waitq_lock_allow_invalid().
2655 *
2656 * We hence can resolve the locking inversion safely.
2657 */
2658 bool locked;
2659
2660 thread_unlock(thread);
2661 locked = waitq_lock_allow_invalid(safeq);
2662 thread_lock(thread);
2663 if (waitq != thread->waitq) {
2664 /*
2665 * the waitq this thread was waiting on either is invalid,
2666 * or changed, or both. Either way, we're done.
2667 */
2668 if (locked) {
2669 waitq_unlock(safeq);
2670 }
2671 return false;
2672 }
2673 if (__improbable(!locked)) {
2674 panic("Thread %p is blocked on an invalid waitq %p",
2675 thread, waitq);
2676 }
2677 #else
2678 return false;
2679 #endif /* CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID */
2680 }
2681
2682 waitq_thread_remove(safeq, thread);
2683 waitq_stats_count_clear_wakeup(waitq);
2684 waitq_unlock(safeq);
2685 return true;
2686 }
2687
2688
2689 static inline void
maybe_adjust_thread_pri(thread_t thread,int priority,__kdebug_only struct waitq * waitq)2690 maybe_adjust_thread_pri(thread_t thread, int priority,
2691 __kdebug_only struct waitq *waitq)
2692 {
2693 /*
2694 * If the caller is requesting the waitq subsystem to promote the
2695 * priority of the awoken thread, then boost the thread's priority to
2696 * the default WAITQ_BOOST_PRIORITY (if it's not already equal or
2697 * higher priority). This boost must be removed via a call to
2698 * waitq_clear_promotion_locked before the thread waits again.
2699 *
2700 * WAITQ_PROMOTE_PRIORITY is -2.
2701 * Anything above 0 represents a mutex promotion.
2702 * The default 'no action' value is -1.
2703 * TODO: define this in a header
2704 */
2705 if (priority == WAITQ_PROMOTE_PRIORITY) {
2706 uintptr_t trace_waitq = 0;
2707 if (__improbable(kdebug_enable)) {
2708 trace_waitq = VM_KERNEL_UNSLIDE_OR_PERM(waitq);
2709 }
2710
2711 sched_thread_promote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, trace_waitq);
2712 }
2713 }
2714
2715 static void
waitq_select_queue_flush(struct waitq * waitq,struct waitq_select_args * args)2716 waitq_select_queue_flush(struct waitq *waitq, struct waitq_select_args *args)
2717 {
2718 thread_t thread = THREAD_NULL;
2719 __assert_only kern_return_t kr;
2720
2721 qe_foreach_element_safe(thread, &args->threadq, wait_links) {
2722 remqueue(&thread->wait_links);
2723 maybe_adjust_thread_pri(thread, args->priority, waitq);
2724 kr = thread_go(thread, args->result, args->options);
2725 assert(kr == KERN_SUCCESS);
2726 thread_unlock(thread);
2727 }
2728 }
2729
2730 /*
2731 * Clear a potential thread priority promotion from a waitq wakeup
2732 * with WAITQ_PROMOTE_PRIORITY.
2733 *
2734 * This must be called on the thread which was woken up with TH_SFLAG_WAITQ_PROMOTED.
2735 */
2736 void
waitq_clear_promotion_locked(struct waitq * waitq,thread_t thread)2737 waitq_clear_promotion_locked(struct waitq *waitq, thread_t thread)
2738 {
2739 spl_t s = 0;
2740
2741 assert(waitq_held(waitq));
2742 assert(thread != THREAD_NULL);
2743 assert(thread == current_thread());
2744
2745 /* This flag is only cleared by the thread itself, so safe to check outside lock */
2746 if ((thread->sched_flags & TH_SFLAG_WAITQ_PROMOTED) != TH_SFLAG_WAITQ_PROMOTED) {
2747 return;
2748 }
2749
2750 if (!waitq_irq_safe(waitq)) {
2751 s = splsched();
2752 }
2753 thread_lock(thread);
2754
2755 sched_thread_unpromote_reason(thread, TH_SFLAG_WAITQ_PROMOTED, 0);
2756
2757 thread_unlock(thread);
2758 if (!waitq_irq_safe(waitq)) {
2759 splx(s);
2760 }
2761 }
2762
2763 /**
2764 * wakeup all threads waiting on 'waitq' for 'wake_event'
2765 *
2766 * Conditions:
2767 * 'waitq' is locked
2768 *
2769 * Notes:
2770 * May temporarily disable and re-enable interrupts
2771 * and re-adjust thread priority of each awoken thread.
2772 *
2773 * If the input 'lock_state' == WAITQ_UNLOCK then the waitq will have
2774 * been unlocked before calling thread_go() on any returned threads, and
2775 * is guaranteed to be unlocked upon function return.
2776 */
2777 kern_return_t
waitq_wakeup64_all_locked(struct waitq * waitq,event64_t wake_event,wait_result_t result,uint64_t * reserved_preposts,int priority,waitq_lock_state_t lock_state)2778 waitq_wakeup64_all_locked(struct waitq *waitq,
2779 event64_t wake_event,
2780 wait_result_t result,
2781 uint64_t *reserved_preposts,
2782 int priority,
2783 waitq_lock_state_t lock_state)
2784 {
2785 struct waitq_select_args args = {
2786 .event = wake_event,
2787 .priority = priority,
2788 .reserved_preposts = reserved_preposts,
2789 .max_threads = UINT32_MAX,
2790 .result = result,
2791 .options = WQ_OPTION_NONE,
2792 };
2793
2794 assert(waitq_held(waitq));
2795
2796 waitq_select_args_prepare(&args);
2797 do_waitq_select_n_locked(waitq, &args);
2798 waitq_stats_count_wakeup(waitq, args.nthreads);
2799
2800 if (lock_state == WAITQ_UNLOCK) {
2801 waitq_unlock(waitq);
2802 }
2803
2804 waitq_select_queue_flush(waitq, &args);
2805
2806 if (args.nthreads > 0) {
2807 splx(args.spl);
2808 return KERN_SUCCESS;
2809 }
2810
2811 return KERN_NOT_WAITING;
2812 }
2813
2814 /**
2815 * wakeup one thread waiting on 'waitq' for 'wake_event'
2816 *
2817 * Conditions:
2818 * 'waitq' is locked
2819 *
2820 * Notes:
2821 * May temporarily disable and re-enable interrupts.
2822 */
2823 kern_return_t
waitq_wakeup64_one_locked(struct waitq * waitq,event64_t wake_event,wait_result_t result,uint64_t * reserved_preposts,int priority,waitq_lock_state_t lock_state,waitq_options_t option)2824 waitq_wakeup64_one_locked(struct waitq *waitq,
2825 event64_t wake_event,
2826 wait_result_t result,
2827 uint64_t *reserved_preposts,
2828 int priority,
2829 waitq_lock_state_t lock_state,
2830 waitq_options_t option)
2831 {
2832 struct waitq_select_args args = {
2833 .event = wake_event,
2834 .priority = priority,
2835 .reserved_preposts = reserved_preposts,
2836 .max_threads = 1,
2837 .result = result,
2838 .options = option,
2839 };
2840
2841 assert(waitq_held(waitq));
2842
2843 waitq_select_args_prepare(&args);
2844 do_waitq_select_n_locked(waitq, &args);
2845 waitq_stats_count_wakeup(waitq, args.nthreads);
2846
2847 if (lock_state == WAITQ_UNLOCK) {
2848 waitq_unlock(waitq);
2849 }
2850
2851 waitq_select_queue_flush(waitq, &args);
2852
2853 if (args.nthreads > 0) {
2854 splx(args.spl);
2855 return KERN_SUCCESS;
2856 }
2857
2858 return KERN_NOT_WAITING;
2859 }
2860
2861 /**
2862 * wakeup one thread waiting on 'waitq' for 'wake_event'
2863 *
2864 * Conditions:
2865 * 'waitq' is locked
2866 *
2867 * Returns:
2868 * A locked, runnable thread.
2869 * If return value is non-NULL, interrupts have also
2870 * been disabled, and the caller is responsible to call
2871 * splx() with the returned '*spl' value.
2872 */
2873 thread_t
waitq_wakeup64_identify_locked(struct waitq * waitq,event64_t wake_event,wait_result_t result,spl_t * spl,uint64_t * reserved_preposts,int priority,waitq_lock_state_t lock_state)2874 waitq_wakeup64_identify_locked(struct waitq *waitq,
2875 event64_t wake_event,
2876 wait_result_t result,
2877 spl_t *spl,
2878 uint64_t *reserved_preposts,
2879 int priority,
2880 waitq_lock_state_t lock_state)
2881 {
2882 struct waitq_select_args args = {
2883 .event = wake_event,
2884 .priority = priority,
2885 .reserved_preposts = reserved_preposts,
2886 .max_threads = 1,
2887 };
2888 thread_t thread = THREAD_NULL;
2889
2890 assert(waitq_held(waitq));
2891
2892 waitq_select_args_prepare(&args);
2893 do_waitq_select_n_locked(waitq, &args);
2894 waitq_stats_count_wakeup(waitq, args.nthreads);
2895
2896 if (lock_state == WAITQ_UNLOCK) {
2897 waitq_unlock(waitq);
2898 }
2899
2900 if (args.nthreads > 0) {
2901 kern_return_t __assert_only ret;
2902
2903 thread = qe_dequeue_head(&args.threadq, struct thread, wait_links);
2904 assert(args.nthreads == 1 && queue_empty(&args.threadq));
2905
2906 maybe_adjust_thread_pri(thread, priority, waitq);
2907 ret = thread_go(thread, result, WQ_OPTION_NONE);
2908 assert(ret == KERN_SUCCESS);
2909 *spl = args.spl;
2910 }
2911
2912 return thread; /* locked if not NULL (caller responsible for spl) */
2913 }
2914
2915 /**
2916 * wakeup a specific thread iff it's waiting on 'waitq' for 'wake_event'
2917 *
2918 * Conditions:
2919 * 'waitq' is locked
2920 * 'thread' is unlocked
2921 *
2922 * Notes:
2923 * May temporarily disable and re-enable interrupts
2924 *
2925 * If the input lock_state == WAITQ_UNLOCK then the waitq will have been
2926 * unlocked before calling thread_go() if 'thread' is to be awoken, and
2927 * is guaranteed to be unlocked upon function return.
2928 */
2929 kern_return_t
waitq_wakeup64_thread_locked(struct waitq * waitq,event64_t wake_event,thread_t thread,wait_result_t result,waitq_lock_state_t lock_state)2930 waitq_wakeup64_thread_locked(struct waitq *waitq,
2931 event64_t wake_event,
2932 thread_t thread,
2933 wait_result_t result,
2934 waitq_lock_state_t lock_state)
2935 {
2936 kern_return_t ret;
2937 spl_t th_spl;
2938
2939 assert(waitq_held(waitq));
2940 assert_thread_magic(thread);
2941
2942 /*
2943 * See if the thread was still waiting there. If so, it got
2944 * dequeued and returned locked.
2945 */
2946 ret = waitq_select_thread_locked(waitq, wake_event, thread, &th_spl);
2947 waitq_stats_count_wakeup(waitq, ret == KERN_SUCCESS ? 1 : 0);
2948
2949 if (lock_state == WAITQ_UNLOCK) {
2950 waitq_unlock(waitq);
2951 }
2952
2953 if (ret != KERN_SUCCESS) {
2954 return KERN_NOT_WAITING;
2955 }
2956
2957 ret = thread_go(thread, result, WQ_OPTION_NONE);
2958 assert(ret == KERN_SUCCESS);
2959 thread_unlock(thread);
2960 splx(th_spl);
2961
2962 return ret;
2963 }
2964
2965
2966
2967 /* ----------------------------------------------------------------------
2968 *
2969 * In-Kernel API
2970 *
2971 * ---------------------------------------------------------------------- */
2972
2973 #if CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID && MACH_ASSERT
2974 /*
2975 * CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID relies on waitq memory to always
2976 * be one of:
2977 * - a waitq
2978 * - zeroed memory
2979 * - unmapped memory
2980 *
2981 * This function allows to assert that this generally looks true.
2982 */
2983 #include <kern/zalloc_internal.h>
2984
2985 extern char __data_segment_start[] __SEGMENT_START_SYM("__DATA");
2986 extern char __data_segment_end[] __SEGMENT_END_SYM("__DATA");
2987
2988 static inline bool
waitq_is_in_kernel_data_or_adequate_zone(struct waitq * waitq)2989 waitq_is_in_kernel_data_or_adequate_zone(struct waitq *waitq)
2990 {
2991 zone_id_t zid;
2992
2993 if (waitq_is_global(waitq)) {
2994 return true;
2995 }
2996
2997 zid = zone_id_for_native_element(waitq, sizeof(*waitq));
2998 if (zid != ZONE_ID_INVALID) {
2999 return zone_security_array[zid].z_va_sequester &&
3000 zone_array[zid].kasan_noquarantine;
3001 }
3002
3003 const char *dataStart;
3004 const char *dataEnd;
3005
3006 #if PLATFORM_MACOS // 78481451
3007 unsigned long sz;
3008 dataStart = getsegdatafromheader(&_mh_execute_header, "__DATA", &sz);
3009 dataEnd = dataStart + sz;
3010 #else
3011 dataStart = __data_segment_start;
3012 dataEnd = __data_segment_end;
3013 #endif
3014
3015 /* sfi, thread calls, ... */
3016 return dataStart <= (char *)waitq && (char *)(waitq + 1) <= dataEnd;
3017 }
3018 #endif
3019
3020 /**
3021 * initialize a waitq object
3022 */
3023 void
waitq_init(struct waitq * waitq,int policy)3024 waitq_init(struct waitq *waitq, int policy)
3025 {
3026 assert(waitq != NULL);
3027 assert((policy & SYNC_POLICY_FIXED_PRIORITY) == 0);
3028
3029 #if CONFIG_WAITQ_IRQSAFE_ALLOW_INVALID
3030 if (policy & SYNC_POLICY_DISABLE_IRQ) {
3031 assert(waitq_is_in_kernel_data_or_adequate_zone(waitq));
3032 }
3033 #endif
3034
3035 waitq->waitq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0);
3036 waitq->waitq_irq = !!(policy & SYNC_POLICY_DISABLE_IRQ);
3037 if (policy & SYNC_POLICY_TURNSTILE_PROXY) {
3038 waitq->waitq_type = WQT_TSPROXY;
3039 } else {
3040 waitq->waitq_type = WQT_QUEUE;
3041 }
3042 waitq->waitq_turnstile = !!(policy & SYNC_POLICY_TURNSTILE);
3043 waitq->waitq_eventmask = 0;
3044
3045 waitq->waitq_set_id = WAITQ_REF_NULL;
3046 waitq->waitq_prepost_id = 0;
3047
3048 if (waitq_is_turnstile_queue(waitq)) {
3049 /* For turnstile, initialize it as a priority queue */
3050 priority_queue_init(&waitq->waitq_prio_queue);
3051 assert(waitq->waitq_fifo == 0);
3052 } else if (policy & SYNC_POLICY_TURNSTILE_PROXY) {
3053 waitq->waitq_ts = TURNSTILE_NULL;
3054 waitq->waitq_tspriv = NULL;
3055 } else {
3056 queue_init(&waitq->waitq_queue);
3057 }
3058
3059 if (policy & SYNC_POLICY_INIT_LOCKED) {
3060 hw_lck_ticket_init_locked(&waitq->waitq_interlock, &waitq_lck_grp);
3061 } else {
3062 hw_lck_ticket_init(&waitq->waitq_interlock, &waitq_lck_grp);
3063 }
3064 }
3065
3066 /**
3067 * cleanup any link/prepost table resources associated with a waitq
3068 */
3069 void
waitq_deinit(struct waitq * waitq)3070 waitq_deinit(struct waitq *waitq)
3071 {
3072 assert(!waitq_is_set(waitq));
3073
3074 if (waitq_valid(waitq)) {
3075 /*
3076 * We must invalidate under the lock as many waitqs
3077 * use this invalidation state for their logic (see ports)
3078 * and changing it outside of a lock hold might mess
3079 * the state machine of the enclosing object.
3080 */
3081 waitq_lock(waitq);
3082 waitq_invalidate(waitq);
3083 if (waitq_irq_safe(waitq)) {
3084 waitq_unlock(waitq);
3085 } else {
3086 waitq_unlink_all_unlock(waitq);
3087 }
3088 }
3089
3090 hw_lck_ticket_destroy(&waitq->waitq_interlock, true, &waitq_lck_grp);
3091
3092 /*
3093 * it is the responsibility of the waitq client to wake up all waiters
3094 */
3095 #if MACH_ASSERT
3096 if (waitq_is_turnstile_queue(waitq)) {
3097 assert(priority_queue_empty(&waitq->waitq_prio_queue));
3098 } else if (waitq_is_turnstile_proxy(waitq)) {
3099 assert(waitq->waitq_ts == TURNSTILE_NULL);
3100 } else if (waitq_is_queue(waitq)) {
3101 assert(queue_empty(&waitq->waitq_queue));
3102 } else {
3103 assert(waitq->waitq_type == WQT_INVALID);
3104 }
3105 #endif // MACH_ASSERT
3106 }
3107
3108 /**
3109 * Invalidate a waitq.
3110 *
3111 * It is the responsibility of the caller to make sure that:
3112 * - all waiters are woken up
3113 * - linkages and preposts are cleared (non IRQ Safe waitqs).
3114 */
3115 void
waitq_invalidate(struct waitq * waitq)3116 waitq_invalidate(struct waitq *waitq)
3117 {
3118 hw_lck_ticket_invalidate(&waitq->waitq_interlock);
3119 }
3120
3121 /**
3122 * invalidate the given wq_prepost chain
3123 */
3124 static void
wqset_clear_prepost_chain(uint64_t prepost_id)3125 wqset_clear_prepost_chain(uint64_t prepost_id)
3126 {
3127 if (prepost_id == WQSET_PREPOSTED_ANON) {
3128 return;
3129 }
3130
3131 (void)wq_prepost_iterate(prepost_id,
3132 ^(struct wq_prepost *wqp, struct waitq __unused *waitq) {
3133 if (wqp_type(wqp) == WQP_POST) {
3134 wq_prepost_invalidate(wqp);
3135 }
3136 return WQ_ITERATE_CONTINUE;
3137 });
3138 }
3139
3140 /**
3141 * initialize a waitq set object
3142 */
3143 void
waitq_set_init(struct waitq_set * wqset,int policy)3144 waitq_set_init(struct waitq_set *wqset, int policy)
3145 {
3146 memset(wqset, 0, sizeof(*wqset));
3147
3148 waitq_init(&wqset->wqset_q, policy);
3149 wqset->wqset_q.waitq_portset = (policy & SYNC_POLICY_PORT_SET) != 0;
3150 wqset->wqset_q.waitq_type = WQT_SET;
3151
3152 /* Lazy allocate the link only when an actual id is needed. */
3153 wqset->wqset_id = WQSET_NOT_LINKED;
3154 }
3155
3156 void
waitq_set_reset_anon_prepost(struct waitq_set * wqset)3157 waitq_set_reset_anon_prepost(struct waitq_set *wqset)
3158 {
3159 assert(waitq_set_is_valid(wqset) && !wqset->wqset_q.waitq_portset);
3160 wqset->wqset_prepost_id = 0;
3161 }
3162
3163 #if DEVELOPMENT || DEBUG
3164 int
sysctl_helper_waitq_set_nelem(void)3165 sysctl_helper_waitq_set_nelem(void)
3166 {
3167 return ltable_nelem(&g_wqlinktable);
3168 }
3169 #endif
3170
3171 /**
3172 * initialize a waitq set link.
3173 *
3174 * Conditions:
3175 * may block
3176 * locks and unlocks the waiq set lock
3177 *
3178 */
3179 void
waitq_set_lazy_init_link(struct waitq_set * wqset)3180 waitq_set_lazy_init_link(struct waitq_set *wqset)
3181 {
3182 struct waitq_link *link;
3183
3184 assert(get_preemption_level() == 0 && waitq_wait_possible(current_thread()));
3185
3186 waitq_set_lock(wqset);
3187 if (wqset->wqset_id != WQSET_NOT_LINKED) {
3188 waitq_set_unlock(wqset);
3189 return;
3190 }
3191
3192 waitq_set_unlock(wqset);
3193
3194 link = wql_alloc_link(WQL_WQS);
3195 if (!link) {
3196 panic("Can't allocate link object for waitq set: %p", wqset);
3197 }
3198
3199 link->wql_set = wqset;
3200
3201 waitq_set_lock(wqset);
3202 if (wqset->wqset_id == WQSET_NOT_LINKED) {
3203 assert(waitq_set_is_valid(wqset));
3204 wql_mkvalid(link);
3205 wqset->wqset_id = link->wql_setid.id;
3206 }
3207 waitq_set_unlock(wqset);
3208
3209 wql_put_link(link);
3210 }
3211
3212 /**
3213 * clear out / release any resources associated with a waitq set
3214 *
3215 * Conditions:
3216 * may block
3217 * waitqset is locked
3218 * Note:
3219 * This will render the waitq set invalid, and it must
3220 * be re-initialized with waitq_set_init before it can be used again
3221 */
3222 void
waitq_set_deinit_and_unlock(struct waitq_set * wqset)3223 waitq_set_deinit_and_unlock(struct waitq_set *wqset)
3224 {
3225 struct waitq_link *link = NULL;
3226 uint64_t set_id, prepost_id;
3227
3228 assert(waitqs_is_set(wqset));
3229
3230 set_id = wqset->wqset_id;
3231
3232 if (waitqs_is_linked(wqset)) {
3233 /* grab the set's link object */
3234 link = wql_get_link(set_id);
3235 if (link) {
3236 wql_invalidate(link);
3237 }
3238 }
3239
3240 /*
3241 * always clear the wqset_id, including WQSET_NOT_LINKED,
3242 * so that waitq_set_lazy_init_link() does nothing
3243 * once a set is invalidated (because of course,
3244 * port-sets do that).
3245 */
3246 wqset->wqset_id = 0;
3247
3248 /*
3249 * This set may have a lot of preposts, or may have been a member of
3250 * many other sets. To minimize spinlock hold times, we clear out the
3251 * waitq set data structure under the lock-hold, but don't clear any
3252 * table objects. We keep handles to the prepost and set linkage
3253 * objects and free those outside the critical section.
3254 */
3255 prepost_id = wqset->wqset_prepost_id;
3256 if (prepost_id) {
3257 assert(link != NULL);
3258 wqset->wqset_prepost_id = 0;
3259 }
3260
3261 wqset->wqset_q.waitq_fifo = 0;
3262 waitq_invalidate(&wqset->wqset_q);
3263
3264 /* don't clear the 'waitq_irq' bit: it's used in locking! */
3265 wqset->wqset_q.waitq_eventmask = 0;
3266
3267 waitq_set_unlock(wqset);
3268
3269 /* drop / unlink all the prepost table objects */
3270 if (prepost_id) {
3271 wqset_clear_prepost_chain(prepost_id);
3272 }
3273
3274 if (link) {
3275 /*
3276 * wql_walk_sets may race with us for access to the waitq set.
3277 * If wql_walk_sets has a reference to the set, then we should wait
3278 * until the link's refcount goes to 1 (our reference) before we exit
3279 * this function. That way we ensure that the waitq set memory will
3280 * remain valid even though it's been cleared out.
3281 */
3282 while (wql_refcnt(link) > 1) {
3283 delay(1);
3284 }
3285 wql_put_link(link);
3286 }
3287
3288 hw_lck_ticket_destroy(&wqset->wqset_q.waitq_interlock, true, &waitq_lck_grp);
3289 }
3290
3291
3292 /**
3293 * clear out / release any resources associated with a waitq set
3294 *
3295 * Conditions:
3296 * may block
3297 * Note:
3298 * This will render the waitq set invalid, and it must
3299 * be re-initialized with waitq_set_init before it can be used again
3300 */
3301 void
waitq_set_deinit(struct waitq_set * wqset)3302 waitq_set_deinit(struct waitq_set *wqset)
3303 {
3304 if (!waitqs_is_set(wqset)) {
3305 panic("trying to de-initialize an invalid wqset @%p", wqset);
3306 }
3307
3308 assert(!waitq_irq_safe(&wqset->wqset_q));
3309
3310 waitq_set_lock(wqset);
3311
3312 waitq_set_deinit_and_unlock(wqset);
3313 }
3314
3315 /**
3316 * clear all preposts originating from 'waitq'
3317 *
3318 * Conditions:
3319 * 'waitq' locked
3320 * may (rarely) spin waiting for another on-core thread to
3321 * release the last reference to the waitq's prepost link object
3322 *
3323 * NOTE:
3324 * If this function needs to spin, it will drop the waitq lock!
3325 * The return value of the function indicates whether or not this
3326 * happened: 1 == lock was dropped, 0 == lock held
3327 */
3328 int
waitq_clear_prepost_locked(struct waitq * waitq)3329 waitq_clear_prepost_locked(struct waitq *waitq)
3330 {
3331 struct wq_prepost *wqp;
3332 int dropped_lock = 0;
3333
3334 assert(!waitq_irq_safe(waitq));
3335
3336 if (waitq->waitq_prepost_id == 0) {
3337 return 0;
3338 }
3339
3340 wqp = wq_prepost_get(waitq->waitq_prepost_id);
3341 waitq->waitq_prepost_id = 0;
3342 if (wqp) {
3343 wqdbg_v("invalidate prepost 0x%llx (refcnt:%d)",
3344 wqp->wqp_prepostid.id, wqp_refcnt(wqp));
3345 wq_prepost_invalidate(wqp);
3346 while (wqp_refcnt(wqp) > 1) {
3347 /*
3348 * Some other thread must have raced us to grab a link
3349 * object reference before we invalidated it. This
3350 * means that they are probably trying to access the
3351 * waitq to which the prepost object points. We need
3352 * to wait here until the other thread drops their
3353 * reference. We know that no one else can get a
3354 * reference (the object has been invalidated), and
3355 * that prepost references are short-lived (dropped on
3356 * a call to wq_prepost_put). We also know that no one
3357 * blocks while holding a reference therefore the
3358 * other reference holder must be on-core. We'll just
3359 * sit and wait for the other reference to be dropped.
3360 */
3361 disable_preemption();
3362
3363 waitq_unlock(waitq);
3364 dropped_lock = 1;
3365 /*
3366 * don't yield here, just spin and assume the other
3367 * consumer is already on core...
3368 */
3369 delay(1);
3370
3371 waitq_lock(waitq);
3372
3373 enable_preemption();
3374 }
3375
3376 wq_prepost_put(wqp);
3377 }
3378
3379 return dropped_lock;
3380 }
3381
3382 /**
3383 * return a the waitq's prepost object ID (allocate if necessary)
3384 *
3385 * Conditions:
3386 * 'waitq' is unlocked
3387 */
3388 uint64_t
waitq_get_prepost_id(struct waitq * waitq)3389 waitq_get_prepost_id(struct waitq *waitq)
3390 {
3391 struct wq_prepost *wqp;
3392 uint64_t wqp_id = 0;
3393
3394 if (!waitq_valid(waitq)) {
3395 return 0;
3396 }
3397
3398 assert(waitq_is_queue(waitq) || waitq_is_turnstile_proxy(waitq));
3399 assert(!waitq_irq_safe(waitq));
3400
3401 waitq_lock(waitq);
3402
3403 if (!waitq_valid(waitq)) {
3404 goto out_unlock;
3405 }
3406
3407 if (waitq->waitq_prepost_id) {
3408 wqp_id = waitq->waitq_prepost_id;
3409 goto out_unlock;
3410 }
3411
3412 /* don't hold a spinlock while allocating a prepost object */
3413 waitq_unlock(waitq);
3414
3415 wqp = wq_prepost_alloc(WQP_WQ, 1);
3416 if (!wqp) {
3417 return 0;
3418 }
3419
3420 /* re-acquire the waitq lock */
3421 waitq_lock(waitq);
3422
3423 if (!waitq_valid(waitq)) {
3424 wq_prepost_put(wqp);
3425 wqp_id = 0;
3426 goto out_unlock;
3427 }
3428
3429 if (waitq->waitq_prepost_id) {
3430 /* we were beat by someone else */
3431 wq_prepost_put(wqp);
3432 wqp_id = waitq->waitq_prepost_id;
3433 goto out_unlock;
3434 }
3435
3436 wqp->wqp_wq.wqp_wq_ptr = waitq;
3437
3438 wqp_set_valid(wqp);
3439 wqp_id = wqp->wqp_prepostid.id;
3440 waitq->waitq_prepost_id = wqp_id;
3441
3442 wq_prepost_put(wqp);
3443
3444 out_unlock:
3445 waitq_unlock(waitq);
3446
3447 return wqp_id;
3448 }
3449
3450
3451 /**
3452 * determine if 'waitq' is a member of 'wqset'
3453 *
3454 * Conditions:
3455 * 'waitq' is locked
3456 * 'wqset' is not locked
3457 * may disable and re-enable interrupts while locking 'waitq'
3458 */
3459 bool
waitq_member_locked(struct waitq * waitq,struct waitq_set * wqset)3460 waitq_member_locked(struct waitq *waitq, struct waitq_set *wqset)
3461 {
3462 waitq_ref_t root_ref = waitq->waitq_set_id;
3463 uint64_t setid = wqset->wqset_id;
3464
3465 if (!waitqs_is_linked(wqset) || wqr_is_null(root_ref)) {
3466 return false;
3467 }
3468
3469 if (!wqr_is_ptr(root_ref)) {
3470 return wqr_is_equal(root_ref, setid);
3471 }
3472
3473 waitq_foreach_link(link, root_ref) {
3474 if (link->wql_node == setid) {
3475 return true;
3476 }
3477 if (!wqr_is_ptr(link->wql_next)) {
3478 return wqr_is_equal(link->wql_next, setid);
3479 }
3480 }
3481
3482 __builtin_unreachable();
3483 }
3484
3485 __abortlike
3486 static void
__waitq_link_arguments_panic(struct waitq * waitq,struct waitq_set * wqset)3487 __waitq_link_arguments_panic(struct waitq *waitq, struct waitq_set *wqset)
3488 {
3489 if (!waitq_valid(waitq) || waitq_irq_safe(waitq)) {
3490 panic("Invalid waitq: %p", waitq);
3491 }
3492 if (!waitq_is_queue(waitq) && !waitq_is_turnstile_proxy(waitq)) {
3493 panic("Invalid waitq type: %p:%d", waitq, waitq->waitq_type);
3494 }
3495 panic("Invalid waitq-set: %p", wqset);
3496 }
3497
3498 static inline void
__waitq_link_arguments_validate(struct waitq * waitq,struct waitq_set * wqset)3499 __waitq_link_arguments_validate(struct waitq *waitq, struct waitq_set *wqset)
3500 {
3501 if (!waitq_valid(waitq) || waitq_irq_safe(waitq) ||
3502 (!waitq_is_queue(waitq) && !waitq_is_turnstile_proxy(waitq)) ||
3503 !waitqs_is_set(wqset)) {
3504 __waitq_link_arguments_panic(waitq, wqset);
3505 }
3506 }
3507
3508 __abortlike
3509 static void
__waitq_invalid_panic(struct waitq * waitq)3510 __waitq_invalid_panic(struct waitq *waitq)
3511 {
3512 panic("Invalid waitq: %p", waitq);
3513 }
3514
3515
3516 static void
__waitq_validate(struct waitq * waitq)3517 __waitq_validate(struct waitq *waitq)
3518 {
3519 if (!waitq_valid(waitq)) {
3520 __waitq_invalid_panic(waitq);
3521 }
3522 }
3523
3524 /**
3525 * pre-allocate a waitq link structure from the link table
3526 */
3527 waitq_ref_t
waitq_link_reserve(void)3528 waitq_link_reserve(void)
3529 {
3530 return wqr_make(zalloc_flags(waitq_link_zone, Z_WAITOK | Z_ZERO));
3531 }
3532
3533 /**
3534 * release a pre-allocated waitq link structure
3535 */
3536 void
waitq_link_release(waitq_ref_t ref)3537 waitq_link_release(waitq_ref_t ref)
3538 {
3539 struct waitq_link *link = wqr_ptr_raw(ref);
3540
3541 if (link) {
3542 zfree(waitq_link_zone, link);
3543 #if CONFIG_LTABLE_STATS
3544 g_wqlinktable.nreserved_releases += 1;
3545 #endif
3546 }
3547 }
3548
3549 /**
3550 * link 'waitq' to 'wqset' using the 'link' structure
3551 *
3552 * Conditions:
3553 * 'waitq' is locked
3554 * caller should have a reference to the 'link' object,
3555 * that this function consumes
3556 */
3557 static kern_return_t
waitq_link_internal(struct waitq * waitq,struct waitq_set * wqset,waitq_ref_t link_ref)3558 waitq_link_internal(struct waitq *waitq, struct waitq_set *wqset,
3559 waitq_ref_t link_ref)
3560 {
3561 waitq_ref_t *refp;
3562 uint64_t setid = wqset->wqset_id;
3563 uint64_t *dead_ref = NULL;
3564
3565 assert(waitq_held(waitq));
3566 assert(waitqs_is_linked(wqset));
3567
3568 /*
3569 * If the waitq_set_id field is empty, then this waitq is not
3570 * a member of any other set. All we have to do is update the
3571 * field.
3572 */
3573 refp = &waitq->waitq_set_id;
3574 if (wqr_is_null(*refp)) {
3575 *refp = wqr_make(setid);
3576 waitq_link_release(link_ref);
3577 return KERN_SUCCESS;
3578 }
3579
3580 /*
3581 * Check to see if it's already a member of the set.
3582 * Similar to waitq_member_locked() but remember
3583 * the last invalid ref we met to try to reuse it.
3584 *
3585 * This allows us not to have to do any expensive
3586 * ltable get/set operations, while still being able to "GC" links.
3587 */
3588 for (;;) {
3589 if (wqr_is_ptr(*refp)) {
3590 struct waitq_link *tmp = wqr_ptr_raw(*refp);
3591
3592 if (!wql_link_valid(tmp->wql_node)) {
3593 dead_ref = &tmp->wql_node;
3594 } else if (tmp->wql_node == setid) {
3595 waitq_link_release(link_ref);
3596 return KERN_ALREADY_IN_SET;
3597 }
3598
3599 refp = &tmp->wql_next;
3600 } else {
3601 if (!wql_link_valid(refp->wqr_value)) {
3602 dead_ref = &refp->wqr_value;
3603 } else if (wqr_is_equal(*refp, setid)) {
3604 waitq_link_release(link_ref);
3605 return KERN_ALREADY_IN_SET;
3606 }
3607 break;
3608 }
3609 }
3610
3611 /*
3612 * This wait queue is _not_ a member of the given set.
3613 *
3614 * If we found an empty "ref" during traversal, reuse it,
3615 * else use our previously allocated link object,
3616 * and hook it up to the wait queue.
3617 *
3618 * Note that it's possible that one or more of the wait queue sets to
3619 * which the wait queue belongs was invalidated before we allocated
3620 * this link object. That's OK because the next time we use that
3621 * object we'll just ignore it.
3622 */
3623
3624 if (dead_ref) {
3625 *dead_ref = setid;
3626 waitq_link_release(link_ref);
3627 return KERN_SUCCESS;
3628 }
3629
3630 waitq_ref_t root_ref = waitq->waitq_set_id;
3631 struct waitq_link *link = wqr_ptr_raw(link_ref);
3632
3633 if (wqr_is_ptr(root_ref)) {
3634 struct waitq_link *root = wqr_ptr_raw(root_ref);
3635
3636 link->wql_count = root->wql_count + 1;
3637 root->wql_count = LT_IDX_MAX;
3638 } else {
3639 link->wql_count = 2;
3640 }
3641 link->wql_next = root_ref;
3642 link->wql_node = setid;
3643 link->wqte.lt_bits = WQL_LINK << LT_BITS_TYPE_SHIFT;
3644
3645 waitq->waitq_set_id = link_ref;
3646
3647 return KERN_SUCCESS;
3648 }
3649
3650 /**
3651 * link 'waitq' to 'wqset'
3652 *
3653 * Conditions:
3654 * if 'lock_state' contains WAITQ_SHOULD_LOCK, 'waitq' must be unlocked.
3655 * Otherwise, 'waitq' must be locked.
3656 *
3657 * may (rarely) block on link table allocation if the table has to grow,
3658 * and no 'reserved_link' object is passed.
3659 *
3660 * may block and acquire wqset lock if the wqset passed has no link.
3661 *
3662 * Notes:
3663 * The caller can guarantee that this function will never block by
3664 * - pre-allocating a link table object and passing its ID in 'reserved_link'
3665 * - and pre-allocating the waitq set link calling waitq_set_lazy_init_link.
3666 * It is not possible to provide a reserved_link without having also linked
3667 * the wqset.
3668 */
3669 kern_return_t
waitq_link(struct waitq * waitq,struct waitq_set * wqset,waitq_lock_state_t lock_state,waitq_ref_t * reserved_link)3670 waitq_link(struct waitq *waitq, struct waitq_set *wqset,
3671 waitq_lock_state_t lock_state, waitq_ref_t *reserved_link)
3672 {
3673 kern_return_t kr;
3674 waitq_ref_t link;
3675 int should_lock = (lock_state == WAITQ_SHOULD_LOCK);
3676
3677 __waitq_link_arguments_validate(waitq, wqset);
3678
3679 wqdbg_v("Link waitq %p to wqset 0x%llx",
3680 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id);
3681
3682 /*
3683 * We _might_ need a new link object here, so we'll grab outside
3684 * the lock because the alloc call _might_ block.
3685 *
3686 * If the caller reserved a link beforehand, then wql_get_link
3687 * is guaranteed not to block because the caller holds an extra
3688 * reference to the link which, in turn, hold a reference to the
3689 * link table.
3690 */
3691 if (!reserved_link || wqr_is_null(*reserved_link)) {
3692 if (!waitqs_is_linked(wqset)) {
3693 waitq_set_lazy_init_link(wqset);
3694 }
3695
3696 link = waitq_link_reserve();
3697 } else {
3698 link = *reserved_link;
3699 /* always consume the caller's reference */
3700 *reserved_link = WAITQ_REF_NULL;
3701 }
3702
3703 if (should_lock) {
3704 waitq_lock(waitq);
3705 }
3706
3707 /* consumes link */
3708 kr = waitq_link_internal(waitq, wqset, link);
3709
3710 if (should_lock) {
3711 waitq_unlock(waitq);
3712 }
3713
3714 return kr;
3715 }
3716
3717 /**
3718 * unlink 'waitq' from all wqsets and then link to 'newset'
3719 *
3720 * Conditions:
3721 * waitq locked on entry, unlocked on return
3722 */
3723 static void
waitq_unlink_all_unlock_internal(struct waitq * waitq,waitq_ref_t newset)3724 waitq_unlink_all_unlock_internal(struct waitq *waitq, waitq_ref_t newset)
3725 {
3726 waitq_ref_t old_set_id;
3727
3728 assert(!waitq_irq_safe(waitq));
3729
3730 old_set_id = waitq->waitq_set_id;
3731 waitq->waitq_set_id = newset;
3732
3733 if (wqr_is_null(old_set_id)) {
3734 waitq_unlock(waitq);
3735 } else {
3736 /*
3737 * invalidate the prepost entry for this waitq.
3738 *
3739 * This may drop and re-acquire the waitq lock,
3740 * but that's OK because if it was added to another set
3741 * and preposted to that set in the time we drop the lock,
3742 * the state will remain consistent.
3743 */
3744 (void)waitq_clear_prepost_locked(waitq);
3745 waitq_unlock(waitq);
3746
3747 waitq_foreach_link_safe(link, old_set_id) {
3748 waitq_link_release(wqr_make(link));
3749 }
3750 }
3751 }
3752
3753 /**
3754 * unlink 'waitq' from all sets to which it belongs
3755 *
3756 * Conditions:
3757 * 'waitq' is locked on entry
3758 * returns with waitq lock dropped
3759 *
3760 * Notes:
3761 * may (rarely) spin (see waitq_clear_prepost_locked)
3762 */
3763 void
waitq_unlink_all_unlock(struct waitq * waitq)3764 waitq_unlink_all_unlock(struct waitq *waitq)
3765 {
3766 assert(!waitq_irq_safe(waitq));
3767
3768 wqdbg_v("unlink waitq %p from all sets",
3769 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq));
3770
3771 waitq_unlink_all_unlock_internal(waitq, WAITQ_REF_NULL);
3772 }
3773
3774 /**
3775 * unlink 'waitq' from all wqsets and then link to 'wqset'
3776 *
3777 * Conditions:
3778 * waitq locked on entry, unlocked on return
3779 */
3780 void
waitq_unlink_all_relink_unlock(struct waitq * waitq,struct waitq_set * wqset)3781 waitq_unlink_all_relink_unlock(struct waitq *waitq, struct waitq_set *wqset)
3782 {
3783 __waitq_link_arguments_validate(waitq, wqset);
3784
3785 wqdbg_v("Link waitq %p to wqset 0x%llx",
3786 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id);
3787
3788 waitq_unlink_all_unlock_internal(waitq, wqr_make(wqset->wqset_id));
3789 }
3790
3791 /**
3792 * clear out any prepost from waitq into wqset
3793 *
3794 * TODO: this could be more efficient than a linear search of
3795 * the waitq set's prepost list.
3796 */
3797 static void
waitq_unlink_prepost(struct waitq_set * wqset,struct waitq * unlink_wq)3798 waitq_unlink_prepost(struct waitq_set *wqset, struct waitq *unlink_wq)
3799 {
3800 assert(!waitq_irq_safe(&wqset->wqset_q));
3801
3802 if (!wqset->wqset_q.waitq_portset) {
3803 assert(wqset->wqset_prepost_id == 0 ||
3804 wqset->wqset_prepost_id == WQSET_PREPOSTED_ANON);
3805 return;
3806 }
3807
3808 waitq_set_lock(wqset);
3809
3810 (void)wq_prepost_iterate(wqset->wqset_prepost_id,
3811 ^(struct wq_prepost *wqp, struct waitq *waitq) {
3812 if (waitq != unlink_wq) {
3813 return WQ_ITERATE_CONTINUE;
3814 }
3815
3816 if (wqp_type(wqp) == WQP_WQ) {
3817 /* this is the only prepost on this wait queue set */
3818 assert(wqp->wqp_prepostid.id == wqset->wqset_prepost_id);
3819 wqdbg_v("unlink wqp (WQ) 0x%llx", wqp->wqp_prepostid.id);
3820 wqset->wqset_prepost_id = 0;
3821 } else {
3822 /*
3823 * The prepost object 'wqp' points to a waitq which
3824 * should no longer be preposted to 'wqset'.
3825 *
3826 * We can remove the prepost object from the list and
3827 * break out of the iteration.
3828 */
3829 wq_prepost_remove(wqset, wqp);
3830 }
3831 return WQ_ITERATE_BREAK;
3832 });
3833
3834 waitq_set_unlock(wqset);
3835 }
3836
3837 /**
3838 * unlink 'waitq' from 'wqset'
3839 *
3840 * Conditions:
3841 * 'waitq' is locked
3842 * 'wqset' is _not_ locked
3843 * may (rarely) spin in prepost clear and drop/re-acquire 'waitq' lock
3844 * (see waitq_clear_prepost_locked)
3845 */
3846 kern_return_t
waitq_unlink_locked(struct waitq * waitq,struct waitq_set * wqset)3847 waitq_unlink_locked(struct waitq *waitq, struct waitq_set *wqset)
3848 {
3849 waitq_ref_t root_ref;
3850 uint64_t setid;
3851
3852 __waitq_link_arguments_validate(waitq, wqset);
3853
3854 root_ref = waitq->waitq_set_id;
3855
3856 if (wqr_is_null(root_ref)) {
3857 assert(waitq->waitq_prepost_id == 0);
3858 return KERN_NOT_IN_SET;
3859 }
3860
3861 if (!waitqs_is_linked(wqset)) {
3862 /*
3863 * No link has been allocated for the wqset,
3864 * so no waitq could have been linked to it.
3865 */
3866 return KERN_NOT_IN_SET;
3867 }
3868
3869 setid = wqset->wqset_id;
3870 if (wqr_is_equal(root_ref, setid)) {
3871 /*
3872 * This was the only set to which the waitq belonged: we can
3873 * safely release the waitq's prepost object. It doesn't
3874 * matter if this function drops and re-acquires the lock
3875 * because we're not manipulating waitq state any more.
3876 */
3877 waitq->waitq_set_id = WAITQ_REF_NULL;
3878 (void)waitq_clear_prepost_locked(waitq);
3879 return KERN_SUCCESS;
3880 }
3881
3882 if (!wqr_is_ptr(root_ref)) {
3883 return KERN_NOT_IN_SET;
3884 }
3885
3886 /*
3887 * This waitq is member than strictly more than one set.
3888 * Walk them all, in a fashion similar to SLIST_FOREACH_PREVPTR().
3889 */
3890
3891 waitq_ref_t *prev_next_p;
3892 struct waitq_link *root, *link, *next;
3893 uint32_t n;
3894
3895 prev_next_p = &waitq->waitq_set_id;
3896 link = wqr_ptr_raw(root_ref);
3897 n = link->wql_count;
3898
3899 for (;;) {
3900 if (link->wql_node == setid) {
3901 break;
3902 }
3903 if (wqr_is_equal(link->wql_next, setid)) {
3904 break;
3905 }
3906 if (!wqr_is_ptr(link->wql_next)) {
3907 return KERN_NOT_IN_SET;
3908 }
3909
3910 next = wqr_ptr_raw(link->wql_next);
3911 if (wql_link_valid(link->wql_node)) {
3912 prev_next_p = &link->wql_next;
3913 } else {
3914 /*
3915 * Opportunistically cull the list from dead nodes.
3916 *
3917 * This is about making sure the list doesn't
3918 * grow unbounded, so we take shortcuts:
3919 *
3920 * - we use the racy wql_link_valid() rather than
3921 * a get/put() pair which is more expensive,
3922 *
3923 * - we don't try removing the link if the dead setid
3924 * is the tail one as it's slightly more complicated
3925 * to unlink.
3926 */
3927 assert(n >= 3);
3928 root = wqr_ptr_raw(waitq->waitq_set_id);
3929 root->wql_count = --n;
3930
3931 waitq_link_release(wqr_make(link));
3932 *prev_next_p = wqr_make(next);
3933 }
3934 link = next;
3935 }
3936
3937 /*
3938 * We found a link matching this waitq set
3939 *
3940 * 1. pop and free the element
3941 * 2. update the wql_count (if we will keep a list)
3942 * 3. cleanup the possible prepost of the waitq into the set
3943 */
3944
3945 if (link->wql_node == setid) {
3946 *prev_next_p = link->wql_next;
3947 } else {
3948 *prev_next_p = wqr_make(link->wql_node);
3949 }
3950
3951 assert(n >= 2);
3952 if (n > 2) {
3953 root = wqr_ptr_raw(waitq->waitq_set_id);
3954 root->wql_count = n - 1;
3955 }
3956
3957 waitq_link_release(wqr_make(link));
3958
3959 waitq_unlink_prepost(wqset, waitq);
3960 return KERN_SUCCESS;
3961 }
3962
3963 /**
3964 * unlink 'waitq' from 'wqset'
3965 *
3966 * Conditions:
3967 * neither 'waitq' nor 'wqset' is locked
3968 * may disable and re-enable interrupts
3969 * may (rarely) spin in prepost clear
3970 * (see waitq_clear_prepost_locked)
3971 */
3972 kern_return_t
waitq_unlink(struct waitq * waitq,struct waitq_set * wqset)3973 waitq_unlink(struct waitq *waitq, struct waitq_set *wqset)
3974 {
3975 kern_return_t kr = KERN_SUCCESS;
3976
3977 assert(waitqs_is_set(wqset));
3978
3979 /*
3980 * we allow the waitq to be invalid because the caller may be trying
3981 * to clear out old/dirty state
3982 */
3983 if (!waitq_valid(waitq)) {
3984 return KERN_INVALID_ARGUMENT;
3985 }
3986
3987 wqdbg_v("unlink waitq %p from set 0x%llx",
3988 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), wqset->wqset_id);
3989
3990 assert(!waitq_irq_safe(waitq));
3991
3992 waitq_lock(waitq);
3993
3994 kr = waitq_unlink_locked(waitq, wqset);
3995
3996 waitq_unlock(waitq);
3997 return kr;
3998 }
3999
4000 /**
4001 * unlink a waitq from a waitq set, but reference the waitq by its prepost ID
4002 *
4003 * Conditions:
4004 * 'wqset' is unlocked
4005 * wqp_id may be valid or invalid
4006 */
4007 void
waitq_unlink_by_prepost_id(uint64_t wqp_id,struct waitq_set * wqset)4008 waitq_unlink_by_prepost_id(uint64_t wqp_id, struct waitq_set *wqset)
4009 {
4010 struct wq_prepost *wqp;
4011
4012 disable_preemption();
4013 wqp = wq_prepost_get(wqp_id);
4014 if (wqp) {
4015 struct waitq *wq;
4016
4017 wq = wqp->wqp_wq.wqp_wq_ptr;
4018
4019 /*
4020 * lock the waitq, then release our prepost ID reference, then
4021 * unlink the waitq from the wqset: this ensures that we don't
4022 * hold a prepost ID reference during the unlink, but we also
4023 * complete the unlink operation atomically to avoid a race
4024 * with waitq_unlink[_all].
4025 */
4026 assert(!waitq_irq_safe(wq));
4027
4028 waitq_lock(wq);
4029 wq_prepost_put(wqp);
4030
4031 if (!waitq_valid(wq)) {
4032 /* someone already tore down this waitq! */
4033 waitq_unlock(wq);
4034 enable_preemption();
4035 return;
4036 }
4037
4038 /* this _may_ drop the wq lock, but that's OK */
4039 waitq_unlink_locked(wq, wqset);
4040
4041 waitq_unlock(wq);
4042 }
4043 enable_preemption();
4044 }
4045
4046
4047 /**
4048 * reference and lock a waitq by its prepost ID
4049 *
4050 * Conditions:
4051 * wqp_id may be valid or invalid
4052 *
4053 * Returns:
4054 * a locked waitq if wqp_id was valid
4055 * NULL on failure
4056 */
4057 struct waitq *
waitq_lock_by_prepost_id(uint64_t wqp_id)4058 waitq_lock_by_prepost_id(uint64_t wqp_id)
4059 {
4060 struct waitq *wq = NULL;
4061 struct wq_prepost *wqp;
4062
4063 disable_preemption();
4064 wqp = wq_prepost_get(wqp_id);
4065 if (wqp) {
4066 wq = wqp->wqp_wq.wqp_wq_ptr;
4067
4068 assert(!waitq_irq_safe(wq));
4069
4070 waitq_lock(wq);
4071 wq_prepost_put(wqp);
4072
4073 if (!waitq_valid(wq)) {
4074 /* someone already tore down this waitq! */
4075 waitq_unlock(wq);
4076 enable_preemption();
4077 return NULL;
4078 }
4079 }
4080 enable_preemption();
4081 return wq;
4082 }
4083
4084 /**
4085 * unlink all waitqs from 'wqset'
4086 *
4087 * Conditions:
4088 * 'wqset' is locked on entry
4089 * 'wqset' is unlocked on exit and spl is restored
4090 *
4091 * Note:
4092 * may (rarely) spin/block (see waitq_clear_prepost_locked)
4093 */
4094 kern_return_t
waitq_set_unlink_all_unlock(struct waitq_set * wqset)4095 waitq_set_unlink_all_unlock(struct waitq_set *wqset)
4096 {
4097 struct waitq_link *link;
4098 uint64_t prepost_id;
4099
4100 wqdbg_v("unlink all queues from set 0x%llx", wqset->wqset_id);
4101
4102 /*
4103 * This operation does not require interaction with any of the set's
4104 * constituent wait queues. All we have to do is invalidate the SetID
4105 */
4106
4107 if (waitqs_is_linked(wqset)) {
4108 /* invalidate and re-alloc the link object first */
4109 link = wql_get_link(wqset->wqset_id);
4110
4111 /* we may have raced with a waitq_set_deinit: handle this */
4112 if (!link) {
4113 waitq_set_unlock(wqset);
4114 return KERN_SUCCESS;
4115 }
4116
4117 wql_invalidate(link);
4118
4119 /* re-alloc the object to get a new generation ID */
4120 wql_realloc_link(link, WQL_WQS);
4121 link->wql_set = wqset;
4122
4123 wqset->wqset_id = link->wql_setid.id;
4124 wql_mkvalid(link);
4125 wql_put_link(link);
4126 }
4127
4128 /* clear any preposts attached to this set */
4129 prepost_id = wqset->wqset_prepost_id;
4130 wqset->wqset_prepost_id = 0;
4131
4132 waitq_set_unlock(wqset);
4133
4134 /* drop / unlink all the prepost table objects */
4135 if (prepost_id) {
4136 wqset_clear_prepost_chain(prepost_id);
4137 }
4138
4139 return KERN_SUCCESS;
4140 }
4141
4142 static int
waitq_alloc_prepost_reservation(int nalloc,struct waitq * waitq,int * did_unlock,struct wq_prepost ** wqp)4143 waitq_alloc_prepost_reservation(int nalloc, struct waitq *waitq,
4144 int *did_unlock, struct wq_prepost **wqp)
4145 {
4146 struct wq_prepost *tmp;
4147 struct wqp_cache *cache;
4148
4149 *did_unlock = 0;
4150
4151 /*
4152 * Before we unlock the waitq, check the per-processor prepost object
4153 * cache to see if there's enough there for us. If so, do the
4154 * allocation, keep the lock and save an entire iteration over the set
4155 * linkage!
4156 */
4157 if (waitq) {
4158 disable_preemption();
4159 cache = PERCPU_GET(wqp_cache);
4160 if (nalloc <= (int)cache->avail) {
4161 goto do_alloc;
4162 }
4163 enable_preemption();
4164
4165 /* unlock the waitq to perform the allocation */
4166 *did_unlock = 1;
4167 waitq_unlock(waitq);
4168 }
4169
4170 do_alloc:
4171 tmp = wq_prepost_alloc(LT_RESERVED, nalloc);
4172 if (!tmp) {
4173 panic("Couldn't reserve %d preposts for waitq @%p (wqp@%p)",
4174 nalloc, waitq, *wqp);
4175 }
4176 if (*wqp) {
4177 /* link the two lists */
4178 int __assert_only rc;
4179 rc = wq_prepost_rlink(tmp, *wqp);
4180 assert(rc == nalloc);
4181 }
4182 *wqp = tmp;
4183
4184 /*
4185 * If the caller can block, then enforce a minimum-free table element
4186 * policy here. This helps ensure that we will have enough prepost
4187 * objects for callers such as selwakeup() that can be called with
4188 * spin locks held.
4189 */
4190 if (get_preemption_level() == 0) {
4191 wq_prepost_ensure_free_space();
4192 }
4193
4194 if (waitq) {
4195 if (*did_unlock == 0) {
4196 /* decrement the preemption count if alloc from cache */
4197 enable_preemption();
4198 } else {
4199 /* otherwise: re-lock the waitq */
4200 waitq_lock(waitq);
4201 }
4202 }
4203
4204 return nalloc;
4205 }
4206
4207 static int
waitq_count_prepost_reservation(struct waitq * waitq,int extra,int keep_locked)4208 waitq_count_prepost_reservation(struct waitq *waitq, int extra, int keep_locked)
4209 {
4210 waitq_ref_t root_ref;
4211 int npreposts = extra;
4212
4213 /*
4214 * If the waitq is not currently part of a set, and we're not asked to
4215 * keep the waitq locked then we'll want to have 3 in reserve
4216 * just-in-case it becomes part of a set while we unlock and reserve.
4217 * We may need up to 1 object for the waitq, and 2 for the set.
4218 */
4219 root_ref = waitq->waitq_set_id;
4220 if (wqr_is_null(root_ref)) {
4221 if (!keep_locked) {
4222 npreposts += 3;
4223 }
4224 } else {
4225 /* this queue has never been preposted before */
4226 if (waitq->waitq_prepost_id == 0) {
4227 npreposts += 3;
4228 }
4229
4230 /*
4231 * Count the worst-case number of prepost objects that
4232 * may be needed during a wakeup_all.
4233 */
4234 if (wqr_is_ptr(root_ref)) {
4235 struct waitq_link *link = wqr_ptr(root_ref);
4236
4237 npreposts += 2 * link->wql_count;
4238 } else {
4239 npreposts += 2;
4240 }
4241 }
4242
4243 return npreposts;
4244 }
4245
4246
4247 /**
4248 * pre-allocate prepost objects for 'waitq'
4249 *
4250 * Conditions:
4251 * 'waitq' is not locked
4252 *
4253 * Returns:
4254 * panic on error
4255 *
4256 * 0 on success, '*reserved' is set to the head of a singly-linked
4257 * list of pre-allocated prepost objects.
4258 *
4259 * Notes:
4260 * If 'lock_state' is WAITQ_KEEP_LOCKED, this function performs the pre-allocation
4261 * atomically and returns 'waitq' locked.
4262 *
4263 * This function attempts to pre-allocate precisely enough prepost
4264 * objects based on the current set membership of 'waitq'. If the
4265 * operation is performed atomically, then the caller
4266 * is guaranteed to have enough pre-allocated prepost object to avoid
4267 * any (rare) blocking in the wakeup path.
4268 */
4269 uint64_t
waitq_prepost_reserve(struct waitq * waitq,int extra,waitq_lock_state_t lock_state)4270 waitq_prepost_reserve(struct waitq *waitq, int extra,
4271 waitq_lock_state_t lock_state)
4272 {
4273 uint64_t reserved = 0;
4274 struct wq_prepost *wqp = NULL;
4275 int nalloc = 0, npreposts = 0;
4276 int keep_locked = (lock_state == WAITQ_KEEP_LOCKED);
4277 int unlocked = 0;
4278
4279 wqdbg_v("Attempting to reserve prepost linkages for waitq %p (extra:%d)",
4280 (void *)VM_KERNEL_UNSLIDE_OR_PERM(waitq), extra);
4281
4282 if (waitq == NULL && extra > 0) {
4283 /*
4284 * Simple prepost object allocation:
4285 * we'll add 2 more because the waitq might need an object,
4286 * and the set itself may need a new POST object in addition
4287 * to the number of preposts requested by the caller
4288 */
4289 nalloc = waitq_alloc_prepost_reservation(extra + 2, NULL,
4290 &unlocked, &wqp);
4291 assert(nalloc == extra + 2);
4292 return wqp->wqp_prepostid.id;
4293 }
4294
4295 assert(lock_state == WAITQ_KEEP_LOCKED || lock_state == WAITQ_UNLOCK);
4296
4297 assert(!waitq_irq_safe(waitq));
4298
4299 waitq_lock(waitq);
4300
4301 npreposts = waitq_count_prepost_reservation(waitq, extra, keep_locked);
4302 if (npreposts) {
4303 do {
4304 /* this _may_ unlock and relock the waitq! */
4305 nalloc = waitq_alloc_prepost_reservation(npreposts - nalloc,
4306 waitq, &unlocked, &wqp);
4307
4308 if (!unlocked) {
4309 break;
4310 }
4311
4312 npreposts = waitq_count_prepost_reservation(waitq, extra,
4313 keep_locked);
4314 } while (npreposts > nalloc);
4315 }
4316
4317 if (!keep_locked) {
4318 waitq_unlock(waitq);
4319 }
4320 if (wqp) {
4321 reserved = wqp->wqp_prepostid.id;
4322 }
4323
4324 return reserved;
4325 }
4326
4327 /**
4328 * release a linked list of prepost objects allocated via _prepost_reserve
4329 *
4330 * Conditions:
4331 * may (rarely) spin waiting for prepost table growth memcpy
4332 */
4333 void
waitq_prepost_release_reserve(uint64_t id)4334 waitq_prepost_release_reserve(uint64_t id)
4335 {
4336 struct wq_prepost *wqp;
4337
4338 wqdbg_v("releasing reserved preposts starting at: 0x%llx", id);
4339
4340 wqp = wq_prepost_rfirst(id);
4341 if (!wqp) {
4342 return;
4343 }
4344
4345 wq_prepost_release_rlist(wqp);
4346 }
4347
4348
4349 /* ----------------------------------------------------------------------
4350 *
4351 * Iteration: waitq -> sets / waitq_set -> preposts
4352 *
4353 * ---------------------------------------------------------------------- */
4354
4355 /**
4356 * call external iterator function for each prepost object in wqset
4357 *
4358 * Conditions:
4359 * Called from wq_prepost_foreach_locked
4360 * (wqset locked, waitq _not_ locked)
4361 */
4362 static int
4363 wqset_iterate_prepost_cb(struct waitq_set *wqset,
4364 struct wq_prepost *wqp, struct waitq *waitq, int (^it)(struct waitq *))
4365 {
4366 uint64_t wqp_id;
4367 int ret;
4368
4369 (void)wqp;
4370
4371 /*
4372 * This is a bit tricky. The 'wqset' is locked, but the 'waitq' is not.
4373 * Taking the 'waitq' lock is a lock order violation, so we need to be
4374 * careful. We also must realize that we may have taken a reference to
4375 * the 'wqp' just as the associated waitq was being torn down (or
4376 * clearing all its preposts) - see waitq_clear_prepost_locked(). If
4377 * the 'wqp' is valid and we can get the waitq lock, then we are good
4378 * to go. If not, we need to back off, check that the 'wqp' hasn't
4379 * been invalidated, and try to re-take the locks.
4380 */
4381 assert(!waitq_irq_safe(waitq));
4382
4383 if (waitq_lock_try(waitq)) {
4384 goto call_iterator;
4385 }
4386
4387 if (!wqp_is_valid(wqp)) {
4388 return WQ_ITERATE_RESTART;
4389 }
4390
4391 /* We are passed a prepost object with a reference on it. If neither
4392 * the waitq set nor the waitq require interrupts disabled, then we
4393 * may block on the delay(1) call below. We can't hold a prepost
4394 * object reference while blocking, so we have to give that up as well
4395 * and re-acquire it when we come back.
4396 */
4397 wqp_id = wqp->wqp_prepostid.id;
4398 wq_prepost_put(wqp);
4399 waitq_set_unlock(wqset);
4400 wqdbg_v("dropped set:%p lock waiting for wqp:%p (0x%llx -> wq:%p)",
4401 wqset, wqp, wqp->wqp_prepostid.id, waitq);
4402 delay(1);
4403 waitq_set_lock(wqset);
4404 wqp = wq_prepost_get(wqp_id);
4405 if (!wqp) {
4406 /* someone cleared preposts while we slept! */
4407 return WQ_ITERATE_DROPPED;
4408 }
4409
4410 /*
4411 * TODO:
4412 * This differs slightly from the logic in ipc_mqueue.c:
4413 * ipc_mqueue_receive_on_thread(). There, if the waitq lock
4414 * can't be obtained, the prepost link is placed on the back of
4415 * the chain, and the iteration starts from the beginning. Here,
4416 * we just restart from the beginning.
4417 */
4418 return WQ_ITERATE_RESTART;
4419
4420 call_iterator:
4421 if (!wqp_is_valid(wqp)) {
4422 ret = WQ_ITERATE_RESTART;
4423 goto out_unlock;
4424 }
4425
4426 /* call the external callback */
4427 ret = it(waitq);
4428
4429 if (ret == WQ_ITERATE_BREAK_KEEP_LOCKED) {
4430 ret = WQ_ITERATE_BREAK;
4431 goto out;
4432 }
4433
4434 out_unlock:
4435 waitq_unlock(waitq);
4436 out:
4437 return ret;
4438 }
4439
4440 /**
4441 * iterator over all preposts in the given wqset
4442 *
4443 * Conditions:
4444 * 'wqset' is locked
4445 */
4446 int
waitq_set_iterate_preposts(struct waitq_set * wqset,waitq_iterator_t it)4447 waitq_set_iterate_preposts(struct waitq_set *wqset, waitq_iterator_t it)
4448 {
4449 assert(waitq_held(&wqset->wqset_q));
4450
4451 return wq_prepost_foreach_locked(wqset,
4452 ^(struct wq_prepost *wqp, struct waitq *waitq){
4453 return wqset_iterate_prepost_cb(wqset, wqp, waitq, it);
4454 });
4455 }
4456
4457
4458 /* ----------------------------------------------------------------------
4459 *
4460 * Higher-level APIs
4461 *
4462 * ---------------------------------------------------------------------- */
4463
4464
4465 /**
4466 * declare a thread's intent to wait on 'waitq' for 'wait_event'
4467 *
4468 * Conditions:
4469 * 'waitq' is not locked
4470 */
4471 wait_result_t
waitq_assert_wait64(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,uint64_t deadline)4472 waitq_assert_wait64(struct waitq *waitq,
4473 event64_t wait_event,
4474 wait_interrupt_t interruptible,
4475 uint64_t deadline)
4476 {
4477 thread_t thread = current_thread();
4478 wait_result_t ret;
4479 spl_t s = 0;
4480
4481 __waitq_validate(waitq);
4482
4483 if (waitq_irq_safe(waitq)) {
4484 s = splsched();
4485 }
4486
4487 waitq_lock(waitq);
4488 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
4489 TIMEOUT_URGENCY_SYS_NORMAL,
4490 deadline, TIMEOUT_NO_LEEWAY, thread);
4491 waitq_unlock(waitq);
4492
4493 if (waitq_irq_safe(waitq)) {
4494 splx(s);
4495 }
4496
4497 return ret;
4498 }
4499
4500 /**
4501 * declare a thread's intent to wait on 'waitq' for 'wait_event'
4502 *
4503 * Conditions:
4504 * 'waitq' is not locked
4505 * will disable and re-enable interrupts while locking current_thread()
4506 */
4507 wait_result_t
waitq_assert_wait64_leeway(struct waitq * waitq,event64_t wait_event,wait_interrupt_t interruptible,wait_timeout_urgency_t urgency,uint64_t deadline,uint64_t leeway)4508 waitq_assert_wait64_leeway(struct waitq *waitq,
4509 event64_t wait_event,
4510 wait_interrupt_t interruptible,
4511 wait_timeout_urgency_t urgency,
4512 uint64_t deadline,
4513 uint64_t leeway)
4514 {
4515 wait_result_t ret;
4516 thread_t thread = current_thread();
4517 spl_t s = 0;
4518
4519 __waitq_validate(waitq);
4520
4521 if (waitq_irq_safe(waitq)) {
4522 s = splsched();
4523 }
4524
4525 waitq_lock(waitq);
4526 ret = waitq_assert_wait64_locked(waitq, wait_event, interruptible,
4527 urgency, deadline, leeway, thread);
4528 waitq_unlock(waitq);
4529
4530 if (waitq_irq_safe(waitq)) {
4531 splx(s);
4532 }
4533
4534 return ret;
4535 }
4536
4537 /**
4538 * wakeup a single thread from a waitq that's waiting for a given event
4539 *
4540 * Conditions:
4541 * 'waitq' is not locked
4542 * may (rarely) block if 'waitq' is non-global and a member of 1 or more sets
4543 * may disable and re-enable interrupts
4544 *
4545 * Notes:
4546 * will _not_ block if waitq is global (or not a member of any set)
4547 */
4548 kern_return_t
waitq_wakeup64_one(struct waitq * waitq,event64_t wake_event,wait_result_t result,int priority)4549 waitq_wakeup64_one(struct waitq *waitq, event64_t wake_event,
4550 wait_result_t result, int priority)
4551 {
4552 kern_return_t kr;
4553 uint64_t reserved_preposts = 0;
4554 spl_t spl = 0;
4555
4556 __waitq_validate(waitq);
4557
4558 if (waitq_irq_safe(waitq)) {
4559 spl = splsched();
4560 waitq_lock(waitq);
4561 } else if (wake_event != NO_EVENT64) {
4562 waitq_lock(waitq);
4563 } else {
4564 /*
4565 * reserve preposts in addition to locking waitq
4566 * only non global wait queues for the NO_EVENT64
4567 * will prepost.
4568 */
4569 reserved_preposts = waitq_prepost_reserve(waitq, 0,
4570 WAITQ_KEEP_LOCKED);
4571 }
4572
4573
4574 /* waitq is locked upon return */
4575 kr = waitq_wakeup64_one_locked(waitq, wake_event, result,
4576 &reserved_preposts, priority, WAITQ_UNLOCK, WQ_OPTION_NONE);
4577
4578 if (waitq_irq_safe(waitq)) {
4579 splx(spl);
4580 }
4581
4582 /* release any left-over prepost object (won't block/lock anything) */
4583 waitq_prepost_release_reserve(reserved_preposts);
4584
4585 return kr;
4586 }
4587
4588 /**
4589 * wakeup all threads from a waitq that are waiting for a given event
4590 *
4591 * Conditions:
4592 * 'waitq' is not locked
4593 * may (rarely) block if 'waitq' is non-global and a member of 1 or more sets
4594 * may disable and re-enable interrupts
4595 *
4596 * Notes:
4597 * will _not_ block if waitq is global (or not a member of any set)
4598 */
4599 kern_return_t
waitq_wakeup64_all(struct waitq * waitq,event64_t wake_event,wait_result_t result,int priority)4600 waitq_wakeup64_all(struct waitq *waitq, event64_t wake_event,
4601 wait_result_t result, int priority)
4602 {
4603 kern_return_t ret;
4604 uint64_t reserved_preposts = 0;
4605 spl_t spl = 0;
4606
4607 __waitq_validate(waitq);
4608
4609 if (waitq_irq_safe(waitq)) {
4610 spl = splsched();
4611 waitq_lock(waitq);
4612 } else if (wake_event != NO_EVENT64) {
4613 waitq_lock(waitq);
4614 } else {
4615 /*
4616 * reserve preposts in addition to locking waitq
4617 * only non global wait queues for the NO_EVENT64
4618 * will prepost.
4619 */
4620 reserved_preposts = waitq_prepost_reserve(waitq, 0,
4621 WAITQ_KEEP_LOCKED);
4622 }
4623
4624 ret = waitq_wakeup64_all_locked(waitq, wake_event, result,
4625 &reserved_preposts, priority, WAITQ_UNLOCK);
4626
4627 if (waitq_irq_safe(waitq)) {
4628 splx(spl);
4629 }
4630
4631 waitq_prepost_release_reserve(reserved_preposts);
4632
4633 return ret;
4634 }
4635
4636 /**
4637 * wakeup a specific thread iff it's waiting on 'waitq' for 'wake_event'
4638 *
4639 * Conditions:
4640 * 'waitq' is not locked
4641 *
4642 * Notes:
4643 * May temporarily disable and re-enable interrupts
4644 */
4645 kern_return_t
waitq_wakeup64_thread(struct waitq * waitq,event64_t wake_event,thread_t thread,wait_result_t result)4646 waitq_wakeup64_thread(struct waitq *waitq, event64_t wake_event,
4647 thread_t thread, wait_result_t result)
4648 {
4649 kern_return_t ret;
4650 spl_t s, th_spl;
4651
4652 __waitq_validate(waitq);
4653
4654 if (waitq_irq_safe(waitq)) {
4655 s = splsched();
4656 }
4657 waitq_lock(waitq);
4658
4659 ret = waitq_select_thread_locked(waitq, wake_event, thread, &th_spl);
4660 waitq_stats_count_wakeup(waitq, ret == KERN_SUCCESS ? 1 : 0);
4661
4662 /* on success, returns 'thread' locked */
4663
4664 waitq_unlock(waitq);
4665
4666 if (ret == KERN_SUCCESS) {
4667 ret = thread_go(thread, result, WQ_OPTION_NONE);
4668 assert(ret == KERN_SUCCESS);
4669 thread_unlock(thread);
4670 splx(th_spl);
4671 } else {
4672 ret = KERN_NOT_WAITING;
4673 }
4674
4675 if (waitq_irq_safe(waitq)) {
4676 splx(s);
4677 }
4678
4679 return ret;
4680 }
4681
4682 /**
4683 * wakeup a single thread from a waitq that's waiting for a given event
4684 * and return a reference to that thread
4685 * returns THREAD_NULL if no thread was waiting
4686 *
4687 * Conditions:
4688 * 'waitq' is not locked
4689 * may (rarely) block if 'waitq' is non-global and a member of 1 or more sets
4690 * may disable and re-enable interrupts
4691 *
4692 * Notes:
4693 * will _not_ block if waitq is global (or not a member of any set)
4694 */
4695 thread_t
waitq_wakeup64_identify(struct waitq * waitq,event64_t wake_event,wait_result_t result,int priority)4696 waitq_wakeup64_identify(struct waitq *waitq, event64_t wake_event,
4697 wait_result_t result, int priority)
4698 {
4699 uint64_t reserved_preposts = 0;
4700 spl_t thread_spl = 0;
4701 thread_t thread;
4702 spl_t spl = 0;
4703
4704 __waitq_validate(waitq);
4705
4706 if (waitq_irq_safe(waitq)) {
4707 spl = splsched();
4708 waitq_lock(waitq);
4709 } else if (wake_event != NO_EVENT64) {
4710 waitq_lock(waitq);
4711 } else {
4712 /*
4713 * reserve preposts in addition to locking waitq
4714 * only non global wait queues for the NO_EVENT64
4715 * will prepost.
4716 */
4717 reserved_preposts = waitq_prepost_reserve(waitq, 0,
4718 WAITQ_KEEP_LOCKED);
4719 }
4720
4721 thread = waitq_wakeup64_identify_locked(waitq, wake_event, result,
4722 &thread_spl, &reserved_preposts,
4723 priority, WAITQ_UNLOCK);
4724 /* waitq is unlocked, thread is locked */
4725
4726 if (thread != THREAD_NULL) {
4727 thread_reference(thread);
4728 thread_unlock(thread);
4729 splx(thread_spl);
4730 }
4731
4732 if (waitq_irq_safe(waitq)) {
4733 splx(spl);
4734 }
4735
4736 /* release any left-over prepost object (won't block/lock anything) */
4737 waitq_prepost_release_reserve(reserved_preposts);
4738
4739 /* returns +1 ref to running thread or THREAD_NULL */
4740 return thread;
4741 }
4742
4743 #pragma mark - tests
4744 #if DEBUG || DEVELOPMENT
4745
4746 #include <ipc/ipc_pset.h>
4747 #include <sys/errno.h>
4748
4749 #define MAX_GLOBAL_TEST_QUEUES 64
4750 static struct waitq wqt_waitq_array[MAX_GLOBAL_TEST_QUEUES];
4751 static bool wqt_running;
4752 static bool wqt_init;
4753
4754 static bool
wqt_start(const char * test,int64_t * out)4755 wqt_start(const char *test, int64_t *out)
4756 {
4757 if (os_atomic_xchg(&wqt_running, true, acquire)) {
4758 *out = 0;
4759 return false;
4760 }
4761
4762 if (!wqt_init) {
4763 wqt_init = true;
4764 for (int i = 0; i < MAX_GLOBAL_TEST_QUEUES; i++) {
4765 waitq_init(&wqt_waitq_array[i], SYNC_POLICY_FIFO);
4766 }
4767 }
4768
4769 printf("[WQ] starting %s\n", test);
4770 return true;
4771 }
4772
4773 static int
wqt_end(const char * test,int64_t * out)4774 wqt_end(const char *test, int64_t *out)
4775 {
4776 os_atomic_store(&wqt_running, false, release);
4777 printf("[WQ] done %s\n", test);
4778 *out = 1;
4779 return 0;
4780 }
4781
4782 static struct waitq *
wqt_wq(uint32_t index)4783 wqt_wq(uint32_t index)
4784 {
4785 return &wqt_waitq_array[index];
4786 }
4787
4788 static uint32_t
wqt_idx(struct waitq * waitq)4789 wqt_idx(struct waitq *waitq)
4790 {
4791 assert(waitq >= wqt_waitq_array &&
4792 waitq < wqt_waitq_array + MAX_GLOBAL_TEST_QUEUES);
4793 return (uint32_t)(waitq - wqt_waitq_array);
4794 }
4795
4796 __attribute__((overloadable))
4797 static uint64_t
wqt_bit(uint32_t index)4798 wqt_bit(uint32_t index)
4799 {
4800 return 1ull << index;
4801 }
4802
4803 __attribute__((overloadable))
4804 static uint64_t
wqt_bit(struct waitq * waitq)4805 wqt_bit(struct waitq *waitq)
4806 {
4807 return wqt_bit(wqt_idx(waitq));
4808 }
4809
4810 static struct waitq_set *
wqt_wqset_create(void)4811 wqt_wqset_create(void)
4812 {
4813 struct waitq_set *wqset;
4814
4815 wqset = &ipc_pset_alloc_special(ipc_space_kernel)->ips_wqset;
4816 if (!waitqs_is_linked(wqset)) {
4817 waitq_set_lazy_init_link(wqset);
4818 }
4819 printf("[WQ]: created waitq set 0x%llx\n", wqset->wqset_id);
4820 return wqset;
4821 }
4822
4823 static void
wqt_wqset_free(struct waitq_set * wqset)4824 wqt_wqset_free(struct waitq_set *wqset)
4825 {
4826 waitq_set_lock(wqset);
4827 ipc_pset_destroy(ipc_space_kernel,
4828 __container_of(wqset, struct ipc_pset, ips_wqset));
4829 }
4830
4831 static void
wqt_link(uint32_t index,struct waitq_set * wqset,kern_return_t want)4832 wqt_link(uint32_t index, struct waitq_set *wqset, kern_return_t want)
4833 {
4834 struct waitq *waitq = wqt_wq(index);
4835 waitq_ref_t reserved_link;
4836 kern_return_t kr;
4837
4838 printf("[WQ]: linking waitq [%d] to global wqset (0x%llx)\n",
4839 index, wqset->wqset_id);
4840 reserved_link = waitq_link_reserve();
4841 kr = waitq_link(waitq, wqset, WAITQ_SHOULD_LOCK, &reserved_link);
4842 waitq_link_release(reserved_link);
4843
4844 printf("[WQ]:\tkr=%d\texpected=%d\n", kr, want);
4845 assert(kr == want);
4846 }
4847
4848 static void
wqt_unlink(uint32_t index,struct waitq_set * wqset,kern_return_t want)4849 wqt_unlink(uint32_t index, struct waitq_set *wqset, kern_return_t want)
4850 {
4851 struct waitq *waitq = wqt_wq(index);
4852 kern_return_t kr;
4853
4854 printf("[WQ]: unlinking waitq [%d] from global wqset (0x%llx)\n",
4855 index, wqset->wqset_id);
4856 kr = waitq_unlink(waitq, wqset);
4857 printf("[WQ]: \tkr=%d\n", kr);
4858 assert(kr == want);
4859 }
4860
4861 static void
wqt_wakeup_one(uint32_t index,event64_t event64,kern_return_t want)4862 wqt_wakeup_one(uint32_t index, event64_t event64, kern_return_t want)
4863 {
4864 kern_return_t kr;
4865
4866 printf("[WQ]: Waking one thread on waitq [%d] event:0x%llx\n",
4867 index, event64);
4868 kr = waitq_wakeup64_one(wqt_wq(index), event64,
4869 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
4870 printf("[WQ]: \tkr=%d\n", kr);
4871 assert(kr == want);
4872 }
4873
4874 static void
wqt_clear_preposts(uint32_t idx)4875 wqt_clear_preposts(uint32_t idx)
4876 {
4877 waitq_lock(wqt_wq(idx));
4878 (void)waitq_clear_prepost_locked(wqt_wq(idx));
4879 waitq_unlock(wqt_wq(idx));
4880 }
4881
4882 static void
wqt_expect_preposts(struct waitq_set * wqset,uint64_t preposts)4883 wqt_expect_preposts(struct waitq_set *wqset, uint64_t preposts)
4884 {
4885 /* make sure we find all preposts on wqset1 */
4886 __block uint64_t found = 0;
4887
4888 waitq_set_lock(wqset);
4889 waitq_set_iterate_preposts(wqset, ^(struct waitq *waitq) {
4890 printf("[WQ]: found prepost %d\n", wqt_idx(waitq));
4891 assertf((found & wqt_bit(waitq)) == 0,
4892 "found waitq %d twice", wqt_idx(waitq));
4893 found |= wqt_bit(waitq);
4894 return WQ_ITERATE_CONTINUE;
4895 });
4896 waitq_set_unlock(wqset);
4897
4898 assertf(found == preposts, "preposts expected 0x%llx, but got 0x%llx",
4899 preposts, found);
4900 }
4901
4902 static int
waitq_basic_test(__unused int64_t in,int64_t * out)4903 waitq_basic_test(__unused int64_t in, int64_t *out)
4904 {
4905 struct waitq_set *wqset;
4906
4907 if (!wqt_start(__func__, out)) {
4908 return EBUSY;
4909 }
4910
4911 wqset = wqt_wqset_create();
4912 wqt_link(10, wqset, KERN_SUCCESS);
4913 wqt_link(10, wqset, KERN_ALREADY_IN_SET);
4914 wqt_link(11, wqset, KERN_SUCCESS);
4915 wqt_link(11, wqset, KERN_ALREADY_IN_SET);
4916 wqt_link(12, wqset, KERN_SUCCESS);
4917 wqt_link(12, wqset, KERN_ALREADY_IN_SET);
4918
4919 wqt_wakeup_one(10, NO_EVENT64, KERN_NOT_WAITING);
4920 wqt_wakeup_one(12, NO_EVENT64, KERN_NOT_WAITING);
4921
4922 wqt_expect_preposts(wqset, wqt_bit(10) | wqt_bit(12));
4923 wqt_clear_preposts(10);
4924
4925 wqt_expect_preposts(wqset, wqt_bit(12));
4926 wqt_clear_preposts(12);
4927
4928 wqt_expect_preposts(wqset, 0);
4929
4930 wqt_unlink(12, wqset, KERN_SUCCESS);
4931 wqt_unlink(12, wqset, KERN_NOT_IN_SET);
4932 wqt_unlink(11, wqset, KERN_SUCCESS);
4933 wqt_unlink(10, wqset, KERN_SUCCESS);
4934 wqt_wqset_free(wqset);
4935
4936 return wqt_end(__func__, out);
4937 }
4938 SYSCTL_TEST_REGISTER(waitq_basic, waitq_basic_test);
4939 #endif /* DEBUG || DEVELOPMENT */
4940