1 /*
2 * Copyright (c) 2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _KERN_MPSC_QUEUE_H_
30 #define _KERN_MPSC_QUEUE_H_
31
32 #ifdef XNU_KERNEL_PRIVATE
33
34 #include <machine/atomic.h>
35 #include <kern/macro_help.h>
36 #include <kern/thread_call.h>
37
38 #endif // XNU_KERNEL_PRIVATE
39
40 #include <sys/cdefs.h>
41
42 __BEGIN_DECLS __ASSUME_PTR_ABI_SINGLE_BEGIN
43
44 /*!
45 * @typedef struct mpsc_queue_chain
46 *
47 * @brief
48 * Type for the intrusive linkage used by MPSC queues.
49 */
50 typedef struct mpsc_queue_chain {
51 #if XNU_BOUND_CHECKS // work around 78354145
52 struct mpsc_queue_chain *volatile mpqc_next;
53 #else
54 struct mpsc_queue_chain *_Atomic mpqc_next;
55 #endif
56 } *mpsc_queue_chain_t;
57
58 /*!
59 * @typedef struct mpsc_queue_head
60 *
61 * @brief
62 * The type for a multi-producer single-consumer queue.
63 *
64 * @discussion
65 * MPSC queues allow for producers to not be affected by other producers or the
66 * consumer. Which means in turn that having producers in interrupt context
67 * does not require that other producers disable interrupts like a traditional
68 * spinlock based approach would require.
69 *
70 * These queues shine when data is produced from the entire system and is
71 * consumed from a single serial context (logging, tracing, ...).
72 * mpsc_daemon_queue_t is provided as a fully ready/easy-to-use pre-packaged
73 * solution for these common use cases.
74 *
75 * - mpsc_queue_append() can be used to append a single item
76 * - mpsc_queue_append_list() can be used to append a batch of items at once.
77 *
78 * Functions for the consumer side assume proper serialization that is not
79 * provided by the MPSC queue itself. Dequeuing doesn't require preemption
80 * to be disabled.
81 *
82 * <h2>Algorithm</h2>
83 *
84 * The base of the enqueue algorithm is a single atomic exchange (first half,
85 * called __mpsc_queue_append_update_tail) and a list fixup (2nd half, called
86 * __mpsc_queue_append_update_prev).
87 *
88 * Graphically, enqueuing `X` looks like this, with each step being done
89 * atomically (for the empty queue case, `tail` points to `head`):
90 *
91 * | orig state | update_tail | update_prev |
92 * +---------------------+---------------------+---------------------+
93 * | | | |
94 * | head -> e1 -> e2 -. | head -> e1 -> e2 -. | head -> e1 -> e2 -. |
95 * | | | | | | |
96 * | ,- ... <--' | ,- ... <--' | ,- ... <--' |
97 * | | | | | | |
98 * | v | v | v |
99 * | tail -> eN -> NULL | tail eN -> NULL | tail eN |
100 * | | | | | | |
101 * | | | | | v |
102 * | X -> NULL | `---> X -> NULL | '---> X -> NULL |
103 * | | | |
104 * +---------------------+---------------------+---------------------+
105 *
106 *
107 * There is a small 1-instruction gap of inconsistency which makes the chosen
108 * algorithm non linearizable, and requires enqueuers to disable preemption
109 * during the enqueue so as not to starve the consumer forever.
110 *
111 * As far as memory visibility is concerned, enqueuing uses a release fence in
112 * update_tail which pairs with memory fences in mpsc_queue_dequeue_batch().
113 *
114 * Note: as far as the data structure in memory, its layout is equivalent to
115 * a BSD <sys/queue.h> STAILQ. However because of this inconsistency
116 * window and memory ordering concerns, it is incorrect to use STAILQ
117 * macros on an MPSC queue.
118 */
119 typedef struct mpsc_queue_head {
120 struct mpsc_queue_chain mpqh_head;
121 #if XNU_BOUND_CHECKS // work around 78354145
122 struct mpsc_queue_chain *volatile mpqh_tail;
123 #else
124 struct mpsc_queue_chain *_Atomic mpqh_tail;
125 #endif
126 } *mpsc_queue_head_t;
127
128 /*!
129 * @macro MPSC_QUEUE_INITIALIZER
130 *
131 * @brief
132 * Macro to use in static initializers for mpsc queues.
133 *
134 * @param head
135 * The name of the variable to initialize.
136 */
137 #define MPSC_QUEUE_INITIALIZER(head) { .mpqh_tail = &(head).mpqh_head }
138
139 #ifdef XNU_KERNEL_PRIVATE
140
141 /*!
142 * @function mpsc_queue_init
143 *
144 * @brief
145 * Dynamically initialize an mpsc queue.
146 *
147 * @discussion
148 * This initialization assumes that the object holding the queue head
149 * is initialized before it can be made visible to other threads/cores.
150 *
151 * @param q
152 * The queue to initialize.
153 */
154 static inline void
mpsc_queue_init(mpsc_queue_head_t q)155 mpsc_queue_init(mpsc_queue_head_t q)
156 {
157 os_atomic_init(&q->mpqh_head.mpqc_next, NULL);
158 os_atomic_init(&q->mpqh_tail, &q->mpqh_head);
159 }
160
161 /*!
162 * @typedef enum mpsc_queue_options
163 */
164 typedef enum mpsc_queue_options {
165 MPSC_QUEUE_NONE = 0,
166 MPSC_QUEUE_DISABLE_PREEMPTION = 1 << 0,
167 } mpsc_queue_options_t;
168
169 /*!
170 * @const MPSC_QUEUE_NOTQUEUED_MARKER
171 *
172 * @brief
173 * Magical marker that implementations can use to poison the chain pointer of
174 * elements not on any MPSC queue.
175 */
176 #define MPSC_QUEUE_NOTQUEUED_MARKER ((mpsc_queue_chain_t)~0ul)
177
178 /*!
179 * @macro mpsc_queue_element
180 *
181 * @brief
182 * Macro to find the pointer of an element back from its MPSC chain linkage.
183 */
184 #define mpsc_queue_element(ptr, type, field) __container_of(ptr, type, field)
185
186
187 #pragma mark Advanced Multi Producer calls
188
189 /**
190 * @function __mpsc_queue_append_update_tail
191 *
192 * @brief
193 * First half of the enqueue operation onto a multi-producer single-consumer
194 * queue.
195 *
196 * @discussion
197 * This function is available for algorithms that need to do things (such as
198 * taking a refcount) before calling __mpsc_queue_append_update_prev().
199 *
200 * Preemption should be disabled before calling
201 * __mpsc_queue_append_update_tail(), and until
202 * __mpsc_queue_append_update_prev() has returned.
203 *
204 * @param q
205 * The queue to update.
206 *
207 * @param elm
208 * The element to append to `q`.
209 *
210 * @returns
211 * A token to later pass to __mpsc_queue_append_update_prev()
212 * to complete the enqueue.
213 */
214 static inline mpsc_queue_chain_t
__mpsc_queue_append_update_tail(mpsc_queue_head_t q,mpsc_queue_chain_t elm)215 __mpsc_queue_append_update_tail(mpsc_queue_head_t q, mpsc_queue_chain_t elm)
216 {
217 os_atomic_store(&elm->mpqc_next, NULL, relaxed);
218 return os_atomic_xchg(&q->mpqh_tail, elm, release);
219 }
220
221 /**
222 * @function __mpsc_queue_append_was_empty
223 *
224 * @brief
225 * Tests whether the queue was empty at the time
226 * __mpsc_queue_append_update_tail() was called.
227 *
228 * @param q
229 * The queue to test emptiness for.
230 *
231 * @param prev
232 * The token returned by __mpsc_queue_append_update_tail().
233 *
234 * @returns
235 * Whether the queue was empty (true) or not (false).
236 */
237 static inline bool
__mpsc_queue_append_was_empty(mpsc_queue_head_t q,mpsc_queue_chain_t prev)238 __mpsc_queue_append_was_empty(mpsc_queue_head_t q, mpsc_queue_chain_t prev)
239 {
240 return &q->mpqh_head == prev;
241 }
242
243 /**
244 * @function __mpsc_queue_append_update_prev
245 *
246 * @brief
247 * Second half of the enqueue operation onto a multi-producer single-consumer
248 * queue.
249 *
250 * @discussion
251 * This function is available for algorithms that need to do things (such as
252 * taking a refcount) before calling __mpsc_queue_append_update_prev().
253 *
254 * Preemption should be disabled before calling
255 * __mpsc_queue_append_update_tail(), and until
256 * __mpsc_queue_append_update_prev() has returned.
257 *
258 * @param prev
259 * The token returned by __mpsc_queue_append_update_tail().
260 *
261 * @param elm
262 * The element to append to the queue.
263 */
264 static inline void
__mpsc_queue_append_update_prev(mpsc_queue_chain_t prev,mpsc_queue_chain_t elm)265 __mpsc_queue_append_update_prev(mpsc_queue_chain_t prev, mpsc_queue_chain_t elm)
266 {
267 os_atomic_store(&prev->mpqc_next, elm, relaxed);
268 }
269
270
271 #pragma mark Multi Producer calls
272
273 /**
274 * @function mpsc_queue_append_list
275 *
276 * @brief
277 * Enqueues a list of elements onto a queue.
278 *
279 * @discussion
280 * This enqueues a list that has to be fully formed from `first` to `last`
281 * at the end of `q`.
282 *
283 * Preemption should be disabled when calling mpsc_queue_append_list().
284 *
285 * @param q
286 * The queue to update.
287 *
288 * @param first
289 * The first of the list elements being appended.
290 *
291 * @param last
292 * The last of the list elements being appended.
293 */
294 static inline bool
mpsc_queue_append_list(mpsc_queue_head_t q,mpsc_queue_chain_t first,mpsc_queue_chain_t last)295 mpsc_queue_append_list(mpsc_queue_head_t q, mpsc_queue_chain_t first,
296 mpsc_queue_chain_t last)
297 {
298 mpsc_queue_chain_t prev = __mpsc_queue_append_update_tail(q, last);
299 __mpsc_queue_append_update_prev(prev, first);
300 return __mpsc_queue_append_was_empty(q, prev);
301 }
302
303 /**
304 * @function __mpsc_queue_append_update_tail
305 *
306 * @brief
307 * Enqueues an element onto a queue.
308 *
309 * @discussion
310 * Preemption should be disabled when calling mpsc_queue_append().
311 *
312 * @param q the queue to update
313 * @param elm the element to append
314 */
315 static inline bool
mpsc_queue_append(mpsc_queue_head_t q,mpsc_queue_chain_t elm)316 mpsc_queue_append(mpsc_queue_head_t q, mpsc_queue_chain_t elm)
317 {
318 return mpsc_queue_append_list(q, elm, elm);
319 }
320
321
322 #pragma mark Single Consumer calls
323
324 /**
325 * @function mpsc_queue_dequeue_batch()
326 *
327 * @brief
328 * Atomically empty a queue at once and return the batch head and tail.
329 *
330 * @discussion
331 * Consumer function, must be called in a serialized way with respect to any
332 * other consumer function.
333 *
334 * @param q
335 * The queue
336 *
337 * @param tail
338 * An out pointer filled with the last element captured.
339 *
340 * @param dependency
341 * A dependency token (to rely on consume / hardware dependencies)
342 * When not trying to take advantage of hardware dependencies, just pass NULL.
343 *
344 * @returns
345 * The first element of the batch if any, or NULL the queue was empty.
346 */
347 mpsc_queue_chain_t
348 mpsc_queue_dequeue_batch(mpsc_queue_head_t q, mpsc_queue_chain_t *tail,
349 os_atomic_dependency_t dependency);
350
351 /**
352 * @function mpsc_queue_batch_next()
353 *
354 * @brief
355 * Function used to consume an element from a batch dequeued with
356 * mpsc_queue_dequeue_batch().
357 *
358 * @discussion
359 * Once a batch has been dequeued, there is no need to hold the consumer lock
360 * anymore to consume it.
361 *
362 * mpsc_queue_batch_foreach_safe() is the preferred interface to consume
363 * the whole batch.
364 *
365 * @param cur
366 * The current inspected element of the batch (must be the batch head or
367 * a value returned by mpsc_queue_batch_next()).
368 *
369 * @param tail
370 * The last element of the batch.
371 *
372 * @returns
373 * The next element if any.
374 */
375 mpsc_queue_chain_t
376 mpsc_queue_batch_next(mpsc_queue_chain_t cur, mpsc_queue_chain_t tail);
377
378 /**
379 * @macro mpsc_queue_batch_foreach_safe
380 *
381 * @brief
382 * Macro used to enumerate a batch dequeued with mpsc_queue_dequeue_batch().
383 *
384 * @param item
385 * The item being currently visited.
386 *
387 * @param head
388 * The first element of the batch.
389 *
390 * @param tail
391 * The last element of the batch.
392 */
393 #define mpsc_queue_batch_foreach_safe(item, head, tail) \
394 for (mpsc_queue_chain_t __tmp, __item = (head), __tail = (tail); \
395 __tmp = mpsc_queue_batch_next(__item, __tail), (item) = __item; \
396 __item = __tmp)
397
398 /**
399 * @function mpsc_queue_restore_batch()
400 *
401 * @brief
402 * "Restore"s a batch at the head of the queue.
403 *
404 * @discussion
405 * Consumer function, must be called in a serialized way with respect to any
406 * other consumer function.
407 *
408 * @param q
409 * The queue
410 *
411 * @param first
412 * The first element to put back.
413 *
414 * @param last
415 * The last element to put back.
416 * It is the responsibility of the caller to ensure the linkages from first to
417 * last are properly set up before calling this function.
418 */
419 void
420 mpsc_queue_restore_batch(mpsc_queue_head_t q, mpsc_queue_chain_t first,
421 mpsc_queue_chain_t last);
422
423
424 #pragma mark "GCD"-like facilities
425
426 /*!
427 * @typedef struct mpsc_daemon_queue
428 *
429 * @brief
430 * Daemon queues are a ready-to use packaging of the low level MPSC queue
431 * primitive.
432 *
433 * @discussion
434 * mpsc_queue_t requires handling of state transitions of the queue and
435 * dequeuing yourself, which is a non trivial task.
436 *
437 * Daemon queues are a simple packaged solution that allows for mpsc_queue_t to
438 * form hierarchies (mostly for layering purposes), and be serviced at the
439 * bottom of such a hierarchy by a thread or a thread call.
440 *
441 * Daemon queues assume homogenous items, and are setup with an `invoke`
442 * callback that is called in the dequeuer on every item as they are dequeued.
443 */
444 typedef struct mpsc_daemon_queue *mpsc_daemon_queue_t;
445
446 #define MPSC_QUEUE_BATCH_END ((mpsc_queue_chain_t)~0ul)
447
448 /*!
449 * @typedef struct mpsc_daemon_queue
450 *
451 * @brief
452 * The type for MPSC Daemon Queues invoke callbacks.
453 */
454 typedef void (*mpsc_daemon_invoke_fn_t)(mpsc_queue_chain_t elm,
455 mpsc_daemon_queue_t dq);
456
457 /*!
458 * @enum mpsc_daemon_queue_kind
459 *
460 * @brief
461 * Internal type, not to be used by clients.
462 */
463 __enum_decl(mpsc_daemon_queue_kind_t, uint16_t, {
464 MPSC_QUEUE_KIND_UNKNOWN,
465 MPSC_QUEUE_KIND_NESTED,
466 MPSC_QUEUE_KIND_THREAD,
467 MPSC_QUEUE_KIND_THREAD_CRITICAL,
468 MPSC_QUEUE_KIND_THREAD_CALL,
469 });
470
471 /*!
472 * @enum mpsc_daemon_queue_options
473 *
474 * @brief
475 * Options clients can set on their queue before first use.
476 *
477 * @const MPSC_QUEUE_OPTION_BATCH
478 * Call the `invoke` callback at the end of a batch
479 * with the magic @c MPSC_QUEUE_BATCH_END marker.
480 */
481 __options_decl(mpsc_daemon_queue_options_t, uint16_t, {
482 MPSC_QUEUE_OPTION_BATCH = 0x0001,
483 });
484
485 /*!
486 * @enum mpsc_daemon_queue_state
487 *
488 * @brief
489 * Internal type, not to be used by clients.
490 */
491 __options_decl(mpsc_daemon_queue_state_t, uint32_t, {
492 MPSC_QUEUE_STATE_DRAINING = 0x0001,
493 MPSC_QUEUE_STATE_WAKEUP = 0x0002,
494 MPSC_QUEUE_STATE_CANCELED = 0x0004,
495 });
496
497 struct mpsc_daemon_queue {
498 mpsc_daemon_queue_kind_t mpd_kind;
499 mpsc_daemon_queue_options_t mpd_options;
500 mpsc_daemon_queue_state_t _Atomic mpd_state;
501 mpsc_daemon_invoke_fn_t mpd_invoke;
502 union {
503 mpsc_daemon_queue_t mpd_target;
504 struct thread *mpd_thread;
505 struct thread_call *mpd_call;
506 };
507 struct mpsc_queue_head mpd_queue;
508 struct mpsc_queue_chain mpd_chain;
509 };
510
511 /*!
512 * @function mpsc_daemon_queue_init_with_thread
513 *
514 * @brief
515 * Sets up a daemon queue to be a base queue drained by a kernel thread.
516 *
517 * @discussion
518 * The function will allocate the thread and start it in assert_wait.
519 *
520 * @param dq
521 * The queue to initialize
522 *
523 * @param invoke
524 * The invoke function called on individual items on the queue during drain.
525 *
526 * @param pri
527 * The scheduler priority for the created thread.
528 *
529 * @param name
530 * The name to give to the created thread.
531 *
532 * @returns
533 * Whether creating the thread was successful.
534 */
535 kern_return_t
536 mpsc_daemon_queue_init_with_thread(mpsc_daemon_queue_t dq,
537 mpsc_daemon_invoke_fn_t invoke, int pri, const char *name);
538
539
540 /*!
541 * @function mpsc_daemon_queue_init_with_thread_call
542 *
543 * @brief
544 * Sets up a daemon queue to be a base queue drained by a thread call.
545 *
546 * @param dq
547 * The queue to initialize
548 *
549 * @param invoke
550 * The invoke function called on individual items on the queue during drain.
551 *
552 * @param pri
553 * The priority the thread call will run at.
554 */
555 void
556 mpsc_daemon_queue_init_with_thread_call(mpsc_daemon_queue_t dq,
557 mpsc_daemon_invoke_fn_t invoke, thread_call_priority_t pri);
558
559 /*!
560 * @function mpsc_daemon_queue_init_with_target
561 *
562 * @brief
563 * Sets up a daemon queue to target another daemon queue.
564 *
565 * @discussion
566 * The targetting relationship is useful for subsystem layering purposes only.
567 * Because draining a given queue is atomic with respect to its target, target
568 * queue hierarchies are prone to starvation.
569 *
570 * @param dq
571 * The queue to initialize
572 *
573 * @param invoke
574 * The invoke function called on individual items on the queue during drain.
575 *
576 * @param target
577 * The target queue of the initialized queue, which has to be initialized with
578 * the mpsc_daemon_queue_nested_invoke invoke handler.
579 */
580 void
581 mpsc_daemon_queue_init_with_target(mpsc_daemon_queue_t dq,
582 mpsc_daemon_invoke_fn_t invoke, mpsc_daemon_queue_t target);
583
584 /*!
585 * @function mpsc_daemon_queue_nested_invoke
586 *
587 * @brief
588 * The invoke function to pass to mpsc_daemon_queue_init_* when a queue is meant
589 * to be targeted by other queues.
590 */
591 void
592 mpsc_daemon_queue_nested_invoke(mpsc_queue_chain_t elm,
593 mpsc_daemon_queue_t dq);
594
595 /*!
596 * @function mpsc_daemon_queue_cancel_and_wait
597 *
598 * @brief
599 * Cancels the queue so that the object owning it can be destroyed.
600 *
601 * @discussion
602 * This interface will cancel the queue and wait synchronously for the
603 * cancelation to have taken effect, possibly waiting on elements currently
604 * draining.
605 *
606 * Sending objects to the daemon queue after cancelation is undefined.
607 *
608 * Calling this function multiple times is undefined.
609 *
610 * Tearing down daemon queue hierarchies is the responsibility of the adopter.
611 */
612 void
613 mpsc_daemon_queue_cancel_and_wait(mpsc_daemon_queue_t dq);
614
615 /*!
616 * @function mpsc_daemon_enqueue
617 *
618 * @brief
619 * Send ("async") an item to a given daemon on a given queue.
620 *
621 * @discussion
622 * It is the responsibility of the caller to ensure preemption is disabled when
623 * this call is made.
624 *
625 * @param dq
626 * The daemon queue to enqueue the element onto.
627 *
628 * @param elm
629 * The item to enqueue.
630 *
631 * @param options
632 * Options applicable to the enqueue. In particupar passing
633 * MPSC_QUEUE_DISABLE_PREEMPTION makes sure preemption is properly disabled
634 * during the enqueue.
635 */
636 void
637 mpsc_daemon_enqueue(mpsc_daemon_queue_t dq, mpsc_queue_chain_t elm,
638 mpsc_queue_options_t options);
639
640
641 #pragma mark Deferred deallocation daemon
642
643 /*!
644 * @function thread_deallocate_daemon_init
645 *
646 * @brief
647 * Initializes the deferred deallocation daemon, called by thread_daemon_init().
648 *
649 * @discussion
650 * The deferred deallocation daemon is a kernel thread based daemon queue that
651 * is targeted by nested daemon queues.
652 *
653 * It is used to perform deferred deallocation for objects that can't safely be
654 * deallocated from the context where the deallocation should normally occur.
655 *
656 * Subsystems using it are for example: turnstiles, workqueues, threads.
657 *
658 * @warning
659 * New queues should be added to this daemon with great care,
660 * as abusing it can lead to unbounded amount of kernel work.
661 */
662 void
663 thread_deallocate_daemon_init(void);
664
665 /*!
666 * @function thread_deallocate_daemon_register_queue
667 *
668 * @brief
669 * Dynamically register a queue for deferred deletion with the deferred
670 * deallocation daemon.
671 *
672 * @param dq
673 * The daemon queue to register with the deferred deallocation daemon.
674 *
675 * @param invoke
676 * The callback called on every element of this queue by the deallocation
677 * daemon.
678 */
679 void
680 thread_deallocate_daemon_register_queue(mpsc_daemon_queue_t dq,
681 mpsc_daemon_invoke_fn_t invoke);
682
683
684 #pragma mark tests
685 #if DEBUG || DEVELOPMENT
686
687 int
688 mpsc_test_pingpong(uint64_t count, uint64_t *out);
689
690 #endif /* DEBUG || DEVELOPMENT */
691
692 #endif /* XNU_KERNEL_PRIVATE */
693
694 __ASSUME_PTR_ABI_SINGLE_END __END_DECLS
695
696 #endif /* _KERN_MPSC_QUEUE_H_ */
697