xref: /xnu-8792.61.2/bsd/sys/eventvar.h (revision 42e220869062b56f8d7d0726fd4c88954f87902c)
1 /*
2  * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*
29  * Copyright (c) 1999,2000 Jonathan Lemon <[email protected]>
30  * All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  *	$FreeBSD: src/sys/sys/eventvar.h,v 1.1.2.2 2000/07/18 21:49:12 jlemon Exp $
54  */
55 
56 #ifndef _SYS_EVENTVAR_H_
57 #define _SYS_EVENTVAR_H_
58 
59 #include <sys/event.h>
60 #include <sys/select.h>
61 #include <kern/kern_types.h>
62 #include <kern/waitq.h>
63 
64 #if defined(XNU_KERNEL_PRIVATE)
65 
66 typedef int (*kevent_callback_t)(struct kevent_qos_s *, struct kevent_ctx_s *);
67 
68 #include <stdint.h>
69 #include <kern/locks.h>
70 #include <mach/thread_policy.h>
71 #include <pthread/workqueue_internal.h>
72 #include <os/refcnt.h>
73 
74 /*
75  * Lock ordering:
76  *
77  * The kqueue locking order can follow a few different patterns:
78  *
79  * Standard file-based kqueues (from above):
80  *     proc fd lock -> kq lock -> kq-waitq-set lock -> thread lock
81  *
82  * WorkQ/WorkLoop kqueues (from above):
83  *     proc fd lock -> kq lock -> workq lock -> thread lock
84  *
85  * Whenever kqueues interact with source locks, it drops all of its own
86  * locks in exchange for a use-reference on the knote used to synchronize
87  * with the source code. When those sources post events from below, they
88  * have the following lock hierarchy.
89  *
90  * Standard file-based kqueues (from below):
91  *     XXX lock -> kq lock -> kq-waitq-set lock -> thread lock
92  *
93  * WorkQ/WorkLoop kqueues (from below):
94  *     XXX lock -> kq lock -> workq lock -> thread lock
95  */
96 
97 #define KQEXTENT        256             /* linear growth by this amount */
98 
99 struct knote_lock_ctx {
100 	struct knote               *knlc_knote;
101 	thread_t                    knlc_thread;
102 	uintptr_t                   knlc_waiters;
103 	LIST_ENTRY(knote_lock_ctx)  knlc_link;
104 #if DEBUG || DEVELOPMENT
105 #define KNOTE_LOCK_CTX_UNLOCKED 0
106 #define KNOTE_LOCK_CTX_LOCKED   1
107 #define KNOTE_LOCK_CTX_WAITING  2
108 	int knlc_state;
109 #endif
110 };
111 LIST_HEAD(knote_locks, knote_lock_ctx);
112 
113 #if DEBUG || DEVELOPMENT
114 /*
115  * KNOTE_LOCK_CTX(name) is a convenience macro to define a knote lock context on
116  * the stack named `name`. In development kernels, it uses tricks to make sure
117  * not locks was still held when exiting the C-scope that contains this context.
118  */
119 static inline void
knote_lock_ctx_chk(struct knote_lock_ctx * knlc)120 knote_lock_ctx_chk(struct knote_lock_ctx *knlc)
121 {
122 	/* evil hackery to make sure no one forgets to unlock */
123 	assert(knlc->knlc_state == KNOTE_LOCK_CTX_UNLOCKED);
124 }
125 #define KNOTE_LOCK_CTX(n) \
126 	struct knote_lock_ctx n __attribute__((cleanup(knote_lock_ctx_chk))); \
127 	n.knlc_state = KNOTE_LOCK_CTX_UNLOCKED
128 #else
129 #define KNOTE_LOCK_CTX(n) \
130 	struct knote_lock_ctx n
131 #endif
132 
133 
134 __options_decl(kq_state_t, uint16_t, {
135 	KQ_SLEEP          = 0x0002, /* thread is waiting for events */
136 	KQ_PROCWAIT       = 0x0004, /* thread waiting for processing */
137 	KQ_KEV32          = 0x0008, /* kq is used with 32-bit events */
138 	KQ_KEV64          = 0x0010, /* kq is used with 64-bit events */
139 	KQ_KEV_QOS        = 0x0020, /* kq events carry QoS info */
140 	KQ_WORKQ          = 0x0040, /* KQ is bound to process workq */
141 	KQ_WORKLOOP       = 0x0080, /* KQ is part of a workloop */
142 	KQ_PROCESSING     = 0x0100, /* KQ is being processed */
143 	KQ_DRAIN          = 0x0200, /* kq is draining */
144 	KQ_DYNAMIC        = 0x0800, /* kqueue is dynamically managed */
145 	KQ_R2K_ARMED      = 0x1000, /* ast notification armed */
146 	KQ_HAS_TURNSTILE  = 0x2000, /* this kqueue has a turnstile */
147 });
148 
149 /*
150  * kqueue - common core definition of a kqueue
151  *
152  *          No real structures are allocated of this type. They are
153  *          either kqfile objects or kqworkq objects - each of which is
154  *          derived from this definition.
155  */
156 struct kqueue {
157 	lck_spin_t          kq_lock;      /* kqueue lock */
158 	kq_state_t          kq_state;     /* state of the kq */
159 	uint16_t            kq_level;     /* nesting level of the kqfile */
160 	uint32_t            kq_count;     /* number of queued events */
161 	struct proc        *kq_p;         /* process containing kqueue */
162 	struct knote_locks  kq_knlocks;   /* list of knote locks held */
163 };
164 
165 /*
166  * kqfile - definition of a typical kqueue opened as a file descriptor
167  *          via the kqueue() system call.
168  *
169  *          Adds selinfo support to the base kqueue definition, as these
170  *          fds can be fed into select().
171  */
172 struct kqfile {
173 	struct kqueue       kqf_kqueue;     /* common kqueue core */
174 	struct kqtailq      kqf_queue;      /* queue of woken up knotes */
175 	struct kqtailq      kqf_suppressed; /* suppression queue */
176 	struct selinfo      kqf_sel;        /* parent select/kqueue info */
177 #define kqf_lock     kqf_kqueue.kq_lock
178 #define kqf_state    kqf_kqueue.kq_state
179 #define kqf_level    kqf_kqueue.kq_level
180 #define kqf_count    kqf_kqueue.kq_count
181 #define kqf_p        kqf_kqueue.kq_p
182 };
183 
184 #define QOS_INDEX_KQFILE   0          /* number of qos levels in a file kq */
185 
186 /*
187  * WorkQ kqueues need to request threads to service the triggered
188  * knotes in the queue.  These threads are brought up on a
189  * effective-requested-QoS basis. Knotes are segregated based on
190  * that value - calculated by computing max(event-QoS, kevent-QoS).
191  * Only one servicing thread is requested at a time for all the
192  * knotes at a given effective-requested-QoS.
193  */
194 
195 #if !defined(KQWQ_QOS_MANAGER)
196 #define KQWQ_QOS_MANAGER (THREAD_QOS_LAST)
197 #endif
198 
199 #if !defined(KQWQ_NBUCKETS)
200 #define KQWQ_NBUCKETS    (KQWQ_QOS_MANAGER)
201 #endif
202 
203 /*
204  * kqworkq - definition of a private kqueue used to coordinate event
205  *           handling for pthread work queues.
206  *
207  *           These have per-qos processing queues and state to coordinate with
208  *           the pthread kext to ask for threads at corresponding pthread priority
209  *           values.
210  */
211 struct kqworkq {
212 	struct kqueue       kqwq_kqueue;
213 	struct kqtailq      kqwq_queue[KQWQ_NBUCKETS];       /* array of queues */
214 	struct kqtailq      kqwq_suppressed[KQWQ_NBUCKETS];  /* Per-QoS suppression queues */
215 	workq_threadreq_s   kqwq_request[KQWQ_NBUCKETS];     /* per-QoS request states */
216 };
217 
218 #define kqwq_lock        kqwq_kqueue.kq_lock
219 #define kqwq_state       kqwq_kqueue.kq_state
220 #define kqwq_waitq_hook  kqwq_kqueue.kq_waitq_hook
221 #define kqwq_count       kqwq_kqueue.kq_count
222 #define kqwq_p           kqwq_kqueue.kq_p
223 
224 /*
225  * WorkLoop kqueues need to request a thread to service the triggered
226  * knotes in the queue.  The thread is brought up on a
227  * effective-requested-QoS basis. Knotes are segregated based on
228  * that value. Once a request is made, it cannot be undone.  If
229  * events with higher QoS arrive after, they are stored in their
230  * own queues and an override applied to the original request based
231  * on the delta between the two QoS values.
232  */
233 
234 #if !defined(KQWL_NBUCKETS)
235 #define KQWL_NBUCKETS    (THREAD_QOS_LAST - 1)
236 #endif
237 
238 /*
239  * kqworkloop - definition of a private kqueue used to coordinate event
240  *              handling for pthread workloops.
241  *
242  *              Workloops vary from workqs in that only a single thread is ever
243  *              requested to service a workloop at a time.  But unlike workqs,
244  *              workloops may be "owned" by user-space threads that are
245  *              synchronously draining an event off the workloop. In those cases,
246  *              any overrides have to be applied to the owner until it relinqueshes
247  *              ownership.
248  *
249  *      NOTE:   "lane" support is TBD.
250  */
251 
252 #if CONFIG_PREADOPT_TG_DEBUG
253 __options_decl(kqwl_preadopt_tg_op_t, uint8_t, {
254 	KQWL_PREADOPT_OP_SERVICER_BIND = 0x01,
255 	KQWL_PREADOPT_OP_SERVICER_REBIND = 0x02,
256 	KQWL_PREADOPT_OP_SERVICER_UNBIND = 0x3,
257 	KQWL_PREADOPT_OP_INCOMING_IPC = 0x4,
258 });
259 #endif
260 
261 #if CONFIG_PREADOPT_TG
262 /*
263  * We have this typedef to distinguish when there is a thread_qos_t embedded
264  * in the last 3 bits inside the pointer
265  */
266 typedef struct thread_group *thread_group_qos_t;
267 
268 /* The possible states for kqwl_preadopt_tg:
269  *
270  * 1) Valid thread group with a QoS masked in the last 3 bits.
271  * 2) A known constant value (enumerated below). For these known constant
272  * values, no QoS is merged into them.
273  *
274  * @const KQWL_PREADOPTED_TG_NULL
275  *		NULL implies that the kqwl is capable of preadopting a thread group and it
276  *              hasn't got such a thread group to preadopt
277  * @const KQWL_PREADOPTED_TG_SENTINEL
278  *		SENTINEL is set when the kqwl is no longer capable of preadopting a thread
279  *              group because it has bound to a servicer - the reference of the thread group
280  *              is passed to the servicer
281  * @const KQWL_PREADOPTED_TG_PROCESSED
282  *		PROCESSED is set when the kqwl's servicer has processed and preadopted the
283  *              thread group of the first EVFILT_MACHPORT knote that it is going to deliver
284  *              to userspace.
285  * @const KQWL_PREADOPTED_TG_NEVER
286  *		NEVER is set when the kqwl is not capable of preadopting a thread
287  *		group because it is an app
288  */
289 
290 #define KQWL_PREADOPTED_TG_NULL ((struct thread_group *) 0)
291 #define KQWL_PREADOPTED_TG_SENTINEL ((struct thread_group *) -1)
292 #define KQWL_PREADOPTED_TG_PROCESSED ((struct thread_group *) -2)
293 #define KQWL_PREADOPTED_TG_NEVER ((struct thread_group *) -3)
294 
295 #define KQWL_ENCODE_PREADOPTED_TG_QOS(tg, qos) \
296 	        (struct thread_group *) ((uintptr_t) tg | (uintptr_t) qos);
297 
298 #define KQWL_PREADOPT_TG_MASK ~((uint64_t) THREAD_QOS_LAST)
299 #define KQWL_GET_PREADOPTED_TG(tg) \
300 	        (struct thread_group *)(((uintptr_t) tg) & KQWL_PREADOPT_TG_MASK)
301 
302 #define KQWL_PREADOPT_TG_QOS_MASK ((uint64_t) THREAD_QOS_LAST)
303 #define KQWL_GET_PREADOPTED_TG_QOS(tg) \
304 	        (thread_qos_t) (((uintptr_t) tg) & KQWL_PREADOPT_TG_QOS_MASK)
305 
306 #define KQWL_CAN_ADOPT_PREADOPT_TG(tg) \
307 	        ((tg != KQWL_PREADOPTED_TG_SENTINEL) && \
308 	        (tg != KQWL_PREADOPTED_TG_NEVER) && \
309 	        (tg != KQWL_PREADOPTED_TG_PROCESSED))
310 
311 
312 #define KQWL_HAS_VALID_PREADOPTED_TG(tg) \
313 	        ((tg != KQWL_PREADOPTED_TG_NULL) && \
314 	        (tg != KQWL_PREADOPTED_TG_SENTINEL) && \
315 	        (tg != KQWL_PREADOPTED_TG_NEVER) && \
316 	        (tg != KQWL_PREADOPTED_TG_PROCESSED) && \
317 	        (KQWL_GET_PREADOPTED_TG(tg) != NULL))
318 
319 struct thread_group *
320 kqr_preadopt_thread_group(workq_threadreq_t req);
321 
322 _Atomic(struct thread_group *) *
323 kqr_preadopt_thread_group_addr(workq_threadreq_t req);
324 
325 #endif
326 
327 
328 struct kqworkloop {
329 	struct kqueue       kqwl_kqueue;                  /* queue of events */
330 	struct kqtailq      kqwl_queue[KQWL_NBUCKETS];    /* array of queues */
331 	struct kqtailq      kqwl_suppressed;              /* Per-QoS suppression queues */
332 	workq_threadreq_s   kqwl_request;                 /* thread request state */
333 #if CONFIG_PREADOPT_TG
334 	_Atomic thread_group_qos_t      kqwl_preadopt_tg;
335 #endif
336 
337 	lck_spin_t          kqwl_statelock;               /* state/debounce lock */
338 	thread_t            kqwl_owner;                   /* current [sync] owner thread */
339 	os_ref_atomic_t     kqwl_retains;                 /* retain references */
340 	thread_qos_t        kqwl_wakeup_qos;              /* QoS/override woke */
341 	_Atomic uint8_t     kqwl_iotier_override;         /* iotier override */
342 
343 #if CONFIG_PREADOPT_TG
344 	/* The point of the kqwl_preadopt_tg_needs_redrive bit is to be able to
345 	 * coordinate which thread is going to push information about modifications
346 	 * to the kqwl_preadopt_thread group on the kqwl, to the workqueue
347 	 * subsystem. This coordination is needed because the preadoption thread
348 	 * group is set on the kqwl in the filter call without the kqlock.
349 	 *
350 	 * As such, if there is another thread holding the kqlock at this time and
351 	 * observes the write to the preadoption thread group and the need for a
352 	 * redrive request, that thread will take the responsibility of pushing that
353 	 * information down to the workqueue subsystem, thereby ack-ing the request.
354 	 *
355 	 * Otherwise, the original thread which modified the kqwl, will do so when
356 	 * it gets the kqlock.
357 	 *
358 	 * Note: Only a 1 single bit is required here but the 2 bytes here were
359 	 * wasted in packing so I've created a new atomic field for it. Only the
360 	 * bottom bit is being used, the remaining bits can be reused for other
361 	 * purposes.
362 	 */
363 #define KQWL_PREADOPT_TG_NEEDS_REDRIVE (uint16_t) 0x1
364 #define KQWL_PREADOPT_TG_CLEAR_REDRIVE (uint16_t) 0x0
365 	_Atomic uint16_t                        kqwl_preadopt_tg_needs_redrive;
366 #endif
367 
368 #if CONFIG_PREADOPT_TG_DEBUG
369 	/* Keep track of history of events that happened to the kqworkloop wrt to tg preadoption */
370 #define KQWL_PREADOPT_TG_HISTORY_COUNT 32
371 #define KQWL_PREADOPT_TG_HISTORY_WRITE_ENTRY(kqwl, ...)  ({\
372 	        struct kqworkloop *__kqwl = (kqwl); \
373 	        unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_preadopt_tg_history_index, relaxed); \
374 	                struct kqwl_preadopt_tg _preadopt_tg = { mach_approximate_time(), __VA_ARGS__}; \
375 	        __kqwl->kqwl_preadopt_tg_history[__index % KQWL_PREADOPT_TG_HISTORY_COUNT] = \
376 	                        (struct kqwl_preadopt_tg) _preadopt_tg; \
377 	})
378 
379 	struct kqwl_preadopt_tg {
380 		uint64_t time;
381 		kqwl_preadopt_tg_op_t op;
382 		struct thread_group *old_preadopt_tg;
383 		struct thread_group *new_preadopt_tg;
384 	} kqwl_preadopt_tg_history[KQWL_PREADOPT_TG_HISTORY_COUNT];
385 	unsigned int kqwl_preadopt_tg_history_index;
386 #else
387 #define KQWL_PREADOPT_TG_HISTORY_WRITE_ENTRY(kqwl, ...)
388 #endif /* CONFIG_PREADOPT_TG_DEBUG */
389 
390 	struct turnstile   *kqwl_turnstile;               /* turnstile for sync IPC/waiters */
391 	kqueue_id_t         kqwl_dynamicid;               /* dynamic identity */
392 	uint64_t            kqwl_params;                  /* additional parameters */
393 	LIST_ENTRY(kqworkloop) kqwl_hashlink;             /* linkage for search list */
394 #if CONFIG_WORKLOOP_DEBUG
395 #define KQWL_HISTORY_COUNT 32
396 #define KQWL_HISTORY_WRITE_ENTRY(kqwl, ...) ({ \
397 	        struct kqworkloop *__kqwl = (kqwl); \
398 	        unsigned int __index = os_atomic_inc_orig(&__kqwl->kqwl_index, relaxed); \
399 	        __kqwl->kqwl_history[__index % KQWL_HISTORY_COUNT] = \
400 	                        (struct kqwl_history)__VA_ARGS__; \
401 	})
402 	struct kqwl_history {
403 		thread_t updater;  /* Note: updates can be reordered */
404 		thread_t servicer;
405 		thread_t old_owner;
406 		thread_t new_owner;
407 
408 		uint64_t kev_ident;
409 		int16_t  error;
410 		uint16_t kev_flags;
411 		uint32_t kev_fflags;
412 
413 		uint64_t kev_mask;
414 		uint64_t kev_value;
415 		uint64_t in_value;
416 	} kqwl_history[KQWL_HISTORY_COUNT];
417 	unsigned int kqwl_index;
418 #endif // CONFIG_WORKLOOP_DEBUG
419 };
420 LIST_HEAD(kqwllist, kqworkloop);
421 
422 typedef union {
423 	struct kqueue       *kq;
424 	struct kqworkq      *kqwq;
425 	struct kqfile       *kqf;
426 	struct kqworkloop   *kqwl;
427 } __attribute__((transparent_union)) kqueue_t;
428 
429 #define kqwl_lock        kqwl_kqueue.kq_lock
430 #define kqwl_state       kqwl_kqueue.kq_state
431 #define kqwl_waitq_hook  kqwl_kqueue.kq_waitq_hook
432 #define kqwl_count       kqwl_kqueue.kq_count
433 #define kqwl_p           kqwl_kqueue.kq_p
434 
435 #define KQ_WORKLOOP_RETAINS_MAX UINT32_MAX
436 
437 extern void kqueue_threadreq_unbind(struct proc *p, workq_threadreq_t);
438 
439 // called with the kq req held
440 #define KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE 0x1
441 extern void kqueue_threadreq_bind(struct proc *p, workq_threadreq_t req,
442     thread_t thread, unsigned int flags);
443 
444 struct turnstile *kqueue_threadreq_get_turnstile(workq_threadreq_t kqr);
445 
446 // called with the wq lock held
447 extern void
448 kqueue_threadreq_bind_prepost(struct proc *p, workq_threadreq_t req,
449     struct uthread *uth);
450 
451 // called with no lock held
452 extern void kqueue_threadreq_bind_commit(struct proc *p, thread_t thread);
453 
454 extern void kqueue_threadreq_cancel(struct proc *p, workq_threadreq_t req);
455 
456 // lock not held as kqwl_params is immutable after creation
457 extern workq_threadreq_param_t kqueue_threadreq_workloop_param(workq_threadreq_t req);
458 
459 extern struct kqueue *kqueue_alloc(struct proc *);
460 extern void kqueue_dealloc(struct kqueue *);
461 extern void kqworkq_dealloc(struct kqworkq *kqwq);
462 
463 extern void knotes_dealloc(struct proc *);
464 extern void kqworkloops_dealloc(struct proc *);
465 
466 extern int kevent_register(struct kqueue *, struct kevent_qos_s *,
467     struct knote **);
468 extern int kqueue_scan(struct kqueue *, int flags,
469     struct kevent_ctx_s *, kevent_callback_t);
470 extern int kqueue_stat(struct kqueue *, void *, int, proc_t);
471 
472 extern void kevent_set_workq_quantum_expiry_user_tsd(proc_t p, thread_t t,
473     uint64_t flags);
474 
475 #endif /* XNU_KERNEL_PRIVATE */
476 
477 #endif /* !_SYS_EVENTVAR_H_ */
478