xref: /xnu-10002.61.3/bsd/sys/event_private.h (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2003-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 /*-
29  * Copyright (c) 1999,2000,2001 Jonathan Lemon <[email protected]>
30  * All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  *	$FreeBSD: src/sys/sys/event.h,v 1.5.2.5 2001/12/14 19:21:22 jlemon Exp $
54  */
55 
56 #ifndef _SYS_EVENT_PRIVATE_H_
57 #define _SYS_EVENT_PRIVATE_H_
58 
59 #include <machine/types.h>
60 #include <stdint.h>
61 #include <sys/cdefs.h>
62 #include <sys/event.h>
63 #include <sys/queue.h>
64 #ifndef KERNEL_PRIVATE
65 #include <sys/types.h>
66 #endif
67 #ifdef XNU_KERNEL_PRIVATE
68 #include <kern/debug.h> /* panic */
69 #include <kern/kern_types.h>
70 #include <mach/vm_param.h>
71 #include <pthread/priority_private.h>
72 #include <sys/fcntl.h> /* FREAD, FWRITE */
73 #endif /* XNU_KERNEL_PRIVATE */
74 
75 /*
76  * Filter types
77  */
78 /* Additional filter types in event.h */
79 #define EVFILT_UNUSED_11        (-11)   /* (-11) unused */
80 #define EVFILT_SOCK             (-13)   /* Socket events */
81 #define EVFILT_MEMORYSTATUS     (-14)   /* Memorystatus events */
82 #define EVFILT_NW_CHANNEL       (-16)   /* Skywalk channel events */
83 #define EVFILT_WORKLOOP         (-17)   /* Workloop events */
84 /* If additional filter types are added, make sure to update
85  * EVFILT_SYSCOUNT in event.h!!!
86  */
87 
88 #ifdef KERNEL_PRIVATE
89 
90 #pragma pack(4)
91 
92 struct user64_kevent {
93 	uint64_t        ident;  /* identifier for this event */
94 	int16_t         filter; /* filter for event */
95 	uint16_t        flags;  /* general flags */
96 	uint32_t        fflags; /* filter-specific flags */
97 	int64_t         data;   /* filter-specific data */
98 	user_addr_t     udata;  /* opaque user data identifier */
99 };
100 
101 struct user32_kevent {
102 	uint32_t        ident;  /* identifier for this event */
103 	int16_t         filter; /* filter for event */
104 	uint16_t        flags;  /* general flags */
105 	uint32_t        fflags; /* filter-specific flags */
106 	int32_t         data;   /* filter-specific data */
107 	user32_addr_t   udata;  /* opaque user data identifier */
108 };
109 
110 #pragma pack()
111 
112 #endif /* KERNEL_PRIVATE */
113 
114 struct kevent_qos_s {
115 	uint64_t        ident;          /* identifier for this event */
116 	int16_t         filter;         /* filter for event */
117 	uint16_t        flags;          /* general flags */
118 	int32_t         qos;            /* quality of service */
119 	uint64_t        udata;          /* opaque user data identifier */
120 	uint32_t        fflags;         /* filter-specific flags */
121 	uint32_t        xflags;         /* extra filter-specific flags */
122 	int64_t         data;           /* filter-specific data */
123 	uint64_t        ext[4];         /* filter-specific extensions */
124 };
125 
126 /*
127  * Type definition for names/ids of dynamically allocated kqueues.
128  */
129 typedef uint64_t kqueue_id_t;
130 
131 /*
132  * Rather than provide an EV_SET_QOS macro for kevent_qos_t structure
133  * initialization, we encourage use of named field initialization support
134  * instead.
135  */
136 
137 // was  KEVENT_FLAG_STACK_EVENTS                 0x000004
138 #define KEVENT_FLAG_STACK_DATA                   0x000008   /* output data allocated as stack (grows down) */
139 //      KEVENT_FLAG_POLL                         0x000010
140 #define KEVENT_FLAG_WORKQ                        0x000020   /* interact with the default workq kq */
141 //      KEVENT_FLAG_LEGACY32                     0x000040
142 //      KEVENT_FLAG_LEGACY64                     0x000080
143 //      KEVENT_FLAG_PROC64                       0x000100
144 #define KEVENT_FLAG_WORKQ_MANAGER                0x000200   /* obsolete */
145 #define KEVENT_FLAG_WORKLOOP                     0x000400   /* interact with the specified workloop kq */
146 #define KEVENT_FLAG_PARKING                      0x000800   /* workq thread is parking */
147 //      KEVENT_FLAG_KERNEL                       0x001000
148 //      KEVENT_FLAG_DYNAMIC_KQUEUE               0x002000
149 //      KEVENT_FLAG_NEEDS_END_PROCESSING         0x004000
150 #define KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH     0x008000   /* obsolete */
151 #define KEVENT_FLAG_WORKLOOP_SERVICER_DETACH     0x010000   /* obsolete */
152 #define KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST        0x020000   /* kq lookup by id must exist */
153 #define KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST    0x040000   /* kq lookup by id must not exist */
154 #define KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD        0x080000   /* obsolete */
155 
156 #ifdef XNU_KERNEL_PRIVATE
157 
158 #define KEVENT_FLAG_POLL                         0x0010  /* Call is for poll() */
159 #define KEVENT_FLAG_LEGACY32                     0x0040  /* event data in legacy 32-bit format */
160 #define KEVENT_FLAG_LEGACY64                     0x0080  /* event data in legacy 64-bit format */
161 #define KEVENT_FLAG_PROC64                       0x0100  /* proc is 64bits */
162 #define KEVENT_FLAG_KERNEL                       0x1000  /* caller is in-kernel */
163 #define KEVENT_FLAG_DYNAMIC_KQUEUE               0x2000  /* kqueue is dynamically allocated */
164 #define KEVENT_FLAG_NEEDS_END_PROCESSING         0x4000  /* end processing required before returning */
165 
166 #define KEVENT_ID_FLAG_USER (KEVENT_FLAG_WORKLOOP | \
167 	        KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST)
168 
169 #define KEVENT_FLAG_USER (KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS | \
170 	        KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_WORKQ | KEVENT_FLAG_WORKLOOP | \
171 	        KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST)
172 
173 /*
174  * Since some filter ops are not part of the standard sysfilt_ops, we use
175  * kn_filtid starting from EVFILT_SYSCOUNT to identify these cases.  This is to
176  * let kn_fops() get the correct fops for all cases.
177  */
178 #define EVFILTID_KQREAD            (EVFILT_SYSCOUNT)
179 #define EVFILTID_PIPE_N            (EVFILT_SYSCOUNT + 1)
180 #define EVFILTID_PIPE_R            (EVFILT_SYSCOUNT + 2)
181 #define EVFILTID_PIPE_W            (EVFILT_SYSCOUNT + 3)
182 #define EVFILTID_PTSD              (EVFILT_SYSCOUNT + 4)
183 #define EVFILTID_SOREAD            (EVFILT_SYSCOUNT + 5)
184 #define EVFILTID_SOWRITE           (EVFILT_SYSCOUNT + 6)
185 #define EVFILTID_SCK               (EVFILT_SYSCOUNT + 7)
186 #define EVFILTID_SOEXCEPT          (EVFILT_SYSCOUNT + 8)
187 #define EVFILTID_SPEC              (EVFILT_SYSCOUNT + 9)
188 #define EVFILTID_BPFREAD           (EVFILT_SYSCOUNT + 10)
189 #define EVFILTID_NECP_FD           (EVFILT_SYSCOUNT + 11)
190 #define EVFILTID_SKYWALK_CHANNEL_W (EVFILT_SYSCOUNT + 12)
191 #define EVFILTID_SKYWALK_CHANNEL_R (EVFILT_SYSCOUNT + 13)
192 #define EVFILTID_SKYWALK_CHANNEL_E (EVFILT_SYSCOUNT + 14)
193 #define EVFILTID_FSEVENT           (EVFILT_SYSCOUNT + 15)
194 #define EVFILTID_VN                (EVFILT_SYSCOUNT + 16)
195 #define EVFILTID_TTY               (EVFILT_SYSCOUNT + 17)
196 #define EVFILTID_PTMX              (EVFILT_SYSCOUNT + 18)
197 
198 #define EVFILTID_DETACHED          (EVFILT_SYSCOUNT + 19)
199 #define EVFILTID_MAX               (EVFILT_SYSCOUNT + 20)
200 
201 #endif /* defined(XNU_KERNEL_PRIVATE) */
202 
203 #define EV_SET_QOS 0
204 
205 /*
206  * data/hint fflags for EVFILT_WORKLOOP, shared with userspace
207  *
208  * The ident for thread requests should be the dynamic ID of the workloop
209  * The ident for each sync waiter must be unique to that waiter [for this workloop]
210  *
211  *
212  * Commands:
213  *
214  * @const NOTE_WL_THREAD_REQUEST [in/out]
215  * The kevent represents asynchronous userspace work and its associated QoS.
216  * There can only be a single knote with this flag set per workloop.
217  *
218  * @const NOTE_WL_SYNC_WAIT [in/out]
219  * This bit is set when the caller is waiting to become the owner of a workloop.
220  * If the NOTE_WL_SYNC_WAKE bit is already set then the caller is not blocked,
221  * else it blocks until it is set.
222  *
223  * The QoS field of the knote is used to push on other owners or servicers.
224  *
225  * @const NOTE_WL_SYNC_WAKE [in/out]
226  * Marks the waiter knote as being eligible to become an owner
227  * This bit can only be set once, trying it again will fail with EALREADY.
228  *
229  * @const NOTE_WL_SYNC_IPC [in/out]
230  * The knote is a sync IPC redirected turnstile push.
231  *
232  * Flags/Modifiers:
233  *
234  * @const NOTE_WL_UPDATE_QOS [in] (only NOTE_WL_THREAD_REQUEST)
235  * For successful updates (EV_ADD only), learn the new userspace async QoS from
236  * the kevent qos field.
237  *
238  * @const NOTE_WL_END_OWNERSHIP [in]
239  * If the update is successful (including deletions) or returns ESTALE, and
240  * the caller thread or the "suspended" thread is currently owning the workloop,
241  * then ownership is forgotten.
242  *
243  * @const NOTE_WL_DISCOVER_OWNER [in]
244  * If the update is successful (including deletions), learn the owner identity
245  * from the loaded value during debounce. This requires an address to have been
246  * filled in the EV_EXTIDX_WL_ADDR ext field, but doesn't require a mask to have
247  * been set in the EV_EXTIDX_WL_MASK.
248  *
249  * @const NOTE_WL_IGNORE_ESTALE [in]
250  * If the operation would fail with ESTALE, mask the error and pretend the
251  * update was successful. However the operation itself didn't happen, meaning
252  * that:
253  * - attaching a new knote will not happen
254  * - dropping an existing knote will not happen
255  * - NOTE_WL_UPDATE_QOS or NOTE_WL_DISCOVER_OWNER will have no effect
256  *
257  * This modifier doesn't affect NOTE_WL_END_OWNERSHIP.
258  */
259 #define NOTE_WL_THREAD_REQUEST   0x00000001
260 #define NOTE_WL_SYNC_WAIT        0x00000004
261 #define NOTE_WL_SYNC_WAKE        0x00000008
262 #define NOTE_WL_SYNC_IPC         0x80000000
263 #define NOTE_WL_COMMANDS_MASK    0x8000000f /* Mask of all the [in] commands above */
264 
265 #define NOTE_WL_UPDATE_QOS       0x00000010
266 #define NOTE_WL_END_OWNERSHIP    0x00000020
267 #define NOTE_WL_DISCOVER_OWNER   0x00000080
268 #define NOTE_WL_IGNORE_ESTALE    0x00000100
269 #define NOTE_WL_UPDATES_MASK     0x000001f0 /* Mask of all the [in] updates above */
270 
271 #define NOTE_WL_UPDATE_OWNER     0 /* ... compatibility define ... */
272 
273 /*
274  * EVFILT_WORKLOOP ext[] array indexes/meanings.
275  */
276 #define EV_EXTIDX_WL_LANE        0         /* lane identifier  [in: sync waiter]
277 	                                    *                  [out: thread request]     */
278 #define EV_EXTIDX_WL_ADDR        1         /* debounce address [in: NULL==no debounce]   */
279 #define EV_EXTIDX_WL_MASK        2         /* debounce mask    [in]                      */
280 #define EV_EXTIDX_WL_VALUE       3         /* debounce value   [in: not current->ESTALE]
281 	                                    *                  [out: new/debounce value] */
282 
283 /*
284  * If NOTE_EXIT_MEMORY is present, these bits indicate specific jetsam condition.
285  */
286 #define NOTE_EXIT_MEMORY_DETAIL_MASK    0xfe000000
287 #define NOTE_EXIT_MEMORY_VMPAGESHORTAGE 0x80000000      /* jetsam condition: lowest jetsam priority proc killed due to vm page shortage */
288 #define NOTE_EXIT_MEMORY_VMTHRASHING    0x40000000      /* jetsam condition: lowest jetsam priority proc killed due to vm thrashing */
289 #define NOTE_EXIT_MEMORY_HIWAT          0x20000000      /* jetsam condition: process reached its high water mark */
290 #define NOTE_EXIT_MEMORY_PID            0x10000000      /* jetsam condition: special pid kill requested */
291 #define NOTE_EXIT_MEMORY_IDLE           0x08000000      /* jetsam condition: idle process cleaned up */
292 #define NOTE_EXIT_MEMORY_VNODE          0X04000000      /* jetsam condition: virtual node kill */
293 #define NOTE_EXIT_MEMORY_FCTHRASHING    0x02000000      /* jetsam condition: lowest jetsam priority proc killed due to filecache thrashing */
294 
295 /*
296  * data/hint fflags for EVFILT_MEMORYSTATUS, shared with userspace.
297  */
298 #define NOTE_MEMORYSTATUS_PRESSURE_NORMAL       0x00000001      /* system memory pressure has returned to normal */
299 #define NOTE_MEMORYSTATUS_PRESSURE_WARN         0x00000002      /* system memory pressure has changed to the warning state */
300 #define NOTE_MEMORYSTATUS_PRESSURE_CRITICAL     0x00000004      /* system memory pressure has changed to the critical state */
301 #define NOTE_MEMORYSTATUS_LOW_SWAP              0x00000008      /* system is in a low-swap state */
302 #define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN       0x00000010      /* process memory limit has hit a warning state */
303 #define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL   0x00000020      /* process memory limit has hit a critical state - soft limit */
304 #define NOTE_MEMORYSTATUS_MSL_STATUS   0xf0000000      /* bits used to request change to process MSL status */
305 
306 #ifdef KERNEL_PRIVATE
307 /*
308  * data/hint fflags for EVFILT_MEMORYSTATUS, but not shared with userspace.
309  */
310 #define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE        0x00000040      /* Used to restrict sending a warn event only once, per active limit, soft limits only */
311 #define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE      0x00000080      /* Used to restrict sending a warn event only once, per inactive limit, soft limit only */
312 #define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE    0x00000100      /* Used to restrict sending a critical event only once per active limit, soft limit only */
313 #define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE  0x00000200      /* Used to restrict sending a critical event only once per inactive limit, soft limit only */
314 #define NOTE_MEMORYSTATUS_JETSAM_FG_BAND                0x00000400      /* jetsam is approaching foreground band */
315 
316 /*
317  * Use this mask to protect the kernel private flags.
318  */
319 #define EVFILT_MEMORYSTATUS_ALL_MASK \
320 	(NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP | \
321 	 NOTE_MEMORYSTATUS_PROC_LIMIT_WARN | NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL | NOTE_MEMORYSTATUS_MSL_STATUS)
322 
323 #endif /* KERNEL_PRIVATE */
324 
325 typedef enum vm_pressure_level {
326 	kVMPressureNormal   = 0,
327 	kVMPressureWarning  = 1,
328 	kVMPressureUrgent   = 2,
329 	kVMPressureCritical = 3,
330 	kVMPressureJetsam   = 4,  /* jetsam approaching FG bands */
331 } vm_pressure_level_t;
332 
333 /*
334  * data/hint fflags for EVFILT_SOCK, shared with userspace.
335  *
336  */
337 #define NOTE_CONNRESET          0x00000001 /* Received RST */
338 #define NOTE_READCLOSED         0x00000002 /* Read side is shutdown */
339 #define NOTE_WRITECLOSED        0x00000004 /* Write side is shutdown */
340 #define NOTE_TIMEOUT            0x00000008 /* timeout: rexmt, keep-alive or persist */
341 #define NOTE_NOSRCADDR          0x00000010 /* source address not available */
342 #define NOTE_IFDENIED           0x00000020 /* interface denied connection */
343 #define NOTE_SUSPEND            0x00000040 /* output queue suspended */
344 #define NOTE_RESUME             0x00000080 /* output queue resumed */
345 #define NOTE_KEEPALIVE          0x00000100 /* TCP Keepalive received */
346 #define NOTE_ADAPTIVE_WTIMO     0x00000200 /* TCP adaptive write timeout */
347 #define NOTE_ADAPTIVE_RTIMO     0x00000400 /* TCP adaptive read timeout */
348 #define NOTE_CONNECTED          0x00000800 /* socket is connected */
349 #define NOTE_DISCONNECTED       0x00001000 /* socket is disconnected */
350 #define NOTE_CONNINFO_UPDATED   0x00002000 /* connection info was updated */
351 #define NOTE_NOTIFY_ACK         0x00004000 /* notify acknowledgement */
352 #define NOTE_WAKE_PKT           0x00008000 /* received wake packet */
353 
354 #define EVFILT_SOCK_LEVEL_TRIGGER_MASK \
355 	        (NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_SUSPEND | NOTE_RESUME | \
356 	         NOTE_CONNECTED | NOTE_DISCONNECTED)
357 
358 #define EVFILT_SOCK_ALL_MASK \
359 	        (NOTE_CONNRESET | NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_TIMEOUT | \
360 	        NOTE_NOSRCADDR | NOTE_IFDENIED | NOTE_SUSPEND | NOTE_RESUME | \
361 	        NOTE_KEEPALIVE | NOTE_ADAPTIVE_WTIMO | NOTE_ADAPTIVE_RTIMO | \
362 	        NOTE_CONNECTED | NOTE_DISCONNECTED | NOTE_CONNINFO_UPDATED | \
363 	        NOTE_NOTIFY_ACK | NOTE_WAKE_PKT)
364 
365 /*
366  * data/hint fflags for EVFILT_NW_CHANNEL, shared with userspace.
367  */
368 #define NOTE_FLOW_ADV_UPDATE    0x00000001 /* flow advisory update */
369 #define NOTE_CHANNEL_EVENT      0x00000002 /* generic channel event */
370 #define NOTE_IF_ADV_UPD         0x00000004 /* Interface advisory update */
371 
372 #define EVFILT_NW_CHANNEL_ALL_MASK    \
373     (NOTE_FLOW_ADV_UPDATE | NOTE_CHANNEL_EVENT | NOTE_IF_ADV_UPD)
374 
375 #ifdef KERNEL_PRIVATE
376 
377 #ifdef XNU_KERNEL_PRIVATE
378 LIST_HEAD(knote_list, knote);
379 TAILQ_HEAD(kqtailq, knote);     /* a list of "queued" events */
380 
381 /* index into various kq queues */
382 typedef uint8_t kq_index_t;
383 
384 /* lskq(1) knows about this type */
385 __options_decl(kn_status_t, uint16_t /* 12 bits really */, {
386 	KN_ACTIVE         = 0x001,  /* event has been triggered */
387 	KN_QUEUED         = 0x002,  /* event is on queue */
388 	KN_DISABLED       = 0x004,  /* event is disabled */
389 	KN_DROPPING       = 0x008,  /* knote is being dropped */
390 	KN_LOCKED         = 0x010,  /* knote is locked (kq_knlocks) */
391 	KN_POSTING        = 0x020,  /* f_event() in flight */
392 	// was KN_STAYACTIVE  = 0x040,
393 	KN_DEFERDELETE    = 0x080,  /* defer delete until re-enabled */
394 	KN_MERGE_QOS      = 0x100,  /* f_event() / f_* ran concurrently and overrides must merge */
395 	KN_REQVANISH      = 0x200,  /* requested EV_VANISH */
396 	KN_VANISHED       = 0x400,  /* has vanished */
397 	KN_SUPPRESSED     = 0x800,  /* event is suppressed during delivery */
398 });
399 
400 #if __LP64__
401 #define KNOTE_KQ_PACKED_BITS   42
402 #define KNOTE_KQ_PACKED_SHIFT   0
403 #define KNOTE_KQ_PACKED_BASE    0
404 #else
405 #define KNOTE_KQ_PACKED_BITS   32
406 #define KNOTE_KQ_PACKED_SHIFT   0
407 #define KNOTE_KQ_PACKED_BASE    0
408 #endif
409 
410 _Static_assert(!VM_PACKING_IS_BASE_RELATIVE(KNOTE_KQ_PACKED),
411     "Make sure the knote pointer packing is based on arithmetic shifts");
412 
413 struct kqueue;
414 struct knote {
415 	TAILQ_ENTRY(knote)       kn_tqe;            /* linkage for tail queue */
416 	SLIST_ENTRY(knote)       kn_link;           /* linkage for fd search list */
417 	SLIST_ENTRY(knote)       kn_selnext;        /* klist element chain */
418 #define KNOTE_AUTODETACHED ((struct knote *) -1)
419 #define KNOTE_IS_AUTODETACHED(kn) ((kn)->kn_selnext.sle_next == KNOTE_AUTODETACHED)
420 
421 	kn_status_t              kn_status : 12;
422 	uintptr_t
423 	    kn_qos_index:4,                         /* in-use qos index */
424 	    kn_qos_override:3,                      /* qos override index */
425 	    kn_is_fd:1,                             /* knote is an fd */
426 	    kn_vnode_kqok:1,
427 	    kn_vnode_use_ofst:1;
428 #if __LP64__
429 	uintptr_t                   kn_kq_packed : KNOTE_KQ_PACKED_BITS;
430 #else
431 	uintptr_t                   kn_kq_packed;
432 #endif
433 
434 	/* per filter stash of data (pointer, uint32_t or uint64_t) */
435 	union {
436 		uintptr_t           kn_hook; /* Manually PAC-ed, see knote_kn_hook_get_raw() */
437 		uint32_t            kn_hook32;
438 	};
439 
440 	/* per filter pointer to the resource being watched */
441 	union {
442 		struct fileproc    *XNU_PTRAUTH_SIGNED_PTR("knote.fp") kn_fp;
443 		struct proc        *XNU_PTRAUTH_SIGNED_PTR("knote.proc") kn_proc;
444 		struct ipc_object  *XNU_PTRAUTH_SIGNED_PTR("knote.ipc_obj") kn_ipc_obj;
445 		struct thread_call *XNU_PTRAUTH_SIGNED_PTR("knote.thcall") kn_thcall;
446 		struct thread      *XNU_PTRAUTH_SIGNED_PTR("knote.thread") kn_thread;
447 	};
448 
449 	/*
450 	 * Mimic kevent_qos so that knote_fill_kevent code is not horrid,
451 	 * but with subtleties:
452 	 *
453 	 * - kevent_qos_s::filter is 16bits where ours is 8, and we use the top
454 	 *   bits to store the real specialized filter.
455 	 *   knote_fill_kevent* will always force the top bits to 0xff.
456 	 *
457 	 * - kevent_qos_s::xflags is not kept, kn_sfflags takes its place,
458 	 *   knote_fill_kevent* will set xflags to 0.
459 	 *
460 	 * - kevent_qos_s::data is saved as kn_sdata and filters are encouraged
461 	 *   to use knote_fill_kevent, knote_fill_kevent_with_sdata will copy
462 	 *   kn_sdata as the output value.
463 	 *
464 	 * knote_fill_kevent_with_sdata() programatically asserts
465 	 * these aliasings are respected.
466 	 */
467 	struct kevent_internal_s {
468 		uint64_t    kei_ident;      /* identifier for this event */
469 #ifdef __LITTLE_ENDIAN__
470 		int8_t      kei_filter;     /* filter for event */
471 		uint8_t     kei_filtid;     /* actual filter for event */
472 #else
473 		uint8_t     kei_filtid;     /* actual filter for event */
474 		int8_t      kei_filter;     /* filter for event */
475 #endif
476 		uint16_t    kei_flags;      /* general flags */
477 		int32_t     kei_qos;        /* quality of service */
478 		uint64_t    kei_udata;      /* opaque user data identifier */
479 		uint32_t    kei_fflags;     /* filter-specific flags */
480 		uint32_t    kei_sfflags;    /* knote: saved fflags */
481 		int64_t     kei_sdata;      /* knote: filter-specific saved data */
482 		uint64_t    kei_ext[4];     /* filter-specific extensions */
483 	} kn_kevent;
484 
485 #define kn_id           kn_kevent.kei_ident
486 #define kn_filtid       kn_kevent.kei_filtid
487 #define kn_filter       kn_kevent.kei_filter
488 #define kn_flags        kn_kevent.kei_flags
489 #define kn_qos          kn_kevent.kei_qos
490 #define kn_udata        kn_kevent.kei_udata
491 #define kn_fflags       kn_kevent.kei_fflags
492 #define kn_sfflags      kn_kevent.kei_sfflags
493 #define kn_sdata        kn_kevent.kei_sdata
494 #define kn_ext          kn_kevent.kei_ext
495 };
496 
497 static inline struct kqueue *
knote_get_kq(struct knote * kn)498 knote_get_kq(struct knote *kn)
499 {
500 	vm_offset_t ptr = VM_UNPACK_POINTER(kn->kn_kq_packed, KNOTE_KQ_PACKED);
501 	return __unsafe_forge_single(struct kqueue *, ptr);
502 }
503 
504 static inline int
knote_get_seltype(struct knote * kn)505 knote_get_seltype(struct knote *kn)
506 {
507 	switch (kn->kn_filter) {
508 	case EVFILT_READ:
509 		return FREAD;
510 	case EVFILT_WRITE:
511 		return FWRITE;
512 	default:
513 		panic("%s(%p): invalid filter %d\n",
514 		    __func__, kn, kn->kn_filter);
515 		return 0;
516 	}
517 }
518 
519 struct kevent_ctx_s {
520 	uint64_t         kec_data_avail;    /* address of remaining data size */
521 	union {
522 		user_addr_t    kec_data_out;      /* extra data pointer */
523 		struct pollfd *kec_poll_fds;      /* poll fds */
524 	};
525 	user_size_t      kec_data_size;     /* total extra data size */
526 	user_size_t      kec_data_resid;    /* residual extra data size */
527 	uint64_t         kec_deadline;      /* wait deadline unless KEVENT_FLAG_IMMEDIATE */
528 	struct fileproc *kec_fp;            /* fileproc to pass to fp_drop or NULL */
529 	int              kec_fd;            /* fd to pass to fp_drop or -1 */
530 
531 	/* the fields below are only set during process / scan */
532 	int              kec_process_nevents;       /* user-level event count */
533 	int              kec_process_noutputs;      /* number of events output */
534 	unsigned int     kec_process_flags;         /* kevent flags, only set for process  */
535 	user_addr_t      kec_process_eventlist;     /* user-level event list address */
536 };
537 typedef struct kevent_ctx_s *kevent_ctx_t;
538 
539 kevent_ctx_t
540 kevent_get_context(thread_t thread);
541 
542 /*
543  * Filter operators
544  *
545  * These routines, provided by each filter, are called to attach, detach, deliver events,
546  * change/update filter registration and process/deliver events:
547  *
548  * - the f_attach, f_touch, f_process and f_detach callbacks are always
549  *   serialized with respect to each other for the same knote.
550  *
551  * - the f_event routine is called with a use-count taken on the knote to
552  *   prolongate its lifetime and protect against drop, but is not otherwise
553  *   serialized with other routine calls.
554  *
555  * - the f_detach routine is always called last, and is serialized with all
556  *   other callbacks, including f_event calls.
557  *
558  *
559  * Here are more details:
560  *
561  * f_isfd -
562  *        identifies if the "ident" field in the kevent structure is a file-descriptor.
563  *
564  *        If so, the knote is associated with the file descriptor prior to attach and
565  *        auto-removed when the file descriptor is closed (this latter behavior may change
566  *        for EV_DISPATCH2 kevent types to allow delivery of events identifying unintended
567  *        closes).
568  *
569  *        Otherwise the knote is hashed by the ident and has no auto-close behavior.
570  *
571  * f_adjusts_qos -
572  *        identifies if the filter can adjust its QoS during its lifetime.
573  *
574  *        Filters using this facility should request the new overrides they want
575  *        using the appropriate FILTER_{RESET,ADJUST}_EVENT_QOS extended codes.
576  *
577  *        Currently, EVFILT_MACHPORT is the only filter using this facility.
578  *
579  * f_extended_codes -
580  *        identifies if the filter returns extended codes from its routines
581  *        (see FILTER_ACTIVE, ...) or 0 / 1 values.
582  *
583  * f_attach -
584  *        called to attach the knote to the underlying object that will be delivering events
585  *        through it when EV_ADD is supplied and no existing matching event is found
586  *
587  *        provided a knote that is pre-attached to the fd or hashed (see above) but is
588  *        specially marked to avoid concurrent access until the attach is complete. The
589  *        kevent structure embedded in this knote has been filled in with a sanitized
590  *        version of the user-supplied kevent data.  However, the user-supplied filter-specific
591  *        flags (fflags) and data fields have been moved into the knote's kn_sfflags and kn_sdata
592  *        fields respectively.  These are usually interpretted as a set of "interest" flags and
593  *        data by each filter - to be matched against delivered events.
594  *
595  *        The attach operator indicated errors by setting the EV_ERROR flog in the flags field
596  *        embedded in the knote's kevent structure - with the specific error indicated in the
597  *        corresponding data field.
598  *
599  *        The return value indicates if the knote should already be considered "activated" at
600  *        the time of attach (one or more of the interest events has already occured).
601  *
602  * f_detach -
603  *        called to disassociate the knote from the underlying object delivering events
604  *        the filter should not attempt to deliver events through this knote after this
605  *        operation returns control to the kq system.
606  *
607  * f_event -
608  *        if the knote() function (or KNOTE() macro) is called against a list of knotes,
609  *        this operator will be called on each knote in the list.
610  *
611  *        The "hint" parameter is completely filter-specific, but usually indicates an
612  *        event or set of events that have occured against the source object associated
613  *        with the list.
614  *
615  *        The return value indicates if the knote should already be considered "activated" at
616  *        the time of attach (one or more of the interest events has already occured).
617  *
618  * f_process -
619  *        called when attempting to deliver triggered events to user-space.
620  *
621  *        If the knote was previously activated, this operator will be called when a
622  *        thread is trying to deliver events to user-space.  The filter gets one last
623  *        chance to determine if the event/events are still interesting for this knote
624  *        (are the conditions still right to deliver an event).  If so, the filter
625  *        fills in the output kevent structure with the information to be delivered.
626  *
627  *        The input context/data parameter is used during event delivery.  Some
628  *        filters allow additional data delivery as part of event delivery.  This
629  *        context field indicates if space was made available for these additional
630  *        items and how that space is to be allocated/carved-out.
631  *
632  *        The filter may set EV_CLEAR or EV_ONESHOT in the output flags field to indicate
633  *        special post-delivery dispositions for the knote.
634  *
635  *        EV_CLEAR - indicates that all matching events have been delivered. Even
636  *                   though there were events to deliver now, there will not be any
637  *                   more until some additional events are delivered to the knote
638  *                   via the f_event operator, or the interest set is changed via
639  *                   the f_touch operator.  The knote can remain deactivated after
640  *                   processing this event delivery.
641  *
642  *        EV_ONESHOT - indicates that this is the last event to be delivered via
643  *                   this knote.  It will automatically be deleted upon delivery
644  *                   (or if in dispatch-mode, upon re-enablement after this delivery).
645  *
646  *        The return value indicates if the knote has delivered an output event.
647  *        Unless one of the special output flags was set in the output kevent, a non-
648  *        zero return value ALSO indicates that the knote should be re-activated
649  *        for future event processing (in case it delivers level-based or a multi-edge
650  *        type events like message queues that already exist).
651  *
652  *        NOTE: In the future, the boolean may change to an enum that allows more
653  *              explicit indication of just delivering a current event vs delivering
654  *              an event with more events still pending.
655  *
656  * f_touch -
657  *        called to update the knote with new state from the user during
658  *        EVFILT_ADD/ENABLE/DISABLE on an already-attached knote.
659  *
660  *        f_touch should copy relevant new data from the kevent into the knote.
661  *
662  *        operator must lock against concurrent f_event operations.
663  *
664  *        A return value of 1 indicates that the knote should now be considered
665  *        'activated'.
666  *
667  *        f_touch can set EV_ERROR with specific error in the data field to
668  *        return an error to the client. You should return 1 to indicate that
669  *        the kevent needs to be activated and processed.
670  *
671  * f_allow_drop -
672  *
673  *        [OPTIONAL] If this function is non-null, then it indicates that the
674  *        filter wants to validate EV_DELETE events. This is necessary if
675  *        a particular filter needs to synchronize knote deletion with its own
676  *        filter lock.
677  *
678  *        When true is returned, the the EV_DELETE is allowed and can proceed.
679  *
680  *        If false is returned, the EV_DELETE doesn't proceed, and the passed in
681  *        kevent is used for the copyout to userspace.
682  *
683  *        Currently, EVFILT_WORKLOOP is the only filter using this facility.
684  *
685  * f_post_register_wait -
686  *        [OPTIONAL] called when attach or touch return the FILTER_REGISTER_WAIT
687  *        extended code bit. It is possible to use this facility when the last
688  *        register command wants to wait.
689  *
690  *        Currently, EVFILT_WORKLOOP is the only filter using this facility.
691  *
692  * f_sanitized_copyout -
693  *        [OPTIONAL] If this function is non-null, then it should be used so
694  *        that the filter can provide a sanitized copy of the current contents
695  *        of a knote to userspace. This prevents leaking of any sensitive
696  *        information like kernel pointers which might be stashed in filter
697  *        specific data.
698  *
699  *        Currently, EVFILT_MACHPORT uses this facility.
700  */
701 
702 struct _kevent_register;
703 struct knote_lock_ctx;
704 struct proc;
705 struct uthread;
706 struct waitq;
707 struct thread_group;
708 
709 struct filterops {
710 	bool    f_isfd;               /* true if ident == filedescriptor */
711 	bool    f_adjusts_qos;    /* true if the filter can override the knote */
712 	bool    f_extended_codes; /* hooks return extended codes */
713 
714 	int     (*f_attach)(struct knote *kn, struct kevent_qos_s *kev);
715 	void    (*f_detach)(struct knote *kn);
716 	int     (*f_event)(struct knote *kn, long hint);
717 	int     (*f_touch)(struct knote *kn, struct kevent_qos_s *kev);
718 	int     (*f_process)(struct knote *kn, struct kevent_qos_s *kev);
719 
720 	/* optional & advanced */
721 	bool    (*f_allow_drop)(struct knote *kn, struct kevent_qos_s *kev);
722 	void    (*f_post_register_wait)(struct uthread *uth, struct knote *kn,
723 	    struct _kevent_register *ss_kr);
724 	void    (*f_sanitized_copyout)(struct knote *kn, struct kevent_qos_s *kev);
725 };
726 
727 /*
728  * Extended codes returned by filter routines when f_extended_codes is set.
729  *
730  * FILTER_ACTIVE
731  *     The filter is active and a call to f_process() may return an event.
732  *
733  *     For f_process() the meaning is slightly different: the knote will be
734  *     activated again as long as f_process returns FILTER_ACTIVE, unless
735  *     EV_CLEAR is set, which require a new f_event to reactivate the knote.
736  *
737  *     Valid:    f_attach, f_event, f_touch, f_process
738  *     Implicit: -
739  *     Ignored:  -
740  *
741  * FILTER_REGISTER_WAIT
742  *     The filter wants its f_post_register_wait() to be called.
743  *
744  *     Note: It is only valid to ask for this behavior for a workloop kqueue,
745  *     and is really only meant to be used by EVFILT_WORKLOOP.
746  *
747  *     Valid:    f_attach, f_touch
748  *     Implicit: -
749  *     Ignored:  f_event, f_process
750  *
751  * FILTER_UPDATE_REQ_QOS
752  *     The filter wants the passed in QoS to be updated as the new intrinsic qos
753  *     for this knote. If the kevent `qos` field is 0, no update is performed.
754  *
755  *     This also will reset the event QoS, so FILTER_ADJUST_EVENT_QOS() must
756  *     also be used if an override should be maintained.
757  *
758  *     Note: when this is used in f_touch, the incoming qos validation
759  *           is under the responsiblity of the filter.
760  *
761  *     Valid:    f_touch
762  *     Implicit: f_attach
763  *     Ignored:  f_event, f_process
764  *
765  * FILTER_RESET_EVENT_QOS
766  * FILTER_ADJUST_EVENT_QOS(qos)
767  *     The filter wants the QoS of the next event delivery to be overridden
768  *     at the specified QoS.  This allows for the next event QoS to be elevated
769  *     from the knote requested qos (See FILTER_UPDATE_REQ_QOS).
770  *
771  *     Event QoS Overrides are reset when a particular knote is no longer
772  *     active. Hence this is ignored if FILTER_ACTIVE isn't also returned.
773  *
774  *     Races between an f_event() and any other f_* routine asking for
775  *     a specific QoS override are handled generically and the filters do not
776  *     have to worry about them.
777  *
778  *     To use this facility, filters MUST set their f_adjusts_qos bit to true.
779  *
780  *     It is expected that filters will return the new QoS they expect to be
781  *     applied from any f_* callback except for f_process() where no specific
782  *     information should be provided. Filters should not try to hide no-ops,
783  *     kevent will already optimize these away.
784  *
785  *     Valid:    f_touch, f_attach, f_event, f_process
786  *     Implicit: -
787  *     Ignored:  -
788  *
789  * FILTER_THREADREQ_NODEFEER
790  *     The filter has moved a turnstile priority push away from the current
791  *     thread, preemption has been disabled, and thread requests need to be
792  *     commited before preemption is re-enabled.
793  *
794  *
795  *     Valid:    f_attach, f_touch
796  *     Implicit: -
797  *     Invalid:  f_event, f_process
798  */
799 #define FILTER_ACTIVE                       0x00000001
800 #define FILTER_REGISTER_WAIT                0x00000002
801 #define FILTER_UPDATE_REQ_QOS               0x00000004
802 #define FILTER_ADJUST_EVENT_QOS_BIT         0x00000008
803 #define FILTER_ADJUST_EVENT_QOS_MASK        0x00000070
804 #define FILTER_ADJUST_EVENT_QOS_SHIFT 4
805 #define FILTER_ADJUST_EVENT_QOS(qos) \
806 	        (((qos) << FILTER_ADJUST_EVENT_QOS_SHIFT) | FILTER_ADJUST_EVENT_QOS_BIT)
807 #define FILTER_GET_EVENT_QOS(result) \
808 	        ((result >> FILTER_ADJUST_EVENT_QOS_SHIFT) & THREAD_QOS_LAST)
809 #define FILTER_RESET_EVENT_QOS              FILTER_ADJUST_EVENT_QOS_BIT
810 #define FILTER_THREADREQ_NODEFEER           0x00000080
811 #define FILTER_ADJUST_EVENT_IOTIER_BIT      0x00000100
812 
813 #define filter_call(_ops, call)  \
814 	        ((_ops)->f_extended_codes ? (_ops)->call : !!((_ops)->call))
815 
816 SLIST_HEAD(klist, knote);
817 extern void     knote_init(void);
818 extern void     klist_init(struct klist *list);
819 
820 #define KNOTE(list, hint)       knote(list, hint, false)
821 #define KNOTE_ATTACH(list, kn)  knote_attach(list, kn)
822 #define KNOTE_DETACH(list, kn)  knote_detach(list, kn)
823 
824 extern void knote(struct klist *list, long hint, bool autodetach);
825 extern int knote_attach(struct klist *list, struct knote *kn);
826 extern int knote_detach(struct klist *list, struct knote *kn);
827 extern void knote_vanish(struct klist *list, bool make_active);
828 
829 extern void knote_set_error(struct knote *kn, int error);
830 extern int64_t knote_low_watermark(const struct knote *kn) __pure2;
831 extern void knote_fill_kevent_with_sdata(struct knote *kn, struct kevent_qos_s *kev);
832 extern void knote_fill_kevent(struct knote *kn, struct kevent_qos_s *kev, int64_t data);
833 
834 extern void *knote_kn_hook_get_raw(struct knote *kn);
835 // Must be called after having specified the filtid + filter in the knote
836 extern void knote_kn_hook_set_raw(struct knote *kn, void *kn_hook);
837 
838 extern void knote_fdclose(struct proc *p, int fd);
839 extern const struct filterops *knote_fops(struct knote *kn);
840 
841 extern struct turnstile *kqueue_turnstile(struct kqueue *);
842 extern struct turnstile *kqueue_alloc_turnstile(struct kqueue *);
843 extern void kqueue_set_iotier_override(struct kqueue *kqu, uint8_t iotier_override);
844 extern uint8_t kqueue_get_iotier_override(struct kqueue *kqu);
845 
846 int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
847 #if CONFIG_PREADOPT_TG
848 extern void kqueue_set_preadopted_thread_group(struct kqueue *kq, struct thread_group *tg, thread_qos_t qos);
849 extern bool kqueue_process_preadopt_thread_group(thread_t t, struct kqueue *kq, struct thread_group *tg);
850 #endif
851 
852 int kevent_copyout_proc_dynkqids(void *proc, user_addr_t ubuf,
853     uint32_t ubufsize, int32_t *nkqueues_out);
854 int kevent_copyout_dynkqinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf,
855     uint32_t ubufsize, int32_t *size_out);
856 int kevent_copyout_dynkqextinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf,
857     uint32_t ubufsize, int32_t *nknotes_out);
858 
859 extern int filt_wlattach_sync_ipc(struct knote *kn);
860 extern void filt_wldetach_sync_ipc(struct knote *kn);
861 
862 extern int kevent_workq_internal(struct proc *p,
863     user_addr_t changelist, int nchanges,
864     user_addr_t eventlist, int nevents,
865     user_addr_t data_out, user_size_t *data_available,
866     unsigned int flags, int32_t *retval);
867 
868 #elif defined(KERNEL_PRIVATE) /* !XNU_KERNEL_PRIVATE: kexts still need a klist structure definition */
869 
870 struct proc;
871 struct knote;
872 SLIST_HEAD(klist, knote);
873 
874 #endif /* !XNU_KERNEL_PRIVATE && KERNEL_PRIVATE */
875 
876 #else   /* KERNEL_PRIVATE */
877 
878 __BEGIN_DECLS
879 int     kevent_qos(int kq,
880     const struct kevent_qos_s *changelist, int nchanges,
881     struct kevent_qos_s *eventlist, int nevents,
882     void *data_out, size_t *data_available,
883     unsigned int flags);
884 
885 int     kevent_id(kqueue_id_t id,
886     const struct kevent_qos_s *changelist, int nchanges,
887     struct kevent_qos_s *eventlist, int nevents,
888     void *data_out, size_t *data_available,
889     unsigned int flags);
890 
891 __END_DECLS
892 
893 
894 #endif /* KERNEL_PRIVATE */
895 
896 /* Flags for pending events notified by kernel via return-to-kernel ast */
897 #define R2K_WORKLOOP_PENDING_EVENTS             0x1
898 #define R2K_WORKQ_PENDING_EVENTS                0x2
899 
900 /* Flags for notifying what to do when there is a workqueue quantum expiry */
901 #define PTHREAD_WQ_QUANTUM_EXPIRY_NARROW 0x1
902 #define PTHREAD_WQ_QUANTUM_EXPIRY_SHUFFLE 0x2
903 
904 #endif /* !_SYS_EVENT_PRIVATE_H_ */
905