1 /*
2 * Copyright (c) 2003-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 1999,2000,2001 Jonathan Lemon <[email protected]>
30 * All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 *
41 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * $FreeBSD: src/sys/sys/event.h,v 1.5.2.5 2001/12/14 19:21:22 jlemon Exp $
54 */
55
56 #ifndef _SYS_EVENT_PRIVATE_H_
57 #define _SYS_EVENT_PRIVATE_H_
58
59 #include <machine/types.h>
60 #include <stdint.h>
61 #include <sys/cdefs.h>
62 #include <sys/event.h>
63 #include <sys/queue.h>
64 #ifndef KERNEL_PRIVATE
65 #include <sys/types.h>
66 #endif
67 #ifdef XNU_KERNEL_PRIVATE
68 #include <kern/debug.h> /* panic */
69 #include <kern/kern_types.h>
70 #include <mach/vm_param.h>
71 #include <pthread/priority_private.h>
72 #include <sys/fcntl.h> /* FREAD, FWRITE */
73 #endif /* XNU_KERNEL_PRIVATE */
74
75 /*
76 * Filter types
77 */
78 /* Additional filter types in event.h */
79 #define EVFILT_UNUSED_11 (-11) /* (-11) unused */
80 #define EVFILT_SOCK (-13) /* Socket events */
81 #define EVFILT_MEMORYSTATUS (-14) /* Memorystatus events */
82 #define EVFILT_NW_CHANNEL (-16) /* Skywalk channel events */
83 #define EVFILT_WORKLOOP (-17) /* Workloop events */
84 #define EVFILT_EXCLAVES_NOTIFICATION (-18) /* Exclave notification */
85 /* If additional filter types are added, make sure to update
86 * EVFILT_SYSCOUNT in event.h!!!
87 */
88
89 #ifdef KERNEL_PRIVATE
90
91 #pragma pack(4)
92
93 struct user64_kevent {
94 uint64_t ident; /* identifier for this event */
95 int16_t filter; /* filter for event */
96 uint16_t flags; /* general flags */
97 uint32_t fflags; /* filter-specific flags */
98 int64_t data; /* filter-specific data */
99 user_addr_t udata; /* opaque user data identifier */
100 };
101
102 struct user32_kevent {
103 uint32_t ident; /* identifier for this event */
104 int16_t filter; /* filter for event */
105 uint16_t flags; /* general flags */
106 uint32_t fflags; /* filter-specific flags */
107 int32_t data; /* filter-specific data */
108 user32_addr_t udata; /* opaque user data identifier */
109 };
110
111 #pragma pack()
112
113 #endif /* KERNEL_PRIVATE */
114
115 struct kevent_qos_s {
116 uint64_t ident; /* identifier for this event */
117 int16_t filter; /* filter for event */
118 uint16_t flags; /* general flags */
119 int32_t qos; /* quality of service */
120 uint64_t udata; /* opaque user data identifier */
121 uint32_t fflags; /* filter-specific flags */
122 uint32_t xflags; /* extra filter-specific flags */
123 int64_t data; /* filter-specific data */
124 uint64_t ext[4]; /* filter-specific extensions */
125 };
126
127 /*
128 * Type definition for names/ids of dynamically allocated kqueues.
129 */
130 typedef uint64_t kqueue_id_t;
131
132 /*
133 * Rather than provide an EV_SET_QOS macro for kevent_qos_t structure
134 * initialization, we encourage use of named field initialization support
135 * instead.
136 */
137
138 // was KEVENT_FLAG_STACK_EVENTS 0x000004
139 #define KEVENT_FLAG_STACK_DATA 0x000008 /* output data allocated as stack (grows down) */
140 // KEVENT_FLAG_POLL 0x000010
141 #define KEVENT_FLAG_WORKQ 0x000020 /* interact with the default workq kq */
142 // KEVENT_FLAG_LEGACY32 0x000040
143 // KEVENT_FLAG_LEGACY64 0x000080
144 // KEVENT_FLAG_PROC64 0x000100
145 #define KEVENT_FLAG_WORKQ_MANAGER 0x000200 /* obsolete */
146 #define KEVENT_FLAG_WORKLOOP 0x000400 /* interact with the specified workloop kq */
147 #define KEVENT_FLAG_PARKING 0x000800 /* workq thread is parking */
148 // KEVENT_FLAG_KERNEL 0x001000
149 // KEVENT_FLAG_DYNAMIC_KQUEUE 0x002000
150 // KEVENT_FLAG_NEEDS_END_PROCESSING 0x004000
151 #define KEVENT_FLAG_WORKLOOP_SERVICER_ATTACH 0x008000 /* obsolete */
152 #define KEVENT_FLAG_WORKLOOP_SERVICER_DETACH 0x010000 /* obsolete */
153 #define KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST 0x020000 /* kq lookup by id must exist */
154 #define KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST 0x040000 /* kq lookup by id must not exist */
155 #define KEVENT_FLAG_WORKLOOP_NO_WQ_THREAD 0x080000 /* obsolete */
156
157 #ifdef XNU_KERNEL_PRIVATE
158
159 #define KEVENT_FLAG_POLL 0x0010 /* Call is for poll() */
160 #define KEVENT_FLAG_LEGACY32 0x0040 /* event data in legacy 32-bit format */
161 #define KEVENT_FLAG_LEGACY64 0x0080 /* event data in legacy 64-bit format */
162 #define KEVENT_FLAG_PROC64 0x0100 /* proc is 64bits */
163 #define KEVENT_FLAG_KERNEL 0x1000 /* caller is in-kernel */
164 #define KEVENT_FLAG_DYNAMIC_KQUEUE 0x2000 /* kqueue is dynamically allocated */
165 #define KEVENT_FLAG_NEEDS_END_PROCESSING 0x4000 /* end processing required before returning */
166
167 #define KEVENT_ID_FLAG_USER (KEVENT_FLAG_WORKLOOP | \
168 KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST)
169
170 #define KEVENT_FLAG_USER (KEVENT_FLAG_IMMEDIATE | KEVENT_FLAG_ERROR_EVENTS | \
171 KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_WORKQ | KEVENT_FLAG_WORKLOOP | \
172 KEVENT_FLAG_DYNAMIC_KQ_MUST_EXIST | KEVENT_FLAG_DYNAMIC_KQ_MUST_NOT_EXIST)
173
174 /*
175 * Since some filter ops are not part of the standard sysfilt_ops, we use
176 * kn_filtid starting from EVFILT_SYSCOUNT to identify these cases. This is to
177 * let kn_fops() get the correct fops for all cases.
178 */
179 #define EVFILTID_KQREAD (EVFILT_SYSCOUNT)
180 #define EVFILTID_PIPE_N (EVFILT_SYSCOUNT + 1)
181 #define EVFILTID_PIPE_R (EVFILT_SYSCOUNT + 2)
182 #define EVFILTID_PIPE_W (EVFILT_SYSCOUNT + 3)
183 #define EVFILTID_PTSD (EVFILT_SYSCOUNT + 4)
184 #define EVFILTID_SOREAD (EVFILT_SYSCOUNT + 5)
185 #define EVFILTID_SOWRITE (EVFILT_SYSCOUNT + 6)
186 #define EVFILTID_SCK (EVFILT_SYSCOUNT + 7)
187 #define EVFILTID_SOEXCEPT (EVFILT_SYSCOUNT + 8)
188 #define EVFILTID_SPEC (EVFILT_SYSCOUNT + 9)
189 #define EVFILTID_BPFREAD (EVFILT_SYSCOUNT + 10)
190 #define EVFILTID_NECP_FD (EVFILT_SYSCOUNT + 11)
191 #define EVFILTID_SKYWALK_CHANNEL_W (EVFILT_SYSCOUNT + 12)
192 #define EVFILTID_SKYWALK_CHANNEL_R (EVFILT_SYSCOUNT + 13)
193 #define EVFILTID_SKYWALK_CHANNEL_E (EVFILT_SYSCOUNT + 14)
194 #define EVFILTID_FSEVENT (EVFILT_SYSCOUNT + 15)
195 #define EVFILTID_VN (EVFILT_SYSCOUNT + 16)
196 #define EVFILTID_TTY (EVFILT_SYSCOUNT + 17)
197 #define EVFILTID_PTMX (EVFILT_SYSCOUNT + 18)
198 #define EVFILTID_MACH_PORT (EVFILT_SYSCOUNT + 19)
199 #define EVFILTID_MACH_PORT_SET (EVFILT_SYSCOUNT + 20)
200
201 #define EVFILTID_DETACHED (EVFILT_SYSCOUNT + 21)
202 #define EVFILTID_MAX (EVFILT_SYSCOUNT + 22)
203
204 #endif /* defined(XNU_KERNEL_PRIVATE) */
205
206 #define EV_SET_QOS 0
207
208 /*
209 * data/hint fflags for EVFILT_WORKLOOP, shared with userspace
210 *
211 * The ident for thread requests should be the dynamic ID of the workloop
212 * The ident for each sync waiter must be unique to that waiter [for this workloop]
213 *
214 *
215 * Commands:
216 *
217 * @const NOTE_WL_THREAD_REQUEST [in/out]
218 * The kevent represents asynchronous userspace work and its associated QoS.
219 * There can only be a single knote with this flag set per workloop.
220 *
221 * @const NOTE_WL_SYNC_WAIT [in/out]
222 * This bit is set when the caller is waiting to become the owner of a workloop.
223 * If the NOTE_WL_SYNC_WAKE bit is already set then the caller is not blocked,
224 * else it blocks until it is set.
225 *
226 * The QoS field of the knote is used to push on other owners or servicers.
227 *
228 * @const NOTE_WL_SYNC_WAKE [in/out]
229 * Marks the waiter knote as being eligible to become an owner
230 * This bit can only be set once, trying it again will fail with EALREADY.
231 *
232 * @const NOTE_WL_SYNC_IPC [in/out]
233 * The knote is a sync IPC redirected turnstile push.
234 *
235 * Flags/Modifiers:
236 *
237 * @const NOTE_WL_UPDATE_QOS [in] (only NOTE_WL_THREAD_REQUEST)
238 * For successful updates (EV_ADD only), learn the new userspace async QoS from
239 * the kevent qos field.
240 *
241 * @const NOTE_WL_END_OWNERSHIP [in]
242 * If the update is successful (including deletions) or returns ESTALE, and
243 * the caller thread or the "suspended" thread is currently owning the workloop,
244 * then ownership is forgotten.
245 *
246 * @const NOTE_WL_DISCOVER_OWNER [in]
247 * If the update is successful (including deletions), learn the owner identity
248 * from the loaded value during debounce. This requires an address to have been
249 * filled in the EV_EXTIDX_WL_ADDR ext field, but doesn't require a mask to have
250 * been set in the EV_EXTIDX_WL_MASK.
251 *
252 * @const NOTE_WL_IGNORE_ESTALE [in]
253 * If the operation would fail with ESTALE, mask the error and pretend the
254 * update was successful. However the operation itself didn't happen, meaning
255 * that:
256 * - attaching a new knote will not happen
257 * - dropping an existing knote will not happen
258 * - NOTE_WL_UPDATE_QOS or NOTE_WL_DISCOVER_OWNER will have no effect
259 *
260 * This modifier doesn't affect NOTE_WL_END_OWNERSHIP.
261 */
262 #define NOTE_WL_THREAD_REQUEST 0x00000001
263 #define NOTE_WL_SYNC_WAIT 0x00000004
264 #define NOTE_WL_SYNC_WAKE 0x00000008
265 #define NOTE_WL_SYNC_IPC 0x80000000
266 #define NOTE_WL_COMMANDS_MASK 0x8000000f /* Mask of all the [in] commands above */
267
268 #define NOTE_WL_UPDATE_QOS 0x00000010
269 #define NOTE_WL_END_OWNERSHIP 0x00000020
270 #define NOTE_WL_DISCOVER_OWNER 0x00000080
271 #define NOTE_WL_IGNORE_ESTALE 0x00000100
272 #define NOTE_WL_UPDATES_MASK 0x000001f0 /* Mask of all the [in] updates above */
273
274 #define NOTE_WL_UPDATE_OWNER 0 /* ... compatibility define ... */
275
276 /*
277 * EVFILT_WORKLOOP ext[] array indexes/meanings.
278 */
279 #define EV_EXTIDX_WL_LANE 0 /* lane identifier [in: sync waiter]
280 * [out: thread request] */
281 #define EV_EXTIDX_WL_ADDR 1 /* debounce address [in: NULL==no debounce] */
282 #define EV_EXTIDX_WL_MASK 2 /* debounce mask [in] */
283 #define EV_EXTIDX_WL_VALUE 3 /* debounce value [in: not current->ESTALE]
284 * [out: new/debounce value] */
285
286 /*
287 * If NOTE_EXIT_MEMORY is present, these bits indicate specific jetsam condition.
288 */
289 #define NOTE_EXIT_MEMORY_DETAIL_MASK 0xfe000000
290 #define NOTE_EXIT_MEMORY_VMPAGESHORTAGE 0x80000000 /* jetsam condition: lowest jetsam priority proc killed due to vm page shortage */
291 #define NOTE_EXIT_MEMORY_VMTHRASHING 0x40000000 /* jetsam condition: lowest jetsam priority proc killed due to vm thrashing */
292 #define NOTE_EXIT_MEMORY_HIWAT 0x20000000 /* jetsam condition: process reached its high water mark */
293 #define NOTE_EXIT_MEMORY_PID 0x10000000 /* jetsam condition: special pid kill requested */
294 #define NOTE_EXIT_MEMORY_IDLE 0x08000000 /* jetsam condition: idle process cleaned up */
295 #define NOTE_EXIT_MEMORY_VNODE 0X04000000 /* jetsam condition: virtual node kill */
296 #define NOTE_EXIT_MEMORY_FCTHRASHING 0x02000000 /* jetsam condition: lowest jetsam priority proc killed due to filecache thrashing */
297
298 /*
299 * data/hint fflags for EVFILT_MEMORYSTATUS, shared with userspace.
300 */
301 #define NOTE_MEMORYSTATUS_PRESSURE_NORMAL 0x00000001 /* system memory pressure has returned to normal */
302 #define NOTE_MEMORYSTATUS_PRESSURE_WARN 0x00000002 /* system memory pressure has changed to the warning state */
303 #define NOTE_MEMORYSTATUS_PRESSURE_CRITICAL 0x00000004 /* system memory pressure has changed to the critical state */
304 #define NOTE_MEMORYSTATUS_LOW_SWAP 0x00000008 /* system is in a low-swap state */
305 #define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN 0x00000010 /* process memory limit has hit a warning state */
306 #define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL 0x00000020 /* process memory limit has hit a critical state - soft limit */
307 #define NOTE_MEMORYSTATUS_MSL_STATUS 0xf0000000 /* bits used to request change to process MSL status */
308
309 #ifdef KERNEL_PRIVATE
310 /*
311 * data/hint fflags for EVFILT_MEMORYSTATUS, but not shared with userspace.
312 */
313 #define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE 0x00000040 /* Used to restrict sending a warn event only once, per active limit, soft limits only */
314 #define NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE 0x00000080 /* Used to restrict sending a warn event only once, per inactive limit, soft limit only */
315 #define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE 0x00000100 /* Used to restrict sending a critical event only once per active limit, soft limit only */
316 #define NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE 0x00000200 /* Used to restrict sending a critical event only once per inactive limit, soft limit only */
317 #define NOTE_MEMORYSTATUS_JETSAM_FG_BAND 0x00000400 /* jetsam is approaching foreground band */
318
319 /*
320 * Use this mask to protect the kernel private flags.
321 */
322 #define EVFILT_MEMORYSTATUS_ALL_MASK \
323 (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP | \
324 NOTE_MEMORYSTATUS_PROC_LIMIT_WARN | NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL | NOTE_MEMORYSTATUS_MSL_STATUS)
325
326 #endif /* KERNEL_PRIVATE */
327
328 typedef enum vm_pressure_level {
329 kVMPressureNormal = 0,
330 kVMPressureWarning = 1,
331 kVMPressureUrgent = 2,
332 kVMPressureCritical = 3,
333 /* Jetsam is approaching the Foreground bands */
334 kVMPressureForegroundJetsam = 4,
335 /* Jetsam is approaching the Background bands */
336 kVMPressureBackgroundJetsam = 5,
337 } vm_pressure_level_t;
338
339 /* Legacy */
340 #define kVMPressureJetsam kVMPressureForegroundJetsam
341
342 /*
343 * data/hint fflags for EVFILT_SOCK, shared with userspace.
344 *
345 */
346 #define NOTE_CONNRESET 0x00000001 /* Received RST */
347 #define NOTE_READCLOSED 0x00000002 /* Read side is shutdown */
348 #define NOTE_WRITECLOSED 0x00000004 /* Write side is shutdown */
349 #define NOTE_TIMEOUT 0x00000008 /* timeout: rexmt, keep-alive or persist */
350 #define NOTE_NOSRCADDR 0x00000010 /* source address not available */
351 #define NOTE_IFDENIED 0x00000020 /* interface denied connection */
352 #define NOTE_SUSPEND 0x00000040 /* output queue suspended */
353 #define NOTE_RESUME 0x00000080 /* output queue resumed */
354 #define NOTE_KEEPALIVE 0x00000100 /* TCP Keepalive received */
355 #define NOTE_ADAPTIVE_WTIMO 0x00000200 /* TCP adaptive write timeout */
356 #define NOTE_ADAPTIVE_RTIMO 0x00000400 /* TCP adaptive read timeout */
357 #define NOTE_CONNECTED 0x00000800 /* socket is connected */
358 #define NOTE_DISCONNECTED 0x00001000 /* socket is disconnected */
359 #define NOTE_CONNINFO_UPDATED 0x00002000 /* connection info was updated */
360 #define NOTE_NOTIFY_ACK 0x00004000 /* notify acknowledgement */
361 #define NOTE_WAKE_PKT 0x00008000 /* received wake packet */
362
363 #define EVFILT_SOCK_LEVEL_TRIGGER_MASK \
364 (NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_SUSPEND | NOTE_RESUME | \
365 NOTE_CONNECTED | NOTE_DISCONNECTED)
366
367 #define EVFILT_SOCK_ALL_MASK \
368 (NOTE_CONNRESET | NOTE_READCLOSED | NOTE_WRITECLOSED | NOTE_TIMEOUT | \
369 NOTE_NOSRCADDR | NOTE_IFDENIED | NOTE_SUSPEND | NOTE_RESUME | \
370 NOTE_KEEPALIVE | NOTE_ADAPTIVE_WTIMO | NOTE_ADAPTIVE_RTIMO | \
371 NOTE_CONNECTED | NOTE_DISCONNECTED | NOTE_CONNINFO_UPDATED | \
372 NOTE_NOTIFY_ACK | NOTE_WAKE_PKT)
373
374 /*
375 * data/hint fflags for EVFILT_NW_CHANNEL, shared with userspace.
376 */
377 #define NOTE_FLOW_ADV_UPDATE 0x00000001 /* flow advisory update */
378 #define NOTE_CHANNEL_EVENT 0x00000002 /* generic channel event */
379 #define NOTE_IF_ADV_UPD 0x00000004 /* Interface advisory update */
380
381 #define EVFILT_NW_CHANNEL_ALL_MASK \
382 (NOTE_FLOW_ADV_UPDATE | NOTE_CHANNEL_EVENT | NOTE_IF_ADV_UPD)
383
384 #ifdef KERNEL_PRIVATE
385
386 #ifdef XNU_KERNEL_PRIVATE
387 LIST_HEAD(knote_list, knote);
388 TAILQ_HEAD(kqtailq, knote); /* a list of "queued" events */
389
390 /* index into various kq queues */
391 typedef uint8_t kq_index_t;
392
393 /* lskq(1) knows about this type */
394 __options_decl(kn_status_t, uint16_t /* 12 bits really */, {
395 KN_ACTIVE = 0x001, /* event has been triggered */
396 KN_QUEUED = 0x002, /* event is on queue */
397 KN_DISABLED = 0x004, /* event is disabled */
398 KN_DROPPING = 0x008, /* knote is being dropped */
399 KN_LOCKED = 0x010, /* knote is locked (kq_knlocks) */
400 KN_POSTING = 0x020, /* f_event() in flight */
401 // was KN_STAYACTIVE = 0x040,
402 KN_DEFERDELETE = 0x080, /* defer delete until re-enabled */
403 KN_MERGE_QOS = 0x100, /* f_event() / f_* ran concurrently and overrides must merge */
404 KN_REQVANISH = 0x200, /* requested EV_VANISH */
405 KN_VANISHED = 0x400, /* has vanished */
406 KN_SUPPRESSED = 0x800, /* event is suppressed during delivery */
407 });
408
409 #if CONFIG_EXCLAVES
410 /* forward declaration of exclaves_resource */
411 struct exclaves_resource;
412 #endif /* CONFIG_EXCLAVES */
413
414 #if __LP64__
415 #define KNOTE_KQ_PACKED_BITS 42
416 #define KNOTE_KQ_PACKED_SHIFT 0
417 #define KNOTE_KQ_PACKED_BASE 0
418 #else
419 #define KNOTE_KQ_PACKED_BITS 32
420 #define KNOTE_KQ_PACKED_SHIFT 0
421 #define KNOTE_KQ_PACKED_BASE 0
422 #endif
423
424 _Static_assert(!VM_PACKING_IS_BASE_RELATIVE(KNOTE_KQ_PACKED),
425 "Make sure the knote pointer packing is based on arithmetic shifts");
426
427 struct kqueue;
428 struct knote {
429 TAILQ_ENTRY(knote) kn_tqe; /* linkage for tail queue */
430 SLIST_ENTRY(knote) kn_link; /* linkage for fd search list */
431 SLIST_ENTRY(knote) kn_selnext; /* klist element chain */
432 #define KNOTE_AUTODETACHED ((struct knote *) -1)
433 #define KNOTE_IS_AUTODETACHED(kn) ((kn)->kn_selnext.sle_next == KNOTE_AUTODETACHED)
434
435 kn_status_t kn_status : 12;
436 uintptr_t
437 kn_qos_index:4, /* in-use qos index */
438 kn_qos_override:3, /* qos override index */
439 kn_is_fd:1, /* knote is an fd */
440 kn_vnode_kqok:1,
441 kn_vnode_use_ofst:1;
442 #if __LP64__
443 uintptr_t kn_kq_packed : KNOTE_KQ_PACKED_BITS;
444 #else
445 uintptr_t kn_kq_packed;
446 #endif
447
448 /* per filter stash of data (pointer, uint32_t or uint64_t) */
449 union {
450 uintptr_t kn_hook; /* Manually PAC-ed, see knote_kn_hook_get_raw() */
451 uint32_t kn_hook32;
452 };
453
454 /* per filter pointer to the resource being watched */
455 union {
456 struct fileproc *XNU_PTRAUTH_SIGNED_PTR("knote.fp") kn_fp;
457 struct proc *XNU_PTRAUTH_SIGNED_PTR("knote.proc") kn_proc;
458 struct ipc_port *XNU_PTRAUTH_SIGNED_PTR("knote.ipc_port") kn_ipc_port;
459 struct ipc_pset *XNU_PTRAUTH_SIGNED_PTR("knote.ipc_pset") kn_ipc_pset;
460 struct thread_call *XNU_PTRAUTH_SIGNED_PTR("knote.thcall") kn_thcall;
461 struct thread *XNU_PTRAUTH_SIGNED_PTR("knote.thread") kn_thread;
462 #if CONFIG_EXCLAVES
463 struct exclaves_resource *XNU_PTRAUTH_SIGNED_PTR("knote.exclaves_resource") kn_exclaves_resource;
464 #endif /* CONFIG_EXCLAVES*/
465 };
466
467 /*
468 * Mimic kevent_qos so that knote_fill_kevent code is not horrid,
469 * but with subtleties:
470 *
471 * - kevent_qos_s::filter is 16bits where ours is 8, and we use the top
472 * bits to store the real specialized filter.
473 * knote_fill_kevent* will always force the top bits to 0xff.
474 *
475 * - kevent_qos_s::xflags is not kept, kn_sfflags takes its place,
476 * knote_fill_kevent* will set xflags to 0.
477 *
478 * - kevent_qos_s::data is saved as kn_sdata and filters are encouraged
479 * to use knote_fill_kevent, knote_fill_kevent_with_sdata will copy
480 * kn_sdata as the output value.
481 *
482 * knote_fill_kevent_with_sdata() programatically asserts
483 * these aliasings are respected.
484 */
485 struct kevent_internal_s {
486 uint64_t kei_ident; /* identifier for this event */
487 #ifdef __LITTLE_ENDIAN__
488 int8_t kei_filter; /* filter for event */
489 uint8_t kei_filtid; /* actual filter for event */
490 #else
491 uint8_t kei_filtid; /* actual filter for event */
492 int8_t kei_filter; /* filter for event */
493 #endif
494 uint16_t kei_flags; /* general flags */
495 int32_t kei_qos; /* quality of service */
496 uint64_t kei_udata; /* opaque user data identifier */
497 uint32_t kei_fflags; /* filter-specific flags */
498 uint32_t kei_sfflags; /* knote: saved fflags */
499 int64_t kei_sdata; /* knote: filter-specific saved data */
500 uint64_t kei_ext[4]; /* filter-specific extensions */
501 } kn_kevent;
502
503 #define kn_id kn_kevent.kei_ident
504 #define kn_filtid kn_kevent.kei_filtid
505 #define kn_filter kn_kevent.kei_filter
506 #define kn_flags kn_kevent.kei_flags
507 #define kn_qos kn_kevent.kei_qos
508 #define kn_udata kn_kevent.kei_udata
509 #define kn_fflags kn_kevent.kei_fflags
510 #define kn_sfflags kn_kevent.kei_sfflags
511 #define kn_sdata kn_kevent.kei_sdata
512 #define kn_ext kn_kevent.kei_ext
513 };
514
515 static inline struct kqueue *
knote_get_kq(struct knote * kn)516 knote_get_kq(struct knote *kn)
517 {
518 vm_offset_t ptr = VM_UNPACK_POINTER(kn->kn_kq_packed, KNOTE_KQ_PACKED);
519 return __unsafe_forge_single(struct kqueue *, ptr);
520 }
521
522 static inline int
knote_get_seltype(struct knote * kn)523 knote_get_seltype(struct knote *kn)
524 {
525 switch (kn->kn_filter) {
526 case EVFILT_READ:
527 return FREAD;
528 case EVFILT_WRITE:
529 return FWRITE;
530 default:
531 panic("%s(%p): invalid filter %d\n",
532 __func__, kn, kn->kn_filter);
533 return 0;
534 }
535 }
536
537 struct kevent_ctx_s {
538 uint64_t kec_data_avail; /* address of remaining data size */
539 union {
540 user_addr_t kec_data_out; /* extra data pointer */
541 struct pollfd *kec_poll_fds; /* poll fds */
542 };
543 user_size_t kec_data_size; /* total extra data size */
544 user_size_t kec_data_resid; /* residual extra data size */
545 uint64_t kec_deadline; /* wait deadline unless KEVENT_FLAG_IMMEDIATE */
546 struct fileproc *kec_fp; /* fileproc to pass to fp_drop or NULL */
547 int kec_fd; /* fd to pass to fp_drop or -1 */
548
549 /* the fields below are only set during process / scan */
550 int kec_process_nevents; /* user-level event count */
551 int kec_process_noutputs; /* number of events output */
552 unsigned int kec_process_flags; /* kevent flags, only set for process */
553 user_addr_t kec_process_eventlist; /* user-level event list address */
554 };
555 typedef struct kevent_ctx_s *kevent_ctx_t;
556
557 kevent_ctx_t
558 kevent_get_context(thread_t thread);
559
560 /*
561 * Filter operators
562 *
563 * These routines, provided by each filter, are called to attach, detach, deliver events,
564 * change/update filter registration and process/deliver events:
565 *
566 * - the f_attach, f_touch, f_process and f_detach callbacks are always
567 * serialized with respect to each other for the same knote.
568 *
569 * - the f_event routine is called with a use-count taken on the knote to
570 * prolongate its lifetime and protect against drop, but is not otherwise
571 * serialized with other routine calls.
572 *
573 * - the f_detach routine is always called last, and is serialized with all
574 * other callbacks, including f_event calls.
575 *
576 *
577 * Here are more details:
578 *
579 * f_isfd -
580 * identifies if the "ident" field in the kevent structure is a file-descriptor.
581 *
582 * If so, the knote is associated with the file descriptor prior to attach and
583 * auto-removed when the file descriptor is closed (this latter behavior may change
584 * for EV_DISPATCH2 kevent types to allow delivery of events identifying unintended
585 * closes).
586 *
587 * Otherwise the knote is hashed by the ident and has no auto-close behavior.
588 *
589 * f_adjusts_qos -
590 * identifies if the filter can adjust its QoS during its lifetime.
591 *
592 * Filters using this facility should request the new overrides they want
593 * using the appropriate FILTER_{RESET,ADJUST}_EVENT_QOS extended codes.
594 *
595 * Currently, EVFILT_MACHPORT is the only filter using this facility.
596 *
597 * f_extended_codes -
598 * identifies if the filter returns extended codes from its routines
599 * (see FILTER_ACTIVE, ...) or 0 / 1 values.
600 *
601 * f_attach -
602 * called to attach the knote to the underlying object that will be delivering events
603 * through it when EV_ADD is supplied and no existing matching event is found
604 *
605 * provided a knote that is pre-attached to the fd or hashed (see above) but is
606 * specially marked to avoid concurrent access until the attach is complete. The
607 * kevent structure embedded in this knote has been filled in with a sanitized
608 * version of the user-supplied kevent data. However, the user-supplied filter-specific
609 * flags (fflags) and data fields have been moved into the knote's kn_sfflags and kn_sdata
610 * fields respectively. These are usually interpretted as a set of "interest" flags and
611 * data by each filter - to be matched against delivered events.
612 *
613 * The attach operator indicated errors by setting the EV_ERROR flog in the flags field
614 * embedded in the knote's kevent structure - with the specific error indicated in the
615 * corresponding data field.
616 *
617 * The return value indicates if the knote should already be considered "activated" at
618 * the time of attach (one or more of the interest events has already occured).
619 *
620 * f_detach -
621 * called to disassociate the knote from the underlying object delivering events
622 * the filter should not attempt to deliver events through this knote after this
623 * operation returns control to the kq system.
624 *
625 * f_event -
626 * if the knote() function (or KNOTE() macro) is called against a list of knotes,
627 * this operator will be called on each knote in the list.
628 *
629 * The "hint" parameter is completely filter-specific, but usually indicates an
630 * event or set of events that have occured against the source object associated
631 * with the list.
632 *
633 * The return value indicates if the knote should already be considered "activated" at
634 * the time of attach (one or more of the interest events has already occured).
635 *
636 * f_process -
637 * called when attempting to deliver triggered events to user-space.
638 *
639 * If the knote was previously activated, this operator will be called when a
640 * thread is trying to deliver events to user-space. The filter gets one last
641 * chance to determine if the event/events are still interesting for this knote
642 * (are the conditions still right to deliver an event). If so, the filter
643 * fills in the output kevent structure with the information to be delivered.
644 *
645 * The input context/data parameter is used during event delivery. Some
646 * filters allow additional data delivery as part of event delivery. This
647 * context field indicates if space was made available for these additional
648 * items and how that space is to be allocated/carved-out.
649 *
650 * The filter may set EV_CLEAR or EV_ONESHOT in the output flags field to indicate
651 * special post-delivery dispositions for the knote.
652 *
653 * EV_CLEAR - indicates that all matching events have been delivered. Even
654 * though there were events to deliver now, there will not be any
655 * more until some additional events are delivered to the knote
656 * via the f_event operator, or the interest set is changed via
657 * the f_touch operator. The knote can remain deactivated after
658 * processing this event delivery.
659 *
660 * EV_ONESHOT - indicates that this is the last event to be delivered via
661 * this knote. It will automatically be deleted upon delivery
662 * (or if in dispatch-mode, upon re-enablement after this delivery).
663 *
664 * The return value indicates if the knote has delivered an output event.
665 * Unless one of the special output flags was set in the output kevent, a non-
666 * zero return value ALSO indicates that the knote should be re-activated
667 * for future event processing (in case it delivers level-based or a multi-edge
668 * type events like message queues that already exist).
669 *
670 * NOTE: In the future, the boolean may change to an enum that allows more
671 * explicit indication of just delivering a current event vs delivering
672 * an event with more events still pending.
673 *
674 * f_touch -
675 * called to update the knote with new state from the user during
676 * EVFILT_ADD/ENABLE/DISABLE on an already-attached knote.
677 *
678 * f_touch should copy relevant new data from the kevent into the knote.
679 *
680 * operator must lock against concurrent f_event operations.
681 *
682 * A return value of 1 indicates that the knote should now be considered
683 * 'activated'.
684 *
685 * f_touch can set EV_ERROR with specific error in the data field to
686 * return an error to the client. You should return 1 to indicate that
687 * the kevent needs to be activated and processed.
688 *
689 * f_allow_drop -
690 *
691 * [OPTIONAL] If this function is non-null, then it indicates that the
692 * filter wants to validate EV_DELETE events. This is necessary if
693 * a particular filter needs to synchronize knote deletion with its own
694 * filter lock.
695 *
696 * When true is returned, the the EV_DELETE is allowed and can proceed.
697 *
698 * If false is returned, the EV_DELETE doesn't proceed, and the passed in
699 * kevent is used for the copyout to userspace.
700 *
701 * Currently, EVFILT_WORKLOOP is the only filter using this facility.
702 *
703 * f_post_register_wait -
704 * [OPTIONAL] called when attach or touch return the FILTER_REGISTER_WAIT
705 * extended code bit. It is possible to use this facility when the last
706 * register command wants to wait.
707 *
708 * Currently, EVFILT_WORKLOOP is the only filter using this facility.
709 *
710 * f_sanitized_copyout -
711 * [OPTIONAL] If this function is non-null, then it should be used so
712 * that the filter can provide a sanitized copy of the current contents
713 * of a knote to userspace. This prevents leaking of any sensitive
714 * information like kernel pointers which might be stashed in filter
715 * specific data.
716 *
717 * Currently, EVFILT_MACHPORT uses this facility.
718 */
719
720 struct _kevent_register;
721 struct knote_lock_ctx;
722 struct proc;
723 struct uthread;
724 struct waitq;
725 struct thread_group;
726
727 struct filterops {
728 bool f_isfd; /* true if ident == filedescriptor */
729 bool f_adjusts_qos; /* true if the filter can override the knote */
730 bool f_extended_codes; /* hooks return extended codes */
731
732 int (*f_attach)(struct knote *kn, struct kevent_qos_s *kev);
733 void (*f_detach)(struct knote *kn);
734 int (*f_event)(struct knote *kn, long hint);
735 int (*f_touch)(struct knote *kn, struct kevent_qos_s *kev);
736 int (*f_process)(struct knote *kn, struct kevent_qos_s *kev);
737
738 /* optional & advanced */
739 bool (*f_allow_drop)(struct knote *kn, struct kevent_qos_s *kev);
740 void (*f_post_register_wait)(struct uthread *uth, struct knote *kn,
741 struct _kevent_register *ss_kr);
742 void (*f_sanitized_copyout)(struct knote *kn, struct kevent_qos_s *kev);
743 };
744
745 /*
746 * Extended codes returned by filter routines when f_extended_codes is set.
747 *
748 * FILTER_ACTIVE
749 * The filter is active and a call to f_process() may return an event.
750 *
751 * For f_process() the meaning is slightly different: the knote will be
752 * activated again as long as f_process returns FILTER_ACTIVE, unless
753 * EV_CLEAR is set, which require a new f_event to reactivate the knote.
754 *
755 * Valid: f_attach, f_event, f_touch, f_process
756 * Implicit: -
757 * Ignored: -
758 *
759 * FILTER_REGISTER_WAIT
760 * The filter wants its f_post_register_wait() to be called.
761 *
762 * Note: It is only valid to ask for this behavior for a workloop kqueue,
763 * and is really only meant to be used by EVFILT_WORKLOOP.
764 *
765 * Valid: f_attach, f_touch
766 * Implicit: -
767 * Ignored: f_event, f_process
768 *
769 * FILTER_UPDATE_REQ_QOS
770 * The filter wants the passed in QoS to be updated as the new intrinsic qos
771 * for this knote. If the kevent `qos` field is 0, no update is performed.
772 *
773 * This also will reset the event QoS, so FILTER_ADJUST_EVENT_QOS() must
774 * also be used if an override should be maintained.
775 *
776 * Note: when this is used in f_touch, the incoming qos validation
777 * is under the responsiblity of the filter.
778 *
779 * Valid: f_touch
780 * Implicit: f_attach
781 * Ignored: f_event, f_process
782 *
783 * FILTER_RESET_EVENT_QOS
784 * FILTER_ADJUST_EVENT_QOS(qos)
785 * The filter wants the QoS of the next event delivery to be overridden
786 * at the specified QoS. This allows for the next event QoS to be elevated
787 * from the knote requested qos (See FILTER_UPDATE_REQ_QOS).
788 *
789 * Event QoS Overrides are reset when a particular knote is no longer
790 * active. Hence this is ignored if FILTER_ACTIVE isn't also returned.
791 *
792 * Races between an f_event() and any other f_* routine asking for
793 * a specific QoS override are handled generically and the filters do not
794 * have to worry about them.
795 *
796 * To use this facility, filters MUST set their f_adjusts_qos bit to true.
797 *
798 * It is expected that filters will return the new QoS they expect to be
799 * applied from any f_* callback except for f_process() where no specific
800 * information should be provided. Filters should not try to hide no-ops,
801 * kevent will already optimize these away.
802 *
803 * Valid: f_touch, f_attach, f_event, f_process
804 * Implicit: -
805 * Ignored: -
806 *
807 * FILTER_THREADREQ_NODEFEER
808 * The filter has moved a turnstile priority push away from the current
809 * thread, preemption has been disabled, and thread requests need to be
810 * commited before preemption is re-enabled.
811 *
812 *
813 * Valid: f_attach, f_touch
814 * Implicit: -
815 * Invalid: f_event, f_process
816 */
817 #define FILTER_ACTIVE 0x00000001
818 #define FILTER_REGISTER_WAIT 0x00000002
819 #define FILTER_UPDATE_REQ_QOS 0x00000004
820 #define FILTER_ADJUST_EVENT_QOS_BIT 0x00000008
821 #define FILTER_ADJUST_EVENT_QOS_MASK 0x00000070
822 #define FILTER_ADJUST_EVENT_QOS_SHIFT 4
823 #define FILTER_ADJUST_EVENT_QOS(qos) \
824 (((qos) << FILTER_ADJUST_EVENT_QOS_SHIFT) | FILTER_ADJUST_EVENT_QOS_BIT)
825 #define FILTER_GET_EVENT_QOS(result) \
826 ((result >> FILTER_ADJUST_EVENT_QOS_SHIFT) & THREAD_QOS_LAST)
827 #define FILTER_RESET_EVENT_QOS FILTER_ADJUST_EVENT_QOS_BIT
828 #define FILTER_THREADREQ_NODEFEER 0x00000080
829 #define FILTER_ADJUST_EVENT_IOTIER_BIT 0x00000100
830
831 #define filter_call(_ops, call) \
832 ((_ops)->f_extended_codes ? (_ops)->call : !!((_ops)->call))
833
834 SLIST_HEAD(klist, knote);
835 extern void knote_init(void);
836 extern void klist_init(struct klist *list);
837
838 #define KNOTE(list, hint) knote(list, hint, false)
839 #define KNOTE_ATTACH(list, kn) knote_attach(list, kn)
840 #define KNOTE_DETACH(list, kn) knote_detach(list, kn)
841
842 extern void knote(struct klist *list, long hint, bool autodetach);
843 extern int knote_attach(struct klist *list, struct knote *kn);
844 extern int knote_detach(struct klist *list, struct knote *kn);
845 extern void knote_vanish(struct klist *list, bool make_active);
846
847 extern void knote_set_error(struct knote *kn, int error);
848 extern int64_t knote_low_watermark(const struct knote *kn) __pure2;
849 extern void knote_fill_kevent_with_sdata(struct knote *kn, struct kevent_qos_s *kev);
850 extern void knote_fill_kevent(struct knote *kn, struct kevent_qos_s *kev, int64_t data);
851
852 extern void *knote_kn_hook_get_raw(struct knote *kn);
853 // Must be called after having specified the filtid + filter in the knote
854 extern void knote_kn_hook_set_raw(struct knote *kn, void *kn_hook);
855
856 extern void knote_fdclose(struct proc *p, int fd);
857 extern const struct filterops *knote_fops(struct knote *kn);
858
859 extern struct turnstile *kqueue_turnstile(struct kqueue *);
860 extern struct turnstile *kqueue_alloc_turnstile(struct kqueue *);
861 extern void kqueue_set_iotier_override(struct kqueue *kqu, uint8_t iotier_override);
862 extern uint8_t kqueue_get_iotier_override(struct kqueue *kqu);
863
864 int kevent_proc_copy_uptrs(void *proc, uint64_t *buf, uint32_t bufsize);
865 #if CONFIG_PREADOPT_TG
866 extern void kqueue_set_preadopted_thread_group(struct kqueue *kq, struct thread_group *tg, thread_qos_t qos);
867 extern bool kqueue_process_preadopt_thread_group(thread_t t, struct kqueue *kq, struct thread_group *tg);
868 #endif
869
870 int kevent_copyout_proc_dynkqids(void *proc, user_addr_t ubuf,
871 uint32_t ubufsize, int32_t *nkqueues_out);
872 int kevent_copyout_dynkqinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf,
873 uint32_t ubufsize, int32_t *size_out);
874 int kevent_copyout_dynkqextinfo(void *proc, kqueue_id_t kq_id, user_addr_t ubuf,
875 uint32_t ubufsize, int32_t *nknotes_out);
876
877 extern int filt_wlattach_sync_ipc(struct knote *kn);
878 extern void filt_wldetach_sync_ipc(struct knote *kn);
879
880 extern int kevent_workq_internal(struct proc *p,
881 user_addr_t changelist, int nchanges,
882 user_addr_t eventlist, int nevents,
883 user_addr_t data_out, user_size_t *data_available,
884 unsigned int flags, int32_t *retval);
885
886 #elif defined(KERNEL_PRIVATE) /* !XNU_KERNEL_PRIVATE: kexts still need a klist structure definition */
887
888 struct proc;
889 struct knote;
890 SLIST_HEAD(klist, knote);
891
892 #endif /* !XNU_KERNEL_PRIVATE && KERNEL_PRIVATE */
893
894 #else /* KERNEL_PRIVATE */
895
896 __BEGIN_DECLS
897 int kevent_qos(int kq,
898 const struct kevent_qos_s *changelist, int nchanges,
899 struct kevent_qos_s *eventlist, int nevents,
900 void *data_out, size_t *data_available,
901 unsigned int flags);
902
903 int kevent_id(kqueue_id_t id,
904 const struct kevent_qos_s *changelist, int nchanges,
905 struct kevent_qos_s *eventlist, int nevents,
906 void *data_out, size_t *data_available,
907 unsigned int flags);
908
909 __END_DECLS
910
911
912 #endif /* KERNEL_PRIVATE */
913
914 /* Flags for pending events notified by kernel via return-to-kernel ast */
915 #define R2K_WORKLOOP_PENDING_EVENTS 0x1
916 #define R2K_WORKQ_PENDING_EVENTS 0x2
917
918 /* Flags for notifying what to do when there is a workqueue quantum expiry */
919 #define PTHREAD_WQ_QUANTUM_EXPIRY_NARROW 0x1
920 #define PTHREAD_WQ_QUANTUM_EXPIRY_SHUFFLE 0x2
921
922 #endif /* !_SYS_EVENT_PRIVATE_H_ */
923