xref: /xnu-8796.121.2/bsd/skywalk/channel/os_channel_private.h (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 
56 #ifndef _SKYWALK_OS_CHANNEL_PRIVATE_H_
57 #define _SKYWALK_OS_CHANNEL_PRIVATE_H_
58 
59 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
60 #include <sys/time.h>
61 #include <sys/signal.h>
62 #include <sys/guarded.h>
63 #include <sys/utsname.h>
64 #include <skywalk/os_channel.h>
65 #include <skywalk/os_stats_private.h>
66 
67 /* BEGIN CSTYLED */
68 /*
69  * The userspace data structures used by Skywalk are shown below.
70  *
71  * The kernel allocates the regions for the various object types,
72  * and maps them to the userspace task in a contiguous span, one
73  * after another.
74  *
75  * Each channel file descriptor comes with its own memory map,
76  * and the layout of the rest of the objects is described in the
77  * __user_channel_schema structure associated with the channel.
78  * This schema structure is mapped read-only in the task.
79  *
80  *     +=======================+
81  *     | __user_channel_schema | (1 per channel fd)
82  *     +=======================+
83  *     |     csm_ver           |
84  *     |     csm_flags         |
85  *     |-----------------------|
86  *     |     csm_tx_rings      |
87  *     |     csm_rx_rings      |
88  *     | csm_allocator_rings   |
89  *     |    csm_event_rings    |
90  *     |-----------------------|
91  *     |     csm_stats_ofs     | <<---+
92  *     |-----------------------|      |
93  *     |     csm_flowadv_max   |      |
94  *     |     csm_flowadv_ofs   | <<---+ relative to base of memory map
95  *     |-----------------------|      |
96  *     | csm_md_redzone_cookie |      |
97  *     |     csm_md_type       |      |
98  *     |     csm_md_subtype    |      |
99  *     |-----------------------|      |
100  *     |     csm_stats_ofs     | <<---+
101  *     |     csm_stats_type    |      |
102  *     |-----------------------|      |
103  *     |     csm_nexusadv_ofs  | <<---+
104  *     |-----------------------|
105  *     |     csm_kern_name     |
106  *     |     csm_kern_uuid     |
107  *     |-----------------------|
108  *     | TX  csm_ring_ofs[0]   | <<---+
109  *     | TX  csm_sd_ofs[0]     |      |
110  *     :        ...            :      |
111  *     | TX  csm_ring_ofs[t]   |      |
112  *     | TX  csm_sd_ofs[t]     |      |
113  *     |-----------------------|      |
114  *     | RX  csm_ring_ofs[0]   | <<---+ these offsets are relative
115  *     | RX  csm_sd_ofs[0]     |      | to each schema structure
116  *     :        ...            :      |
117  *     | RX  csm_ring_ofs[t]   |      |
118  *     | RX  csm_sd_ofs[t]     |      |
119  *     |-----------------------|      |
120  *     | A   csm_ring_ofs[0]   |      |
121  *     | A   csm_sd_ofs[0]     |      |
122  *     :        ...            :      |
123  *     | A   csm_ring_ofs[t]   | <<---+
124  *     | A   csm_sd_ofs[t]     |      |
125  *     |-----------------------|      |
126  *     | F   csm_ring_ofs[0]   |      |
127  *     | F   csm_sd_ofs[0]     |      |
128  *     :        ...            :      |
129  *     | F   csm_ring_ofs[t]   | <<---+
130  *     | F   csm_sd_ofs[t]     |      |
131  *     |-----------------------|      |
132  *     | EV  csm_ring_ofs[0]   | <<---+
133  *     | EV  csm_sd_ofs[0]     |
134  *     +-----------------------+
135  *         (variable length)
136  *
137  * On nexus adapters that support statistics or flow advisory, the
138  * csm_stats_ofs or csm_flowadv_ofs would be non-zero, and their values
139  * represent the offset to the respective objects from the base of the
140  * memory map.  This is because those regions are shared amongst all
141  * channels opened to the adapter associated with the nexus port.
142  *
143  * Other regions, such as rings and slot descriptors, are unique to the
144  * channel itself.  They are always present, and their values indicated
145  * by csm_{ring,sd}_ofs represent the offset to the respective objects
146  * from the schema pointer (not from base of memory map.)  This is done
147  * to support channels bound to any of the adapter's ring-pairs.
148  *
149  * See notes below on CSM_CURRENT_VERSION.
150  */
151 /* END CSTYLED */
152 #define CHANNEL_SCHEMA_KERN_NAME        _SYS_NAMELEN
153 struct __user_channel_schema {
154 	/*
155 	 * Schema properties, kernel version string and kernel
156 	 * executable UUID (for debugging).  These 4 fields
157 	 * must be at the beginning of the structure.
158 	 */
159 	const uint32_t  csm_ver;                /* schema layout version */
160 	const volatile uint32_t csm_flags;      /* CSM_* flags */
161 	const char      csm_kern_name[CHANNEL_SCHEMA_KERN_NAME];
162 	const uuid_t    csm_kern_uuid;
163 
164 	/*
165 	 * The rest of the fields may be rearranged as needed, with
166 	 * the expectation that CSM_CURRENT_VERSION be bumped up on
167 	 * each modification.
168 	 */
169 
170 	/*
171 	 * The number of packet rings available for this channel.
172 	 */
173 	const uint32_t  csm_tx_rings;   /* # of tx rings */
174 	const uint32_t  csm_rx_rings;   /* # of rx rings */
175 
176 	/*
177 	 * The number of allocator ring pair available for this channel.
178 	 * If the channel supports user packet pool then 1 pair of
179 	 * alloc/free ring per channel are used to manage the packet
180 	 * allocation from userspace.
181 	 * If the channel supports multi-buflet packet then an additional pair
182 	 * of alloc/free ring is used to manage the buffer (buflet) allocation
183 	 * from userspace.
184 	 */
185 	const uint32_t  csm_allocator_ring_pairs;
186 
187 	/*
188 	 * number of event rings for this channel.
189 	 */
190 	const uint32_t  csm_num_event_rings;
191 
192 	/*
193 	 * Flow advisory region offset; this field will be 0 if the
194 	 * nexus isn't capable of flow advisory scheme.  Otherwise,
195 	 * it points to a table of flow advisory entries, and the
196 	 * total number of entries is indicated by csm_flowadv_max.
197 	 */
198 	const uint32_t          csm_flowadv_max;
199 	const mach_vm_offset_t  csm_flowadv_ofs
200 	__attribute__((aligned(sizeof(uint64_t))));
201 
202 	/*
203 	 * Metadata region redzone, type and sub-type.
204 	 */
205 	const uint64_t  csm_md_redzone_cookie   /* metadata redzone cookie */
206 	__attribute__((aligned(sizeof(uint64_t))));
207 	const nexus_meta_type_t csm_md_type;    /* metadata type */
208 	const nexus_meta_subtype_t csm_md_subtype; /* metadata subtype */
209 
210 	/*
211 	 * Statistics region offset; each nexus is free to use this
212 	 * region and break it up into multiple smaller regions if
213 	 * needed.  The definition and interpretation of the contents
214 	 * is left to the nexus.  The value of this field will be 0
215 	 * if the nexus doesn't facilitate shareable statistics.
216 	 */
217 	const mach_vm_offset_t  csm_stats_ofs
218 	__attribute__((aligned(sizeof(uint64_t))));
219 	const nexus_stats_type_t csm_stats_type;
220 
221 	/*
222 	 * Nexus advisory region offset; this field will be 0 if the
223 	 * nexus isn't providing any nexus-wide advisories.  Otherwise,
224 	 * it points to the nexus advisory structure.
225 	 */
226 	const mach_vm_offset_t csm_nexusadv_ofs
227 	__attribute__((aligned(sizeof(uint64_t))));
228 
229 	/*
230 	 * The following array contains the offset of each channel ring
231 	 * from the beginning of this structure, as well as the ring's
232 	 * slot descriptor, in the following order:
233 	 *
234 	 * tx rings (csm_tx_rings-csm_htx_rings)
235 	 * rx rings (csm_rx_rings-csm_hrx_rings)
236 	 * allocator rings (either 2 or 4 or none) (optional)
237 	 * event rings (optional)
238 	 *
239 	 * The area is filled up by the kernel, and then only read
240 	 * by userspace code.
241 	 */
242 	struct {
243 		const mach_vm_offset_t  ring_off; /* __user_channel_ring */
244 		const mach_vm_offset_t  sd_off;   /* __slot_desc */
245 	} csm_ring_ofs[0] __attribute__((aligned(sizeof(uint64_t))));
246 };
247 
248 /*
249  * Schema layout version.  Make sure to bump this up each time
250  * struct __user_channel_schema layout is modified.  This helps
251  * to ensure that both kernel and libsystem_kernel are in sync,
252  * as otherwise we'd assert due to version mismatch.
253  */
254 #define CSM_CURRENT_VERSION     15
255 
256 /* valid values for csm_flags */
257 #define CSM_PRIV_MEM    0x1             /* private memory region */
258 #define CSM_ACTIVE      (1U << 31)      /* channel is active */
259 
260 #define CSM_BITS        "\020\01PRIV_MEM\040ACTIVE"
261 
262 /* the size of __user_channel_schema structure for n total rings */
263 #define CHANNEL_SCHEMA_SIZE(n) \
264 	__builtin_offsetof(struct __user_channel_schema, csm_ring_ofs[(n)])
265 
266 /*
267  * Some fields should be cache-aligned to reduce contention.
268  * The alignment is architecture and OS dependent; we use an
269  * estimate that should cover most architectures.
270  */
271 #define CHANNEL_CACHE_ALIGN_MAX 128     /* max cache line size */
272 
273 /*
274  * Ring kind.
275  */
276 #define CR_KIND_RX              0       /* same as NR_RX */
277 #define CR_KIND_TX              1       /* same as NR_TX */
278 #define CR_KIND_ALLOC           2       /* same as NR_A */
279 #define CR_KIND_FREE            3       /* same as NR_F */
280 #define CR_KIND_EVENT           4       /* same as NR_EV */
281 
282 typedef uint32_t slot_idx_t;
283 
284 typedef uint32_t obj_idx_t;
285 #define OBJ_IDX_NONE    ((obj_idx_t)-1)
286 
287 /*
288  * This structure contains per-slot properties for userspace.  If the flag
289  * SD_IDX_VALID is set, the descriptor contains the index of the metadata
290  * attached to the slot.
291  *
292  * TODO: [email protected] -- this will be made read-write for user pool.
293  * TODO: [email protected] -- Should we make use of RX/TX
294  * preparation/writeback descriptors (in a union) for sd_len?
295  */
296 struct __user_slot_desc {
297 	obj_idx_t       sd_md_idx;      /* metadata index */
298 	uint16_t        sd_flags;       /* slot flags */
299 	/*
300 	 * XXX: sd_len is currently used only for the purpose of acoounting
301 	 * for the number of bytes pending to be read by the user channel.
302 	 * Currently the maximum size of a packet being transported on user
303 	 * channel is <= UINT16_MAX, so sd_len being uint16_t is fine, but
304 	 * this needs to be changed if we want to go beyond UINT16_MAX.
305 	 */
306 	uint16_t        sd_len;         /* slot len */
307 };
308 
309 /* valid values for sd_flags */
310 #define SD_IDX_VALID    0x1             /* slot has metadata attached */
311 #ifdef KERNEL
312 #define SD_LEN_VALID    0x2             /* slot has packet length recorded */
313 #define SD_KERNEL_ONLY  (1 << 15)       /* kernel only; no user counterpart */
314 
315 #define SD_FLAGS_USER   (SD_IDX_VALID)
316 /* invariant flags we want to keep */
317 #define SD_SAVE_MASK    (SD_KERNEL_ONLY)
318 #endif /* KERNEL */
319 /*
320  * SD_VALID_METADATA() returns TRUE if the slot has an attached metadata
321  */
322 #define SD_VALID_METADATA(_sd)                                          \
323 	(!!((_sd)->sd_flags & SD_IDX_VALID))
324 
325 /*
326  * Slot descriptor.
327  */
328 struct __slot_desc {
329 	union {
330 		struct __user_slot_desc _sd_user;
331 		uint64_t                _sd_private[1];
332 	};
333 };
334 
335 #define SLOT_DESC_SZ            (sizeof (struct __slot_desc))
336 #define SLOT_DESC_USD(_sdp)     (&(_sdp)->_sd_user)
337 
338 /*
339  * Ring.
340  *
341  * Channel representation of a TX or RX ring (also known as "queue").
342  * This is a queue implemented as a fixed-size circular array.
343  * At the software level the important fields are: head, cur, tail.
344  *
345  * The __user_channel_ring, and all slots and buffers in the range
346  * [head .. tail-1] are owned by the user program; the kernel only
347  * accesses them during a channel system call and in the user thread
348  * context.
349  */
350 struct __user_channel_ring {
351 	/*
352 	 * In TX rings:
353 	 *
354 	 *   head	first slot available for transmission;
355 	 *   tail	(readonly) first slot reserved to the kernel
356 	 *   khead	(readonly) kernel's view of next slot to send
357 	 *		since last sync.
358 	 *
359 	 * [head .. tail-1] can be used for new packets to send;
360 	 *
361 	 * 'head' must be incremented as slots are filled with new packets to
362 	 * be sent;
363 	 *
364 	 * In RX rings:
365 	 *
366 	 *   head	first valid received packet;
367 	 *   tail	(readonly) first slot reserved to the kernel
368 	 *   khead	(readonly) kernel's view of next slot to reclaim
369 	 *		since last sync.
370 	 *
371 	 * [head .. tail-1] contain received packets;
372 	 *
373 	 * 'head' must be incremented as slots are consumed and can be returned
374 	 * to the kernel;
375 	 *
376 	 */
377 	volatile slot_idx_t     ring_head;      /* (u) first user slot */
378 	const volatile slot_idx_t ring_tail;    /* (k) first kernel slot */
379 	const volatile slot_idx_t ring_khead;   /* (k) next to send/reclaim */
380 
381 	const uint32_t  ring_num_slots; /* # of slots in the ring */
382 	const uint32_t  ring_def_buf_size;  /* size of each default buffer */
383 	const uint32_t  ring_large_buf_size;  /* size of each large buffer */
384 	const uint16_t  ring_md_size;   /* size of each metadata */
385 	const uint16_t  ring_bft_size;  /* size of each buflet metadata */
386 	const uint16_t  ring_id;        /* unused */
387 	const uint16_t  ring_kind;      /* kind of ring (tx or rx) */
388 
389 	/*
390 	 * Base addresses of {buf, metadata, slot descriptor} regions
391 	 * from this ring descriptor.  This facilitates computing the
392 	 * addresses of those regions in the task's mapped memory.
393 	 */
394 	/* base address of default buffer region */
395 	const mach_vm_offset_t  ring_def_buf_base
396 	__attribute((aligned(sizeof(uint64_t))));
397 	/* base address of large buffer region */
398 	const mach_vm_offset_t  ring_large_buf_base
399 	__attribute((aligned(sizeof(uint64_t))));
400 	const mach_vm_offset_t  ring_md_base    /* base of metadata region */
401 	__attribute((aligned(sizeof(uint64_t))));
402 	const mach_vm_offset_t  ring_sd_base    /* base of slot desc region */
403 	__attribute((aligned(sizeof(uint64_t))));
404 	/*
405 	 * base of buflet metadata region
406 	 * value of 0 means that external buflet metadata is not present.
407 	 */
408 	const mach_vm_offset_t  ring_bft_base
409 	__attribute((aligned(sizeof(uint64_t))));
410 
411 	const volatile uint64_t ring_sync_time /* (k) time of last sync */
412 	__attribute((aligned(sizeof(uint64_t))));
413 	const volatile uint64_t ring_notify_time /* (k) time of last notify */
414 	__attribute((aligned(sizeof(uint64_t))));
415 	/* current working set for the packet allocator ring */
416 	const volatile uint32_t ring_alloc_ws;
417 	/* current working set for the buflet allocator ring */
418 	const volatile uint32_t ring_alloc_buf_ws;
419 };
420 
421 /* check if space is available in the ring */
422 #define CHANNEL_RING_EMPTY(_ring) ((_ring)->ring_head == (_ring)->ring_tail)
423 
424 /*
425  * Flow advisory.
426  *
427  * Each flow that is registered with the nexus capable of supporting
428  * flow advisory is given an entry.  Each entry resides in the flow
429  * advisory table that is mapped to the task.
430  * fae_id:  is the flow identifier used by libnetcore to identify a flow.
431  *          This identifier is passed as a metadata on all packets
432  *          generated by the user space stack. This is the flow_id parameter
433  *          which should be used while checking if a flow is
434  *          admissible using the API os_channel_flow_admissible().
435  * fae_flowid: is a globally unique flow identifier generated by the
436  *             flowswitch for each flow. Flowswitch stamps every TX packet
437  *             with this identifier. This is the flow identifier which would
438  *             be visible to the AQM logic and the driver. The flow advisory
439  *             mechanism in kernel uses this fae_id to identify the flow entry
440  *             in the flow advisory table.
441  */
442 struct __flowadv_entry {
443 	union {
444 		uint64_t        fae_id_64[2];
445 		uint32_t        fae_id_32[4];
446 		uuid_t          fae_id; /* flow ID from userspace stack */
447 	};
448 	volatile uint32_t       fae_flags;  /* flags FLOWADVF_* */
449 	/* flow ID generated by flowswitch */
450 	uint32_t                fae_flowid;
451 #ifdef KERNEL
452 #define fae_token               fae_flowid
453 #endif /* KERNEL */
454 } __attribute__((aligned(sizeof(uint64_t))));
455 
456 #define FLOWADVF_VALID          0x1     /* flow is valid */
457 #define FLOWADVF_SUSPENDED      0x2     /* flow is suspended */
458 
459 /* channel event threshold */
460 struct ch_ev_thresh {
461 	channel_threshold_unit_t cet_unit;
462 	uint32_t                cet_value;
463 };
464 
465 /*
466  * Channel information.
467  */
468 struct ch_info {
469 	union {
470 		uint64_t  cinfo_ch_id_64[2];
471 		uint32_t  cinfo_ch_id_32[4];
472 		uuid_t    cinfo_ch_id;          /* Channel UUID */
473 	};
474 #ifdef KERNEL
475 #define cinfo_ch_token  cinfo_ch_id_32[0]
476 #endif /* KERNEL */
477 	uint32_t          cinfo_ch_mode;        /* CHMODE_* flags */
478 	ring_id_t         cinfo_ch_ring_id;     /* Channel ring (or any) */
479 	struct nxprov_params cinfo_nxprov_params; /* Nexus provider params */
480 	uuid_t            cinfo_nx_uuid;        /* Nexus instance UUID */
481 	nexus_port_t      cinfo_nx_port;        /* Nexus instance port */
482 	uint32_t          cinfo_num_bufs;       /* # buffers in user pool */
483 	mach_vm_size_t    cinfo_mem_map_size;   /* size of VM map */
484 	mach_vm_address_t cinfo_mem_base;       /* VM mapping for task */
485 	mach_vm_offset_t  cinfo_schema_offset;  /* offset in VM map */
486 	ring_id_t         cinfo_first_tx_ring;  /* first TX ring ID */
487 	ring_id_t         cinfo_last_tx_ring;   /* last TX ring ID */
488 	ring_id_t         cinfo_first_rx_ring;  /* first RX ring ID */
489 	ring_id_t         cinfo_last_rx_ring;   /* last RX ring ID */
490 	struct ch_ev_thresh cinfo_tx_lowat;     /* TX low-watermark */
491 	struct ch_ev_thresh cinfo_rx_lowat;     /* RX low-watermark */
492 } __attribute__((aligned(sizeof(uint64_t))));
493 
494 #include <skywalk/os_nexus_private.h>
495 
496 #define CHANNEL_INIT_VERSION_1          1
497 #define CHANNEL_INIT_CURRENT_VERSION    CHANNEL_INIT_VERSION_1
498 
499 /*
500  * Channel init parameters.
501  */
502 struct ch_init {
503 	uint32_t        ci_version;     /* in: CHANNEL_INIT_CURRENT_VERSION */
504 	uint32_t        ci_ch_mode;     /* in: CHMODE_* flags */
505 	ring_id_t       ci_ch_ring_id;  /* in: Channel ring */
506 	nexus_port_t    ci_nx_port;     /* in: Nexus instance port */
507 	uuid_t          ci_nx_uuid;     /* in: Nexus instance UUID */
508 	user_addr_t     ci_key;         /* in: key blob */
509 	uint32_t        ci_key_len;     /* in: key length */
510 	uint32_t        __ci_align;     /* reserved */
511 	struct ch_ev_thresh ci_tx_lowat; /* in: TX low-watermark */
512 	struct ch_ev_thresh ci_rx_lowat; /* in: RX low-watermark */
513 	guardid_t       ci_guard;       /* out: guard ID */
514 };
515 
516 #define CHMODE_MONITOR_TX               0x00000001
517 #define CHMODE_MONITOR_RX               0x00000002
518 #define CHMODE_MONITOR_NO_COPY          0x00000004     /* only if mon tx/rx is set */
519 #define CHMODE_USER_PACKET_POOL         0x00000008
520 #define CHMODE_DEFUNCT_OK               0x00000010
521 #define CHMODE_FILTER                   0x00000020     /* packet filter channel */
522 #define CHMODE_EVENT_RING               0x00000040
523 #define CHMODE_LOW_LATENCY              0x00000080
524 #define CHMODE_EXCLUSIVE                0x00000200
525 #define CHMODE_MONITOR                  \
526 	(CHMODE_MONITOR_TX | CHMODE_MONITOR_RX)
527 #ifdef KERNEL
528 /* mask off userland-settable bits */
529 #define CHMODE_MASK                                     \
530 	(CHMODE_MONITOR | CHMODE_MONITOR_NO_COPY |      \
531 	CHMODE_USER_PACKET_POOL | CHMODE_FILTER  |      \
532 	CHMODE_DEFUNCT_OK | CHMODE_EVENT_RING | CHMODE_EXCLUSIVE | \
533 	CHMODE_LOW_LATENCY)
534 #define CHMODE_KERNEL                   0x00001000  /* special, in-kernel */
535 #define CHMODE_NO_NXREF                 0x00002000  /* does not hold nx refcnt */
536 #define CHMODE_CONFIG                   0x00004000  /* provider config mode */
537 #define CHMODE_HOST                     0x00008000  /* to host (kernel) stack */
538 
539 #define CHMODE_BITS                                                       \
540 	"\020\01MON_TX\02MON_RX\03NO_COPY\04USER_PKT_POOL"                \
541 	"\05DEFUNCT_OK\06FILTER\07EVENT_RING\010LOW_LATENCY\012EXCLUSIVE" \
542 	"\015KERNEL\016NO_NXREF\017CONFIG\020HOST"
543 #endif /* KERNEL */
544 
545 /*
546  * Channel options.
547  */
548 #define CHOPT_TX_LOWAT_THRESH   1  /* (get/set) ch_ev_thresh */
549 #define CHOPT_RX_LOWAT_THRESH   2  /* (get/set) ch_ev_thresh */
550 #define CHOPT_IF_ADV_CONF       3  /* (set) enable/disable interface advisory events on the channel */
551 
552 #ifndef KERNEL
553 /*
554  * Channel ring descriptor.
555  */
556 struct channel_ring_desc {
557 	const struct channel    *chrd_channel;
558 	const volatile uint32_t *chrd_csm_flags;
559 	const struct __user_channel_ring *chrd_ring;
560 
561 	/*
562 	 * Array of __slot_desc each representing slot-specific data.
563 	 * There is exactly one descriptor for each slot in the ring.
564 	 */
565 	struct __slot_desc *chrd_slot_desc;
566 
567 	/* local per-ring copies for easy access */
568 	const nexus_meta_type_t chrd_md_type;
569 	const nexus_meta_subtype_t chrd_md_subtype;
570 	const mach_vm_address_t chrd_shmem_base_addr;
571 	const mach_vm_address_t chrd_def_buf_base_addr;
572 	const mach_vm_address_t chrd_large_buf_base_addr;
573 	const mach_vm_address_t chrd_md_base_addr;
574 	const mach_vm_address_t chrd_sd_base_addr;
575 	const mach_vm_address_t chrd_bft_base_addr;
576 	const uint32_t          chrd_max_bufs; /* max length of buflet chain */
577 } __attribute__((aligned(sizeof(uint64_t))));
578 
579 /*
580  * Channel descriptor.
581  */
582 struct channel {
583 	int             chd_fd;
584 	sync_flags_t    chd_sync_flags;
585 	guardid_t       chd_guard;
586 	struct ch_info  *chd_info;
587 
588 	const volatile struct __user_channel_schema *chd_schema;
589 	const volatile void *chd_nx_stats;
590 	const volatile struct __flowadv_entry *chd_nx_flowadv;
591 	const volatile struct __kern_nexus_adv_metadata *chd_nx_adv;
592 
593 	const nexus_meta_type_t chd_md_type;
594 	const nexus_meta_subtype_t chd_md_subtype;
595 	const uint8_t chd_alloc_ring_idx;
596 	const uint8_t chd_free_ring_idx;
597 	const uint8_t chd_buf_alloc_ring_idx;
598 	const uint8_t chd_buf_free_ring_idx;
599 #if defined(LIBSYSCALL_INTERFACE)
600 #define CHD_RING_IDX_NONE    (uint8_t)-1
601 #endif /* LIBSYSCALL_INTERFACE */
602 
603 	/*
604 	 * Per-ring descriptor, aligned at max cache line boundary
605 	 */
606 	struct channel_ring_desc        chd_rings[0]
607 	__attribute__((aligned(sizeof(uint64_t))));
608 };
609 
610 #define CHD_SIZE(n) \
611 	((size_t)(&((struct channel *)0)->chd_rings[n]))
612 
613 #define CHD_INFO_SIZE           (sizeof (struct ch_info))
614 #define CHD_INFO(_chd)          ((_chd)->chd_info)
615 #define CHD_PARAMS(_chd)        (&CHD_INFO(_chd)->cinfo_nxprov_params)
616 #define CHD_SCHEMA(_chd)        \
617 	(__DECONST(struct __user_channel_schema *, (_chd)->chd_schema))
618 #define CHD_NX_STATS(_chd)      \
619 	(__DECONST(void *, (_chd)->chd_nx_stats))
620 #define CHD_NX_FLOWADV(_chd)    \
621 	(__DECONST(struct __flowadv_entry *, (_chd)->chd_nx_flowadv))
622 #define CHD_NX_ADV_MD(_chd)    __DECONST(struct __kern_nexus_adv_metadata *, \
623     ((_chd)->chd_nx_adv))
624 #define CHD_NX_ADV_NETIF(_adv_md)    \
625     (struct netif_nexus_advisory *)(void *)(_adv_md + 1)
626 #define CHD_NX_ADV_FSW(_adv_md)    (struct sk_nexusadv *)(void *)(_adv_md + 1)
627 
628 /*
629  * Channel attributes.
630  */
631 struct channel_attr {
632 	uint32_t        cha_tx_rings;
633 	uint32_t        cha_rx_rings;
634 	uint32_t        cha_tx_slots;
635 	uint32_t        cha_rx_slots;
636 	uint32_t        cha_buf_size;
637 	uint32_t        cha_meta_size;
638 	uint32_t        cha_stats_size;
639 	uint32_t        cha_exclusive;
640 	uint32_t        cha_monitor;
641 	uint32_t        cha_key_len;
642 	void            *cha_key;
643 	struct ch_ev_thresh cha_tx_lowat;
644 	struct ch_ev_thresh cha_rx_lowat;
645 	uint32_t        cha_nexus_type;
646 	uint32_t        cha_nexus_extensions;
647 	uint32_t        cha_nexus_mhints;
648 	uint32_t        cha_nexus_ifindex;
649 	uint32_t        cha_flowadv_max;
650 	nexus_meta_type_t cha_nexus_meta_type;
651 	nexus_meta_subtype_t cha_nexus_meta_subtype;
652 	uint32_t        cha_nexus_checksum_offload;
653 	uint32_t        cha_user_packet_pool;
654 	uint32_t        cha_nexusadv_size;
655 	uint32_t        cha_nexus_defunct_ok;
656 	uint32_t        cha_filter;
657 	uint32_t        cha_enable_event_ring;
658 	uint32_t        cha_max_frags;
659 	uint32_t        cha_num_buffers;
660 	uint32_t        cha_low_latency;
661 	uint32_t        cha_large_buf_size;
662 };
663 
664 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
665 __BEGIN_DECLS
666 extern int __channel_open(struct ch_init *init, const uint32_t init_len);
667 extern int __channel_get_info(int c, struct ch_info *cinfo,
668     const uint32_t cinfolen);
669 extern int __channel_sync(int c, const int mode, const sync_flags_t flags);
670 extern int __channel_get_opt(int c, const uint32_t opt, void *aoptval,
671     uint32_t *aoptlen);
672 extern int __channel_set_opt(int c, const uint32_t opt, const void *aoptval,
673     const uint32_t optlen);
674 __END_DECLS
675 #endif  /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
676 #endif /* !KERNEL */
677 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
678 #endif /* !_SKYWALK_OS_CHANNEL_PRIVATE_H_ */
679