xref: /xnu-11215.41.3/bsd/skywalk/channel/os_channel_private.h (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 
56 #ifndef _SKYWALK_OS_CHANNEL_PRIVATE_H_
57 #define _SKYWALK_OS_CHANNEL_PRIVATE_H_
58 
59 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
60 #include <sys/time.h>
61 #include <sys/signal.h>
62 #include <sys/guarded.h>
63 #include <sys/utsname.h>
64 #include <skywalk/os_channel.h>
65 #include <skywalk/os_stats_private.h>
66 
67 /* BEGIN CSTYLED */
68 /*
69  * The userspace data structures used by Skywalk are shown below.
70  *
71  * The kernel allocates the regions for the various object types,
72  * and maps them to the userspace task in a contiguous span, one
73  * after another.
74  *
75  * Each channel file descriptor comes with its own memory map,
76  * and the layout of the rest of the objects is described in the
77  * __user_channel_schema structure associated with the channel.
78  * This schema structure is mapped read-only in the task.
79  *
80  *     +=======================+
81  *     | __user_channel_schema | (1 per channel fd)
82  *     +=======================+
83  *     |     csm_ver           |
84  *     |     csm_flags         |
85  *     |-----------------------|
86  *     |     csm_tx_rings      |
87  *     |     csm_rx_rings      |
88  *     | csm_allocator_rings   |
89  *     |    csm_event_rings    |
90  *     |-----------------------|
91  *     |     csm_stats_ofs     | <<---+
92  *     |-----------------------|      |
93  *     |     csm_flowadv_max   |      |
94  *     |     csm_flowadv_ofs   | <<---+ relative to base of memory map
95  *     |-----------------------|      |
96  *     | csm_md_redzone_cookie |      |
97  *     |     csm_md_type       |      |
98  *     |     csm_md_subtype    |      |
99  *     |-----------------------|      |
100  *     |     csm_stats_ofs     | <<---+
101  *     |     csm_stats_type    |      |
102  *     |-----------------------|      |
103  *     |     csm_nexusadv_ofs  | <<---+
104  *     |-----------------------|
105  *     |     csm_kern_name     |
106  *     |     csm_kern_uuid     |
107  *     |-----------------------|
108  *     | TX  csm_ring_ofs[0]   | <<---+
109  *     | TX  csm_sd_ofs[0]     |      |
110  *     :        ...            :      |
111  *     | TX  csm_ring_ofs[t]   |      |
112  *     | TX  csm_sd_ofs[t]     |      |
113  *     |-----------------------|      |
114  *     | RX  csm_ring_ofs[0]   | <<---+ these offsets are relative
115  *     | RX  csm_sd_ofs[0]     |      | to each schema structure
116  *     :        ...            :      |
117  *     | RX  csm_ring_ofs[t]   |      |
118  *     | RX  csm_sd_ofs[t]     |      |
119  *     |-----------------------|      |
120  *     | A   csm_ring_ofs[0]   |      |
121  *     | A   csm_sd_ofs[0]     |      |
122  *     :        ...            :      |
123  *     | A   csm_ring_ofs[t]   | <<---+
124  *     | A   csm_sd_ofs[t]     |      |
125  *     |-----------------------|      |
126  *     | F   csm_ring_ofs[0]   |      |
127  *     | F   csm_sd_ofs[0]     |      |
128  *     :        ...            :      |
129  *     | F   csm_ring_ofs[t]   | <<---+
130  *     | F   csm_sd_ofs[t]     |      |
131  *     |-----------------------|      |
132  *     | EV  csm_ring_ofs[0]   | <<---+
133  *     | EV  csm_sd_ofs[0]     |
134  *     +-----------------------+
135  *         (variable length)
136  *
137  * On nexus adapters that support statistics or flow advisory, the
138  * csm_stats_ofs or csm_flowadv_ofs would be non-zero, and their values
139  * represent the offset to the respective objects from the base of the
140  * memory map.  This is because those regions are shared amongst all
141  * channels opened to the adapter associated with the nexus port.
142  *
143  * Other regions, such as rings and slot descriptors, are unique to the
144  * channel itself.  They are always present, and their values indicated
145  * by csm_{ring,sd}_ofs represent the offset to the respective objects
146  * from the schema pointer (not from base of memory map.)  This is done
147  * to support channels bound to any of the adapter's ring-pairs.
148  *
149  * See notes below on CSM_CURRENT_VERSION.
150  */
151 /* END CSTYLED */
152 #define CHANNEL_SCHEMA_KERN_NAME        _SYS_NAMELEN
153 struct __user_channel_schema {
154 	/*
155 	 * Schema properties, kernel version string and kernel
156 	 * executable UUID (for debugging).  These 4 fields
157 	 * must be at the beginning of the structure.
158 	 */
159 	const uint32_t  csm_ver;                /* schema layout version */
160 	const volatile uint32_t csm_flags;      /* CSM_* flags */
161 	char      csm_kern_name[CHANNEL_SCHEMA_KERN_NAME];
162 	uuid_t    csm_kern_uuid;
163 
164 	/*
165 	 * The rest of the fields may be rearranged as needed, with
166 	 * the expectation that CSM_CURRENT_VERSION be bumped up on
167 	 * each modification.
168 	 */
169 
170 	/*
171 	 * The number of packet rings available for this channel.
172 	 */
173 	const uint32_t  csm_tx_rings;   /* # of tx rings */
174 	const uint32_t  csm_rx_rings;   /* # of rx rings */
175 
176 	/*
177 	 * The number of allocator ring pair available for this channel.
178 	 * If the channel supports user packet pool then 1 pair of
179 	 * alloc/free ring per channel are used to manage the packet
180 	 * allocation from userspace.
181 	 * If the channel supports multi-buflet packet then an additional pair
182 	 * of alloc/free ring is used to manage the buffer (buflet) allocation
183 	 * from userspace.
184 	 */
185 	const uint32_t  csm_allocator_ring_pairs;
186 
187 	/*
188 	 * number of event rings for this channel.
189 	 */
190 	const uint32_t  csm_num_event_rings;
191 	const uint32_t  csm_large_buf_alloc_rings;
192 
193 	/*
194 	 * Flow advisory region offset; this field will be 0 if the
195 	 * nexus isn't capable of flow advisory scheme.  Otherwise,
196 	 * it points to a table of flow advisory entries, and the
197 	 * total number of entries is indicated by csm_flowadv_max.
198 	 */
199 	const uint32_t          csm_flowadv_max;
200 	const mach_vm_offset_t  csm_flowadv_ofs
201 	__attribute__((aligned(sizeof(uint64_t))));
202 
203 	/*
204 	 * Metadata region redzone, type and sub-type.
205 	 */
206 	const uint64_t  csm_md_redzone_cookie   /* metadata redzone cookie */
207 	__attribute__((aligned(sizeof(uint64_t))));
208 	const nexus_meta_type_t csm_md_type;    /* metadata type */
209 	const nexus_meta_subtype_t csm_md_subtype; /* metadata subtype */
210 
211 	/*
212 	 * Statistics region offset; each nexus is free to use this
213 	 * region and break it up into multiple smaller regions if
214 	 * needed.  The definition and interpretation of the contents
215 	 * is left to the nexus.  The value of this field will be 0
216 	 * if the nexus doesn't facilitate shareable statistics.
217 	 */
218 	const mach_vm_offset_t  csm_stats_ofs
219 	__attribute__((aligned(sizeof(uint64_t))));
220 	const nexus_stats_type_t csm_stats_type;
221 
222 	/*
223 	 * Nexus advisory region offset; this field will be 0 if the
224 	 * nexus isn't providing any nexus-wide advisories.  Otherwise,
225 	 * it points to the nexus advisory structure.
226 	 */
227 	const mach_vm_offset_t csm_nexusadv_ofs
228 	__attribute__((aligned(sizeof(uint64_t))));
229 
230 	/*
231 	 * The following array contains the offset of each channel ring
232 	 * from the beginning of this structure, as well as the ring's
233 	 * slot descriptor, in the following order:
234 	 *
235 	 * tx rings (csm_tx_rings-csm_htx_rings)
236 	 * rx rings (csm_rx_rings-csm_hrx_rings)
237 	 * allocator rings (either 2 or 4 or none) (optional)
238 	 * event rings (optional)
239 	 *
240 	 * The area is filled up by the kernel, and then only read
241 	 * by userspace code.
242 	 */
243 	struct {
244 		const mach_vm_offset_t  ring_off; /* __user_channel_ring */
245 		const mach_vm_offset_t  sd_off;   /* __slot_desc */
246 	} csm_ring_ofs[__counted_by(csm_tx_rings + csm_rx_rings +
247 	csm_allocator_ring_pairs + csm_num_event_rings + csm_large_buf_alloc_rings)]
248 	__attribute__((aligned(sizeof(uint64_t))));
249 };
250 
251 /*
252  * Schema layout version.  Make sure to bump this up each time
253  * struct __user_channel_schema layout is modified.  This helps
254  * to ensure that both kernel and libsystem_kernel are in sync,
255  * as otherwise we'd assert due to version mismatch.
256  */
257 #define CSM_CURRENT_VERSION     18
258 
259 /* valid values for csm_flags */
260 #define CSM_PRIV_MEM    0x1             /* private memory region */
261 #define CSM_ACTIVE      (1U << 31)      /* channel is active */
262 
263 #define CSM_BITS        "\020\01PRIV_MEM\040ACTIVE"
264 
265 /* the size of __user_channel_schema structure for n total rings */
266 #define CHANNEL_SCHEMA_SIZE(n) \
267 	__builtin_offsetof(struct __user_channel_schema, csm_ring_ofs[(n)])
268 
269 /*
270  * Some fields should be cache-aligned to reduce contention.
271  * The alignment is architecture and OS dependent; we use an
272  * estimate that should cover most architectures.
273  */
274 #define CHANNEL_CACHE_ALIGN_MAX 128     /* max cache line size */
275 
276 /*
277  * Ring kind.
278  */
279 #define CR_KIND_RX              0       /* same as NR_RX */
280 #define CR_KIND_TX              1       /* same as NR_TX */
281 #define CR_KIND_ALLOC           2       /* same as NR_A */
282 #define CR_KIND_FREE            3       /* same as NR_F */
283 #define CR_KIND_EVENT           4       /* same as NR_EV */
284 #define CR_KIND_LARGE_BUF_ALLOC 5       /* same as NR_LBA */
285 
286 typedef uint32_t slot_idx_t;
287 
288 typedef uint32_t obj_idx_t;
289 #define OBJ_IDX_NONE    ((obj_idx_t)-1)
290 
291 /*
292  * This structure contains per-slot properties for userspace.  If the flag
293  * SD_IDX_VALID is set, the descriptor contains the index of the metadata
294  * attached to the slot.
295  *
296  * TODO: [email protected] -- this will be made read-write for user pool.
297  * TODO: [email protected] -- Should we make use of RX/TX
298  * preparation/writeback descriptors (in a union) for sd_len?
299  */
300 struct __user_slot_desc {
301 	obj_idx_t       sd_md_idx;      /* metadata index */
302 	uint16_t        sd_flags;       /* slot flags */
303 	/*
304 	 * XXX: sd_len is currently used only for the purpose of acoounting
305 	 * for the number of bytes pending to be read by the user channel.
306 	 * Currently the maximum size of a packet being transported on user
307 	 * channel is <= UINT16_MAX, so sd_len being uint16_t is fine, but
308 	 * this needs to be changed if we want to go beyond UINT16_MAX.
309 	 */
310 	uint16_t        sd_len;         /* slot len */
311 };
312 
313 /* valid values for sd_flags */
314 #define SD_IDX_VALID    0x1             /* slot has metadata attached */
315 #ifdef KERNEL
316 #define SD_LEN_VALID    0x2             /* slot has packet length recorded */
317 #define SD_KERNEL_ONLY  (1 << 15)       /* kernel only; no user counterpart */
318 
319 #define SD_FLAGS_USER   (SD_IDX_VALID)
320 /* invariant flags we want to keep */
321 #define SD_SAVE_MASK    (SD_KERNEL_ONLY)
322 #endif /* KERNEL */
323 /*
324  * SD_VALID_METADATA() returns TRUE if the slot has an attached metadata
325  */
326 #define SD_VALID_METADATA(_sd)                                          \
327 	(!!((_sd)->sd_flags & SD_IDX_VALID))
328 
329 /*
330  * Slot descriptor.
331  */
332 struct __slot_desc {
333 	union {
334 		struct __user_slot_desc _sd_user;
335 		uint64_t                _sd_private[1];
336 	};
337 };
338 
339 #define SLOT_DESC_SZ            (sizeof (struct __slot_desc))
340 #define SLOT_DESC_USD(_sdp)     (&(_sdp)->_sd_user)
341 
342 /*
343  * Ring.
344  *
345  * Channel representation of a TX or RX ring (also known as "queue").
346  * This is a queue implemented as a fixed-size circular array.
347  * At the software level the important fields are: head, cur, tail.
348  *
349  * The __user_channel_ring, and all slots and buffers in the range
350  * [head .. tail-1] are owned by the user program; the kernel only
351  * accesses them during a channel system call and in the user thread
352  * context.
353  */
354 struct __user_channel_ring {
355 	/*
356 	 * In TX rings:
357 	 *
358 	 *   head	first slot available for transmission;
359 	 *   tail	(readonly) first slot reserved to the kernel
360 	 *   khead	(readonly) kernel's view of next slot to send
361 	 *		since last sync.
362 	 *
363 	 * [head .. tail-1] can be used for new packets to send;
364 	 *
365 	 * 'head' must be incremented as slots are filled with new packets to
366 	 * be sent;
367 	 *
368 	 * In RX rings:
369 	 *
370 	 *   head	first valid received packet;
371 	 *   tail	(readonly) first slot reserved to the kernel
372 	 *   khead	(readonly) kernel's view of next slot to reclaim
373 	 *		since last sync.
374 	 *
375 	 * [head .. tail-1] contain received packets;
376 	 *
377 	 * 'head' must be incremented as slots are consumed and can be returned
378 	 * to the kernel;
379 	 *
380 	 */
381 	volatile slot_idx_t     ring_head;      /* (u) first user slot */
382 	const volatile slot_idx_t ring_tail;    /* (k) first kernel slot */
383 	const volatile slot_idx_t ring_khead;   /* (k) next to send/reclaim */
384 
385 	const uint32_t  ring_num_slots; /* # of slots in the ring */
386 	const uint32_t  ring_def_buf_size;  /* size of each default buffer */
387 	const uint32_t  ring_large_buf_size;  /* size of each large buffer */
388 	const uint16_t  ring_md_size;   /* size of each metadata */
389 	const uint16_t  ring_bft_size;  /* size of each buflet metadata */
390 	const uint16_t  ring_id;        /* unused */
391 	const uint16_t  ring_kind;      /* kind of ring (tx or rx) */
392 
393 	/*
394 	 * Base addresses of {buf, metadata, slot descriptor} regions
395 	 * from this ring descriptor.  This facilitates computing the
396 	 * addresses of those regions in the task's mapped memory.
397 	 */
398 	/* base address of default buffer region */
399 	const mach_vm_offset_t  ring_def_buf_base
400 	__attribute((aligned(sizeof(uint64_t))));
401 	/* base address of large buffer region */
402 	const mach_vm_offset_t  ring_large_buf_base
403 	__attribute((aligned(sizeof(uint64_t))));
404 	const mach_vm_offset_t  ring_md_base    /* base of metadata region */
405 	__attribute((aligned(sizeof(uint64_t))));
406 	const mach_vm_offset_t  ring_sd_base    /* base of slot desc region */
407 	__attribute((aligned(sizeof(uint64_t))));
408 	/*
409 	 * base of buflet metadata region
410 	 * value of 0 means that external buflet metadata is not present.
411 	 */
412 	const mach_vm_offset_t  ring_bft_base
413 	__attribute((aligned(sizeof(uint64_t))));
414 
415 	const volatile uint64_t ring_sync_time /* (k) time of last sync */
416 	__attribute((aligned(sizeof(uint64_t))));
417 	const volatile uint64_t ring_notify_time /* (k) time of last notify */
418 	__attribute((aligned(sizeof(uint64_t))));
419 	/* current working set for the packet allocator ring */
420 	const volatile uint32_t ring_alloc_ws;
421 	/* current working set for the buflet allocator ring */
422 	const volatile uint32_t ring_alloc_buf_ws;
423 };
424 
425 /* check if space is available in the ring */
426 #define CHANNEL_RING_EMPTY(_ring) ((_ring)->ring_head == (_ring)->ring_tail)
427 
428 /*
429  * Flow advisory.
430  *
431  * Each flow that is registered with the nexus capable of supporting
432  * flow advisory is given an entry.  Each entry resides in the flow
433  * advisory table that is mapped to the task.
434  * fae_id:  is the flow identifier used by libnetcore to identify a flow.
435  *          This identifier is passed as a metadata on all packets
436  *          generated by the user space stack. This is the flow_id parameter
437  *          which should be used while checking if a flow is
438  *          admissible using the API os_channel_flow_admissible().
439  * fae_flowid: is a globally unique flow identifier generated by the
440  *             flowswitch for each flow. Flowswitch stamps every TX packet
441  *             with this identifier. This is the flow identifier which would
442  *             be visible to the AQM logic and the driver. The flow advisory
443  *             mechanism in kernel uses this fae_id to identify the flow entry
444  *             in the flow advisory table.
445  */
446 struct __flowadv_entry {
447 	union {
448 		uint64_t        fae_id_64[2];
449 		uint32_t        fae_id_32[4];
450 		uuid_t          fae_id; /* flow ID from userspace stack */
451 	};
452 	volatile uint32_t       fae_ce_cnt;
453 	volatile uint32_t       fae_pkt_cnt;
454 	volatile uint32_t       fae_flags;  /* flags FLOWADVF_* */
455 	/* flow ID generated by flowswitch */
456 	uint32_t                fae_flowid;
457 #ifdef KERNEL
458 #define fae_token               fae_flowid
459 #endif /* KERNEL */
460 } __attribute__((aligned(sizeof(uint64_t))));
461 
462 #define FLOWADVF_VALID          0x1     /* flow is valid */
463 #define FLOWADVF_SUSPENDED      0x2     /* flow is suspended */
464 
465 /* channel event threshold */
466 struct ch_ev_thresh {
467 	channel_threshold_unit_t cet_unit;
468 	uint32_t                cet_value;
469 };
470 
471 /*
472  * Channel information.
473  */
474 struct ch_info {
475 	union {
476 		uint64_t  cinfo_ch_id_64[2];
477 		uint32_t  cinfo_ch_id_32[4];
478 		uuid_t    cinfo_ch_id;          /* Channel UUID */
479 	};
480 #ifdef KERNEL
481 #define cinfo_ch_token  cinfo_ch_id_32[0]
482 #endif /* KERNEL */
483 	uint32_t          cinfo_ch_mode;        /* CHMODE_* flags */
484 	ring_id_t         cinfo_ch_ring_id;     /* Channel ring (or any) */
485 	struct nxprov_params cinfo_nxprov_params; /* Nexus provider params */
486 	uuid_t            cinfo_nx_uuid;        /* Nexus instance UUID */
487 	nexus_port_t      cinfo_nx_port;        /* Nexus instance port */
488 	uint32_t          cinfo_num_bufs;       /* # buffers in user pool */
489 	mach_vm_size_t    cinfo_mem_map_size;   /* size of VM map */
490 	mach_vm_address_t cinfo_mem_base;       /* VM mapping for task */
491 	mach_vm_offset_t  cinfo_schema_offset;  /* offset in VM map */
492 	ring_id_t         cinfo_first_tx_ring;  /* first TX ring ID */
493 	ring_id_t         cinfo_last_tx_ring;   /* last TX ring ID */
494 	ring_id_t         cinfo_first_rx_ring;  /* first RX ring ID */
495 	ring_id_t         cinfo_last_rx_ring;   /* last RX ring ID */
496 	struct ch_ev_thresh cinfo_tx_lowat;     /* TX low-watermark */
497 	struct ch_ev_thresh cinfo_rx_lowat;     /* RX low-watermark */
498 } __attribute__((aligned(sizeof(uint64_t))));
499 
500 #include <skywalk/os_nexus_private.h>
501 
502 #define CHANNEL_INIT_VERSION_1          1
503 #define CHANNEL_INIT_CURRENT_VERSION    CHANNEL_INIT_VERSION_1
504 
505 /*
506  * Channel init parameters.
507  */
508 struct ch_init {
509 	uint32_t        ci_version;     /* in: CHANNEL_INIT_CURRENT_VERSION */
510 	uint32_t        ci_ch_mode;     /* in: CHMODE_* flags */
511 	ring_id_t       ci_ch_ring_id;  /* in: Channel ring */
512 	nexus_port_t    ci_nx_port;     /* in: Nexus instance port */
513 	uuid_t          ci_nx_uuid;     /* in: Nexus instance UUID */
514 	user_addr_t     ci_key;         /* in: key blob */
515 	uint32_t        ci_key_len;     /* in: key length */
516 	uint32_t        __ci_align;     /* reserved */
517 	struct ch_ev_thresh ci_tx_lowat; /* in: TX low-watermark */
518 	struct ch_ev_thresh ci_rx_lowat; /* in: RX low-watermark */
519 	guardid_t       ci_guard;       /* out: guard ID */
520 };
521 
522 #define CHMODE_MONITOR_TX               0x00000001
523 #define CHMODE_MONITOR_RX               0x00000002
524 #define CHMODE_MONITOR_NO_COPY          0x00000004     /* only if mon tx/rx is set */
525 #define CHMODE_USER_PACKET_POOL         0x00000008
526 #define CHMODE_DEFUNCT_OK               0x00000010
527 #define CHMODE_FILTER                   0x00000020     /* packet filter channel */
528 #define CHMODE_EVENT_RING               0x00000040
529 #define CHMODE_LOW_LATENCY              0x00000080
530 #define CHMODE_EXCLUSIVE                0x00000200
531 #define CHMODE_MONITOR                  \
532 	(CHMODE_MONITOR_TX | CHMODE_MONITOR_RX)
533 #ifdef KERNEL
534 /* mask off userland-settable bits */
535 #define CHMODE_MASK                                     \
536 	(CHMODE_MONITOR | CHMODE_MONITOR_NO_COPY |      \
537 	CHMODE_USER_PACKET_POOL | CHMODE_FILTER  |      \
538 	CHMODE_DEFUNCT_OK | CHMODE_EVENT_RING | CHMODE_EXCLUSIVE | \
539 	CHMODE_LOW_LATENCY)
540 #define CHMODE_KERNEL                   0x00001000  /* special, in-kernel */
541 #define CHMODE_NO_NXREF                 0x00002000  /* does not hold nx refcnt */
542 #define CHMODE_CONFIG                   0x00004000  /* provider config mode */
543 #define CHMODE_HOST                     0x00008000  /* to host (kernel) stack */
544 
545 #define CHMODE_BITS                                                       \
546 	"\020\01MON_TX\02MON_RX\03NO_COPY\04USER_PKT_POOL"                \
547 	"\05DEFUNCT_OK\06FILTER\07EVENT_RING\010LOW_LATENCY\012EXCLUSIVE" \
548 	"\015KERNEL\016NO_NXREF\017CONFIG\020HOST"
549 #endif /* KERNEL */
550 
551 /*
552  * Channel options.
553  */
554 #define CHOPT_TX_LOWAT_THRESH   1  /* (get/set) ch_ev_thresh */
555 #define CHOPT_RX_LOWAT_THRESH   2  /* (get/set) ch_ev_thresh */
556 #define CHOPT_IF_ADV_CONF       3  /* (set) enable/disable interface advisory events on the channel */
557 
558 #ifndef KERNEL
559 /*
560  * Channel ring descriptor.
561  */
562 struct channel_ring_desc {
563 	const struct channel    *chrd_channel;
564 	const volatile uint32_t *chrd_csm_flags;
565 	const struct __user_channel_ring *chrd_ring;
566 
567 	/*
568 	 * Array of __slot_desc each representing slot-specific data.
569 	 * There is exactly one descriptor for each slot in the ring.
570 	 */
571 	struct __slot_desc *chrd_slot_desc;
572 
573 	/* local per-ring copies for easy access */
574 	const nexus_meta_type_t chrd_md_type;
575 	const nexus_meta_subtype_t chrd_md_subtype;
576 	const mach_vm_address_t chrd_shmem_base_addr;
577 	const mach_vm_address_t chrd_def_buf_base_addr;
578 	const mach_vm_address_t chrd_large_buf_base_addr;
579 	const mach_vm_address_t chrd_md_base_addr;
580 	const mach_vm_address_t chrd_sd_base_addr;
581 	const mach_vm_address_t chrd_bft_base_addr;
582 	const uint32_t          chrd_max_bufs; /* max length of buflet chain */
583 } __attribute__((aligned(sizeof(uint64_t))));
584 
585 /*
586  * Channel descriptor.
587  */
588 struct channel {
589 	int             chd_fd;
590 	sync_flags_t    chd_sync_flags;
591 	guardid_t       chd_guard;
592 	struct ch_info  *chd_info;
593 
594 	const volatile struct __user_channel_schema *chd_schema;
595 	const volatile void *chd_nx_stats;
596 	const volatile struct __flowadv_entry *chd_nx_flowadv;
597 	const volatile struct __kern_nexus_adv_metadata *chd_nx_adv;
598 
599 	const nexus_meta_type_t chd_md_type;
600 	const nexus_meta_subtype_t chd_md_subtype;
601 	const uint8_t chd_alloc_ring_idx;
602 	const uint8_t chd_free_ring_idx;
603 	const uint8_t chd_buf_alloc_ring_idx;
604 	const uint8_t chd_buf_free_ring_idx;
605 	const uint8_t chd_large_buf_alloc_ring_idx;
606 #if defined(LIBSYSCALL_INTERFACE)
607 #define CHD_RING_IDX_NONE    (uint8_t)-1
608 #endif /* LIBSYSCALL_INTERFACE */
609 
610 	/*
611 	 * Per-ring descriptor, aligned at max cache line boundary
612 	 */
613 	struct channel_ring_desc        chd_rings[0]
614 	__attribute__((aligned(sizeof(uint64_t))));
615 };
616 
617 #define CHD_SIZE(n) \
618 	((size_t)(&((struct channel *)0)->chd_rings[n]))
619 
620 #define CHD_INFO_SIZE           (sizeof (struct ch_info))
621 #define CHD_INFO(_chd)          ((_chd)->chd_info)
622 #define CHD_PARAMS(_chd)        (&CHD_INFO(_chd)->cinfo_nxprov_params)
623 #define CHD_SCHEMA(_chd)        \
624 	(__DECONST(struct __user_channel_schema *, (_chd)->chd_schema))
625 #define CHD_NX_STATS(_chd)      \
626 	(__DECONST(void *, (_chd)->chd_nx_stats))
627 #define CHD_NX_FLOWADV(_chd)    \
628 	(__DECONST(struct __flowadv_entry *, (_chd)->chd_nx_flowadv))
629 #define CHD_NX_ADV_MD(_chd)    __DECONST(struct __kern_nexus_adv_metadata *, \
630     ((_chd)->chd_nx_adv))
631 #define CHD_NX_ADV_NETIF(_adv_md)    \
632     (struct netif_nexus_advisory *)(void *)(_adv_md + 1)
633 #define CHD_NX_ADV_FSW(_adv_md)    (struct sk_nexusadv *)(void *)(_adv_md + 1)
634 
635 /*
636  * Channel attributes.
637  */
638 struct channel_attr {
639 	uint32_t        cha_tx_rings;
640 	uint32_t        cha_rx_rings;
641 	uint32_t        cha_tx_slots;
642 	uint32_t        cha_rx_slots;
643 	uint32_t        cha_buf_size;
644 	uint32_t        cha_meta_size;
645 	uint32_t        cha_stats_size;
646 	uint32_t        cha_exclusive;
647 	uint32_t        cha_monitor;
648 	uint32_t        cha_key_len;
649 	void            *cha_key;
650 	struct ch_ev_thresh cha_tx_lowat;
651 	struct ch_ev_thresh cha_rx_lowat;
652 	uint32_t        cha_nexus_type;
653 	uint32_t        cha_nexus_extensions;
654 	uint32_t        cha_nexus_mhints;
655 	uint32_t        cha_nexus_ifindex;
656 	uint32_t        cha_flowadv_max;
657 	nexus_meta_type_t cha_nexus_meta_type;
658 	nexus_meta_subtype_t cha_nexus_meta_subtype;
659 	uint32_t        cha_nexus_checksum_offload;
660 	uint32_t        cha_user_packet_pool;
661 	uint32_t        cha_nexusadv_size;
662 	uint32_t        cha_nexus_defunct_ok;
663 	uint32_t        cha_filter;
664 	uint32_t        cha_enable_event_ring;
665 	uint32_t        cha_max_frags;
666 	uint32_t        cha_num_buffers;
667 	uint32_t        cha_low_latency;
668 	uint32_t        cha_large_buf_size;
669 };
670 
671 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
672 __BEGIN_DECLS
673 extern int __channel_open(struct ch_init *init, const uint32_t init_len);
674 extern int __channel_get_info(int c, struct ch_info *cinfo,
675     const uint32_t cinfolen);
676 extern int __channel_sync(int c, const int mode, const sync_flags_t flags);
677 extern int __channel_get_opt(int c, const uint32_t opt, void *aoptval,
678     uint32_t *aoptlen);
679 extern int __channel_set_opt(int c, const uint32_t opt, const void *aoptval,
680     const uint32_t optlen);
681 __END_DECLS
682 #endif  /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
683 #endif /* !KERNEL */
684 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
685 #endif /* !_SKYWALK_OS_CHANNEL_PRIVATE_H_ */
686