xref: /xnu-12377.81.4/bsd/skywalk/channel/os_channel_private.h (revision 043036a2b3718f7f0be807e2870f8f47d3fa0796)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 
56 #ifndef _SKYWALK_OS_CHANNEL_PRIVATE_H_
57 #define _SKYWALK_OS_CHANNEL_PRIVATE_H_
58 
59 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
60 #include <sys/time.h>
61 #include <sys/signal.h>
62 #include <sys/guarded.h>
63 #include <sys/utsname.h>
64 #include <skywalk/os_channel.h>
65 #include <skywalk/os_stats_private.h>
66 
67 /* BEGIN CSTYLED */
68 /*
69  * The userspace data structures used by Skywalk are shown below.
70  *
71  * The kernel allocates the regions for the various object types,
72  * and maps them to the userspace task in a contiguous span, one
73  * after another.
74  *
75  * Each channel file descriptor comes with its own memory map,
76  * and the layout of the rest of the objects is described in the
77  * __user_channel_schema structure associated with the channel.
78  * This schema structure is mapped read-only in the task.
79  *
80  *     +=======================+
81  *     | __user_channel_schema | (1 per channel fd)
82  *     +=======================+
83  *     |     csm_ver           |
84  *     |     csm_flags         |
85  *     |-----------------------|
86  *     |     csm_tx_rings      |
87  *     |     csm_rx_rings      |
88  *     | csm_allocator_rings   |
89  *     |    csm_event_rings    |
90  *     |-----------------------|
91  *     |     csm_stats_ofs     | <<---+
92  *     |-----------------------|      |
93  *     |     csm_flowadv_max   |      |
94  *     |     csm_flowadv_ofs   | <<---+ relative to base of memory map
95  *     |-----------------------|      |
96  *     | csm_md_redzone_cookie |      |
97  *     |     csm_md_type       |      |
98  *     |     csm_md_subtype    |      |
99  *     |-----------------------|      |
100  *     |     csm_stats_ofs     | <<---+
101  *     |     csm_stats_type    |      |
102  *     |-----------------------|      |
103  *     |     csm_nexusadv_ofs  | <<---+
104  *     |-----------------------|
105  *     |     csm_kern_name     |
106  *     |     csm_kern_uuid     |
107  *     |-----------------------|
108  *     | TX  csm_ring_ofs[0]   | <<---+
109  *     | TX  csm_sd_ofs[0]     |      |
110  *     :        ...            :      |
111  *     | TX  csm_ring_ofs[t]   |      |
112  *     | TX  csm_sd_ofs[t]     |      |
113  *     |-----------------------|      |
114  *     | RX  csm_ring_ofs[0]   | <<---+ these offsets are relative
115  *     | RX  csm_sd_ofs[0]     |      | to each schema structure
116  *     :        ...            :      |
117  *     | RX  csm_ring_ofs[t]   |      |
118  *     | RX  csm_sd_ofs[t]     |      |
119  *     |-----------------------|      |
120  *     | A   csm_ring_ofs[0]   |      |
121  *     | A   csm_sd_ofs[0]     |      |
122  *     :        ...            :      |
123  *     | A   csm_ring_ofs[t]   | <<---+
124  *     | A   csm_sd_ofs[t]     |      |
125  *     |-----------------------|      |
126  *     | F   csm_ring_ofs[0]   |      |
127  *     | F   csm_sd_ofs[0]     |      |
128  *     :        ...            :      |
129  *     | F   csm_ring_ofs[t]   | <<---+
130  *     | F   csm_sd_ofs[t]     |      |
131  *     |-----------------------|      |
132  *     | EV  csm_ring_ofs[0]   | <<---+
133  *     | EV  csm_sd_ofs[0]     |
134  *     +-----------------------+
135  *         (variable length)
136  *
137  * On nexus adapters that support statistics or flow advisory, the
138  * csm_stats_ofs or csm_flowadv_ofs would be non-zero, and their values
139  * represent the offset to the respective objects from the base of the
140  * memory map.  This is because those regions are shared amongst all
141  * channels opened to the adapter associated with the nexus port.
142  *
143  * Other regions, such as rings and slot descriptors, are unique to the
144  * channel itself.  They are always present, and their values indicated
145  * by csm_{ring,sd}_ofs represent the offset to the respective objects
146  * from the schema pointer (not from base of memory map.)  This is done
147  * to support channels bound to any of the adapter's ring-pairs.
148  *
149  * See notes below on CSM_CURRENT_VERSION.
150  */
151 /* END CSTYLED */
152 #define CHANNEL_SCHEMA_KERN_NAME        _SYS_NAMELEN
153 struct __user_channel_schema {
154 	/*
155 	 * Schema properties, kernel version string and kernel
156 	 * executable UUID (for debugging).  These 4 fields
157 	 * must be at the beginning of the structure.
158 	 */
159 	const uint32_t  csm_ver;                /* schema layout version */
160 	const volatile uint32_t csm_flags;      /* CSM_* flags */
161 	char      csm_kern_name[CHANNEL_SCHEMA_KERN_NAME];
162 	uuid_t    csm_kern_uuid;
163 
164 	/* Number of UPP buffers in use and max */
165 	volatile uint64_t csm_upp_buf_inuse;
166 	volatile uint64_t csm_upp_buf_total;
167 
168 	/*
169 	 * The rest of the fields may be rearranged as needed, with
170 	 * the expectation that CSM_CURRENT_VERSION be bumped up on
171 	 * each modification.
172 	 */
173 
174 	/*
175 	 * The number of packet rings available for this channel.
176 	 */
177 	uint32_t  csm_tx_rings;   /* # of tx rings */
178 	uint32_t  csm_rx_rings;   /* # of rx rings */
179 
180 	/*
181 	 * The number of allocator ring pair available for this channel.
182 	 * If the channel supports user packet pool then 1 pair of
183 	 * alloc/free ring per channel are used to manage the packet
184 	 * allocation from userspace.
185 	 * If the channel supports multi-buflet packet then an additional pair
186 	 * of alloc/free ring is used to manage the buffer (buflet) allocation
187 	 * from userspace.
188 	 */
189 	uint32_t  csm_allocator_ring_pairs;
190 
191 	/*
192 	 * number of event rings for this channel.
193 	 */
194 	uint32_t  csm_num_event_rings;
195 	uint32_t  csm_large_buf_alloc_rings;
196 
197 	/*
198 	 * Flow advisory region offset; this field will be 0 if the
199 	 * nexus isn't capable of flow advisory scheme.  Otherwise,
200 	 * it points to a table of flow advisory entries, and the
201 	 * total number of entries is indicated by csm_flowadv_max.
202 	 */
203 	const uint32_t          csm_flowadv_max;
204 	const mach_vm_offset_t  csm_flowadv_ofs
205 	__attribute__((aligned(sizeof(uint64_t))));
206 
207 	/*
208 	 * Metadata region redzone, type and sub-type.
209 	 */
210 	const uint64_t  csm_md_redzone_cookie   /* metadata redzone cookie */
211 	__attribute__((aligned(sizeof(uint64_t))));
212 	const nexus_meta_type_t csm_md_type;    /* metadata type */
213 	const nexus_meta_subtype_t csm_md_subtype; /* metadata subtype */
214 
215 	/*
216 	 * Statistics region offset; each nexus is free to use this
217 	 * region and break it up into multiple smaller regions if
218 	 * needed.  The definition and interpretation of the contents
219 	 * is left to the nexus.  The value of this field will be 0
220 	 * if the nexus doesn't facilitate shareable statistics.
221 	 */
222 	const mach_vm_offset_t  csm_stats_ofs
223 	__attribute__((aligned(sizeof(uint64_t))));
224 	const nexus_stats_type_t csm_stats_type;
225 
226 	/*
227 	 * Nexus advisory region offset; this field will be 0 if the
228 	 * nexus isn't providing any nexus-wide advisories.  Otherwise,
229 	 * it points to the nexus advisory structure.
230 	 */
231 	const mach_vm_offset_t csm_nexusadv_ofs
232 	__attribute__((aligned(sizeof(uint64_t))));
233 
234 	/*
235 	 * The following array contains the offset of each channel ring
236 	 * from the beginning of this structure, as well as the ring's
237 	 * slot descriptor, in the following order:
238 	 *
239 	 * tx rings (csm_tx_rings-csm_htx_rings)
240 	 * rx rings (csm_rx_rings-csm_hrx_rings)
241 	 * allocator rings (either 2 or 4 or none) (optional)
242 	 * event rings (optional)
243 	 *
244 	 * The area is filled up by the kernel, and then only read
245 	 * by userspace code.
246 	 */
247 	struct {
248 		const mach_vm_offset_t  ring_off; /* __user_channel_ring */
249 		const mach_vm_offset_t  sd_off;   /* __slot_desc */
250 	} csm_ring_ofs[__counted_by(csm_tx_rings + csm_rx_rings +
251 	csm_allocator_ring_pairs + csm_num_event_rings + csm_large_buf_alloc_rings)]
252 	__attribute__((aligned(sizeof(uint64_t))));
253 };
254 
255 /*
256  * Schema layout version.  Make sure to bump this up each time
257  * struct __user_channel_schema layout is modified.  This helps
258  * to ensure that both kernel and libsystem_kernel are in sync,
259  * as otherwise we'd assert due to version mismatch.
260  */
261 #define CSM_CURRENT_VERSION     19
262 
263 /* valid values for csm_flags */
264 #define CSM_PRIV_MEM    0x1             /* private memory region */
265 #define CSM_ACTIVE      (1U << 31)      /* channel is active */
266 
267 #define CSM_BITS        "\020\01PRIV_MEM\040ACTIVE"
268 
269 /* the size of __user_channel_schema structure for n total rings */
270 #define CHANNEL_SCHEMA_SIZE(n) \
271 	__builtin_offsetof(struct __user_channel_schema, csm_ring_ofs[(n)])
272 
273 /*
274  * Some fields should be cache-aligned to reduce contention.
275  * The alignment is architecture and OS dependent; we use an
276  * estimate that should cover most architectures.
277  */
278 #define CHANNEL_CACHE_ALIGN_MAX 128     /* max cache line size */
279 
280 /*
281  * Ring kind.
282  */
283 #define CR_KIND_RX              0       /* same as NR_RX */
284 #define CR_KIND_TX              1       /* same as NR_TX */
285 #define CR_KIND_ALLOC           2       /* same as NR_A */
286 #define CR_KIND_FREE            3       /* same as NR_F */
287 #define CR_KIND_EVENT           4       /* same as NR_EV */
288 #define CR_KIND_LARGE_BUF_ALLOC 5       /* same as NR_LBA */
289 
290 typedef uint32_t slot_idx_t;
291 
292 typedef uint32_t obj_idx_t;
293 #define OBJ_IDX_NONE    ((obj_idx_t)-1)
294 
295 /*
296  * This structure contains per-slot properties for userspace.  If the flag
297  * SD_IDX_VALID is set, the descriptor contains the index of the metadata
298  * attached to the slot.
299  *
300  * TODO: [email protected] -- this will be made read-write for user pool.
301  * TODO: [email protected] -- Should we make use of RX/TX
302  * preparation/writeback descriptors (in a union) for sd_len?
303  */
304 struct __user_slot_desc {
305 	obj_idx_t       sd_md_idx;      /* metadata index */
306 	uint16_t        sd_flags;       /* slot flags */
307 	/*
308 	 * XXX: sd_len is currently used only for the purpose of acoounting
309 	 * for the number of bytes pending to be read by the user channel.
310 	 * Currently the maximum size of a packet being transported on user
311 	 * channel is <= UINT16_MAX, so sd_len being uint16_t is fine, but
312 	 * this needs to be changed if we want to go beyond UINT16_MAX.
313 	 */
314 	uint16_t        sd_len;         /* slot len */
315 };
316 
317 /* valid values for sd_flags */
318 #define SD_IDX_VALID    0x1             /* slot has metadata attached */
319 #ifdef KERNEL
320 #define SD_LEN_VALID    0x2             /* slot has packet length recorded */
321 #define SD_KERNEL_ONLY  (1 << 15)       /* kernel only; no user counterpart */
322 
323 #define SD_FLAGS_USER   (SD_IDX_VALID)
324 /* invariant flags we want to keep */
325 #define SD_SAVE_MASK    (SD_KERNEL_ONLY)
326 #endif /* KERNEL */
327 /*
328  * SD_VALID_METADATA() returns TRUE if the slot has an attached metadata
329  */
330 #define SD_VALID_METADATA(_sd)                                          \
331 	(!!((_sd)->sd_flags & SD_IDX_VALID))
332 
333 /*
334  * Slot descriptor.
335  */
336 struct __slot_desc {
337 	union {
338 		struct __user_slot_desc _sd_user;
339 		uint64_t                _sd_private[1];
340 	};
341 };
342 
343 #define SLOT_DESC_SZ            (sizeof (struct __slot_desc))
344 #define SLOT_DESC_USD(_sdp)     (&(_sdp)->_sd_user)
345 
346 /*
347  * Ring.
348  *
349  * Channel representation of a TX or RX ring (also known as "queue").
350  * This is a queue implemented as a fixed-size circular array.
351  * At the software level the important fields are: head, cur, tail.
352  *
353  * The __user_channel_ring, and all slots and buffers in the range
354  * [head .. tail-1] are owned by the user program; the kernel only
355  * accesses them during a channel system call and in the user thread
356  * context.
357  */
358 struct __user_channel_ring {
359 	/*
360 	 * In TX rings:
361 	 *
362 	 *   head	first slot available for transmission;
363 	 *   tail	(readonly) first slot reserved to the kernel
364 	 *   khead	(readonly) kernel's view of next slot to send
365 	 *		since last sync.
366 	 *
367 	 * [head .. tail-1] can be used for new packets to send;
368 	 *
369 	 * 'head' must be incremented as slots are filled with new packets to
370 	 * be sent;
371 	 *
372 	 * In RX rings:
373 	 *
374 	 *   head	first valid received packet;
375 	 *   tail	(readonly) first slot reserved to the kernel
376 	 *   khead	(readonly) kernel's view of next slot to reclaim
377 	 *		since last sync.
378 	 *
379 	 * [head .. tail-1] contain received packets;
380 	 *
381 	 * 'head' must be incremented as slots are consumed and can be returned
382 	 * to the kernel;
383 	 *
384 	 */
385 	volatile slot_idx_t     ring_head;      /* (u) first user slot */
386 	const volatile slot_idx_t ring_tail;    /* (k) first kernel slot */
387 	const volatile slot_idx_t ring_khead;   /* (k) next to send/reclaim */
388 
389 	const uint32_t  ring_num_slots; /* # of slots in the ring */
390 	const uint32_t  ring_def_buf_size;  /* size of each default buffer */
391 	const uint32_t  ring_large_buf_size;  /* size of each large buffer */
392 	const uint16_t  ring_md_size;   /* size of each metadata */
393 	const uint16_t  ring_bft_size;  /* size of each buflet metadata */
394 	const uint16_t  ring_id;        /* unused */
395 	const uint16_t  ring_kind;      /* kind of ring (tx or rx) */
396 
397 	/*
398 	 * Base addresses of {buf, metadata, slot descriptor} regions
399 	 * from this ring descriptor.  This facilitates computing the
400 	 * addresses of those regions in the task's mapped memory.
401 	 */
402 	/* base address of default buffer region */
403 	const mach_vm_offset_t  ring_def_buf_base
404 	__attribute((aligned(sizeof(uint64_t))));
405 	/* base address of large buffer region */
406 	const mach_vm_offset_t  ring_large_buf_base
407 	__attribute((aligned(sizeof(uint64_t))));
408 	const mach_vm_offset_t  ring_md_base    /* base of metadata region */
409 	__attribute((aligned(sizeof(uint64_t))));
410 	const mach_vm_offset_t  ring_sd_base    /* base of slot desc region */
411 	__attribute((aligned(sizeof(uint64_t))));
412 	/*
413 	 * base of buflet metadata region
414 	 * value of 0 means that external buflet metadata is not present.
415 	 */
416 	const mach_vm_offset_t  ring_bft_base
417 	__attribute((aligned(sizeof(uint64_t))));
418 
419 	const volatile uint64_t ring_sync_time /* (k) time of last sync */
420 	__attribute((aligned(sizeof(uint64_t))));
421 	const volatile uint64_t ring_notify_time /* (k) time of last notify */
422 	__attribute((aligned(sizeof(uint64_t))));
423 	/* current working set for the packet allocator ring */
424 	const volatile uint32_t ring_alloc_ws;
425 	/* current working set for the buflet allocator ring */
426 	const volatile uint32_t ring_alloc_buf_ws;
427 };
428 
429 /* check if space is available in the ring */
430 #define CHANNEL_RING_EMPTY(_ring) ((_ring)->ring_head == (_ring)->ring_tail)
431 
432 /*
433  * Flow advisory.
434  *
435  * Each flow that is registered with the nexus capable of supporting
436  * flow advisory is given an entry.  Each entry resides in the flow
437  * advisory table that is mapped to the task.
438  * fae_id:  is the flow identifier used by libnetcore to identify a flow.
439  *          This identifier is passed as a metadata on all packets
440  *          generated by the user space stack. This is the flow_id parameter
441  *          which should be used while checking if a flow is
442  *          admissible using the API os_channel_flow_admissible().
443  * fae_flowid: is a globally unique flow identifier generated by the
444  *             flowswitch for each flow. Flowswitch stamps every TX packet
445  *             with this identifier. This is the flow identifier which would
446  *             be visible to the AQM logic and the driver. The flow advisory
447  *             mechanism in kernel uses this fae_id to identify the flow entry
448  *             in the flow advisory table.
449  */
450 struct __flowadv_entry {
451 	union {
452 		uint64_t        fae_id_64[2];
453 		uint32_t        fae_id_32[4];
454 		uuid_t          fae_id; /* flow ID from userspace stack */
455 	};
456 	volatile uint32_t       fae_congestion_cnt;
457 	volatile uint32_t       fae_pkt_cnt;
458 	volatile uint32_t       fae_flags;  /* flags FLOWADVF_* */
459 	/* flow ID generated by flowswitch */
460 	uint32_t                fae_flowid;
461 #ifdef KERNEL
462 #define fae_token               fae_flowid
463 #endif /* KERNEL */
464 } __attribute__((aligned(sizeof(uint64_t))));
465 
466 #define FLOWADVF_VALID          0x1     /* flow is valid */
467 #define FLOWADVF_SUSPENDED      0x2     /* flow is suspended */
468 
469 /* channel event threshold */
470 struct ch_ev_thresh {
471 	channel_threshold_unit_t cet_unit;
472 	uint32_t                cet_value;
473 };
474 
475 /*
476  * Channel information.
477  */
478 struct ch_info {
479 	union {
480 		uint64_t  cinfo_ch_id_64[2];
481 		uint32_t  cinfo_ch_id_32[4];
482 		uuid_t    cinfo_ch_id;          /* Channel UUID */
483 	};
484 #ifdef KERNEL
485 #define cinfo_ch_token  cinfo_ch_id_32[0]
486 #endif /* KERNEL */
487 	uint32_t          cinfo_ch_mode;        /* CHMODE_* flags */
488 	ring_id_t         cinfo_ch_ring_id;     /* Channel ring (or any) */
489 	struct nxprov_params cinfo_nxprov_params; /* Nexus provider params */
490 	uuid_t            cinfo_nx_uuid;        /* Nexus instance UUID */
491 	nexus_port_t      cinfo_nx_port;        /* Nexus instance port */
492 	uint32_t          cinfo_num_bufs;       /* # buffers in user pool */
493 	mach_vm_size_t    cinfo_mem_map_size;   /* size of VM map */
494 	mach_vm_address_t cinfo_mem_base;       /* VM mapping for task */
495 	mach_vm_offset_t  cinfo_schema_offset;  /* offset in VM map */
496 	ring_id_t         cinfo_first_tx_ring;  /* first TX ring ID */
497 	ring_id_t         cinfo_last_tx_ring;   /* last TX ring ID */
498 	ring_id_t         cinfo_first_rx_ring;  /* first RX ring ID */
499 	ring_id_t         cinfo_last_rx_ring;   /* last RX ring ID */
500 	struct ch_ev_thresh cinfo_tx_lowat;     /* TX low-watermark */
501 	struct ch_ev_thresh cinfo_rx_lowat;     /* RX low-watermark */
502 } __attribute__((aligned(sizeof(uint64_t))));
503 
504 #include <skywalk/os_nexus_private.h>
505 
506 #define CHANNEL_INIT_VERSION_1          1
507 #define CHANNEL_INIT_CURRENT_VERSION    CHANNEL_INIT_VERSION_1
508 
509 /*
510  * Channel init parameters.
511  */
512 struct ch_init {
513 	uint32_t        ci_version;     /* in: CHANNEL_INIT_CURRENT_VERSION */
514 	uint32_t        ci_ch_mode;     /* in: CHMODE_* flags */
515 	ring_id_t       ci_ch_ring_id;  /* in: Channel ring */
516 	nexus_port_t    ci_nx_port;     /* in: Nexus instance port */
517 	uuid_t          ci_nx_uuid;     /* in: Nexus instance UUID */
518 	user_addr_t     ci_key;         /* in: key blob */
519 	uint32_t        ci_key_len;     /* in: key length */
520 	uint32_t        __ci_align;     /* reserved */
521 	struct ch_ev_thresh ci_tx_lowat; /* in: TX low-watermark */
522 	struct ch_ev_thresh ci_rx_lowat; /* in: RX low-watermark */
523 	guardid_t       ci_guard;       /* out: guard ID */
524 };
525 
526 #define CHMODE_UNUSED_1                 0x00000001
527 #define CHMODE_UNUSED_2                 0x00000002
528 #define CHMODE_UNUSED_3                 0x00000004
529 #define CHMODE_USER_PACKET_POOL         0x00000008
530 #define CHMODE_DEFUNCT_OK               0x00000010
531 #define CHMODE_FILTER                   0x00000020     /* packet filter channel */
532 #define CHMODE_EVENT_RING               0x00000040
533 #define CHMODE_LOW_LATENCY              0x00000080
534 #define CHMODE_EXCLUSIVE                0x00000200
535 #ifdef KERNEL
536 /* mask off userland-settable bits */
537 #define CHMODE_MASK                                     \
538 	(CHMODE_USER_PACKET_POOL | CHMODE_FILTER  |      \
539 	CHMODE_DEFUNCT_OK | CHMODE_EVENT_RING | CHMODE_EXCLUSIVE | \
540 	CHMODE_LOW_LATENCY)
541 #define CHMODE_KERNEL                   0x00001000  /* special, in-kernel */
542 #define CHMODE_NO_NXREF                 0x00002000  /* does not hold nx refcnt */
543 #define CHMODE_CONFIG                   0x00004000  /* provider config mode */
544 #define CHMODE_HOST                     0x00008000  /* to host (kernel) stack */
545 
546 #define CHMODE_BITS                                                       \
547 	"\020\01MON_TX\02MON_RX\03NO_COPY\04USER_PKT_POOL"                \
548 	"\05DEFUNCT_OK\06FILTER\07EVENT_RING\010LOW_LATENCY\012EXCLUSIVE" \
549 	"\015KERNEL\016NO_NXREF\017CONFIG\020HOST"
550 #endif /* KERNEL */
551 
552 /*
553  * Channel options.
554  */
555 #define CHOPT_TX_LOWAT_THRESH   1  /* (get/set) ch_ev_thresh */
556 #define CHOPT_RX_LOWAT_THRESH   2  /* (get/set) ch_ev_thresh */
557 #define CHOPT_IF_ADV_CONF       3  /* (set) enable/disable interface advisory events on the channel */
558 
559 #ifndef KERNEL
560 /*
561  * Channel ring descriptor.
562  */
563 struct channel_ring_desc {
564 	const struct channel    *chrd_channel;
565 	const volatile uint32_t *chrd_csm_flags;
566 	const struct __user_channel_ring *chrd_ring;
567 
568 	/*
569 	 * Array of __slot_desc each representing slot-specific data.
570 	 * There is exactly one descriptor for each slot in the ring.
571 	 */
572 	struct __slot_desc *chrd_slot_desc;
573 
574 	/* local per-ring copies for easy access */
575 	const nexus_meta_type_t chrd_md_type;
576 	const nexus_meta_subtype_t chrd_md_subtype;
577 	const mach_vm_address_t chrd_shmem_base_addr;
578 	const mach_vm_address_t chrd_def_buf_base_addr;
579 	const mach_vm_address_t chrd_large_buf_base_addr;
580 	const mach_vm_address_t chrd_md_base_addr;
581 	const mach_vm_address_t chrd_sd_base_addr;
582 	const mach_vm_address_t chrd_bft_base_addr;
583 	const uint32_t          chrd_max_bufs; /* max length of buflet chain */
584 } __attribute__((aligned(sizeof(uint64_t))));
585 
586 /*
587  * Channel descriptor.
588  */
589 struct channel {
590 	int             chd_fd;
591 	sync_flags_t    chd_sync_flags;
592 	guardid_t       chd_guard;
593 	struct ch_info  *chd_info;
594 
595 	const volatile struct __user_channel_schema *chd_schema;
596 	const volatile void *chd_nx_stats;
597 	const volatile struct __flowadv_entry *chd_nx_flowadv;
598 	const volatile struct __kern_nexus_adv_metadata *chd_nx_adv;
599 
600 	const nexus_meta_type_t chd_md_type;
601 	const nexus_meta_subtype_t chd_md_subtype;
602 	const uint8_t chd_alloc_ring_idx;
603 	const uint8_t chd_free_ring_idx;
604 	const uint8_t chd_buf_alloc_ring_idx;
605 	const uint8_t chd_buf_free_ring_idx;
606 	const uint8_t chd_large_buf_alloc_ring_idx;
607 #if defined(LIBSYSCALL_INTERFACE)
608 #define CHD_RING_IDX_NONE    (uint8_t)-1
609 #endif /* LIBSYSCALL_INTERFACE */
610 
611 	/*
612 	 * Per-ring descriptor, aligned at max cache line boundary
613 	 */
614 	struct channel_ring_desc        chd_rings[0]
615 	__attribute__((aligned(sizeof(uint64_t))));
616 };
617 
618 #define CHD_SIZE(n) \
619 	((size_t)(&((struct channel *)0)->chd_rings[n]))
620 
621 #define CHD_INFO_SIZE           (sizeof (struct ch_info))
622 #define CHD_INFO(_chd)          ((_chd)->chd_info)
623 #define CHD_PARAMS(_chd)        (&CHD_INFO(_chd)->cinfo_nxprov_params)
624 #define CHD_SCHEMA(_chd)        \
625 	(__DECONST(struct __user_channel_schema *, (_chd)->chd_schema))
626 #define CHD_NX_STATS(_chd)      \
627 	(__DECONST(void *, (_chd)->chd_nx_stats))
628 #define CHD_NX_FLOWADV(_chd)    \
629 	(__DECONST(struct __flowadv_entry *, (_chd)->chd_nx_flowadv))
630 #define CHD_NX_ADV_MD(_chd)    __DECONST(struct __kern_nexus_adv_metadata *, \
631     ((_chd)->chd_nx_adv))
632 #define CHD_NX_ADV_NETIF(_adv_md)    \
633     (struct netif_nexus_advisory *)(void *)(_adv_md + 1)
634 #define CHD_NX_ADV_FSW(_adv_md)    (struct sk_nexusadv *)(void *)(_adv_md + 1)
635 
636 /*
637  * Channel attributes.
638  */
639 struct channel_attr {
640 	uint32_t        cha_tx_rings;
641 	uint32_t        cha_rx_rings;
642 	uint32_t        cha_tx_slots;
643 	uint32_t        cha_rx_slots;
644 	uint32_t        cha_buf_size;
645 	uint32_t        cha_meta_size;
646 	uint32_t        cha_stats_size;
647 	uint32_t        cha_exclusive;
648 	uint32_t        cha_key_len;
649 	void            *cha_key;
650 	struct ch_ev_thresh cha_tx_lowat;
651 	struct ch_ev_thresh cha_rx_lowat;
652 	uint32_t        cha_nexus_type;
653 	uint32_t        cha_nexus_extensions;
654 	uint32_t        cha_nexus_mhints;
655 	uint32_t        cha_nexus_ifindex;
656 	uint32_t        cha_flowadv_max;
657 	nexus_meta_type_t cha_nexus_meta_type;
658 	nexus_meta_subtype_t cha_nexus_meta_subtype;
659 	uint32_t        cha_nexus_checksum_offload;
660 	uint32_t        cha_user_packet_pool;
661 	uint32_t        cha_nexusadv_size;
662 	uint32_t        cha_nexus_defunct_ok;
663 	uint32_t        cha_filter;
664 	uint32_t        cha_enable_event_ring;
665 	uint32_t        cha_max_frags;
666 	uint32_t        cha_num_buffers;
667 	uint32_t        cha_low_latency;
668 	uint32_t        cha_large_buf_size;
669 };
670 
671 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
672 __BEGIN_DECLS
673 extern int __channel_open(struct ch_init *init, const uint32_t init_len);
674 extern int __channel_get_info(int c, struct ch_info *cinfo,
675     const uint32_t cinfolen);
676 extern int __channel_sync(int c, const int mode, const sync_flags_t flags);
677 extern int __channel_get_opt(int c, const uint32_t opt, void *aoptval,
678     uint32_t *aoptlen);
679 extern int __channel_set_opt(int c, const uint32_t opt, const void *aoptval,
680     const uint32_t optlen);
681 __END_DECLS
682 #endif  /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
683 #endif /* !KERNEL */
684 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
685 #endif /* !_SKYWALK_OS_CHANNEL_PRIVATE_H_ */
686