xref: /xnu-10002.81.5/bsd/skywalk/channel/os_channel_private.h (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2015-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri.
31  * All rights reserved.
32  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  *   1. Redistributions of source code must retain the above copyright
38  *      notice, this list of conditions and the following disclaimer.
39  *   2. Redistributions in binary form must reproduce the above copyright
40  *      notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 
56 #ifndef _SKYWALK_OS_CHANNEL_PRIVATE_H_
57 #define _SKYWALK_OS_CHANNEL_PRIVATE_H_
58 
59 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
60 #include <sys/time.h>
61 #include <sys/signal.h>
62 #include <sys/guarded.h>
63 #include <sys/utsname.h>
64 #include <skywalk/os_channel.h>
65 #include <skywalk/os_stats_private.h>
66 
67 /* BEGIN CSTYLED */
68 /*
69  * The userspace data structures used by Skywalk are shown below.
70  *
71  * The kernel allocates the regions for the various object types,
72  * and maps them to the userspace task in a contiguous span, one
73  * after another.
74  *
75  * Each channel file descriptor comes with its own memory map,
76  * and the layout of the rest of the objects is described in the
77  * __user_channel_schema structure associated with the channel.
78  * This schema structure is mapped read-only in the task.
79  *
80  *     +=======================+
81  *     | __user_channel_schema | (1 per channel fd)
82  *     +=======================+
83  *     |     csm_ver           |
84  *     |     csm_flags         |
85  *     |-----------------------|
86  *     |     csm_tx_rings      |
87  *     |     csm_rx_rings      |
88  *     | csm_allocator_rings   |
89  *     |    csm_event_rings    |
90  *     |-----------------------|
91  *     |     csm_stats_ofs     | <<---+
92  *     |-----------------------|      |
93  *     |     csm_flowadv_max   |      |
94  *     |     csm_flowadv_ofs   | <<---+ relative to base of memory map
95  *     |-----------------------|      |
96  *     | csm_md_redzone_cookie |      |
97  *     |     csm_md_type       |      |
98  *     |     csm_md_subtype    |      |
99  *     |-----------------------|      |
100  *     |     csm_stats_ofs     | <<---+
101  *     |     csm_stats_type    |      |
102  *     |-----------------------|      |
103  *     |     csm_nexusadv_ofs  | <<---+
104  *     |-----------------------|
105  *     |     csm_kern_name     |
106  *     |     csm_kern_uuid     |
107  *     |-----------------------|
108  *     | TX  csm_ring_ofs[0]   | <<---+
109  *     | TX  csm_sd_ofs[0]     |      |
110  *     :        ...            :      |
111  *     | TX  csm_ring_ofs[t]   |      |
112  *     | TX  csm_sd_ofs[t]     |      |
113  *     |-----------------------|      |
114  *     | RX  csm_ring_ofs[0]   | <<---+ these offsets are relative
115  *     | RX  csm_sd_ofs[0]     |      | to each schema structure
116  *     :        ...            :      |
117  *     | RX  csm_ring_ofs[t]   |      |
118  *     | RX  csm_sd_ofs[t]     |      |
119  *     |-----------------------|      |
120  *     | A   csm_ring_ofs[0]   |      |
121  *     | A   csm_sd_ofs[0]     |      |
122  *     :        ...            :      |
123  *     | A   csm_ring_ofs[t]   | <<---+
124  *     | A   csm_sd_ofs[t]     |      |
125  *     |-----------------------|      |
126  *     | F   csm_ring_ofs[0]   |      |
127  *     | F   csm_sd_ofs[0]     |      |
128  *     :        ...            :      |
129  *     | F   csm_ring_ofs[t]   | <<---+
130  *     | F   csm_sd_ofs[t]     |      |
131  *     |-----------------------|      |
132  *     | EV  csm_ring_ofs[0]   | <<---+
133  *     | EV  csm_sd_ofs[0]     |
134  *     +-----------------------+
135  *         (variable length)
136  *
137  * On nexus adapters that support statistics or flow advisory, the
138  * csm_stats_ofs or csm_flowadv_ofs would be non-zero, and their values
139  * represent the offset to the respective objects from the base of the
140  * memory map.  This is because those regions are shared amongst all
141  * channels opened to the adapter associated with the nexus port.
142  *
143  * Other regions, such as rings and slot descriptors, are unique to the
144  * channel itself.  They are always present, and their values indicated
145  * by csm_{ring,sd}_ofs represent the offset to the respective objects
146  * from the schema pointer (not from base of memory map.)  This is done
147  * to support channels bound to any of the adapter's ring-pairs.
148  *
149  * See notes below on CSM_CURRENT_VERSION.
150  */
151 /* END CSTYLED */
152 #define CHANNEL_SCHEMA_KERN_NAME        _SYS_NAMELEN
153 struct __user_channel_schema {
154 	/*
155 	 * Schema properties, kernel version string and kernel
156 	 * executable UUID (for debugging).  These 4 fields
157 	 * must be at the beginning of the structure.
158 	 */
159 	const uint32_t  csm_ver;                /* schema layout version */
160 	const volatile uint32_t csm_flags;      /* CSM_* flags */
161 	const char      csm_kern_name[CHANNEL_SCHEMA_KERN_NAME];
162 	const uuid_t    csm_kern_uuid;
163 
164 	/*
165 	 * The rest of the fields may be rearranged as needed, with
166 	 * the expectation that CSM_CURRENT_VERSION be bumped up on
167 	 * each modification.
168 	 */
169 
170 	/*
171 	 * The number of packet rings available for this channel.
172 	 */
173 	const uint32_t  csm_tx_rings;   /* # of tx rings */
174 	const uint32_t  csm_rx_rings;   /* # of rx rings */
175 
176 	/*
177 	 * The number of allocator ring pair available for this channel.
178 	 * If the channel supports user packet pool then 1 pair of
179 	 * alloc/free ring per channel are used to manage the packet
180 	 * allocation from userspace.
181 	 * If the channel supports multi-buflet packet then an additional pair
182 	 * of alloc/free ring is used to manage the buffer (buflet) allocation
183 	 * from userspace.
184 	 */
185 	const uint32_t  csm_allocator_ring_pairs;
186 
187 	/*
188 	 * number of event rings for this channel.
189 	 */
190 	const uint32_t  csm_num_event_rings;
191 	const uint32_t  csm_large_buf_alloc_rings;
192 
193 	/*
194 	 * Flow advisory region offset; this field will be 0 if the
195 	 * nexus isn't capable of flow advisory scheme.  Otherwise,
196 	 * it points to a table of flow advisory entries, and the
197 	 * total number of entries is indicated by csm_flowadv_max.
198 	 */
199 	const uint32_t          csm_flowadv_max;
200 	const mach_vm_offset_t  csm_flowadv_ofs
201 	__attribute__((aligned(sizeof(uint64_t))));
202 
203 	/*
204 	 * Metadata region redzone, type and sub-type.
205 	 */
206 	const uint64_t  csm_md_redzone_cookie   /* metadata redzone cookie */
207 	__attribute__((aligned(sizeof(uint64_t))));
208 	const nexus_meta_type_t csm_md_type;    /* metadata type */
209 	const nexus_meta_subtype_t csm_md_subtype; /* metadata subtype */
210 
211 	/*
212 	 * Statistics region offset; each nexus is free to use this
213 	 * region and break it up into multiple smaller regions if
214 	 * needed.  The definition and interpretation of the contents
215 	 * is left to the nexus.  The value of this field will be 0
216 	 * if the nexus doesn't facilitate shareable statistics.
217 	 */
218 	const mach_vm_offset_t  csm_stats_ofs
219 	__attribute__((aligned(sizeof(uint64_t))));
220 	const nexus_stats_type_t csm_stats_type;
221 
222 	/*
223 	 * Nexus advisory region offset; this field will be 0 if the
224 	 * nexus isn't providing any nexus-wide advisories.  Otherwise,
225 	 * it points to the nexus advisory structure.
226 	 */
227 	const mach_vm_offset_t csm_nexusadv_ofs
228 	__attribute__((aligned(sizeof(uint64_t))));
229 
230 	/*
231 	 * The following array contains the offset of each channel ring
232 	 * from the beginning of this structure, as well as the ring's
233 	 * slot descriptor, in the following order:
234 	 *
235 	 * tx rings (csm_tx_rings-csm_htx_rings)
236 	 * rx rings (csm_rx_rings-csm_hrx_rings)
237 	 * allocator rings (either 2 or 4 or none) (optional)
238 	 * event rings (optional)
239 	 *
240 	 * The area is filled up by the kernel, and then only read
241 	 * by userspace code.
242 	 */
243 	struct {
244 		const mach_vm_offset_t  ring_off; /* __user_channel_ring */
245 		const mach_vm_offset_t  sd_off;   /* __slot_desc */
246 	} csm_ring_ofs[0] __attribute__((aligned(sizeof(uint64_t))));
247 };
248 
249 /*
250  * Schema layout version.  Make sure to bump this up each time
251  * struct __user_channel_schema layout is modified.  This helps
252  * to ensure that both kernel and libsystem_kernel are in sync,
253  * as otherwise we'd assert due to version mismatch.
254  */
255 #define CSM_CURRENT_VERSION     18
256 
257 /* valid values for csm_flags */
258 #define CSM_PRIV_MEM    0x1             /* private memory region */
259 #define CSM_ACTIVE      (1U << 31)      /* channel is active */
260 
261 #define CSM_BITS        "\020\01PRIV_MEM\040ACTIVE"
262 
263 /* the size of __user_channel_schema structure for n total rings */
264 #define CHANNEL_SCHEMA_SIZE(n) \
265 	__builtin_offsetof(struct __user_channel_schema, csm_ring_ofs[(n)])
266 
267 /*
268  * Some fields should be cache-aligned to reduce contention.
269  * The alignment is architecture and OS dependent; we use an
270  * estimate that should cover most architectures.
271  */
272 #define CHANNEL_CACHE_ALIGN_MAX 128     /* max cache line size */
273 
274 /*
275  * Ring kind.
276  */
277 #define CR_KIND_RX              0       /* same as NR_RX */
278 #define CR_KIND_TX              1       /* same as NR_TX */
279 #define CR_KIND_ALLOC           2       /* same as NR_A */
280 #define CR_KIND_FREE            3       /* same as NR_F */
281 #define CR_KIND_EVENT           4       /* same as NR_EV */
282 #define CR_KIND_LARGE_BUF_ALLOC 5       /* same as NR_LBA */
283 
284 typedef uint32_t slot_idx_t;
285 
286 typedef uint32_t obj_idx_t;
287 #define OBJ_IDX_NONE    ((obj_idx_t)-1)
288 
289 /*
290  * This structure contains per-slot properties for userspace.  If the flag
291  * SD_IDX_VALID is set, the descriptor contains the index of the metadata
292  * attached to the slot.
293  *
294  * TODO: [email protected] -- this will be made read-write for user pool.
295  * TODO: [email protected] -- Should we make use of RX/TX
296  * preparation/writeback descriptors (in a union) for sd_len?
297  */
298 struct __user_slot_desc {
299 	obj_idx_t       sd_md_idx;      /* metadata index */
300 	uint16_t        sd_flags;       /* slot flags */
301 	/*
302 	 * XXX: sd_len is currently used only for the purpose of acoounting
303 	 * for the number of bytes pending to be read by the user channel.
304 	 * Currently the maximum size of a packet being transported on user
305 	 * channel is <= UINT16_MAX, so sd_len being uint16_t is fine, but
306 	 * this needs to be changed if we want to go beyond UINT16_MAX.
307 	 */
308 	uint16_t        sd_len;         /* slot len */
309 };
310 
311 /* valid values for sd_flags */
312 #define SD_IDX_VALID    0x1             /* slot has metadata attached */
313 #ifdef KERNEL
314 #define SD_LEN_VALID    0x2             /* slot has packet length recorded */
315 #define SD_KERNEL_ONLY  (1 << 15)       /* kernel only; no user counterpart */
316 
317 #define SD_FLAGS_USER   (SD_IDX_VALID)
318 /* invariant flags we want to keep */
319 #define SD_SAVE_MASK    (SD_KERNEL_ONLY)
320 #endif /* KERNEL */
321 /*
322  * SD_VALID_METADATA() returns TRUE if the slot has an attached metadata
323  */
324 #define SD_VALID_METADATA(_sd)                                          \
325 	(!!((_sd)->sd_flags & SD_IDX_VALID))
326 
327 /*
328  * Slot descriptor.
329  */
330 struct __slot_desc {
331 	union {
332 		struct __user_slot_desc _sd_user;
333 		uint64_t                _sd_private[1];
334 	};
335 };
336 
337 #define SLOT_DESC_SZ            (sizeof (struct __slot_desc))
338 #define SLOT_DESC_USD(_sdp)     (&(_sdp)->_sd_user)
339 
340 /*
341  * Ring.
342  *
343  * Channel representation of a TX or RX ring (also known as "queue").
344  * This is a queue implemented as a fixed-size circular array.
345  * At the software level the important fields are: head, cur, tail.
346  *
347  * The __user_channel_ring, and all slots and buffers in the range
348  * [head .. tail-1] are owned by the user program; the kernel only
349  * accesses them during a channel system call and in the user thread
350  * context.
351  */
352 struct __user_channel_ring {
353 	/*
354 	 * In TX rings:
355 	 *
356 	 *   head	first slot available for transmission;
357 	 *   tail	(readonly) first slot reserved to the kernel
358 	 *   khead	(readonly) kernel's view of next slot to send
359 	 *		since last sync.
360 	 *
361 	 * [head .. tail-1] can be used for new packets to send;
362 	 *
363 	 * 'head' must be incremented as slots are filled with new packets to
364 	 * be sent;
365 	 *
366 	 * In RX rings:
367 	 *
368 	 *   head	first valid received packet;
369 	 *   tail	(readonly) first slot reserved to the kernel
370 	 *   khead	(readonly) kernel's view of next slot to reclaim
371 	 *		since last sync.
372 	 *
373 	 * [head .. tail-1] contain received packets;
374 	 *
375 	 * 'head' must be incremented as slots are consumed and can be returned
376 	 * to the kernel;
377 	 *
378 	 */
379 	volatile slot_idx_t     ring_head;      /* (u) first user slot */
380 	const volatile slot_idx_t ring_tail;    /* (k) first kernel slot */
381 	const volatile slot_idx_t ring_khead;   /* (k) next to send/reclaim */
382 
383 	const uint32_t  ring_num_slots; /* # of slots in the ring */
384 	const uint32_t  ring_def_buf_size;  /* size of each default buffer */
385 	const uint32_t  ring_large_buf_size;  /* size of each large buffer */
386 	const uint16_t  ring_md_size;   /* size of each metadata */
387 	const uint16_t  ring_bft_size;  /* size of each buflet metadata */
388 	const uint16_t  ring_id;        /* unused */
389 	const uint16_t  ring_kind;      /* kind of ring (tx or rx) */
390 
391 	/*
392 	 * Base addresses of {buf, metadata, slot descriptor} regions
393 	 * from this ring descriptor.  This facilitates computing the
394 	 * addresses of those regions in the task's mapped memory.
395 	 */
396 	/* base address of default buffer region */
397 	const mach_vm_offset_t  ring_def_buf_base
398 	__attribute((aligned(sizeof(uint64_t))));
399 	/* base address of large buffer region */
400 	const mach_vm_offset_t  ring_large_buf_base
401 	__attribute((aligned(sizeof(uint64_t))));
402 	const mach_vm_offset_t  ring_md_base    /* base of metadata region */
403 	__attribute((aligned(sizeof(uint64_t))));
404 	const mach_vm_offset_t  ring_sd_base    /* base of slot desc region */
405 	__attribute((aligned(sizeof(uint64_t))));
406 	/*
407 	 * base of buflet metadata region
408 	 * value of 0 means that external buflet metadata is not present.
409 	 */
410 	const mach_vm_offset_t  ring_bft_base
411 	__attribute((aligned(sizeof(uint64_t))));
412 
413 	const volatile uint64_t ring_sync_time /* (k) time of last sync */
414 	__attribute((aligned(sizeof(uint64_t))));
415 	const volatile uint64_t ring_notify_time /* (k) time of last notify */
416 	__attribute((aligned(sizeof(uint64_t))));
417 	/* current working set for the packet allocator ring */
418 	const volatile uint32_t ring_alloc_ws;
419 	/* current working set for the buflet allocator ring */
420 	const volatile uint32_t ring_alloc_buf_ws;
421 };
422 
423 /* check if space is available in the ring */
424 #define CHANNEL_RING_EMPTY(_ring) ((_ring)->ring_head == (_ring)->ring_tail)
425 
426 /*
427  * Flow advisory.
428  *
429  * Each flow that is registered with the nexus capable of supporting
430  * flow advisory is given an entry.  Each entry resides in the flow
431  * advisory table that is mapped to the task.
432  * fae_id:  is the flow identifier used by libnetcore to identify a flow.
433  *          This identifier is passed as a metadata on all packets
434  *          generated by the user space stack. This is the flow_id parameter
435  *          which should be used while checking if a flow is
436  *          admissible using the API os_channel_flow_admissible().
437  * fae_flowid: is a globally unique flow identifier generated by the
438  *             flowswitch for each flow. Flowswitch stamps every TX packet
439  *             with this identifier. This is the flow identifier which would
440  *             be visible to the AQM logic and the driver. The flow advisory
441  *             mechanism in kernel uses this fae_id to identify the flow entry
442  *             in the flow advisory table.
443  */
444 struct __flowadv_entry {
445 	union {
446 		uint64_t        fae_id_64[2];
447 		uint32_t        fae_id_32[4];
448 		uuid_t          fae_id; /* flow ID from userspace stack */
449 	};
450 	volatile uint32_t       fae_ce_cnt;
451 	volatile uint32_t       fae_pkt_cnt;
452 	volatile uint32_t       fae_flags;  /* flags FLOWADVF_* */
453 	/* flow ID generated by flowswitch */
454 	uint32_t                fae_flowid;
455 #ifdef KERNEL
456 #define fae_token               fae_flowid
457 #endif /* KERNEL */
458 } __attribute__((aligned(sizeof(uint64_t))));
459 
460 #define FLOWADVF_VALID          0x1     /* flow is valid */
461 #define FLOWADVF_SUSPENDED      0x2     /* flow is suspended */
462 
463 /* channel event threshold */
464 struct ch_ev_thresh {
465 	channel_threshold_unit_t cet_unit;
466 	uint32_t                cet_value;
467 };
468 
469 /*
470  * Channel information.
471  */
472 struct ch_info {
473 	union {
474 		uint64_t  cinfo_ch_id_64[2];
475 		uint32_t  cinfo_ch_id_32[4];
476 		uuid_t    cinfo_ch_id;          /* Channel UUID */
477 	};
478 #ifdef KERNEL
479 #define cinfo_ch_token  cinfo_ch_id_32[0]
480 #endif /* KERNEL */
481 	uint32_t          cinfo_ch_mode;        /* CHMODE_* flags */
482 	ring_id_t         cinfo_ch_ring_id;     /* Channel ring (or any) */
483 	struct nxprov_params cinfo_nxprov_params; /* Nexus provider params */
484 	uuid_t            cinfo_nx_uuid;        /* Nexus instance UUID */
485 	nexus_port_t      cinfo_nx_port;        /* Nexus instance port */
486 	uint32_t          cinfo_num_bufs;       /* # buffers in user pool */
487 	mach_vm_size_t    cinfo_mem_map_size;   /* size of VM map */
488 	mach_vm_address_t cinfo_mem_base;       /* VM mapping for task */
489 	mach_vm_offset_t  cinfo_schema_offset;  /* offset in VM map */
490 	ring_id_t         cinfo_first_tx_ring;  /* first TX ring ID */
491 	ring_id_t         cinfo_last_tx_ring;   /* last TX ring ID */
492 	ring_id_t         cinfo_first_rx_ring;  /* first RX ring ID */
493 	ring_id_t         cinfo_last_rx_ring;   /* last RX ring ID */
494 	struct ch_ev_thresh cinfo_tx_lowat;     /* TX low-watermark */
495 	struct ch_ev_thresh cinfo_rx_lowat;     /* RX low-watermark */
496 } __attribute__((aligned(sizeof(uint64_t))));
497 
498 #include <skywalk/os_nexus_private.h>
499 
500 #define CHANNEL_INIT_VERSION_1          1
501 #define CHANNEL_INIT_CURRENT_VERSION    CHANNEL_INIT_VERSION_1
502 
503 /*
504  * Channel init parameters.
505  */
506 struct ch_init {
507 	uint32_t        ci_version;     /* in: CHANNEL_INIT_CURRENT_VERSION */
508 	uint32_t        ci_ch_mode;     /* in: CHMODE_* flags */
509 	ring_id_t       ci_ch_ring_id;  /* in: Channel ring */
510 	nexus_port_t    ci_nx_port;     /* in: Nexus instance port */
511 	uuid_t          ci_nx_uuid;     /* in: Nexus instance UUID */
512 	user_addr_t     ci_key;         /* in: key blob */
513 	uint32_t        ci_key_len;     /* in: key length */
514 	uint32_t        __ci_align;     /* reserved */
515 	struct ch_ev_thresh ci_tx_lowat; /* in: TX low-watermark */
516 	struct ch_ev_thresh ci_rx_lowat; /* in: RX low-watermark */
517 	guardid_t       ci_guard;       /* out: guard ID */
518 };
519 
520 #define CHMODE_MONITOR_TX               0x00000001
521 #define CHMODE_MONITOR_RX               0x00000002
522 #define CHMODE_MONITOR_NO_COPY          0x00000004     /* only if mon tx/rx is set */
523 #define CHMODE_USER_PACKET_POOL         0x00000008
524 #define CHMODE_DEFUNCT_OK               0x00000010
525 #define CHMODE_FILTER                   0x00000020     /* packet filter channel */
526 #define CHMODE_EVENT_RING               0x00000040
527 #define CHMODE_LOW_LATENCY              0x00000080
528 #define CHMODE_EXCLUSIVE                0x00000200
529 #define CHMODE_MONITOR                  \
530 	(CHMODE_MONITOR_TX | CHMODE_MONITOR_RX)
531 #ifdef KERNEL
532 /* mask off userland-settable bits */
533 #define CHMODE_MASK                                     \
534 	(CHMODE_MONITOR | CHMODE_MONITOR_NO_COPY |      \
535 	CHMODE_USER_PACKET_POOL | CHMODE_FILTER  |      \
536 	CHMODE_DEFUNCT_OK | CHMODE_EVENT_RING | CHMODE_EXCLUSIVE | \
537 	CHMODE_LOW_LATENCY)
538 #define CHMODE_KERNEL                   0x00001000  /* special, in-kernel */
539 #define CHMODE_NO_NXREF                 0x00002000  /* does not hold nx refcnt */
540 #define CHMODE_CONFIG                   0x00004000  /* provider config mode */
541 #define CHMODE_HOST                     0x00008000  /* to host (kernel) stack */
542 
543 #define CHMODE_BITS                                                       \
544 	"\020\01MON_TX\02MON_RX\03NO_COPY\04USER_PKT_POOL"                \
545 	"\05DEFUNCT_OK\06FILTER\07EVENT_RING\010LOW_LATENCY\012EXCLUSIVE" \
546 	"\015KERNEL\016NO_NXREF\017CONFIG\020HOST"
547 #endif /* KERNEL */
548 
549 /*
550  * Channel options.
551  */
552 #define CHOPT_TX_LOWAT_THRESH   1  /* (get/set) ch_ev_thresh */
553 #define CHOPT_RX_LOWAT_THRESH   2  /* (get/set) ch_ev_thresh */
554 #define CHOPT_IF_ADV_CONF       3  /* (set) enable/disable interface advisory events on the channel */
555 
556 #ifndef KERNEL
557 /*
558  * Channel ring descriptor.
559  */
560 struct channel_ring_desc {
561 	const struct channel    *chrd_channel;
562 	const volatile uint32_t *chrd_csm_flags;
563 	const struct __user_channel_ring *chrd_ring;
564 
565 	/*
566 	 * Array of __slot_desc each representing slot-specific data.
567 	 * There is exactly one descriptor for each slot in the ring.
568 	 */
569 	struct __slot_desc *chrd_slot_desc;
570 
571 	/* local per-ring copies for easy access */
572 	const nexus_meta_type_t chrd_md_type;
573 	const nexus_meta_subtype_t chrd_md_subtype;
574 	const mach_vm_address_t chrd_shmem_base_addr;
575 	const mach_vm_address_t chrd_def_buf_base_addr;
576 	const mach_vm_address_t chrd_large_buf_base_addr;
577 	const mach_vm_address_t chrd_md_base_addr;
578 	const mach_vm_address_t chrd_sd_base_addr;
579 	const mach_vm_address_t chrd_bft_base_addr;
580 	const uint32_t          chrd_max_bufs; /* max length of buflet chain */
581 } __attribute__((aligned(sizeof(uint64_t))));
582 
583 /*
584  * Channel descriptor.
585  */
586 struct channel {
587 	int             chd_fd;
588 	sync_flags_t    chd_sync_flags;
589 	guardid_t       chd_guard;
590 	struct ch_info  *chd_info;
591 
592 	const volatile struct __user_channel_schema *chd_schema;
593 	const volatile void *chd_nx_stats;
594 	const volatile struct __flowadv_entry *chd_nx_flowadv;
595 	const volatile struct __kern_nexus_adv_metadata *chd_nx_adv;
596 
597 	const nexus_meta_type_t chd_md_type;
598 	const nexus_meta_subtype_t chd_md_subtype;
599 	const uint8_t chd_alloc_ring_idx;
600 	const uint8_t chd_free_ring_idx;
601 	const uint8_t chd_buf_alloc_ring_idx;
602 	const uint8_t chd_buf_free_ring_idx;
603 	const uint8_t chd_large_buf_alloc_ring_idx;
604 #if defined(LIBSYSCALL_INTERFACE)
605 #define CHD_RING_IDX_NONE    (uint8_t)-1
606 #endif /* LIBSYSCALL_INTERFACE */
607 
608 	/*
609 	 * Per-ring descriptor, aligned at max cache line boundary
610 	 */
611 	struct channel_ring_desc        chd_rings[0]
612 	__attribute__((aligned(sizeof(uint64_t))));
613 };
614 
615 #define CHD_SIZE(n) \
616 	((size_t)(&((struct channel *)0)->chd_rings[n]))
617 
618 #define CHD_INFO_SIZE           (sizeof (struct ch_info))
619 #define CHD_INFO(_chd)          ((_chd)->chd_info)
620 #define CHD_PARAMS(_chd)        (&CHD_INFO(_chd)->cinfo_nxprov_params)
621 #define CHD_SCHEMA(_chd)        \
622 	(__DECONST(struct __user_channel_schema *, (_chd)->chd_schema))
623 #define CHD_NX_STATS(_chd)      \
624 	(__DECONST(void *, (_chd)->chd_nx_stats))
625 #define CHD_NX_FLOWADV(_chd)    \
626 	(__DECONST(struct __flowadv_entry *, (_chd)->chd_nx_flowadv))
627 #define CHD_NX_ADV_MD(_chd)    __DECONST(struct __kern_nexus_adv_metadata *, \
628     ((_chd)->chd_nx_adv))
629 #define CHD_NX_ADV_NETIF(_adv_md)    \
630     (struct netif_nexus_advisory *)(void *)(_adv_md + 1)
631 #define CHD_NX_ADV_FSW(_adv_md)    (struct sk_nexusadv *)(void *)(_adv_md + 1)
632 
633 /*
634  * Channel attributes.
635  */
636 struct channel_attr {
637 	uint32_t        cha_tx_rings;
638 	uint32_t        cha_rx_rings;
639 	uint32_t        cha_tx_slots;
640 	uint32_t        cha_rx_slots;
641 	uint32_t        cha_buf_size;
642 	uint32_t        cha_meta_size;
643 	uint32_t        cha_stats_size;
644 	uint32_t        cha_exclusive;
645 	uint32_t        cha_monitor;
646 	uint32_t        cha_key_len;
647 	void            *cha_key;
648 	struct ch_ev_thresh cha_tx_lowat;
649 	struct ch_ev_thresh cha_rx_lowat;
650 	uint32_t        cha_nexus_type;
651 	uint32_t        cha_nexus_extensions;
652 	uint32_t        cha_nexus_mhints;
653 	uint32_t        cha_nexus_ifindex;
654 	uint32_t        cha_flowadv_max;
655 	nexus_meta_type_t cha_nexus_meta_type;
656 	nexus_meta_subtype_t cha_nexus_meta_subtype;
657 	uint32_t        cha_nexus_checksum_offload;
658 	uint32_t        cha_user_packet_pool;
659 	uint32_t        cha_nexusadv_size;
660 	uint32_t        cha_nexus_defunct_ok;
661 	uint32_t        cha_filter;
662 	uint32_t        cha_enable_event_ring;
663 	uint32_t        cha_max_frags;
664 	uint32_t        cha_num_buffers;
665 	uint32_t        cha_low_latency;
666 	uint32_t        cha_large_buf_size;
667 };
668 
669 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
670 __BEGIN_DECLS
671 extern int __channel_open(struct ch_init *init, const uint32_t init_len);
672 extern int __channel_get_info(int c, struct ch_info *cinfo,
673     const uint32_t cinfolen);
674 extern int __channel_sync(int c, const int mode, const sync_flags_t flags);
675 extern int __channel_get_opt(int c, const uint32_t opt, void *aoptval,
676     uint32_t *aoptlen);
677 extern int __channel_set_opt(int c, const uint32_t opt, const void *aoptval,
678     const uint32_t optlen);
679 __END_DECLS
680 #endif  /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
681 #endif /* !KERNEL */
682 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
683 #endif /* !_SKYWALK_OS_CHANNEL_PRIVATE_H_ */
684