xref: /xnu-11215.41.3/bsd/skywalk/nexus/os_nexus_private.h (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_
30 #define _SKYWALK_OS_NEXUS_PRIVATE_H_
31 
32 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
33 #include <stdbool.h>
34 #include <sys/guarded.h>
35 #include <skywalk/os_channel.h>
36 #include <skywalk/os_nexus.h>
37 #include <netinet/in.h>
38 #include <netinet/in_private.h>
39 #include <netinet/tcp.h>
40 #include <netinet/tcp_private.h>
41 #include <net/ethernet.h>
42 
43 /*
44  * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus.
45  */
46 #define NEXUS_PORT_ANY  ((nexus_port_t)-1)
47 #define NEXUS_PORT_MAX  ((nexus_port_t)-1)
48 
49 typedef nexus_port_t nexus_port_size_t;
50 
51 #define NEXUSCTL_INIT_VERSION_1         1
52 #define NEXUSCTL_INIT_CURRENT_VERSION   NEXUSCTL_INIT_VERSION_1
53 
54 /*
55  * Nexus controller init parameters.
56  */
57 struct nxctl_init {
58 	uint32_t        ni_version;     /* in: NEXUSCTL_INIT_CURRENT_VERSION */
59 	uint32_t        __ni_align;     /* reserved */
60 	guardid_t       ni_guard;       /* out: guard ID */
61 };
62 
63 /*
64  * Nexus metadata type.
65  *
66  * Be mindful that due to the use of tagged pointers for packets, this
67  * type gets encoded along with the subtype, with the requirement that the
68  * object addresses are aligned on 64-byte boundary at the minimum.  That
69  * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore
70  * limiting the maximum enum value to 3.
71  */
72 typedef enum {
73 	NEXUS_META_TYPE_INVALID = 0,    /* invalid type */
74 	NEXUS_META_TYPE_QUANTUM,        /* struct __quantum */
75 	NEXUS_META_TYPE_PACKET,         /* struct __packet */
76 	NEXUS_META_TYPE_RESERVED,       /* for future */
77 	NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED
78 } nexus_meta_type_t;
79 
80 typedef enum {
81 	NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */
82 	NEXUS_META_SUBTYPE_PAYLOAD,     /* normal payload mode */
83 	NEXUS_META_SUBTYPE_RAW,         /* raw (link layer) mode */
84 	NEXUS_META_SUBTYPE_RESERVED,    /* for future */
85 	NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED
86 } nexus_meta_subtype_t;
87 
88 /*
89  * Nexus provider parameters.
90  */
91 struct nxprov_params {
92 	nexus_name_t    nxp_name;       /* name */
93 	uint32_t        nxp_namelen;    /* length of name */
94 	nexus_type_t    nxp_type;       /* NEXUS_TYPE_* */
95 	nexus_meta_type_t nxp_md_type;  /* NEXUS_META_TYPE_* */
96 	nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */
97 	uint32_t        nxp_flags;      /* NXPF_* */
98 	uint32_t        nxp_format;     /* provider-defined */
99 	uint32_t        nxp_tx_rings;   /* # of channel transmit rings */
100 	uint32_t        nxp_rx_rings;   /* # of channel receive rings */
101 	uint32_t        nxp_tx_slots;   /* # of slots per channel TX ring */
102 	uint32_t        nxp_rx_slots;   /* # of slots per channel RX ring */
103 	uint32_t        nxp_buf_size;   /* size of each buffer */
104 	uint32_t        nxp_meta_size;  /* size of metadata per slot */
105 	uint32_t        nxp_stats_size; /* size of statistics region */
106 	uint32_t        nxp_pipes;      /* number of pipes */
107 	nexus_extension_t nxp_extensions;  /* extension specific parameter(s) */
108 	uint32_t        nxp_mhints;        /* memory usage hints */
109 	uint32_t        nxp_ifindex;       /* network interface index */
110 	uint32_t        nxp_flowadv_max;   /* max flow advisory entries */
111 	nexus_qmap_type_t nxp_qmap;        /* queue mapping type */
112 	uint32_t        nxp_capabilities;  /* nexus capabilities */
113 	uint32_t        nxp_nexusadv_size; /* nexus advisory region size */
114 	uint32_t        nxp_max_frags;     /* max fragments per packet */
115 	/*
116 	 * reject channel operations if the peer has closed the channel.
117 	 * Only valid for user-pipe nexus.
118 	 */
119 	boolean_t       nxp_reject_on_close;
120 	uint32_t        nxp_large_buf_size;   /* size of large buffer */
121 } __attribute__((aligned(64)));
122 
123 /* valid values for nxp_flags */
124 #define NXPF_ANONYMOUS          0x1     /* allow anonymous channel clients */
125 #define NXPF_USER_CHANNEL       0x2     /* allow user channel open */
126 #define NXPF_NETIF_LLINK        0x4     /* use netif logical link */
127 #ifdef KERNEL
128 #define NXPF_MASK    (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK)
129 #endif /* KERNEL */
130 
131 #define NXPF_BITS               \
132 	"\020\01ANONYMOUS\02USER_CHANNEL"
133 
134 /* valid values for nxp_capabilities */
135 #define NXPCAP_CHECKSUM_PARTIAL 0x1     /* partial checksum */
136 #define NXPCAP_USER_PACKET_POOL 0x2     /* user packet pool */
137 #define NXPCAP_USER_CHANNEL     0x4     /* allow user channel access */
138 
139 #define NXPCAP_BITS             \
140 	"\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL"
141 
142 #define NXPROV_REG_VERSION_1            1
143 #define NXPROV_REG_CURRENT_VERSION      NXPROV_REG_VERSION_1
144 
145 /*
146  * Nexus provider registration parameters.
147  */
148 struct nxprov_reg {
149 	uint32_t        nxpreg_version;         /* NXPROV_REG_CURRENT_VERSION */
150 	uint32_t        nxpreg_requested;       /* customized attributes */
151 	struct nxprov_params nxpreg_params;     /* Nexus provider parameters */
152 };
153 
154 /*
155  * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags.
156  * Note that these are 32-bit, whereas nxa_requested is 64-bit
157  * wide; for now this won't matter.
158  */
159 #define NXPREQ_TX_RINGS         (1U << 0)       /* 0x00000001 */
160 #define NXPREQ_RX_RINGS         (1U << 1)       /* 0x00000002 */
161 #define NXPREQ_TX_SLOTS         (1U << 2)       /* 0x00000004 */
162 #define NXPREQ_RX_SLOTS         (1U << 3)       /* 0x00000008 */
163 #define NXPREQ_BUF_SIZE         (1U << 4)       /* 0x00000010 */
164 #define NXPREQ_META_SIZE        (1U << 5)       /* 0x00000020 */
165 #define NXPREQ_STATS_SIZE       (1U << 6)       /* 0x00000040 */
166 #define NXPREQ_ANONYMOUS        (1U << 7)       /* 0x00000080 */
167 #define NXPREQ_PIPES            (1U << 8)       /* 0x00000100 */
168 #define NXPREQ_EXTENSIONS       (1U << 9)       /* 0x00000200 */
169 #define NXPREQ_MHINTS           (1U << 10)      /* 0x00000400 */
170 #define NXPREQ_FLOWADV_MAX      (1U << 11)      /* 0x00000800 */
171 #define NXPREQ_QMAP             (1U << 12)      /* 0x00001000 */
172 #define NXPREQ_CHECKSUM_OFFLOAD (1U << 13)      /* 0x00002000 */
173 #define NXPREQ_USER_PACKET_POOL (1U << 14)      /* 0x00004000 */
174 #define NXPREQ_CAPABILITIES     (1U << 15)      /* 0x00008000 */
175 #define NXPREQ_NEXUSADV_SIZE    (1U << 16)      /* 0x00010000 */
176 #define NXPREQ_IFINDEX          (1U << 17)      /* 0x00020000 */
177 #define NXPREQ_USER_CHANNEL     (1U << 18)      /* 0x00040000 */
178 #define NXPREQ_MAX_FRAGS        (1U << 19)      /* 0x00080000 */
179 #define NXPREQ_REJECT_ON_CLOSE  (1U << 20)      /* 0x00100000 */
180 #define NXPREQ_LARGE_BUF_SIZE   (1U << 21)      /* 0x00200000 */
181 
182 #define NXPREQ_BITS                                                     \
183 	"\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE"   \
184 	"\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \
185 	"\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP"               \
186 	"\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \
187 	"\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE\027LBUF_SIZE"
188 
189 /*
190  * Nexus provider registration entry.  Also argument for NXOPT_NEXUS_PROV_ENTRY.
191  */
192 struct nxprov_reg_ent {
193 	uuid_t          npre_prov_uuid;         /* Nexus provider UUID */
194 	struct nxprov_params npre_prov_params;  /* Nexus provider parameters */
195 };
196 
197 /*
198  * Nexus options.
199  */
200 #define NXOPT_NEXUS_PROV_LIST   1       /* (get) list all provider UUIDS */
201 #define NXOPT_NEXUS_PROV_ENTRY  2       /* (get) get params of a provider */
202 #define NXOPT_NEXUS_LIST        20      /* (get) list all Nexus instances */
203 #define NXOPT_NEXUS_BIND        21      /* (set) bind a Nexus port */
204 #define NXOPT_NEXUS_UNBIND      22      /* (set) unbind a Nexus port */
205 #define NXOPT_CHANNEL_LIST      30      /* (get) list all Channel instances */
206 #define NXOPT_NEXUS_CONFIG      40      /* (set) nexus specific config */
207 
208 /*
209  * Argument structure for NXOPT_NEXUS_PROV_LIST.
210  */
211 struct nxprov_list_req {
212 	uint32_t                nrl_num_regs;   /* array count */
213 	uint32_t                __nrl_align;    /* reserved */
214 	user_addr_t             nrl_regs;       /* array of nexus_reg_ent */
215 };
216 
217 /*
218  * Argument structure for NXOPT_NEXUS_LIST.
219  */
220 struct nx_list_req {
221 	uuid_t                  nl_prov_uuid;   /* nexus provider UUID */
222 	uint32_t                nl_num_nx_uuids; /* array count */
223 	uint32_t                __nl_align;     /* reserved */
224 	user_addr_t             nl_nx_uuids;    /* array of nexus UUIDs */
225 };
226 
227 /*
228  * Argument structure for NXOPT_NEXUS_BIND.
229  */
230 struct nx_bind_req {
231 	uuid_t                  nb_nx_uuid;     /* nexus instance UUID */
232 	nexus_port_t            nb_port;        /* nexus instance port */
233 	uint32_t                nb_flags;       /* NBR_* match flags */
234 	uuid_t                  nb_exec_uuid;   /* executable UUID */
235 	user_addr_t             nb_key;         /* key blob */
236 	uint32_t                nb_key_len;     /* key blob length */
237 	pid_t                   nb_pid;         /* client PID */
238 };
239 
240 #define NBR_MATCH_PID           0x1             /* match against PID */
241 #define NBR_MATCH_EXEC_UUID     0x2             /* match executable's UUID */
242 #define NBR_MATCH_KEY           0x4             /* match key blob */
243 #ifdef KERNEL
244 #define NBR_MATCH_MASK          \
245 	(NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY)
246 #endif /* KERNEL */
247 
248 /*
249  * Argument structure for NXOPT_NEXUS_UNBIND.
250  */
251 struct nx_unbind_req {
252 	uuid_t                  nu_nx_uuid;     /* nexus instance UUID */
253 	nexus_port_t            nu_port;        /* nexus instance port */
254 };
255 
256 /*
257  * Argument structure for NXOPT_CHANNEL_LIST.
258  */
259 struct ch_list_req {
260 	uuid_t                  cl_nx_uuid;     /* nexus instance UUID */
261 	uint32_t                cl_num_ch_uuids; /* array count */
262 	uint32_t                __cl_align;     /* reserved */
263 	user_addr_t             cl_ch_uuids;    /* array of channel UUIDs */
264 };
265 
266 /*
267  * Skywalk Nexus MIB
268  *
269  * We will use the name MIB now to refer to things that we expose to outside
270  * world for management/telemetry purpose.
271  *
272  * General rule of thumb of this MIB structure is to keep it simple.
273  * Try to avoid variable length field and hierarchical representation wherever
274  * possible. Simple retrieval would return either a single object (simple type
275  * or fixed length compound type) or an object array of same type. This makes
276  * parsing the retrieved information a lot easier.
277  *
278  * For now, we use sysctl as the way MIB interface is exposed. Additional
279  * interfaces could be syscall (e.g. via a nexus controller), etc.
280  */
281 #define NXMIB_NETIF_STATS       (((uint32_t)1) << 1)
282 #define NXMIB_FSW_STATS         (((uint32_t)1) << 2)
283 #define NXMIB_FLOW              (((uint32_t)1) << 3)
284 #define NXMIB_FLOW_ADV          (((uint32_t)1) << 4)
285 #define NXMIB_FLOW_OWNER        (((uint32_t)1) << 5)
286 #define NXMIB_FLOW_ROUTE        (((uint32_t)1) << 6)
287 #define NXMIB_LLINK_LIST        (((uint32_t)1) << 7)
288 #define NXMIB_NETIF_QUEUE_STATS (((uint32_t)1) << 8)
289 
290 #define NXMIB_QUIC_STATS        (((uint32_t)1) << 27)
291 #define NXMIB_UDP_STATS         (((uint32_t)1) << 28)
292 #define NXMIB_TCP_STATS         (((uint32_t)1) << 29)
293 #define NXMIB_IP6_STATS         (((uint32_t)1) << 30)
294 #define NXMIB_IP_STATS          (((uint32_t)1) << 31)
295 
296 #define NXMIB_USERSTACK_STATS   (NXMIB_IP_STATS | NXMIB_IP6_STATS \
297 	                        | NXMIB_TCP_STATS | NXMIB_UDP_STATS \
298 	                        | NXMIB_QUIC_STATS)
299 
300 #define NXMIB_FILTER_NX_UUID    (((uint64_t)1) << 0)
301 #define NXMIB_FILTER_FLOW_ID    (((uint64_t)1) << 1)
302 #define NXMIB_FILTER_PID        (((uint64_t)1) << 2)
303 #define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3)
304 
305 /*
306  * Nexus MIB filter: used to retrieve only those matching the filter value.
307  */
308 struct nexus_mib_filter {
309 	uint32_t                nmf_type;       /* MIB type */
310 	uint64_t                nmf_bitmap;     /* bitmap of following fields */
311 
312 	uuid_t                  nmf_nx_uuid;    /* nexus instance uuid */
313 	uuid_t                  nmf_flow_id;    /* flow rule id */
314 	pid_t                   nmf_pid;        /* owner pid */
315 	struct info_tuple       nmf_info_tuple; /* flow tuple */
316 };
317 
318 /*
319  * Nexus-specific config commands.
320  */
321 typedef enum {
322 	NXCFG_CMD_ATTACH =      0,      /* attach an object to a nexus */
323 	NXCFG_CMD_DETACH =      1,      /* detach an object from a nexus */
324 	NXCFG_CMD_FLOW_ADD =    20,     /* add a flow to a nexus */
325 	NXCFG_CMD_FLOW_DEL =    21,     /* delete a flow from nexus */
326 	NXCFG_CMD_FLOW_CONFIG = 22,     /* configure a flow in nexus */
327 	NXCFG_CMD_NETEM =       30,     /* config packet scheduler */
328 	NXCFG_CMD_GET_LLINK_INFO = 40,  /* collect llink info */
329 } nxcfg_cmd_t;
330 
331 #define NX_SPEC_IF_NAMELEN      64
332 
333 /*
334  * Argument struture for NXOPT_NEXUS_CONFIG.
335  */
336 struct nx_cfg_req {
337 	uuid_t                  nc_nx_uuid;     /* nexus instance UUID */
338 	nxcfg_cmd_t             nc_cmd;         /* NXCFG_CMD_* */
339 	uint32_t                nc_req_len;     /* size of request struct */
340 	user_addr_t             nc_req;         /* address of request struct */
341 };
342 
343 /*
344  * Argument structure for NXCFG_CMD_{ATTACH,DETACH}
345  */
346 struct nx_spec_req {
347 	union {
348 		char            nsru_name[NX_SPEC_IF_NAMELEN];
349 		uuid_t          nsru_uuid;
350 #ifdef KERNEL
351 		struct ifnet    *nsru_ifp;
352 #endif /* KERNEL */
353 	} nsr_u __attribute__((aligned(sizeof(uint64_t))));     /* in */
354 	uint32_t                nsr_flags;                      /* in */
355 	uuid_t                  nsr_if_uuid;    /* attach: out, detach: in */
356 };
357 #define nsr_name        nsr_u.nsru_name
358 #define nsr_uuid        nsr_u.nsru_uuid
359 #ifdef KERNEL
360 #define nsr_ifp         nsr_u.nsru_ifp
361 #endif /* KERNEL */
362 
363 #define NXSPECREQ_UUID          0x1     /* nsr_name is uuid_t else ifname */
364 #define NXSPECREQ_HOST          0x2     /* attach to host port */
365 #ifdef KERNEL
366 /* mask off userland-settable bits */
367 #define NXSPECREQ_MASK          (NXSPECREQ_UUID | NXSPECREQ_HOST)
368 #define NXSPECREQ_IFP           0x1000  /* (embryonic) ifnet */
369 #endif /* KERNEL */
370 
371 /*
372  * Structure for flow demuxing for parent/child flows
373  */
374 #define FLOW_DEMUX_MAX_LEN      32
375 struct flow_demux_pattern {
376 	uint16_t                fdp_offset;
377 	uint16_t                fdp_len;
378 	uint8_t                 fdp_mask[FLOW_DEMUX_MAX_LEN];
379 	uint8_t                 fdp_value[FLOW_DEMUX_MAX_LEN];
380 };
381 
382 #define MAX_FLOW_DEMUX_PATTERN  4
383 
384 /*
385  * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND}
386  */
387 struct nx_flow_req {
388 	nexus_port_t                    nfr_nx_port;
389 	uint16_t                        nfr_ethertype;
390 	ether_addr_t                    nfr_etheraddr;
391 	union sockaddr_in_4_6           nfr_saddr;
392 	union sockaddr_in_4_6           nfr_daddr;
393 	uint8_t                         nfr_ip_protocol;
394 	uint8_t                         nfr_transport_protocol;
395 	uint16_t                        nfr_flags;
396 	uuid_t                          nfr_flow_uuid;
397 	packet_svc_class_t              nfr_svc_class;
398 	uuid_t                          nfr_euuid;
399 	uint32_t                        nfr_policy_id;
400 	uint32_t                        nfr_skip_policy_id;
401 	pid_t                           nfr_epid;
402 	flowadv_idx_t                   nfr_flowadv_idx;
403 	uuid_t                          nfr_bind_key;
404 	uint64_t                        nfr_qset_id;
405 	uuid_t                          nfr_parent_flow_uuid;
406 	uint8_t                         nfr_flow_demux_count;
407 	struct flow_demux_pattern       nfr_flow_demux_patterns[MAX_FLOW_DEMUX_PATTERN];
408 	// below is reserved kernel-only fields
409 	union {
410 #ifdef KERNEL
411 		struct {
412 			char                    _nfr_kernel_field_start[0];
413 			void                    *nfr_context;
414 			struct proc             *nfr_proc;
415 			struct ifnet            *nfr_ifp;
416 			struct flow_route       *nfr_route;
417 			struct ns_token         *nfr_port_reservation;
418 			struct protons_token    *nfr_proto_reservation;
419 			struct flow_stats       *nfr_flow_stats;
420 			pid_t                   nfr_pid;
421 			uint32_t                nfr_saddr_gencnt;
422 			void                    *nfr_ipsec_reservation;
423 			uint32_t                nfr_inp_flowhash;
424 #if defined(__LP64__)
425 			uint8_t                 _nfr_kernel_pad[4];
426 #else  /* !__LP64__ */
427 			uint8_t                 _nfr_kernel_pad[36];
428 #endif /* !__LP64__ */
429 			char                    _nfr_kernel_field_end[0];
430 		};
431 #endif  /* KERNEL */
432 		struct {
433 			uint8_t                 _nfr_opaque[80];
434 			/* should be at the same offset as _nfr_kernel_field_end above */
435 			char                    _nfr_common_field_end[0];
436 		};
437 	};
438 };
439 
440 /* valid flags for nfr_flags */
441 #define NXFLOWREQF_TRACK          0x0001  /* enable state tracking */
442 #define NXFLOWREQF_QOS_MARKING    0x0002  /* allow qos marking */
443 #define NXFLOWREQF_FILTER         0x0004  /* interpose filter */
444 #define NXFLOWREQF_CUSTOM_ETHER   0x0008  /* custom ethertype */
445 #define NXFLOWREQF_IPV6_ULA       0x0010  /* ipv6 ula */
446 #define NXFLOWREQF_LISTENER       0x0020  /* listener */
447 #define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040  /* override system address selection */
448 #define NXFLOWREQF_USE_STABLE_ADDRESS     0x0080  /* if override local, use stable address */
449 #define NXFLOWREQF_FLOWADV        0x0100  /* allocate flow advisory */
450 #define NXFLOWREQF_ASIS           0x0200  /* create flow as is in nfr */
451 #define NXFLOWREQF_LOW_LATENCY    0x0400  /* low latency flow */
452 #define NXFLOWREQF_NOWAKEFROMSLEEP        0x0800  /* Don't wake for traffic to this flow */
453 #define NXFLOWREQF_REUSEPORT      0x1000  /* Don't wake for traffic to this flow */
454 #define NXFLOWREQF_PARENT         0x4000  /* Parent flow */
455 
456 #define NXFLOWREQF_BITS                                                   \
457 	"\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \
458 	"\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \
459 	"\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY\014NOWAKEUPFROMSLEEP" \
460 	"\015REUSEPORT\017PARENT"
461 
462 struct flow_ip_addr {
463 	union {
464 		struct in_addr  _v4;
465 		struct in6_addr _v6;
466 		uint8_t         _addr8[16];
467 		uint16_t        _addr16[8];
468 		uint32_t        _addr32[4];
469 		uint64_t        _addr64[2];
470 	};
471 };
472 
473 struct flow_key {
474 	uint16_t                fk_mask;
475 	uint8_t                 fk_ipver;
476 	uint8_t                 fk_proto;
477 	uint16_t                fk_sport;
478 	uint16_t                fk_dport;
479 	struct flow_ip_addr     fk_src;
480 	struct flow_ip_addr     fk_dst;
481 	uint64_t                fk_pad[1];      /* pad to 48 bytes */
482 } __attribute__((__aligned__(16)));
483 
484 #define fk_src4                 fk_src._v4
485 #define fk_dst4                 fk_dst._v4
486 #define fk_src6                 fk_src._v6
487 #define fk_dst6                 fk_dst._v6
488 
489 #define FLOW_KEY_LEN            sizeof(struct flow_key)
490 #define FK_HASH_SEED            0xabcd
491 
492 #define FKMASK_IPVER            (((uint16_t)1) << 0)
493 #define FKMASK_PROTO            (((uint16_t)1) << 1)
494 #define FKMASK_SRC              (((uint16_t)1) << 2)
495 #define FKMASK_SPORT            (((uint16_t)1) << 3)
496 #define FKMASK_DST              (((uint16_t)1) << 4)
497 #define FKMASK_DPORT            (((uint16_t)1) << 5)
498 
499 #define FKMASK_2TUPLE           (FKMASK_PROTO | FKMASK_SPORT)
500 #define FKMASK_3TUPLE           (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC)
501 #define FKMASK_4TUPLE           (FKMASK_3TUPLE | FKMASK_DPORT)
502 #define FKMASK_5TUPLE           (FKMASK_4TUPLE | FKMASK_DST)
503 #define FKMASK_IPFLOW1          FKMASK_PROTO
504 #define FKMASK_IPFLOW2          (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC)
505 #define FKMASK_IPFLOW3          (FKMASK_IPFLOW2 | FKMASK_DST)
506 #define FKMASK_IDX_MAX          7
507 
508 extern const struct flow_key fk_mask_2tuple;
509 extern const struct flow_key fk_mask_3tuple;
510 extern const struct flow_key fk_mask_4tuple;
511 extern const struct flow_key fk_mask_5tuple;
512 extern const struct flow_key fk_mask_ipflow1;
513 extern const struct flow_key fk_mask_ipflow2;
514 extern const struct flow_key fk_mask_ipflow3;
515 
516 #define FLOW_KEY_CLEAR(_fk) do {                                        \
517 	_CASSERT(FLOW_KEY_LEN == 48);                                   \
518 	_CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key));              \
519 	sk_zero_48(_fk);                                                \
520 } while (0)
521 
522 #ifdef KERNEL
523 /* mask off userland-settable bits */
524 #define NXFLOWREQF_MASK \
525     (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \
526     NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \
527     NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \
528     NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY | NXFLOWREQF_NOWAKEFROMSLEEP | \
529     NXFLOWREQF_REUSEPORT | NXFLOWREQF_PARENT)
530 
531 #define NXFLOWREQF_EXT_PORT_RSV   0x1000  /* external port reservation */
532 #define NXFLOWREQF_EXT_PROTO_RSV  0x2000  /* external proto reservation */
533 
534 static inline void
nx_flow_req_internalize(struct nx_flow_req * req)535 nx_flow_req_internalize(struct nx_flow_req *req)
536 {
537 	_CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) ==
538 	    offsetof(struct nx_flow_req, _nfr_common_field_end));
539 
540 	/* init kernel only fields */
541 	bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
542 	req->nfr_flags &= NXFLOWREQF_MASK;
543 	req->nfr_context = NULL;
544 	req->nfr_flow_stats = NULL;
545 	req->nfr_port_reservation = NULL;
546 }
547 
548 static inline void
nx_flow_req_externalize(struct nx_flow_req * req)549 nx_flow_req_externalize(struct nx_flow_req *req)
550 {
551 	/* neutralize kernel only fields */
552 	bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
553 	req->nfr_flags &= NXFLOWREQF_MASK;
554 }
555 #endif /* KERNEL */
556 
557 struct nx_qset_info {
558 	uint64_t        nqi_id;
559 	uint16_t        nqi_flags;
560 	uint8_t         nqi_num_rx_queues;
561 	uint8_t         nqi_num_tx_queues;
562 };
563 
564 #define NETIF_LLINK_MAX_QSETS 256
565 struct nx_llink_info {
566 	uuid_t          nli_netif_uuid;            /* nexus netif instance uuid */
567 	uint64_t        nli_link_id;
568 	uint16_t        nli_link_id_internal;
569 	uint8_t         nli_state;
570 	uint8_t         nli_flags;
571 	uint16_t        nli_qset_cnt;
572 	struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS];
573 };
574 
575 #define NETIF_LLINK_INFO_VERSION  0x01
576 struct nx_llink_info_req {
577 	uint16_t        nlir_version;
578 	uint16_t        nlir_llink_cnt;
579 	struct nx_llink_info nlir_llink[__counted_by(nlir_llink_cnt)];
580 };
581 
582 /*
583  * Nexus controller descriptor.
584  */
585 struct nexus_controller {
586 #ifndef KERNEL
587 	int             ncd_fd;
588 	guardid_t       ncd_guard;
589 #else /* KERNEL */
590 	struct nxctl    *ncd_nxctl;
591 #endif /* KERNEL */
592 };
593 
594 /* For nexus ops without having to create a nexus controller */
595 #define __OS_NEXUS_SHARED_USER_CONTROLLER_FD (-1)
596 
597 /*
598  * Nexus attributes.
599  */
600 struct nexus_attr {
601 	uint64_t        nxa_requested;  /* customized attributes */
602 	uint64_t        nxa_tx_rings;   /* # of channel transmit rings */
603 	uint64_t        nxa_rx_rings;   /* # of channel receive rings */
604 	uint64_t        nxa_tx_slots;   /* # of slots per channel TX ring */
605 	uint64_t        nxa_rx_slots;   /* # of slots per channel RX ring */
606 	uint64_t        nxa_buf_size;   /* size of each buffer */
607 	uint64_t        nxa_meta_size;  /* size of metadata per buffer */
608 	uint64_t        nxa_stats_size; /* size of statistics region */
609 	uint64_t        nxa_anonymous;  /* bool: allow anonymous clients */
610 	uint64_t        nxa_pipes;      /* number of pipes */
611 	uint64_t        nxa_extensions; /* extension-specific attribute */
612 	uint64_t        nxa_mhints;     /* memory usage hints */
613 	uint64_t        nxa_ifindex;    /* network interface index */
614 	uint64_t        nxa_flowadv_max; /* max flow advisory entries */
615 	uint64_t        nxa_qmap;       /* queue mapping type */
616 	uint64_t        nxa_checksum_offload;   /* partial checksum offload */
617 	uint64_t        nxa_user_packet_pool;   /* user packet pool */
618 	uint64_t        nxa_nexusadv_size;      /* size of advisory region */
619 	uint64_t        nxa_user_channel;       /* user channel open allowed */
620 	uint64_t        nxa_max_frags;  /* max fragments per packet */
621 	/*
622 	 * reject channel operations if the nexus peer has closed the channel.
623 	 * valid only for user-pipe nexus.
624 	 */
625 	uint64_t        nxa_reject_on_close;
626 	uint64_t        nxa_large_buf_size;  /* size of large buffer */
627 };
628 
629 /*
630  * Flags for nxa_requested; keep in sync with NXPREQ_* flags.
631  * Note that these are 64-bit, whereas nxpreq_requested is
632  * 32-bit wide; for not this won't matter.
633  */
634 #define NXA_REQ_TX_RINGS        (1ULL << 0)     /* 0x0000000000000001 */
635 #define NXA_REQ_RX_RINGS        (1ULL << 1)     /* 0x0000000000000002 */
636 #define NXA_REQ_TX_SLOTS        (1ULL << 2)     /* 0x0000000000000004 */
637 #define NXA_REQ_RX_SLOTS        (1ULL << 3)     /* 0x0000000000000008 */
638 #define NXA_REQ_BUF_SIZE        (1ULL << 4)     /* 0x0000000000000010 */
639 #define NXA_REQ_META_SIZE       (1ULL << 5)     /* 0x0000000000000020 */
640 #define NXA_REQ_STATS_SIZE      (1ULL << 6)     /* 0x0000000000000040 */
641 #define NXA_REQ_ANONYMOUS       (1ULL << 7)     /* 0x0000000000000080 */
642 #define NXA_REQ_PIPES           (1ULL << 8)     /* 0x0000000000000100 */
643 #define NXA_REQ_EXTENSIONS      (1ULL << 9)     /* 0x0000000000000200 */
644 #define NXA_REQ_MHINTS          (1ULL << 10)    /* 0x0000000000000400 */
645 #define NXA_REQ_FLOWADV_MAX     (1ULL << 11)    /* 0x0000000000000800 */
646 #define NXA_REQ_QMAP            (1ULL << 12)    /* 0x0000000000001000 */
647 #define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13)   /* 0x0000000000002000 */
648 #define NXA_REQ_USER_PACKET_POOL (1ULL << 14)   /* 0x0000000000004000 */
649 #define NXA_REQ_CAPABILITIES    (1ULL << 15)    /* 0x0000000000008000 */
650 #define NXA_REQ_NEXUSADV_SIZE   (1ULL << 16)    /* 0x0000000000010000 */
651 #define NXA_REQ_IFINDEX         (1ULL << 17)    /* 0x0000000000020000 */
652 #define NXA_REQ_USER_CHANNEL    (1ULL << 18)    /* 0x0000000000040000 */
653 #define NXA_REQ_MAX_FRAGS       (1ULL << 19)    /* 0x0000000000080000 */
654 #define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20)    /* 0x0000000000100000 */
655 #define NXA_REQ_LARGE_BUF_SIZE  (1ULL << 21)    /* 0x0000000000200000 */
656 
657 #ifndef KERNEL
658 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
659 __BEGIN_DECLS
660 /* system calls */
661 extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len);
662 extern int __nexus_register(int ctl, struct nxprov_reg *reg,
663     const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len);
664 extern int __nexus_deregister(int ctl, const uuid_t prov_uuid,
665     const uint32_t prov_uuid_len);
666 extern int __nexus_create(int ctl, const uuid_t prov_uuid,
667     const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len);
668 extern int __nexus_destroy(int ctl, const uuid_t nx_uuid,
669     const uint32_t nx_uuid_len);
670 extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval,
671     uint32_t *aoptlen);
672 extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval,
673     const uint32_t optlen);
674 
675 /* private nexus controller APIs */
676 extern int __os_nexus_ifattach(const nexus_controller_t ctl,
677     const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid,
678     boolean_t host, uuid_t *nx_if_uuid);
679 extern int __os_nexus_ifdetach(const nexus_controller_t ctl,
680     const uuid_t nx_uuid, const uuid_t nx_if_uuid);
681 
682 /* private flow APIs */
683 extern int __os_nexus_flow_add(const nexus_controller_t ncd,
684     const uuid_t nx_uuid, const struct nx_flow_req *nfr);
685 extern int __os_nexus_flow_del(const nexus_controller_t ncd,
686     const uuid_t nx_uuid, const struct nx_flow_req *nfr);
687 extern int __os_nexus_get_llink_info(const nexus_controller_t ncd,
688     const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len);
689 extern int os_nexus_flow_set_wake_from_sleep(const uuid_t nx_uuid,
690     const uuid_t flow_uuid, bool enable);
691 
692 __END_DECLS
693 #endif  /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
694 #endif /* !KERNEL */
695 #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE)
696 #include <skywalk/nexus_common.h>
697 #include <skywalk/nexus_ioctl.h>
698 #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */
699 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
700 #endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */
701