xref: /xnu-8796.121.2/bsd/skywalk/nexus/os_nexus_private.h (revision c54f35ca767986246321eb901baf8f5ff7923f6a)
1 /*
2  * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_
30 #define _SKYWALK_OS_NEXUS_PRIVATE_H_
31 
32 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
33 #include <sys/guarded.h>
34 #include <skywalk/os_channel.h>
35 #include <skywalk/os_nexus.h>
36 #include <netinet/in.h>
37 #include <netinet/tcp.h>
38 #include <net/ethernet.h>
39 
40 /*
41  * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus.
42  */
43 #define NEXUS_PORT_ANY  ((nexus_port_t)-1)
44 #define NEXUS_PORT_MAX  ((nexus_port_t)-1)
45 
46 typedef nexus_port_t nexus_port_size_t;
47 
48 #define NEXUSCTL_INIT_VERSION_1         1
49 #define NEXUSCTL_INIT_CURRENT_VERSION   NEXUSCTL_INIT_VERSION_1
50 
51 /*
52  * Nexus controller init parameters.
53  */
54 struct nxctl_init {
55 	uint32_t        ni_version;     /* in: NEXUSCTL_INIT_CURRENT_VERSION */
56 	uint32_t        __ni_align;     /* reserved */
57 	guardid_t       ni_guard;       /* out: guard ID */
58 };
59 
60 /*
61  * Nexus metadata type.
62  *
63  * Be mindful that due to the use of tagged pointers for packets, this
64  * type gets encoded along with the subtype, with the requirement that the
65  * object addresses are aligned on 64-byte boundary at the minimum.  That
66  * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore
67  * limiting the maximum enum value to 3.
68  */
69 typedef enum {
70 	NEXUS_META_TYPE_INVALID = 0,    /* invalid type */
71 	NEXUS_META_TYPE_QUANTUM,        /* struct __quantum */
72 	NEXUS_META_TYPE_PACKET,         /* struct __packet */
73 	NEXUS_META_TYPE_RESERVED,       /* for future */
74 	NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED
75 } nexus_meta_type_t;
76 
77 typedef enum {
78 	NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */
79 	NEXUS_META_SUBTYPE_PAYLOAD,     /* normal payload mode */
80 	NEXUS_META_SUBTYPE_RAW,         /* raw (link layer) mode */
81 	NEXUS_META_SUBTYPE_RESERVED,    /* for future */
82 	NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED
83 } nexus_meta_subtype_t;
84 
85 /*
86  * Nexus provider parameters.
87  */
88 struct nxprov_params {
89 	nexus_name_t    nxp_name;       /* name */
90 	uint32_t        nxp_namelen;    /* length of name */
91 	nexus_type_t    nxp_type;       /* NEXUS_TYPE_* */
92 	nexus_meta_type_t nxp_md_type;  /* NEXUS_META_TYPE_* */
93 	nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */
94 	uint32_t        nxp_flags;      /* NXPF_* */
95 	uint32_t        nxp_format;     /* provider-defined */
96 	uint32_t        nxp_tx_rings;   /* # of channel transmit rings */
97 	uint32_t        nxp_rx_rings;   /* # of channel receive rings */
98 	uint32_t        nxp_tx_slots;   /* # of slots per channel TX ring */
99 	uint32_t        nxp_rx_slots;   /* # of slots per channel RX ring */
100 	uint32_t        nxp_buf_size;   /* size of each buffer */
101 	uint32_t        nxp_meta_size;  /* size of metadata per slot */
102 	uint32_t        nxp_stats_size; /* size of statistics region */
103 	uint32_t        nxp_pipes;      /* number of pipes */
104 	nexus_extension_t nxp_extensions;  /* extension specific parameter(s) */
105 	uint32_t        nxp_mhints;        /* memory usage hints */
106 	uint32_t        nxp_ifindex;       /* network interface index */
107 	uint32_t        nxp_flowadv_max;   /* max flow advisory entries */
108 	nexus_qmap_type_t nxp_qmap;        /* queue mapping type */
109 	uint32_t        nxp_capabilities;  /* nexus capabilities */
110 	uint32_t        nxp_nexusadv_size; /* nexus advisory region size */
111 	uint32_t        nxp_max_frags;     /* max fragments per packet */
112 	/*
113 	 * reject channel operations if the peer has closed the channel.
114 	 * Only valid for user-pipe nexus.
115 	 */
116 	boolean_t       nxp_reject_on_close;
117 	uint32_t        nxp_large_buf_size;   /* size of large buffer */
118 } __attribute__((aligned(64)));
119 
120 /* valid values for nxp_flags */
121 #define NXPF_ANONYMOUS          0x1     /* allow anonymous channel clients */
122 #define NXPF_USER_CHANNEL       0x2     /* allow user channel open */
123 #define NXPF_NETIF_LLINK        0x4     /* use netif logical link */
124 #ifdef KERNEL
125 #define NXPF_MASK    (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK)
126 #endif /* KERNEL */
127 
128 #define NXPF_BITS               \
129 	"\020\01ANONYMOUS\02USER_CHANNEL"
130 
131 /* valid values for nxp_capabilities */
132 #define NXPCAP_CHECKSUM_PARTIAL 0x1     /* partial checksum */
133 #define NXPCAP_USER_PACKET_POOL 0x2     /* user packet pool */
134 #define NXPCAP_USER_CHANNEL     0x4     /* allow user channel access */
135 
136 #define NXPCAP_BITS             \
137 	"\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL"
138 
139 #define NXPROV_REG_VERSION_1            1
140 #define NXPROV_REG_CURRENT_VERSION      NXPROV_REG_VERSION_1
141 
142 /*
143  * Nexus provider registration parameters.
144  */
145 struct nxprov_reg {
146 	uint32_t        nxpreg_version;         /* NXPROV_REG_CURRENT_VERSION */
147 	uint32_t        nxpreg_requested;       /* customized attributes */
148 	struct nxprov_params nxpreg_params;     /* Nexus provider parameters */
149 };
150 
151 /*
152  * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags.
153  * Note that these are 32-bit, whereas nxa_requested is 64-bit
154  * wide; for now this won't matter.
155  */
156 #define NXPREQ_TX_RINGS         (1U << 0)       /* 0x00000001 */
157 #define NXPREQ_RX_RINGS         (1U << 1)       /* 0x00000002 */
158 #define NXPREQ_TX_SLOTS         (1U << 2)       /* 0x00000004 */
159 #define NXPREQ_RX_SLOTS         (1U << 3)       /* 0x00000008 */
160 #define NXPREQ_BUF_SIZE         (1U << 4)       /* 0x00000010 */
161 #define NXPREQ_META_SIZE        (1U << 5)       /* 0x00000020 */
162 #define NXPREQ_STATS_SIZE       (1U << 6)       /* 0x00000040 */
163 #define NXPREQ_ANONYMOUS        (1U << 7)       /* 0x00000080 */
164 #define NXPREQ_PIPES            (1U << 8)       /* 0x00000100 */
165 #define NXPREQ_EXTENSIONS       (1U << 9)       /* 0x00000200 */
166 #define NXPREQ_MHINTS           (1U << 10)      /* 0x00000400 */
167 #define NXPREQ_FLOWADV_MAX      (1U << 11)      /* 0x00000800 */
168 #define NXPREQ_QMAP             (1U << 12)      /* 0x00001000 */
169 #define NXPREQ_CHECKSUM_OFFLOAD (1U << 13)      /* 0x00002000 */
170 #define NXPREQ_USER_PACKET_POOL (1U << 14)      /* 0x00004000 */
171 #define NXPREQ_CAPABILITIES     (1U << 15)      /* 0x00008000 */
172 #define NXPREQ_NEXUSADV_SIZE    (1U << 16)      /* 0x00010000 */
173 #define NXPREQ_IFINDEX          (1U << 17)      /* 0x00020000 */
174 #define NXPREQ_USER_CHANNEL     (1U << 18)      /* 0x00040000 */
175 #define NXPREQ_MAX_FRAGS        (1U << 19)      /* 0x00080000 */
176 #define NXPREQ_REJECT_ON_CLOSE  (1U << 20)      /* 0x00100000 */
177 #define NXPREQ_LARGE_BUF_SIZE   (1U << 21)      /* 0x00200000 */
178 
179 #define NXPREQ_BITS                                                     \
180 	"\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE"   \
181 	"\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \
182 	"\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP"               \
183 	"\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \
184 	"\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE\027LBUF_SIZE"
185 
186 /*
187  * Nexus provider registration entry.  Also argument for NXOPT_NEXUS_PROV_ENTRY.
188  */
189 struct nxprov_reg_ent {
190 	uuid_t          npre_prov_uuid;         /* Nexus provider UUID */
191 	struct nxprov_params npre_prov_params;  /* Nexus provider parameters */
192 };
193 
194 /*
195  * Nexus options.
196  */
197 #define NXOPT_NEXUS_PROV_LIST   1       /* (get) list all provider UUIDS */
198 #define NXOPT_NEXUS_PROV_ENTRY  2       /* (get) get params of a provider */
199 #define NXOPT_NEXUS_LIST        20      /* (get) list all Nexus instances */
200 #define NXOPT_NEXUS_BIND        21      /* (set) bind a Nexus port */
201 #define NXOPT_NEXUS_UNBIND      22      /* (set) unbind a Nexus port */
202 #define NXOPT_CHANNEL_LIST      30      /* (get) list all Channel instances */
203 #define NXOPT_NEXUS_CONFIG      40      /* (set) nexus specific config */
204 
205 /*
206  * Argument structure for NXOPT_NEXUS_PROV_LIST.
207  */
208 struct nxprov_list_req {
209 	uint32_t                nrl_num_regs;   /* array count */
210 	uint32_t                __nrl_align;    /* reserved */
211 	user_addr_t             nrl_regs;       /* array of nexus_reg_ent */
212 };
213 
214 /*
215  * Argument structure for NXOPT_NEXUS_LIST.
216  */
217 struct nx_list_req {
218 	uuid_t                  nl_prov_uuid;   /* nexus provider UUID */
219 	uint32_t                nl_num_nx_uuids; /* array count */
220 	uint32_t                __nl_align;     /* reserved */
221 	user_addr_t             nl_nx_uuids;    /* array of nexus UUIDs */
222 };
223 
224 /*
225  * Argument structure for NXOPT_NEXUS_BIND.
226  */
227 struct nx_bind_req {
228 	uuid_t                  nb_nx_uuid;     /* nexus instance UUID */
229 	nexus_port_t            nb_port;        /* nexus instance port */
230 	uint32_t                nb_flags;       /* NBR_* match flags */
231 	uuid_t                  nb_exec_uuid;   /* executable UUID */
232 	user_addr_t             nb_key;         /* key blob */
233 	uint32_t                nb_key_len;     /* key blob length */
234 	pid_t                   nb_pid;         /* client PID */
235 };
236 
237 #define NBR_MATCH_PID           0x1             /* match against PID */
238 #define NBR_MATCH_EXEC_UUID     0x2             /* match executable's UUID */
239 #define NBR_MATCH_KEY           0x4             /* match key blob */
240 #ifdef KERNEL
241 #define NBR_MATCH_MASK          \
242 	(NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY)
243 #endif /* KERNEL */
244 
245 /*
246  * Argument structure for NXOPT_NEXUS_UNBIND.
247  */
248 struct nx_unbind_req {
249 	uuid_t                  nu_nx_uuid;     /* nexus instance UUID */
250 	nexus_port_t            nu_port;        /* nexus instance port */
251 };
252 
253 /*
254  * Argument structure for NXOPT_CHANNEL_LIST.
255  */
256 struct ch_list_req {
257 	uuid_t                  cl_nx_uuid;     /* nexus instance UUID */
258 	uint32_t                cl_num_ch_uuids; /* array count */
259 	uint32_t                __cl_align;     /* reserved */
260 	user_addr_t             cl_ch_uuids;    /* array of channel UUIDs */
261 };
262 
263 /*
264  * Skywalk Nexus MIB
265  *
266  * We will use the name MIB now to refer to things that we expose to outside
267  * world for management/telemetry purpose.
268  *
269  * General rule of thumb of this MIB structure is to keep it simple.
270  * Try to avoid variable length field and hierarchical representation wherever
271  * possible. Simple retrieval would return either a single object (simple type
272  * or fixed length compound type) or an object array of same type. This makes
273  * parsing the retrieved information a lot easier.
274  *
275  * For now, we use sysctl as the way MIB interface is exposed. Additional
276  * interfaces could be syscall (e.g. via a nexus controller), etc.
277  */
278 #define NXMIB_NETIF_STATS       (((uint32_t)1) << 1)
279 #define NXMIB_FSW_STATS         (((uint32_t)1) << 2)
280 #define NXMIB_FLOW              (((uint32_t)1) << 3)
281 #define NXMIB_FLOW_ADV          (((uint32_t)1) << 4)
282 #define NXMIB_FLOW_OWNER        (((uint32_t)1) << 5)
283 #define NXMIB_FLOW_ROUTE        (((uint32_t)1) << 6)
284 #define NXMIB_LLINK_LIST        (((uint32_t)1) << 7)
285 #define NXMIB_NETIF_QUEUE_STATS (((uint32_t)1) << 8)
286 
287 #define NXMIB_QUIC_STATS        (((uint32_t)1) << 27)
288 #define NXMIB_UDP_STATS         (((uint32_t)1) << 28)
289 #define NXMIB_TCP_STATS         (((uint32_t)1) << 29)
290 #define NXMIB_IP6_STATS         (((uint32_t)1) << 30)
291 #define NXMIB_IP_STATS          (((uint32_t)1) << 31)
292 
293 #define NXMIB_USERSTACK_STATS   (NXMIB_IP_STATS | NXMIB_IP6_STATS \
294 	                        | NXMIB_TCP_STATS | NXMIB_UDP_STATS \
295 	                        | NXMIB_QUIC_STATS)
296 
297 #define NXMIB_FILTER_NX_UUID    (((uint64_t)1) << 0)
298 #define NXMIB_FILTER_FLOW_ID    (((uint64_t)1) << 1)
299 #define NXMIB_FILTER_PID        (((uint64_t)1) << 2)
300 #define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3)
301 
302 /*
303  * Nexus MIB filter: used to retrieve only those matching the filter value.
304  */
305 struct nexus_mib_filter {
306 	uint32_t                nmf_type;       /* MIB type */
307 	uint64_t                nmf_bitmap;     /* bitmap of following fields */
308 
309 	uuid_t                  nmf_nx_uuid;    /* nexus instance uuid */
310 	uuid_t                  nmf_flow_id;    /* flow rule id */
311 	pid_t                   nmf_pid;        /* owner pid */
312 	struct info_tuple       nmf_info_tuple; /* flow tuple */
313 };
314 
315 /*
316  * Nexus-specific config commands.
317  */
318 typedef enum {
319 	NXCFG_CMD_ATTACH =      0,      /* attach an object to a nexus */
320 	NXCFG_CMD_DETACH =      1,      /* detach an object from a nexus */
321 	NXCFG_CMD_FLOW_ADD =    20,     /* bind namespace to a nexus port */
322 	NXCFG_CMD_FLOW_DEL =    21,     /* unbind namespace from a nexus port */
323 	NXCFG_CMD_NETEM =       30,     /* config packet scheduler */
324 	NXCFG_CMD_GET_LLINK_INFO = 40,  /* collect llink info */
325 } nxcfg_cmd_t;
326 
327 #define NX_SPEC_IF_NAMELEN      64
328 
329 /*
330  * Argument struture for NXOPT_NEXUS_CONFIG.
331  */
332 struct nx_cfg_req {
333 	uuid_t                  nc_nx_uuid;     /* nexus instance UUID */
334 	nxcfg_cmd_t             nc_cmd;         /* NXCFG_CMD_* */
335 	uint32_t                nc_req_len;     /* size of request struct */
336 	user_addr_t             nc_req;         /* address of request struct */
337 };
338 
339 /*
340  * Argument structure for NXCFG_CMD_{ATTACH,DETACH}
341  */
342 struct nx_spec_req {
343 	union {
344 		char            nsru_name[NX_SPEC_IF_NAMELEN];
345 		uuid_t          nsru_uuid;
346 #ifdef KERNEL
347 		struct ifnet    *nsru_ifp;
348 #endif /* KERNEL */
349 	} nsr_u __attribute__((aligned(sizeof(uint64_t))));     /* in */
350 	uint32_t                nsr_flags;                      /* in */
351 	uuid_t                  nsr_if_uuid;    /* attach: out, detach: in */
352 };
353 #define nsr_name        nsr_u.nsru_name
354 #define nsr_uuid        nsr_u.nsru_uuid
355 #ifdef KERNEL
356 #define nsr_ifp         nsr_u.nsru_ifp
357 #endif /* KERNEL */
358 
359 #define NXSPECREQ_UUID          0x1     /* nsr_name is uuid_t else ifname */
360 #define NXSPECREQ_HOST          0x2     /* attach to host port */
361 #ifdef KERNEL
362 /* mask off userland-settable bits */
363 #define NXSPECREQ_MASK          (NXSPECREQ_UUID | NXSPECREQ_HOST)
364 #define NXSPECREQ_IFP           0x1000  /* (embryonic) ifnet */
365 #endif /* KERNEL */
366 
367 /*
368  * Structure for flow demuxing for parent/child flows
369  */
370 #define FLOW_DEMUX_MAX_LEN      32
371 struct flow_demux_pattern {
372 	uint16_t                fdp_offset;
373 	uint16_t                fdp_len;
374 	uint8_t                 fdp_mask[FLOW_DEMUX_MAX_LEN];
375 	uint8_t                 fdp_value[FLOW_DEMUX_MAX_LEN];
376 };
377 
378 #define MAX_FLOW_DEMUX_PATTERN  4
379 
380 /*
381  * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND}
382  */
383 struct nx_flow_req {
384 	nexus_port_t                    nfr_nx_port;
385 	uint16_t                        nfr_ethertype;
386 	ether_addr_t                    nfr_etheraddr;
387 	union sockaddr_in_4_6           nfr_saddr;
388 	union sockaddr_in_4_6           nfr_daddr;
389 	uint8_t                         nfr_ip_protocol;
390 	uint8_t                         nfr_transport_protocol;
391 	uint16_t                        nfr_flags;
392 	uuid_t                          nfr_flow_uuid;
393 	packet_svc_class_t              nfr_svc_class;
394 	uuid_t                          nfr_euuid;
395 	uint32_t                        nfr_policy_id;
396 	pid_t                           nfr_epid;
397 	flowadv_idx_t                   nfr_flowadv_idx;
398 	uuid_t                          nfr_bind_key;
399 	uint64_t                        nfr_qset_id;
400 	uuid_t                          nfr_parent_flow_uuid;
401 	uint8_t                         nfr_flow_demux_count;
402 	struct flow_demux_pattern       nfr_flow_demux_patterns[MAX_FLOW_DEMUX_PATTERN];
403 	// below is reserved kernel-only fields
404 	union {
405 #ifdef KERNEL
406 		struct {
407 			char                    _nfr_kernel_field_start[0];
408 			void                    *nfr_context;
409 			struct proc             *nfr_proc;
410 			struct ifnet            *nfr_ifp;
411 			struct flow_route       *nfr_route;
412 			struct ns_token         *nfr_port_reservation;
413 			struct protons_token    *nfr_proto_reservation;
414 			struct flow_stats       *nfr_flow_stats;
415 			pid_t                   nfr_pid;
416 			uint32_t                nfr_saddr_gencnt;
417 			void                    *nfr_ipsec_reservation;
418 			uint32_t                nfr_inp_flowhash;
419 #if defined(__LP64__)
420 			uint8_t                 _nfr_kernel_pad[4];
421 #else  /* !__LP64__ */
422 			uint8_t                 _nfr_kernel_pad[36];
423 #endif /* !__LP64__ */
424 			char                    _nfr_kernel_field_end[0];
425 		};
426 #endif  /* KERNEL */
427 		struct {
428 			uint8_t                 _nfr_opaque[80];
429 			/* should be at the same offset as _nfr_kernel_field_end above */
430 			char                    _nfr_common_field_end[0];
431 		};
432 	};
433 };
434 
435 /* valid flags for nfr_flags */
436 #define NXFLOWREQF_TRACK          0x0001  /* enable state tracking */
437 #define NXFLOWREQF_QOS_MARKING    0x0002  /* allow qos marking */
438 #define NXFLOWREQF_FILTER         0x0004  /* interpose filter */
439 #define NXFLOWREQF_CUSTOM_ETHER   0x0008  /* custom ethertype */
440 #define NXFLOWREQF_IPV6_ULA       0x0010  /* ipv6 ula */
441 #define NXFLOWREQF_LISTENER       0x0020  /* listener */
442 #define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040  /* override system address selection */
443 #define NXFLOWREQF_USE_STABLE_ADDRESS     0x0080  /* if override local, use stable address */
444 #define NXFLOWREQF_FLOWADV        0x0100  /* allocate flow advisory */
445 #define NXFLOWREQF_ASIS           0x0200  /* create flow as is in nfr */
446 #define NXFLOWREQF_LOW_LATENCY    0x0400  /* low latency flow */
447 #define NXFLOWREQF_NOWAKEFROMSLEEP        0x0800  /* Don't wake for traffic to this flow */
448 #define NXFLOWREQF_REUSEPORT      0x1000  /* Don't wake for traffic to this flow */
449 #define NXFLOWREQF_PARENT         0x4000  /* Parent flow */
450 
451 #define NXFLOWREQF_BITS                                                   \
452 	"\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \
453 	"\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \
454 	"\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY\014NOWAKEUPFROMSLEEP" \
455 	"\015REUSEPORT\017PARENT"
456 
457 struct flow_ip_addr {
458 	union {
459 		struct in_addr  _v4;
460 		struct in6_addr _v6;
461 		uint8_t         _addr8[16];
462 		uint16_t        _addr16[8];
463 		uint32_t        _addr32[4];
464 		uint64_t        _addr64[2];
465 	};
466 };
467 
468 struct flow_key {
469 	uint16_t                fk_mask;
470 	uint8_t                 fk_ipver;
471 	uint8_t                 fk_proto;
472 	uint16_t                fk_sport;
473 	uint16_t                fk_dport;
474 	struct flow_ip_addr     fk_src;
475 	struct flow_ip_addr     fk_dst;
476 	uint64_t                fk_pad[1];      /* pad to 48 bytes */
477 } __attribute__((__aligned__(16)));
478 
479 #define fk_src4                 fk_src._v4
480 #define fk_dst4                 fk_dst._v4
481 #define fk_src6                 fk_src._v6
482 #define fk_dst6                 fk_dst._v6
483 
484 #define FLOW_KEY_LEN            sizeof(struct flow_key)
485 #define FK_HASH_SEED            0xabcd
486 
487 #define FKMASK_IPVER            (((uint16_t)1) << 0)
488 #define FKMASK_PROTO            (((uint16_t)1) << 1)
489 #define FKMASK_SRC              (((uint16_t)1) << 2)
490 #define FKMASK_SPORT            (((uint16_t)1) << 3)
491 #define FKMASK_DST              (((uint16_t)1) << 4)
492 #define FKMASK_DPORT            (((uint16_t)1) << 5)
493 
494 #define FKMASK_2TUPLE           (FKMASK_PROTO | FKMASK_SPORT)
495 #define FKMASK_3TUPLE           (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC)
496 #define FKMASK_4TUPLE           (FKMASK_3TUPLE | FKMASK_DPORT)
497 #define FKMASK_5TUPLE           (FKMASK_4TUPLE | FKMASK_DST)
498 #define FKMASK_IPFLOW1          FKMASK_PROTO
499 #define FKMASK_IPFLOW2          (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC)
500 #define FKMASK_IPFLOW3          (FKMASK_IPFLOW2 | FKMASK_DST)
501 #define FKMASK_IDX_MAX          7
502 
503 extern const struct flow_key fk_mask_2tuple;
504 extern const struct flow_key fk_mask_3tuple;
505 extern const struct flow_key fk_mask_4tuple;
506 extern const struct flow_key fk_mask_5tuple;
507 extern const struct flow_key fk_mask_ipflow1;
508 extern const struct flow_key fk_mask_ipflow2;
509 extern const struct flow_key fk_mask_ipflow3;
510 
511 #define FLOW_KEY_CLEAR(_fk) do {                                        \
512 	_CASSERT(FLOW_KEY_LEN == 48);                                   \
513 	_CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key));              \
514 	sk_zero_48(_fk);                                                \
515 } while (0)
516 
517 #ifdef KERNEL
518 /* mask off userland-settable bits */
519 #define NXFLOWREQF_MASK \
520     (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \
521     NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \
522     NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \
523     NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY | NXFLOWREQF_REUSEPORT | \
524     NXFLOWREQF_PARENT)
525 
526 #define NXFLOWREQF_EXT_PORT_RSV   0x1000  /* external port reservation */
527 #define NXFLOWREQF_EXT_PROTO_RSV  0x2000  /* external proto reservation */
528 
529 static inline void
nx_flow_req_internalize(struct nx_flow_req * req)530 nx_flow_req_internalize(struct nx_flow_req *req)
531 {
532 	/* init kernel only fields */
533 	bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
534 	req->nfr_flags &= NXFLOWREQF_MASK;
535 	req->nfr_context = NULL;
536 	req->nfr_flow_stats = NULL;
537 	req->nfr_port_reservation = NULL;
538 }
539 
540 static inline void
nx_flow_req_externalize(struct nx_flow_req * req)541 nx_flow_req_externalize(struct nx_flow_req *req)
542 {
543 	/* neutralize kernel only fields */
544 	bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
545 	req->nfr_flags &= NXFLOWREQF_MASK;
546 }
547 #endif /* KERNEL */
548 
549 struct nx_qset_info {
550 	uint64_t        nqi_id;
551 	uint16_t        nqi_flags;
552 	uint8_t         nqi_num_rx_queues;
553 	uint8_t         nqi_num_tx_queues;
554 };
555 
556 #define NETIF_LLINK_MAX_QSETS 256
557 struct nx_llink_info {
558 	uuid_t          nli_netif_uuid;            /* nexus netif instance uuid */
559 	uint64_t        nli_link_id;
560 	uint16_t        nli_link_id_internal;
561 	uint8_t         nli_state;
562 	uint8_t         nli_flags;
563 	uint16_t        nli_qset_cnt;
564 	struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS];
565 };
566 
567 #define NETIF_LLINK_INFO_VERSION  0x01
568 struct nx_llink_info_req {
569 	uint16_t        nlir_version;
570 	uint16_t        nlir_llink_cnt;
571 	struct nx_llink_info nlir_llink[0];
572 };
573 
574 /*
575  * Nexus controller descriptor.
576  */
577 struct nexus_controller {
578 #ifndef KERNEL
579 	int             ncd_fd;
580 	guardid_t       ncd_guard;
581 #else /* KERNEL */
582 	struct nxctl    *ncd_nxctl;
583 #endif /* KERNEL */
584 };
585 
586 /*
587  * Nexus attributes.
588  */
589 struct nexus_attr {
590 	uint64_t        nxa_requested;  /* customized attributes */
591 	uint64_t        nxa_tx_rings;   /* # of channel transmit rings */
592 	uint64_t        nxa_rx_rings;   /* # of channel receive rings */
593 	uint64_t        nxa_tx_slots;   /* # of slots per channel TX ring */
594 	uint64_t        nxa_rx_slots;   /* # of slots per channel RX ring */
595 	uint64_t        nxa_buf_size;   /* size of each buffer */
596 	uint64_t        nxa_meta_size;  /* size of metadata per buffer */
597 	uint64_t        nxa_stats_size; /* size of statistics region */
598 	uint64_t        nxa_anonymous;  /* bool: allow anonymous clients */
599 	uint64_t        nxa_pipes;      /* number of pipes */
600 	uint64_t        nxa_extensions; /* extension-specific attribute */
601 	uint64_t        nxa_mhints;     /* memory usage hints */
602 	uint64_t        nxa_ifindex;    /* network interface index */
603 	uint64_t        nxa_flowadv_max; /* max flow advisory entries */
604 	uint64_t        nxa_qmap;       /* queue mapping type */
605 	uint64_t        nxa_checksum_offload;   /* partial checksum offload */
606 	uint64_t        nxa_user_packet_pool;   /* user packet pool */
607 	uint64_t        nxa_nexusadv_size;      /* size of advisory region */
608 	uint64_t        nxa_user_channel;       /* user channel open allowed */
609 	uint64_t        nxa_max_frags;  /* max fragments per packet */
610 	/*
611 	 * reject channel operations if the nexus peer has closed the channel.
612 	 * valid only for user-pipe nexus.
613 	 */
614 	uint64_t        nxa_reject_on_close;
615 	uint64_t        nxa_large_buf_size;  /* size of large buffer */
616 };
617 
618 /*
619  * Flags for nxa_requested; keep in sync with NXPREQ_* flags.
620  * Note that these are 64-bit, whereas nxpreq_requested is
621  * 32-bit wide; for not this won't matter.
622  */
623 #define NXA_REQ_TX_RINGS        (1ULL << 0)     /* 0x0000000000000001 */
624 #define NXA_REQ_RX_RINGS        (1ULL << 1)     /* 0x0000000000000002 */
625 #define NXA_REQ_TX_SLOTS        (1ULL << 2)     /* 0x0000000000000004 */
626 #define NXA_REQ_RX_SLOTS        (1ULL << 3)     /* 0x0000000000000008 */
627 #define NXA_REQ_BUF_SIZE        (1ULL << 4)     /* 0x0000000000000010 */
628 #define NXA_REQ_META_SIZE       (1ULL << 5)     /* 0x0000000000000020 */
629 #define NXA_REQ_STATS_SIZE      (1ULL << 6)     /* 0x0000000000000040 */
630 #define NXA_REQ_ANONYMOUS       (1ULL << 7)     /* 0x0000000000000080 */
631 #define NXA_REQ_PIPES           (1ULL << 8)     /* 0x0000000000000100 */
632 #define NXA_REQ_EXTENSIONS      (1ULL << 9)     /* 0x0000000000000200 */
633 #define NXA_REQ_MHINTS          (1ULL << 10)    /* 0x0000000000000400 */
634 #define NXA_REQ_FLOWADV_MAX     (1ULL << 11)    /* 0x0000000000000800 */
635 #define NXA_REQ_QMAP            (1ULL << 12)    /* 0x0000000000001000 */
636 #define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13)   /* 0x0000000000002000 */
637 #define NXA_REQ_USER_PACKET_POOL (1ULL << 14)   /* 0x0000000000004000 */
638 #define NXA_REQ_CAPABILITIES    (1ULL << 15)    /* 0x0000000000008000 */
639 #define NXA_REQ_NEXUSADV_SIZE   (1ULL << 16)    /* 0x0000000000010000 */
640 #define NXA_REQ_IFINDEX         (1ULL << 17)    /* 0x0000000000020000 */
641 #define NXA_REQ_USER_CHANNEL    (1ULL << 18)    /* 0x0000000000040000 */
642 #define NXA_REQ_MAX_FRAGS       (1ULL << 19)    /* 0x0000000000080000 */
643 #define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20)    /* 0x0000000000100000 */
644 #define NXA_REQ_LARGE_BUF_SIZE  (1ULL << 21)    /* 0x0000000000200000 */
645 
646 #ifndef KERNEL
647 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
648 __BEGIN_DECLS
649 /* system calls */
650 extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len);
651 extern int __nexus_register(int ctl, struct nxprov_reg *reg,
652     const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len);
653 extern int __nexus_deregister(int ctl, const uuid_t prov_uuid,
654     const uint32_t prov_uuid_len);
655 extern int __nexus_create(int ctl, const uuid_t prov_uuid,
656     const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len);
657 extern int __nexus_destroy(int ctl, const uuid_t nx_uuid,
658     const uint32_t nx_uuid_len);
659 extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval,
660     uint32_t *aoptlen);
661 extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval,
662     const uint32_t optlen);
663 
664 /* private nexus controller APIs */
665 extern int __os_nexus_ifattach(const nexus_controller_t ctl,
666     const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid,
667     boolean_t host, uuid_t *nx_if_uuid);
668 extern int __os_nexus_ifdetach(const nexus_controller_t ctl,
669     const uuid_t nx_uuid, const uuid_t nx_if_uuid);
670 
671 /* private flow APIs */
672 extern int __os_nexus_flow_add(const nexus_controller_t ncd,
673     const uuid_t nx_uuid, const struct nx_flow_req *nfr);
674 extern int __os_nexus_flow_del(const nexus_controller_t ncd,
675     const uuid_t nx_uuid, const struct nx_flow_req *nfr);
676 extern int __os_nexus_get_llink_info(const nexus_controller_t ncd,
677     const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len);
678 
679 __END_DECLS
680 #endif  /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
681 #endif /* !KERNEL */
682 #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE)
683 #include <skywalk/nexus_common.h>
684 #include <skywalk/nexus_ioctl.h>
685 #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */
686 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
687 #endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */
688