1 /*
2 * Copyright (c) 2015-2022 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_
30 #define _SKYWALK_OS_NEXUS_PRIVATE_H_
31
32 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
33 #include <stdbool.h>
34 #include <sys/guarded.h>
35 #include <skywalk/os_channel.h>
36 #include <skywalk/os_nexus.h>
37 #include <netinet/in.h>
38 #include <netinet/in_private.h>
39 #include <netinet/tcp.h>
40 #include <netinet/tcp_private.h>
41 #include <net/ethernet.h>
42
43 /*
44 * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus.
45 */
46 #define NEXUS_PORT_ANY ((nexus_port_t)-1)
47 #define NEXUS_PORT_MAX ((nexus_port_t)-1)
48
49 typedef nexus_port_t nexus_port_size_t;
50
51 #define NEXUSCTL_INIT_VERSION_1 1
52 #define NEXUSCTL_INIT_CURRENT_VERSION NEXUSCTL_INIT_VERSION_1
53
54 /*
55 * Nexus controller init parameters.
56 */
57 struct nxctl_init {
58 uint32_t ni_version; /* in: NEXUSCTL_INIT_CURRENT_VERSION */
59 uint32_t __ni_align; /* reserved */
60 guardid_t ni_guard; /* out: guard ID */
61 };
62
63 /*
64 * Nexus metadata type.
65 *
66 * Be mindful that due to the use of tagged pointers for packets, this
67 * type gets encoded along with the subtype, with the requirement that the
68 * object addresses are aligned on 64-byte boundary at the minimum. That
69 * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore
70 * limiting the maximum enum value to 3.
71 */
72 typedef enum {
73 NEXUS_META_TYPE_INVALID = 0, /* invalid type */
74 NEXUS_META_TYPE_QUANTUM, /* struct __quantum */
75 NEXUS_META_TYPE_PACKET, /* struct __packet */
76 NEXUS_META_TYPE_RESERVED, /* for future */
77 NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED
78 } nexus_meta_type_t;
79
80 typedef enum {
81 NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */
82 NEXUS_META_SUBTYPE_PAYLOAD, /* normal payload mode */
83 NEXUS_META_SUBTYPE_RAW, /* raw (link layer) mode */
84 NEXUS_META_SUBTYPE_RESERVED, /* for future */
85 NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED
86 } nexus_meta_subtype_t;
87
88 /*
89 * Nexus provider parameters.
90 */
91 struct nxprov_params {
92 nexus_name_t nxp_name; /* name */
93 uint32_t nxp_namelen; /* length of name */
94 nexus_type_t nxp_type; /* NEXUS_TYPE_* */
95 nexus_meta_type_t nxp_md_type; /* NEXUS_META_TYPE_* */
96 nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */
97 uint32_t nxp_flags; /* NXPF_* */
98 uint32_t nxp_format; /* provider-defined */
99 uint32_t nxp_tx_rings; /* # of channel transmit rings */
100 uint32_t nxp_rx_rings; /* # of channel receive rings */
101 uint32_t nxp_tx_slots; /* # of slots per channel TX ring */
102 uint32_t nxp_rx_slots; /* # of slots per channel RX ring */
103 uint32_t nxp_buf_size; /* size of each buffer */
104 uint32_t nxp_meta_size; /* size of metadata per slot */
105 uint32_t nxp_stats_size; /* size of statistics region */
106 uint32_t nxp_pipes; /* number of pipes */
107 nexus_extension_t nxp_extensions; /* extension specific parameter(s) */
108 uint32_t nxp_mhints; /* memory usage hints */
109 uint32_t nxp_ifindex; /* network interface index */
110 uint32_t nxp_flowadv_max; /* max flow advisory entries */
111 nexus_qmap_type_t nxp_qmap; /* queue mapping type */
112 uint32_t nxp_capabilities; /* nexus capabilities */
113 uint32_t nxp_nexusadv_size; /* nexus advisory region size */
114 uint32_t nxp_max_frags; /* max fragments per packet */
115 /*
116 * reject channel operations if the peer has closed the channel.
117 * Only valid for user-pipe nexus.
118 */
119 boolean_t nxp_reject_on_close;
120 uint32_t nxp_large_buf_size; /* size of large buffer */
121 } __attribute__((aligned(64)));
122
123 /* valid values for nxp_flags */
124 #define NXPF_ANONYMOUS 0x1 /* allow anonymous channel clients */
125 #define NXPF_USER_CHANNEL 0x2 /* allow user channel open */
126 #define NXPF_NETIF_LLINK 0x4 /* use netif logical link */
127 #ifdef KERNEL
128 #define NXPF_MASK (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK)
129 #endif /* KERNEL */
130
131 #define NXPF_BITS \
132 "\020\01ANONYMOUS\02USER_CHANNEL"
133
134 /* valid values for nxp_capabilities */
135 #define NXPCAP_CHECKSUM_PARTIAL 0x1 /* partial checksum */
136 #define NXPCAP_USER_PACKET_POOL 0x2 /* user packet pool */
137 #define NXPCAP_USER_CHANNEL 0x4 /* allow user channel access */
138
139 #define NXPCAP_BITS \
140 "\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL"
141
142 #define NXPROV_REG_VERSION_1 1
143 #define NXPROV_REG_CURRENT_VERSION NXPROV_REG_VERSION_1
144
145 /*
146 * Nexus provider registration parameters.
147 */
148 struct nxprov_reg {
149 uint32_t nxpreg_version; /* NXPROV_REG_CURRENT_VERSION */
150 uint32_t nxpreg_requested; /* customized attributes */
151 struct nxprov_params nxpreg_params; /* Nexus provider parameters */
152 };
153
154 /*
155 * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags.
156 * Note that these are 32-bit, whereas nxa_requested is 64-bit
157 * wide; for now this won't matter.
158 */
159 #define NXPREQ_TX_RINGS (1U << 0) /* 0x00000001 */
160 #define NXPREQ_RX_RINGS (1U << 1) /* 0x00000002 */
161 #define NXPREQ_TX_SLOTS (1U << 2) /* 0x00000004 */
162 #define NXPREQ_RX_SLOTS (1U << 3) /* 0x00000008 */
163 #define NXPREQ_BUF_SIZE (1U << 4) /* 0x00000010 */
164 #define NXPREQ_META_SIZE (1U << 5) /* 0x00000020 */
165 #define NXPREQ_STATS_SIZE (1U << 6) /* 0x00000040 */
166 #define NXPREQ_ANONYMOUS (1U << 7) /* 0x00000080 */
167 #define NXPREQ_PIPES (1U << 8) /* 0x00000100 */
168 #define NXPREQ_EXTENSIONS (1U << 9) /* 0x00000200 */
169 #define NXPREQ_MHINTS (1U << 10) /* 0x00000400 */
170 #define NXPREQ_FLOWADV_MAX (1U << 11) /* 0x00000800 */
171 #define NXPREQ_QMAP (1U << 12) /* 0x00001000 */
172 #define NXPREQ_CHECKSUM_OFFLOAD (1U << 13) /* 0x00002000 */
173 #define NXPREQ_USER_PACKET_POOL (1U << 14) /* 0x00004000 */
174 #define NXPREQ_CAPABILITIES (1U << 15) /* 0x00008000 */
175 #define NXPREQ_NEXUSADV_SIZE (1U << 16) /* 0x00010000 */
176 #define NXPREQ_IFINDEX (1U << 17) /* 0x00020000 */
177 #define NXPREQ_USER_CHANNEL (1U << 18) /* 0x00040000 */
178 #define NXPREQ_MAX_FRAGS (1U << 19) /* 0x00080000 */
179 #define NXPREQ_REJECT_ON_CLOSE (1U << 20) /* 0x00100000 */
180 #define NXPREQ_LARGE_BUF_SIZE (1U << 21) /* 0x00200000 */
181
182 #define NXPREQ_BITS \
183 "\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE" \
184 "\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \
185 "\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP" \
186 "\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \
187 "\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE\027LBUF_SIZE"
188
189 /*
190 * Nexus provider registration entry. Also argument for NXOPT_NEXUS_PROV_ENTRY.
191 */
192 struct nxprov_reg_ent {
193 uuid_t npre_prov_uuid; /* Nexus provider UUID */
194 struct nxprov_params npre_prov_params; /* Nexus provider parameters */
195 };
196
197 /*
198 * Nexus options.
199 */
200 #define NXOPT_NEXUS_PROV_LIST 1 /* (get) list all provider UUIDS */
201 #define NXOPT_NEXUS_PROV_ENTRY 2 /* (get) get params of a provider */
202 #define NXOPT_NEXUS_LIST 20 /* (get) list all Nexus instances */
203 #define NXOPT_NEXUS_BIND 21 /* (set) bind a Nexus port */
204 #define NXOPT_NEXUS_UNBIND 22 /* (set) unbind a Nexus port */
205 #define NXOPT_CHANNEL_LIST 30 /* (get) list all Channel instances */
206 #define NXOPT_NEXUS_CONFIG 40 /* (set) nexus specific config */
207
208 /*
209 * Argument structure for NXOPT_NEXUS_PROV_LIST.
210 */
211 struct nxprov_list_req {
212 uint32_t nrl_num_regs; /* array count */
213 uint32_t __nrl_align; /* reserved */
214 user_addr_t nrl_regs; /* array of nexus_reg_ent */
215 };
216
217 /*
218 * Argument structure for NXOPT_NEXUS_LIST.
219 */
220 struct nx_list_req {
221 uuid_t nl_prov_uuid; /* nexus provider UUID */
222 uint32_t nl_num_nx_uuids; /* array count */
223 uint32_t __nl_align; /* reserved */
224 user_addr_t nl_nx_uuids; /* array of nexus UUIDs */
225 };
226
227 /*
228 * Argument structure for NXOPT_NEXUS_BIND.
229 */
230 struct nx_bind_req {
231 uuid_t nb_nx_uuid; /* nexus instance UUID */
232 nexus_port_t nb_port; /* nexus instance port */
233 uint32_t nb_flags; /* NBR_* match flags */
234 uuid_t nb_exec_uuid; /* executable UUID */
235 user_addr_t nb_key; /* key blob */
236 uint32_t nb_key_len; /* key blob length */
237 pid_t nb_pid; /* client PID */
238 };
239
240 #define NBR_MATCH_PID 0x1 /* match against PID */
241 #define NBR_MATCH_EXEC_UUID 0x2 /* match executable's UUID */
242 #define NBR_MATCH_KEY 0x4 /* match key blob */
243 #ifdef KERNEL
244 #define NBR_MATCH_MASK \
245 (NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY)
246 #endif /* KERNEL */
247
248 /*
249 * Argument structure for NXOPT_NEXUS_UNBIND.
250 */
251 struct nx_unbind_req {
252 uuid_t nu_nx_uuid; /* nexus instance UUID */
253 nexus_port_t nu_port; /* nexus instance port */
254 };
255
256 /*
257 * Argument structure for NXOPT_CHANNEL_LIST.
258 */
259 struct ch_list_req {
260 uuid_t cl_nx_uuid; /* nexus instance UUID */
261 uint32_t cl_num_ch_uuids; /* array count */
262 uint32_t __cl_align; /* reserved */
263 user_addr_t cl_ch_uuids; /* array of channel UUIDs */
264 };
265
266 /*
267 * Skywalk Nexus MIB
268 *
269 * We will use the name MIB now to refer to things that we expose to outside
270 * world for management/telemetry purpose.
271 *
272 * General rule of thumb of this MIB structure is to keep it simple.
273 * Try to avoid variable length field and hierarchical representation wherever
274 * possible. Simple retrieval would return either a single object (simple type
275 * or fixed length compound type) or an object array of same type. This makes
276 * parsing the retrieved information a lot easier.
277 *
278 * For now, we use sysctl as the way MIB interface is exposed. Additional
279 * interfaces could be syscall (e.g. via a nexus controller), etc.
280 */
281 #define NXMIB_NETIF_STATS (((uint32_t)1) << 1)
282 #define NXMIB_FSW_STATS (((uint32_t)1) << 2)
283 #define NXMIB_FLOW (((uint32_t)1) << 3)
284 #define NXMIB_FLOW_ADV (((uint32_t)1) << 4)
285 #define NXMIB_FLOW_OWNER (((uint32_t)1) << 5)
286 #define NXMIB_FLOW_ROUTE (((uint32_t)1) << 6)
287 #define NXMIB_LLINK_LIST (((uint32_t)1) << 7)
288 #define NXMIB_NETIF_QUEUE_STATS (((uint32_t)1) << 8)
289
290 #define NXMIB_QUIC_STATS (((uint32_t)1) << 27)
291 #define NXMIB_UDP_STATS (((uint32_t)1) << 28)
292 #define NXMIB_TCP_STATS (((uint32_t)1) << 29)
293 #define NXMIB_IP6_STATS (((uint32_t)1) << 30)
294 #define NXMIB_IP_STATS (((uint32_t)1) << 31)
295
296 #define NXMIB_USERSTACK_STATS (NXMIB_IP_STATS | NXMIB_IP6_STATS \
297 | NXMIB_TCP_STATS | NXMIB_UDP_STATS \
298 | NXMIB_QUIC_STATS)
299
300 #define NXMIB_FILTER_NX_UUID (((uint64_t)1) << 0)
301 #define NXMIB_FILTER_FLOW_ID (((uint64_t)1) << 1)
302 #define NXMIB_FILTER_PID (((uint64_t)1) << 2)
303 #define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3)
304
305 /*
306 * Nexus MIB filter: used to retrieve only those matching the filter value.
307 */
308 struct nexus_mib_filter {
309 uint32_t nmf_type; /* MIB type */
310 uint64_t nmf_bitmap; /* bitmap of following fields */
311
312 uuid_t nmf_nx_uuid; /* nexus instance uuid */
313 uuid_t nmf_flow_id; /* flow rule id */
314 pid_t nmf_pid; /* owner pid */
315 struct info_tuple nmf_info_tuple; /* flow tuple */
316 };
317
318 /*
319 * Nexus-specific config commands.
320 */
321 typedef enum {
322 NXCFG_CMD_ATTACH = 0, /* attach an object to a nexus */
323 NXCFG_CMD_DETACH = 1, /* detach an object from a nexus */
324 NXCFG_CMD_FLOW_ADD = 20, /* add a flow to a nexus */
325 NXCFG_CMD_FLOW_DEL = 21, /* delete a flow from nexus */
326 NXCFG_CMD_FLOW_CONFIG = 22, /* configure a flow in nexus */
327 NXCFG_CMD_NETEM = 30, /* config packet scheduler */
328 NXCFG_CMD_GET_LLINK_INFO = 40, /* collect llink info */
329 } nxcfg_cmd_t;
330
331 #define NX_SPEC_IF_NAMELEN 64
332
333 /*
334 * Argument struture for NXOPT_NEXUS_CONFIG.
335 */
336 struct nx_cfg_req {
337 uuid_t nc_nx_uuid; /* nexus instance UUID */
338 nxcfg_cmd_t nc_cmd; /* NXCFG_CMD_* */
339 uint32_t nc_req_len; /* size of request struct */
340 user_addr_t nc_req; /* address of request struct */
341 };
342
343 /*
344 * Argument structure for NXCFG_CMD_{ATTACH,DETACH}
345 */
346 struct nx_spec_req {
347 union {
348 char nsru_name[NX_SPEC_IF_NAMELEN];
349 uuid_t nsru_uuid;
350 #ifdef KERNEL
351 struct ifnet *nsru_ifp;
352 #endif /* KERNEL */
353 } nsr_u __attribute__((aligned(sizeof(uint64_t)))); /* in */
354 uint32_t nsr_flags; /* in */
355 uuid_t nsr_if_uuid; /* attach: out, detach: in */
356 };
357 #define nsr_name nsr_u.nsru_name
358 #define nsr_uuid nsr_u.nsru_uuid
359 #ifdef KERNEL
360 #define nsr_ifp nsr_u.nsru_ifp
361 #endif /* KERNEL */
362
363 #define NXSPECREQ_UUID 0x1 /* nsr_name is uuid_t else ifname */
364 #define NXSPECREQ_HOST 0x2 /* attach to host port */
365 #ifdef KERNEL
366 /* mask off userland-settable bits */
367 #define NXSPECREQ_MASK (NXSPECREQ_UUID | NXSPECREQ_HOST)
368 #define NXSPECREQ_IFP 0x1000 /* (embryonic) ifnet */
369 #endif /* KERNEL */
370
371 /*
372 * Structure for flow demuxing for parent/child flows
373 */
374 #define FLOW_DEMUX_MAX_LEN 32
375 struct flow_demux_pattern {
376 uint16_t fdp_offset;
377 uint16_t fdp_len;
378 uint8_t fdp_mask[FLOW_DEMUX_MAX_LEN];
379 uint8_t fdp_value[FLOW_DEMUX_MAX_LEN];
380 };
381
382 #define MAX_FLOW_DEMUX_PATTERN 4
383
384 /*
385 * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND}
386 */
387 struct nx_flow_req {
388 nexus_port_t nfr_nx_port;
389 uint16_t nfr_ethertype;
390 ether_addr_t nfr_etheraddr;
391 union sockaddr_in_4_6 nfr_saddr;
392 union sockaddr_in_4_6 nfr_daddr;
393 uint8_t nfr_ip_protocol;
394 uint8_t nfr_transport_protocol;
395 uint16_t nfr_flags;
396 uuid_t nfr_flow_uuid;
397 packet_svc_class_t nfr_svc_class;
398 uuid_t nfr_euuid;
399 uint32_t nfr_policy_id;
400 uint32_t nfr_skip_policy_id;
401 pid_t nfr_epid;
402 flowadv_idx_t nfr_flowadv_idx;
403 uuid_t nfr_bind_key;
404 uint64_t nfr_qset_id;
405 uuid_t nfr_parent_flow_uuid;
406 uint8_t nfr_flow_demux_count;
407 struct flow_demux_pattern nfr_flow_demux_patterns[MAX_FLOW_DEMUX_PATTERN];
408 // below is reserved kernel-only fields
409 union {
410 #ifdef KERNEL
411 struct {
412 char _nfr_kernel_field_start[0];
413 void *nfr_context;
414 struct proc *nfr_proc;
415 struct ifnet *nfr_ifp;
416 struct flow_route *nfr_route;
417 struct ns_token *nfr_port_reservation;
418 struct protons_token *nfr_proto_reservation;
419 struct flow_stats *nfr_flow_stats;
420 pid_t nfr_pid;
421 uint32_t nfr_saddr_gencnt;
422 void *nfr_ipsec_reservation;
423 uint32_t nfr_inp_flowhash;
424 #if defined(__LP64__)
425 uint8_t _nfr_kernel_pad[4];
426 #else /* !__LP64__ */
427 uint8_t _nfr_kernel_pad[36];
428 #endif /* !__LP64__ */
429 char _nfr_kernel_field_end[0];
430 };
431 #endif /* KERNEL */
432 struct {
433 uint8_t _nfr_opaque[80];
434 /* should be at the same offset as _nfr_kernel_field_end above */
435 char _nfr_common_field_end[0];
436 };
437 };
438 };
439
440 /* valid flags for nfr_flags */
441 #define NXFLOWREQF_TRACK 0x0001 /* enable state tracking */
442 #define NXFLOWREQF_QOS_MARKING 0x0002 /* allow qos marking */
443 #define NXFLOWREQF_FILTER 0x0004 /* interpose filter */
444 #define NXFLOWREQF_CUSTOM_ETHER 0x0008 /* custom ethertype */
445 #define NXFLOWREQF_IPV6_ULA 0x0010 /* ipv6 ula */
446 #define NXFLOWREQF_LISTENER 0x0020 /* listener */
447 #define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040 /* override system address selection */
448 #define NXFLOWREQF_USE_STABLE_ADDRESS 0x0080 /* if override local, use stable address */
449 #define NXFLOWREQF_FLOWADV 0x0100 /* allocate flow advisory */
450 #define NXFLOWREQF_ASIS 0x0200 /* create flow as is in nfr */
451 #define NXFLOWREQF_LOW_LATENCY 0x0400 /* low latency flow */
452 #define NXFLOWREQF_NOWAKEFROMSLEEP 0x0800 /* Don't wake for traffic to this flow */
453 #define NXFLOWREQF_REUSEPORT 0x1000 /* Don't wake for traffic to this flow */
454 #define NXFLOWREQF_PARENT 0x4000 /* Parent flow */
455
456 #define NXFLOWREQF_BITS \
457 "\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \
458 "\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \
459 "\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY\014NOWAKEUPFROMSLEEP" \
460 "\015REUSEPORT\017PARENT"
461
462 struct flow_ip_addr {
463 union {
464 struct in_addr _v4;
465 struct in6_addr _v6;
466 uint8_t _addr8[16];
467 uint16_t _addr16[8];
468 uint32_t _addr32[4];
469 uint64_t _addr64[2];
470 };
471 };
472
473 struct flow_key {
474 uint16_t fk_mask;
475 uint8_t fk_ipver;
476 uint8_t fk_proto;
477 uint16_t fk_sport;
478 uint16_t fk_dport;
479 struct flow_ip_addr fk_src;
480 struct flow_ip_addr fk_dst;
481 uint64_t fk_pad[1]; /* pad to 48 bytes */
482 } __attribute__((__aligned__(16)));
483
484 #define fk_src4 fk_src._v4
485 #define fk_dst4 fk_dst._v4
486 #define fk_src6 fk_src._v6
487 #define fk_dst6 fk_dst._v6
488
489 #define FLOW_KEY_LEN sizeof(struct flow_key)
490 #define FK_HASH_SEED 0xabcd
491
492 #define FKMASK_IPVER (((uint16_t)1) << 0)
493 #define FKMASK_PROTO (((uint16_t)1) << 1)
494 #define FKMASK_SRC (((uint16_t)1) << 2)
495 #define FKMASK_SPORT (((uint16_t)1) << 3)
496 #define FKMASK_DST (((uint16_t)1) << 4)
497 #define FKMASK_DPORT (((uint16_t)1) << 5)
498
499 #define FKMASK_2TUPLE (FKMASK_PROTO | FKMASK_SPORT)
500 #define FKMASK_3TUPLE (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC)
501 #define FKMASK_4TUPLE (FKMASK_3TUPLE | FKMASK_DPORT)
502 #define FKMASK_5TUPLE (FKMASK_4TUPLE | FKMASK_DST)
503 #define FKMASK_IPFLOW1 FKMASK_PROTO
504 #define FKMASK_IPFLOW2 (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC)
505 #define FKMASK_IPFLOW3 (FKMASK_IPFLOW2 | FKMASK_DST)
506 #define FKMASK_IDX_MAX 7
507
508 extern const struct flow_key fk_mask_2tuple;
509 extern const struct flow_key fk_mask_3tuple;
510 extern const struct flow_key fk_mask_4tuple;
511 extern const struct flow_key fk_mask_5tuple;
512 extern const struct flow_key fk_mask_ipflow1;
513 extern const struct flow_key fk_mask_ipflow2;
514 extern const struct flow_key fk_mask_ipflow3;
515
516 #define FLOW_KEY_CLEAR(_fk) do { \
517 _CASSERT(FLOW_KEY_LEN == 48); \
518 _CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key)); \
519 sk_zero_48(_fk); \
520 } while (0)
521
522 #ifdef KERNEL
523 /* mask off userland-settable bits */
524 #define NXFLOWREQF_MASK \
525 (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \
526 NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \
527 NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \
528 NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY | NXFLOWREQF_NOWAKEFROMSLEEP | \
529 NXFLOWREQF_REUSEPORT | NXFLOWREQF_PARENT)
530
531 #define NXFLOWREQF_EXT_PORT_RSV 0x1000 /* external port reservation */
532 #define NXFLOWREQF_EXT_PROTO_RSV 0x2000 /* external proto reservation */
533
534 static inline void
nx_flow_req_internalize(struct nx_flow_req * req)535 nx_flow_req_internalize(struct nx_flow_req *req)
536 {
537 _CASSERT(offsetof(struct nx_flow_req, _nfr_kernel_field_end) ==
538 offsetof(struct nx_flow_req, _nfr_common_field_end));
539
540 /* init kernel only fields */
541 bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
542 req->nfr_flags &= NXFLOWREQF_MASK;
543 req->nfr_context = NULL;
544 req->nfr_flow_stats = NULL;
545 req->nfr_port_reservation = NULL;
546 }
547
548 static inline void
nx_flow_req_externalize(struct nx_flow_req * req)549 nx_flow_req_externalize(struct nx_flow_req *req)
550 {
551 /* neutralize kernel only fields */
552 bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
553 req->nfr_flags &= NXFLOWREQF_MASK;
554 }
555 #endif /* KERNEL */
556
557 struct nx_qset_info {
558 uint64_t nqi_id;
559 uint16_t nqi_flags;
560 uint8_t nqi_num_rx_queues;
561 uint8_t nqi_num_tx_queues;
562 };
563
564 #define NETIF_LLINK_MAX_QSETS 256
565 struct nx_llink_info {
566 uuid_t nli_netif_uuid; /* nexus netif instance uuid */
567 uint64_t nli_link_id;
568 uint16_t nli_link_id_internal;
569 uint8_t nli_state;
570 uint8_t nli_flags;
571 uint16_t nli_qset_cnt;
572 struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS];
573 };
574
575 #define NETIF_LLINK_INFO_VERSION 0x01
576 struct nx_llink_info_req {
577 uint16_t nlir_version;
578 uint16_t nlir_llink_cnt;
579 struct nx_llink_info nlir_llink[__counted_by(nlir_llink_cnt)];
580 };
581
582 /*
583 * Nexus controller descriptor.
584 */
585 struct nexus_controller {
586 #ifndef KERNEL
587 int ncd_fd;
588 guardid_t ncd_guard;
589 #else /* KERNEL */
590 struct nxctl *ncd_nxctl;
591 #endif /* KERNEL */
592 };
593
594 /* For nexus ops without having to create a nexus controller */
595 #define __OS_NEXUS_SHARED_USER_CONTROLLER_FD (-1)
596
597 /*
598 * Nexus attributes.
599 */
600 struct nexus_attr {
601 uint64_t nxa_requested; /* customized attributes */
602 uint64_t nxa_tx_rings; /* # of channel transmit rings */
603 uint64_t nxa_rx_rings; /* # of channel receive rings */
604 uint64_t nxa_tx_slots; /* # of slots per channel TX ring */
605 uint64_t nxa_rx_slots; /* # of slots per channel RX ring */
606 uint64_t nxa_buf_size; /* size of each buffer */
607 uint64_t nxa_meta_size; /* size of metadata per buffer */
608 uint64_t nxa_stats_size; /* size of statistics region */
609 uint64_t nxa_anonymous; /* bool: allow anonymous clients */
610 uint64_t nxa_pipes; /* number of pipes */
611 uint64_t nxa_extensions; /* extension-specific attribute */
612 uint64_t nxa_mhints; /* memory usage hints */
613 uint64_t nxa_ifindex; /* network interface index */
614 uint64_t nxa_flowadv_max; /* max flow advisory entries */
615 uint64_t nxa_qmap; /* queue mapping type */
616 uint64_t nxa_checksum_offload; /* partial checksum offload */
617 uint64_t nxa_user_packet_pool; /* user packet pool */
618 uint64_t nxa_nexusadv_size; /* size of advisory region */
619 uint64_t nxa_user_channel; /* user channel open allowed */
620 uint64_t nxa_max_frags; /* max fragments per packet */
621 /*
622 * reject channel operations if the nexus peer has closed the channel.
623 * valid only for user-pipe nexus.
624 */
625 uint64_t nxa_reject_on_close;
626 uint64_t nxa_large_buf_size; /* size of large buffer */
627 };
628
629 /*
630 * Flags for nxa_requested; keep in sync with NXPREQ_* flags.
631 * Note that these are 64-bit, whereas nxpreq_requested is
632 * 32-bit wide; for not this won't matter.
633 */
634 #define NXA_REQ_TX_RINGS (1ULL << 0) /* 0x0000000000000001 */
635 #define NXA_REQ_RX_RINGS (1ULL << 1) /* 0x0000000000000002 */
636 #define NXA_REQ_TX_SLOTS (1ULL << 2) /* 0x0000000000000004 */
637 #define NXA_REQ_RX_SLOTS (1ULL << 3) /* 0x0000000000000008 */
638 #define NXA_REQ_BUF_SIZE (1ULL << 4) /* 0x0000000000000010 */
639 #define NXA_REQ_META_SIZE (1ULL << 5) /* 0x0000000000000020 */
640 #define NXA_REQ_STATS_SIZE (1ULL << 6) /* 0x0000000000000040 */
641 #define NXA_REQ_ANONYMOUS (1ULL << 7) /* 0x0000000000000080 */
642 #define NXA_REQ_PIPES (1ULL << 8) /* 0x0000000000000100 */
643 #define NXA_REQ_EXTENSIONS (1ULL << 9) /* 0x0000000000000200 */
644 #define NXA_REQ_MHINTS (1ULL << 10) /* 0x0000000000000400 */
645 #define NXA_REQ_FLOWADV_MAX (1ULL << 11) /* 0x0000000000000800 */
646 #define NXA_REQ_QMAP (1ULL << 12) /* 0x0000000000001000 */
647 #define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13) /* 0x0000000000002000 */
648 #define NXA_REQ_USER_PACKET_POOL (1ULL << 14) /* 0x0000000000004000 */
649 #define NXA_REQ_CAPABILITIES (1ULL << 15) /* 0x0000000000008000 */
650 #define NXA_REQ_NEXUSADV_SIZE (1ULL << 16) /* 0x0000000000010000 */
651 #define NXA_REQ_IFINDEX (1ULL << 17) /* 0x0000000000020000 */
652 #define NXA_REQ_USER_CHANNEL (1ULL << 18) /* 0x0000000000040000 */
653 #define NXA_REQ_MAX_FRAGS (1ULL << 19) /* 0x0000000000080000 */
654 #define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20) /* 0x0000000000100000 */
655 #define NXA_REQ_LARGE_BUF_SIZE (1ULL << 21) /* 0x0000000000200000 */
656
657 #ifndef KERNEL
658 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
659 __BEGIN_DECLS
660 /* system calls */
661 extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len);
662 extern int __nexus_register(int ctl, struct nxprov_reg *reg,
663 const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len);
664 extern int __nexus_deregister(int ctl, const uuid_t prov_uuid,
665 const uint32_t prov_uuid_len);
666 extern int __nexus_create(int ctl, const uuid_t prov_uuid,
667 const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len);
668 extern int __nexus_destroy(int ctl, const uuid_t nx_uuid,
669 const uint32_t nx_uuid_len);
670 extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval,
671 uint32_t *aoptlen);
672 extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval,
673 const uint32_t optlen);
674
675 /* private nexus controller APIs */
676 extern int __os_nexus_ifattach(const nexus_controller_t ctl,
677 const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid,
678 boolean_t host, uuid_t *nx_if_uuid);
679 extern int __os_nexus_ifdetach(const nexus_controller_t ctl,
680 const uuid_t nx_uuid, const uuid_t nx_if_uuid);
681
682 /* private flow APIs */
683 extern int __os_nexus_flow_add(const nexus_controller_t ncd,
684 const uuid_t nx_uuid, const struct nx_flow_req *nfr);
685 extern int __os_nexus_flow_del(const nexus_controller_t ncd,
686 const uuid_t nx_uuid, const struct nx_flow_req *nfr);
687 extern int __os_nexus_get_llink_info(const nexus_controller_t ncd,
688 const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len);
689 extern int os_nexus_flow_set_wake_from_sleep(const uuid_t nx_uuid,
690 const uuid_t flow_uuid, bool enable);
691
692 __END_DECLS
693 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
694 #endif /* !KERNEL */
695 #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE)
696 #include <skywalk/nexus_common.h>
697 #include <skywalk/nexus_ioctl.h>
698 #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */
699 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
700 #endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */
701