1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SKYWALK_OS_NEXUS_PRIVATE_H_
30 #define _SKYWALK_OS_NEXUS_PRIVATE_H_
31
32 #if defined(PRIVATE) || defined(BSD_KERNEL_PRIVATE)
33 #include <sys/guarded.h>
34 #include <skywalk/os_channel.h>
35 #include <skywalk/os_nexus.h>
36 #include <netinet/in.h>
37 #include <netinet/tcp.h>
38 #include <net/ethernet.h>
39
40 /*
41 * Ephemeral port, for NEXUSDOMCAPF_EPHEMERAL capable nexus.
42 */
43 #define NEXUS_PORT_ANY ((nexus_port_t)-1)
44
45 #define NEXUSCTL_INIT_VERSION_1 1
46 #define NEXUSCTL_INIT_CURRENT_VERSION NEXUSCTL_INIT_VERSION_1
47
48 /*
49 * Nexus controller init parameters.
50 */
51 struct nxctl_init {
52 uint32_t ni_version; /* in: NEXUSCTL_INIT_CURRENT_VERSION */
53 uint32_t __ni_align; /* reserved */
54 guardid_t ni_guard; /* out: guard ID */
55 };
56
57 /*
58 * Nexus metadata type.
59 *
60 * Be mindful that due to the use of tagged pointers for packets, this
61 * type gets encoded along with the subtype, with the requirement that the
62 * object addresses are aligned on 64-byte boundary at the minimum. That
63 * leaves a total of 4 bits: 2 for type and another 2 for subtype, therefore
64 * limiting the maximum enum value to 3.
65 */
66 typedef enum {
67 NEXUS_META_TYPE_INVALID = 0, /* invalid type */
68 NEXUS_META_TYPE_QUANTUM, /* struct __quantum */
69 NEXUS_META_TYPE_PACKET, /* struct __packet */
70 NEXUS_META_TYPE_RESERVED, /* for future */
71 NEXUS_META_TYPE_MAX = NEXUS_META_TYPE_RESERVED
72 } nexus_meta_type_t;
73
74 typedef enum {
75 NEXUS_META_SUBTYPE_INVALID = 0, /* invalid subtype */
76 NEXUS_META_SUBTYPE_PAYLOAD, /* normal payload mode */
77 NEXUS_META_SUBTYPE_RAW, /* raw (link layer) mode */
78 NEXUS_META_SUBTYPE_RESERVED, /* for future */
79 NEXUS_META_SUBTYPE_MAX = NEXUS_META_SUBTYPE_RESERVED
80 } nexus_meta_subtype_t;
81
82 /*
83 * Nexus provider parameters.
84 */
85 struct nxprov_params {
86 nexus_name_t nxp_name; /* name */
87 uint32_t nxp_namelen; /* length of name */
88 nexus_type_t nxp_type; /* NEXUS_TYPE_* */
89 nexus_meta_type_t nxp_md_type; /* NEXUS_META_TYPE_* */
90 nexus_meta_subtype_t nxp_md_subtype; /* NEXUS_META_SUBTYPE_* */
91 uint32_t nxp_flags; /* NXPF_* */
92 uint32_t nxp_format; /* provider-defined */
93 uint32_t nxp_tx_rings; /* # of channel transmit rings */
94 uint32_t nxp_rx_rings; /* # of channel receive rings */
95 uint32_t nxp_tx_slots; /* # of slots per channel TX ring */
96 uint32_t nxp_rx_slots; /* # of slots per channel RX ring */
97 uint32_t nxp_buf_size; /* size of each buffer */
98 uint32_t nxp_meta_size; /* size of metadata per slot */
99 uint32_t nxp_stats_size; /* size of statistics region */
100 uint32_t nxp_pipes; /* number of pipes */
101 nexus_extension_t nxp_extensions; /* extension specific parameter(s) */
102 uint32_t nxp_mhints; /* memory usage hints */
103 uint32_t nxp_ifindex; /* network interface index */
104 uint32_t nxp_flowadv_max; /* max flow advisory entries */
105 nexus_qmap_type_t nxp_qmap; /* queue mapping type */
106 uint32_t nxp_capabilities; /* nexus capabilities */
107 uint32_t nxp_nexusadv_size; /* nexus advisory region size */
108 uint32_t nxp_max_frags; /* max fragments per packet */
109 /*
110 * reject channel operations if the peer has closed the channel.
111 * Only valid for user-pipe nexus.
112 */
113 boolean_t nxp_reject_on_close;
114 } __attribute__((aligned(64)));
115
116 /* valid values for nxp_flags */
117 #define NXPF_ANONYMOUS 0x1 /* allow anonymous channel clients */
118 #define NXPF_USER_CHANNEL 0x2 /* allow user channel open */
119 #define NXPF_NETIF_LLINK 0x4 /* use netif logical link */
120 #ifdef KERNEL
121 #define NXPF_MASK (NXPF_ANONYMOUS | NXPF_USER_CHANNEL | NXPF_NETIF_LLINK)
122 #endif /* KERNEL */
123
124 #define NXPF_BITS \
125 "\020\01ANONYMOUS\02USER_CHANNEL"
126
127 /* valid values for nxp_capabilities */
128 #define NXPCAP_CHECKSUM_PARTIAL 0x1 /* partial checksum */
129 #define NXPCAP_USER_PACKET_POOL 0x2 /* user packet pool */
130 #define NXPCAP_USER_CHANNEL 0x4 /* allow user channel access */
131
132 #define NXPCAP_BITS \
133 "\020\01CHECKSUM_PARTIAL\02USER_PKT_POOL\03USER_CHANNEL"
134
135 #define NXPROV_REG_VERSION_1 1
136 #define NXPROV_REG_CURRENT_VERSION NXPROV_REG_VERSION_1
137
138 /*
139 * Nexus provider registration parameters.
140 */
141 struct nxprov_reg {
142 uint32_t nxpreg_version; /* NXPROV_REG_CURRENT_VERSION */
143 uint32_t nxpreg_requested; /* customized attributes */
144 struct nxprov_params nxpreg_params; /* Nexus provider parameters */
145 };
146
147 /*
148 * Flags for nxpreq_requested; keep in sync with NXA_REQ_* flags.
149 * Note that these are 32-bit, whereas nxa_requested is 64-bit
150 * wide; for now this won't matter.
151 */
152 #define NXPREQ_TX_RINGS (1U << 0) /* 0x00000001 */
153 #define NXPREQ_RX_RINGS (1U << 1) /* 0x00000002 */
154 #define NXPREQ_TX_SLOTS (1U << 2) /* 0x00000004 */
155 #define NXPREQ_RX_SLOTS (1U << 3) /* 0x00000008 */
156 #define NXPREQ_BUF_SIZE (1U << 4) /* 0x00000010 */
157 #define NXPREQ_META_SIZE (1U << 5) /* 0x00000020 */
158 #define NXPREQ_STATS_SIZE (1U << 6) /* 0x00000040 */
159 #define NXPREQ_ANONYMOUS (1U << 7) /* 0x00000080 */
160 #define NXPREQ_PIPES (1U << 8) /* 0x00000100 */
161 #define NXPREQ_EXTENSIONS (1U << 9) /* 0x00000200 */
162 #define NXPREQ_MHINTS (1U << 10) /* 0x00000400 */
163 #define NXPREQ_FLOWADV_MAX (1U << 11) /* 0x00000800 */
164 #define NXPREQ_QMAP (1U << 12) /* 0x00001000 */
165 #define NXPREQ_CHECKSUM_OFFLOAD (1U << 13) /* 0x00002000 */
166 #define NXPREQ_USER_PACKET_POOL (1U << 14) /* 0x00004000 */
167 #define NXPREQ_CAPABILITIES (1U << 15) /* 0x00008000 */
168 #define NXPREQ_NEXUSADV_SIZE (1U << 16) /* 0x00010000 */
169 #define NXPREQ_IFINDEX (1U << 17) /* 0x00020000 */
170 #define NXPREQ_USER_CHANNEL (1U << 18) /* 0x00040000 */
171 #define NXPREQ_MAX_FRAGS (1U << 19) /* 0x00080000 */
172 #define NXPREQ_REJECT_ON_CLOSE (1U << 20) /* 0x00100000 */
173
174 #define NXPREQ_BITS \
175 "\020\01TX_RINGS\02RX_RINGS\03TX_SLOTS\04RX_SLOTS\05BUF_SIZE" \
176 "\06META_SIZE\07STATS_SIZE\010ANONYMOUS\011EXTRA_BUFS\012PIPES" \
177 "\013EXTENSIONS\014MHINTS\015FLOWADV_MAX\016QMAP" \
178 "\017CKSUM_OFFLOAD\020USER_PKT_POOL\021CAPABS\022NEXUSADV_SIZE" \
179 "\023IFINDEX\024USER_CHANNEL\025MAX_FRAGS\026REJ_CLOSE"
180
181 /*
182 * Nexus provider registration entry. Also argument for NXOPT_NEXUS_PROV_ENTRY.
183 */
184 struct nxprov_reg_ent {
185 uuid_t npre_prov_uuid; /* Nexus provider UUID */
186 struct nxprov_params npre_prov_params; /* Nexus provider parameters */
187 };
188
189 /*
190 * Nexus options.
191 */
192 #define NXOPT_NEXUS_PROV_LIST 1 /* (get) list all provider UUIDS */
193 #define NXOPT_NEXUS_PROV_ENTRY 2 /* (get) get params of a provider */
194 #define NXOPT_NEXUS_LIST 20 /* (get) list all Nexus instances */
195 #define NXOPT_NEXUS_BIND 21 /* (set) bind a Nexus port */
196 #define NXOPT_NEXUS_UNBIND 22 /* (set) unbind a Nexus port */
197 #define NXOPT_CHANNEL_LIST 30 /* (get) list all Channel instances */
198 #define NXOPT_NEXUS_CONFIG 40 /* (set) nexus specific config */
199
200 /*
201 * Argument structure for NXOPT_NEXUS_PROV_LIST.
202 */
203 struct nxprov_list_req {
204 uint32_t nrl_num_regs; /* array count */
205 uint32_t __nrl_align; /* reserved */
206 user_addr_t nrl_regs; /* array of nexus_reg_ent */
207 };
208
209 /*
210 * Argument structure for NXOPT_NEXUS_LIST.
211 */
212 struct nx_list_req {
213 uuid_t nl_prov_uuid; /* nexus provider UUID */
214 uint32_t nl_num_nx_uuids; /* array count */
215 uint32_t __nl_align; /* reserved */
216 user_addr_t nl_nx_uuids; /* array of nexus UUIDs */
217 };
218
219 /*
220 * Argument structure for NXOPT_NEXUS_BIND.
221 */
222 struct nx_bind_req {
223 uuid_t nb_nx_uuid; /* nexus instance UUID */
224 nexus_port_t nb_port; /* nexus instance port */
225 uint32_t nb_flags; /* NBR_* match flags */
226 uuid_t nb_exec_uuid; /* executable UUID */
227 user_addr_t nb_key; /* key blob */
228 uint32_t nb_key_len; /* key blob length */
229 pid_t nb_pid; /* client PID */
230 };
231
232 #define NBR_MATCH_PID 0x1 /* match against PID */
233 #define NBR_MATCH_EXEC_UUID 0x2 /* match executable's UUID */
234 #define NBR_MATCH_KEY 0x4 /* match key blob */
235 #ifdef KERNEL
236 #define NBR_MATCH_MASK \
237 (NBR_MATCH_PID | NBR_MATCH_EXEC_UUID | NBR_MATCH_KEY)
238 #endif /* KERNEL */
239
240 /*
241 * Argument structure for NXOPT_NEXUS_UNBIND.
242 */
243 struct nx_unbind_req {
244 uuid_t nu_nx_uuid; /* nexus instance UUID */
245 nexus_port_t nu_port; /* nexus instance port */
246 };
247
248 /*
249 * Argument structure for NXOPT_CHANNEL_LIST.
250 */
251 struct ch_list_req {
252 uuid_t cl_nx_uuid; /* nexus instance UUID */
253 uint32_t cl_num_ch_uuids; /* array count */
254 uint32_t __cl_align; /* reserved */
255 user_addr_t cl_ch_uuids; /* array of channel UUIDs */
256 };
257
258 /*
259 * Skywalk Nexus MIB
260 *
261 * We will use the name MIB now to refer to things that we expose to outside
262 * world for management/telemetry purpose.
263 *
264 * General rule of thumb of this MIB structure is to keep it simple.
265 * Try to avoid variable length field and hierarchical representation wherever
266 * possible. Simple retrieval would return either a single object (simple type
267 * or fixed length compound type) or an object array of same type. This makes
268 * parsing the retrieved information a lot easier.
269 *
270 * For now, we use sysctl as the way MIB interface is exposed. Additional
271 * interfaces could be syscall (e.g. via a nexus controller), etc.
272 */
273 #define NXMIB_NETIF_STATS (((uint32_t)1) << 1)
274 #define NXMIB_FSW_STATS (((uint32_t)1) << 2)
275 #define NXMIB_FLOW (((uint32_t)1) << 3)
276 #define NXMIB_FLOW_ADV (((uint32_t)1) << 4)
277 #define NXMIB_FLOW_OWNER (((uint32_t)1) << 5)
278 #define NXMIB_FLOW_ROUTE (((uint32_t)1) << 6)
279 #define NXMIB_LLINK_LIST (((uint32_t)1) << 7)
280
281 #define NXMIB_QUIC_STATS (((uint32_t)1) << 27)
282 #define NXMIB_UDP_STATS (((uint32_t)1) << 28)
283 #define NXMIB_TCP_STATS (((uint32_t)1) << 29)
284 #define NXMIB_IP6_STATS (((uint32_t)1) << 30)
285 #define NXMIB_IP_STATS (((uint32_t)1) << 31)
286
287 #define NXMIB_USERSTACK_STATS (NXMIB_IP_STATS | NXMIB_IP6_STATS \
288 | NXMIB_TCP_STATS | NXMIB_UDP_STATS \
289 | NXMIB_QUIC_STATS)
290
291 #define NXMIB_FILTER_NX_UUID (((uint64_t)1) << 0)
292 #define NXMIB_FILTER_FLOW_ID (((uint64_t)1) << 1)
293 #define NXMIB_FILTER_PID (((uint64_t)1) << 2)
294 #define NXMIB_FILTER_INFO_TUPLE (((uint64_t)1) << 3)
295
296 /*
297 * Nexus MIB filter: used to retrieve only those matching the filter value.
298 */
299 struct nexus_mib_filter {
300 uint32_t nmf_type; /* MIB type */
301 uint64_t nmf_bitmap; /* bitmap of following fields */
302
303 uuid_t nmf_nx_uuid; /* nexus instance uuid */
304 uuid_t nmf_flow_id; /* flow rule id */
305 pid_t nmf_pid; /* owner pid */
306 struct info_tuple nmf_info_tuple; /* flow tuple */
307 };
308
309 /*
310 * Nexus-specific config commands.
311 */
312 typedef enum {
313 NXCFG_CMD_ATTACH = 0, /* attach an object to a nexus */
314 NXCFG_CMD_DETACH = 1, /* detach an object from a nexus */
315 NXCFG_CMD_FLOW_ADD = 20, /* bind namespace to a nexus port */
316 NXCFG_CMD_FLOW_DEL = 21, /* unbind namespace from a nexus port */
317 NXCFG_CMD_NETEM = 30, /* config packet scheduler */
318 NXCFG_CMD_GET_LLINK_INFO = 40, /* collect llink info */
319 } nxcfg_cmd_t;
320
321 #define NX_SPEC_IF_NAMELEN 64
322
323 /*
324 * Argument struture for NXOPT_NEXUS_CONFIG.
325 */
326 struct nx_cfg_req {
327 uuid_t nc_nx_uuid; /* nexus instance UUID */
328 nxcfg_cmd_t nc_cmd; /* NXCFG_CMD_* */
329 uint32_t nc_req_len; /* size of request struct */
330 user_addr_t nc_req; /* address of request struct */
331 };
332
333 /*
334 * Argument structure for NXCFG_CMD_{ATTACH,DETACH}
335 */
336 struct nx_spec_req {
337 union {
338 char nsru_name[NX_SPEC_IF_NAMELEN];
339 uuid_t nsru_uuid;
340 #ifdef KERNEL
341 struct ifnet *nsru_ifp;
342 #endif /* KERNEL */
343 } nsr_u __attribute__((aligned(sizeof(uint64_t)))); /* in */
344 uint32_t nsr_flags; /* in */
345 uuid_t nsr_if_uuid; /* attach: out, detach: in */
346 };
347 #define nsr_name nsr_u.nsru_name
348 #define nsr_uuid nsr_u.nsru_uuid
349 #ifdef KERNEL
350 #define nsr_ifp nsr_u.nsru_ifp
351 #endif /* KERNEL */
352
353 #define NXSPECREQ_UUID 0x1 /* nsr_name is uuid_t else ifname */
354 #define NXSPECREQ_HOST 0x2 /* attach to host port */
355 #ifdef KERNEL
356 /* mask off userland-settable bits */
357 #define NXSPECREQ_MASK (NXSPECREQ_UUID | NXSPECREQ_HOST)
358 #define NXSPECREQ_IFP 0x1000 /* (embryonic) ifnet */
359 #endif /* KERNEL */
360
361 /*
362 * Argument structure for NXCFG_CMD_FLOW_{BIND,UNBIND}
363 */
364 struct nx_flow_req {
365 nexus_port_t nfr_nx_port;
366 uint16_t nfr_ethertype;
367 ether_addr_t nfr_etheraddr;
368 union sockaddr_in_4_6 nfr_saddr;
369 union sockaddr_in_4_6 nfr_daddr;
370 uint8_t nfr_ip_protocol;
371 uint8_t nfr_transport_protocol;
372 uint16_t nfr_flags;
373 uuid_t nfr_flow_uuid;
374 packet_svc_class_t nfr_svc_class;
375 uuid_t nfr_euuid;
376 uint32_t nfr_policy_id;
377 pid_t nfr_epid;
378 flowadv_idx_t nfr_flowadv_idx;
379 uuid_t nfr_bind_key;
380 uint64_t nfr_qset_id;
381 // below is reserved kernel-only fields
382 union {
383 #ifdef KERNEL
384 struct {
385 char _nfr_kernel_field_start[0];
386 void *nfr_context;
387 struct proc *nfr_proc;
388 struct ifnet *nfr_ifp;
389 struct flow_route *nfr_route;
390 struct ns_token *nfr_port_reservation;
391 struct protons_token *nfr_proto_reservation;
392 struct flow_stats *nfr_flow_stats;
393 pid_t nfr_pid;
394 uint32_t nfr_saddr_gencnt;
395 void *nfr_ipsec_reservation;
396 uint32_t nfr_inp_flowhash;
397 #if defined(__LP64__)
398 uint8_t _nfr_kernel_pad[4];
399 #else /* !__LP64__ */
400 uint8_t _nfr_kernel_pad[36];
401 #endif /* !__LP64__ */
402 char _nfr_kernel_field_end[0];
403 };
404 #endif /* KERNEL */
405 struct {
406 uint8_t _nfr_opaque[80];
407 /* should be at the same offset as _nfr_kernel_field_end above */
408 char _nfr_common_field_end[0];
409 };
410 };
411 };
412
413 /* valid flags for nfr_flags */
414 #define NXFLOWREQF_TRACK 0x0001 /* enable state tracking */
415 #define NXFLOWREQF_QOS_MARKING 0x0002 /* allow qos marking */
416 #define NXFLOWREQF_FILTER 0x0004 /* interpose filter */
417 #define NXFLOWREQF_CUSTOM_ETHER 0x0008 /* custom ethertype */
418 #define NXFLOWREQF_IPV6_ULA 0x0010 /* ipv6 ula */
419 #define NXFLOWREQF_LISTENER 0x0020 /* listener */
420 #define NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION 0x0040 /* override system address selection */
421 #define NXFLOWREQF_USE_STABLE_ADDRESS 0x0080 /* if override local, use stable address */
422 #define NXFLOWREQF_FLOWADV 0x0100 /* allocate flow advisory */
423 #define NXFLOWREQF_ASIS 0x0200 /* create flow as is in nfr */
424 #define NXFLOWREQF_LOW_LATENCY 0x0400 /* low latency flow */
425 #define NXFLOWREQF_NOWAKEFROMSLEEP 0x0800 /* Don't wake for traffic to this flow */
426
427 #define NXFLOWREQF_BITS \
428 "\020\01TRACK\02QOS_MARKING\03FILTER\04CUSTOM_ETHER\05IPV6_ULA" \
429 "\06LISTENER\07OVERRIDE_ADDRESS_SELECTION\010USE_STABLE_ADDRESS" \
430 "\011ALLOC_FLOWADV\012ASIS\013LOW_LATENCY"
431
432 struct flow_ip_addr {
433 union {
434 struct in_addr _v4;
435 struct in6_addr _v6;
436 uint8_t _addr8[16];
437 uint16_t _addr16[8];
438 uint32_t _addr32[4];
439 uint64_t _addr64[2];
440 };
441 };
442
443 struct flow_key {
444 uint16_t fk_mask;
445 uint8_t fk_ipver;
446 uint8_t fk_proto;
447 uint16_t fk_sport;
448 uint16_t fk_dport;
449 struct flow_ip_addr fk_src;
450 struct flow_ip_addr fk_dst;
451 uint64_t fk_pad[1]; /* pad to 48 bytes */
452 } __attribute__((__aligned__(16)));
453
454 #define fk_src4 fk_src._v4
455 #define fk_dst4 fk_dst._v4
456 #define fk_src6 fk_src._v6
457 #define fk_dst6 fk_dst._v6
458
459 #define FLOW_KEY_LEN sizeof(struct flow_key)
460 #define FK_HASH_SEED 0xabcd
461
462 #define FKMASK_IPVER (((uint16_t)1) << 0)
463 #define FKMASK_PROTO (((uint16_t)1) << 1)
464 #define FKMASK_SRC (((uint16_t)1) << 2)
465 #define FKMASK_SPORT (((uint16_t)1) << 3)
466 #define FKMASK_DST (((uint16_t)1) << 4)
467 #define FKMASK_DPORT (((uint16_t)1) << 5)
468
469 #define FKMASK_2TUPLE (FKMASK_PROTO | FKMASK_SPORT)
470 #define FKMASK_3TUPLE (FKMASK_2TUPLE | FKMASK_IPVER | FKMASK_SRC)
471 #define FKMASK_4TUPLE (FKMASK_3TUPLE | FKMASK_DPORT)
472 #define FKMASK_5TUPLE (FKMASK_4TUPLE | FKMASK_DST)
473 #define FKMASK_IPFLOW1 FKMASK_PROTO
474 #define FKMASK_IPFLOW2 (FKMASK_IPFLOW1 | FKMASK_IPVER | FKMASK_SRC)
475 #define FKMASK_IPFLOW3 (FKMASK_IPFLOW2 | FKMASK_DST)
476 #define FKMASK_IDX_MAX 7
477
478 extern const struct flow_key fk_mask_2tuple;
479 extern const struct flow_key fk_mask_3tuple;
480 extern const struct flow_key fk_mask_4tuple;
481 extern const struct flow_key fk_mask_5tuple;
482 extern const struct flow_key fk_mask_ipflow1;
483 extern const struct flow_key fk_mask_ipflow2;
484 extern const struct flow_key fk_mask_ipflow3;
485
486 #define FLOW_KEY_CLEAR(_fk) do { \
487 _CASSERT(FLOW_KEY_LEN == 48); \
488 _CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key)); \
489 sk_zero_48(_fk); \
490 } while (0)
491
492 #ifdef KERNEL
493 /* mask off userland-settable bits */
494 #define NXFLOWREQF_MASK \
495 (NXFLOWREQF_TRACK | NXFLOWREQF_QOS_MARKING | NXFLOWREQF_FILTER | \
496 NXFLOWREQF_CUSTOM_ETHER | NXFLOWREQF_IPV6_ULA | NXFLOWREQF_LISTENER | \
497 NXFLOWREQF_OVERRIDE_ADDRESS_SELECTION | NXFLOWREQF_USE_STABLE_ADDRESS | \
498 NXFLOWREQF_FLOWADV | NXFLOWREQF_LOW_LATENCY)
499
500 #define NXFLOWREQF_EXT_PORT_RSV 0x1000 /* external port reservation */
501 #define NXFLOWREQF_EXT_PROTO_RSV 0x2000 /* external proto reservation */
502
503 static inline void
nx_flow_req_internalize(struct nx_flow_req * req)504 nx_flow_req_internalize(struct nx_flow_req *req)
505 {
506 /* init kernel only fields */
507 bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
508 req->nfr_flags &= NXFLOWREQF_MASK;
509 req->nfr_context = NULL;
510 req->nfr_flow_stats = NULL;
511 req->nfr_port_reservation = NULL;
512 }
513
514 static inline void
nx_flow_req_externalize(struct nx_flow_req * req)515 nx_flow_req_externalize(struct nx_flow_req *req)
516 {
517 /* neutralize kernel only fields */
518 bzero(&req->_nfr_opaque, sizeof(req->_nfr_opaque));
519 req->nfr_flags &= NXFLOWREQF_MASK;
520 }
521 #endif /* KERNEL */
522
523 struct nx_qset_info {
524 uint64_t nqi_id;
525 uint16_t nqi_flags;
526 uint8_t nqi_num_rx_queues;
527 uint8_t nqi_num_tx_queues;
528 };
529
530 #define NETIF_LLINK_MAX_QSETS 256
531 struct nx_llink_info {
532 uuid_t nli_netif_uuid; /* nexus netif instance uuid */
533 uint64_t nli_link_id;
534 uint16_t nli_link_id_internal;
535 uint8_t nli_state;
536 uint8_t nli_flags;
537 uint16_t nli_qset_cnt;
538 struct nx_qset_info nli_qset[NETIF_LLINK_MAX_QSETS];
539 };
540
541 #define NETIF_LLINK_INFO_VERSION 0x01
542 struct nx_llink_info_req {
543 uint16_t nlir_version;
544 uint16_t nlir_llink_cnt;
545 struct nx_llink_info nlir_llink[0];
546 };
547
548 /*
549 * Nexus controller descriptor.
550 */
551 struct nexus_controller {
552 #ifndef KERNEL
553 int ncd_fd;
554 guardid_t ncd_guard;
555 #else /* KERNEL */
556 struct nxctl *ncd_nxctl;
557 #endif /* KERNEL */
558 };
559
560 /*
561 * Nexus attributes.
562 */
563 struct nexus_attr {
564 uint64_t nxa_requested; /* customized attributes */
565 uint64_t nxa_tx_rings; /* # of channel transmit rings */
566 uint64_t nxa_rx_rings; /* # of channel receive rings */
567 uint64_t nxa_tx_slots; /* # of slots per channel TX ring */
568 uint64_t nxa_rx_slots; /* # of slots per channel RX ring */
569 uint64_t nxa_buf_size; /* size of each buffer */
570 uint64_t nxa_meta_size; /* size of metadata per buffer */
571 uint64_t nxa_stats_size; /* size of statistics region */
572 uint64_t nxa_anonymous; /* bool: allow anonymous clients */
573 uint64_t nxa_pipes; /* number of pipes */
574 uint64_t nxa_extensions; /* extension-specific attribute */
575 uint64_t nxa_mhints; /* memory usage hints */
576 uint64_t nxa_ifindex; /* network interface index */
577 uint64_t nxa_flowadv_max; /* max flow advisory entries */
578 uint64_t nxa_qmap; /* queue mapping type */
579 uint64_t nxa_checksum_offload; /* partial checksum offload */
580 uint64_t nxa_user_packet_pool; /* user packet pool */
581 uint64_t nxa_nexusadv_size; /* size of advisory region */
582 uint64_t nxa_user_channel; /* user channel open allowed */
583 uint64_t nxa_max_frags; /* max fragments per packet */
584 /*
585 * reject channel operations if the nexus peer has closed the channel.
586 * valid only for user-pipe nexus.
587 */
588 uint64_t nxa_reject_on_close;
589 };
590
591 /*
592 * Flags for nxa_requested; keep in sync with NXPREQ_* flags.
593 * Note that these are 64-bit, whereas nxpreq_requested is
594 * 32-bit wide; for not this won't matter.
595 */
596 #define NXA_REQ_TX_RINGS (1ULL << 0) /* 0x0000000000000001 */
597 #define NXA_REQ_RX_RINGS (1ULL << 1) /* 0x0000000000000002 */
598 #define NXA_REQ_TX_SLOTS (1ULL << 2) /* 0x0000000000000004 */
599 #define NXA_REQ_RX_SLOTS (1ULL << 3) /* 0x0000000000000008 */
600 #define NXA_REQ_BUF_SIZE (1ULL << 4) /* 0x0000000000000010 */
601 #define NXA_REQ_META_SIZE (1ULL << 5) /* 0x0000000000000020 */
602 #define NXA_REQ_STATS_SIZE (1ULL << 6) /* 0x0000000000000040 */
603 #define NXA_REQ_ANONYMOUS (1ULL << 7) /* 0x0000000000000080 */
604 #define NXA_REQ_PIPES (1ULL << 8) /* 0x0000000000000100 */
605 #define NXA_REQ_EXTENSIONS (1ULL << 9) /* 0x0000000000000200 */
606 #define NXA_REQ_MHINTS (1ULL << 10) /* 0x0000000000000400 */
607 #define NXA_REQ_FLOWADV_MAX (1ULL << 11) /* 0x0000000000000800 */
608 #define NXA_REQ_QMAP (1ULL << 12) /* 0x0000000000001000 */
609 #define NXA_REQ_CHECKSUM_OFFLOAD (1ULL << 13) /* 0x0000000000002000 */
610 #define NXA_REQ_USER_PACKET_POOL (1ULL << 14) /* 0x0000000000004000 */
611 #define NXA_REQ_CAPABILITIES (1ULL << 15) /* 0x0000000000008000 */
612 #define NXA_REQ_NEXUSADV_SIZE (1ULL << 16) /* 0x0000000000010000 */
613 #define NXA_REQ_IFINDEX (1ULL << 17) /* 0x0000000000020000 */
614 #define NXA_REQ_USER_CHANNEL (1ULL << 18) /* 0x0000000000040000 */
615 #define NXA_REQ_MAX_FRAGS (1ULL << 19) /* 0x0000000000080000 */
616 #define NXA_REQ_REJECT_ON_CLOSE (1ULL << 20) /* 0x0000000000100000 */
617
618 #ifndef KERNEL
619 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
620 __BEGIN_DECLS
621 /* system calls */
622 extern int __nexus_open(struct nxctl_init *init, const uint32_t init_len);
623 extern int __nexus_register(int ctl, struct nxprov_reg *reg,
624 const uint32_t reg_len, uuid_t *prov_uuid, const uint32_t prov_uuid_len);
625 extern int __nexus_deregister(int ctl, const uuid_t prov_uuid,
626 const uint32_t prov_uuid_len);
627 extern int __nexus_create(int ctl, const uuid_t prov_uuid,
628 const uint32_t prov_uuid_len, uuid_t *nx_uuid, const uint32_t nx_uuid_len);
629 extern int __nexus_destroy(int ctl, const uuid_t nx_uuid,
630 const uint32_t nx_uuid_len);
631 extern int __nexus_get_opt(int ctl, const uint32_t opt, void *aoptval,
632 uint32_t *aoptlen);
633 extern int __nexus_set_opt(int ctl, const uint32_t opt, const void *aoptval,
634 const uint32_t optlen);
635
636 /* private nexus controller APIs */
637 extern int __os_nexus_ifattach(const nexus_controller_t ctl,
638 const uuid_t nx_uuid, const char *ifname, const uuid_t netif_uuid,
639 boolean_t host, uuid_t *nx_if_uuid);
640 extern int __os_nexus_ifdetach(const nexus_controller_t ctl,
641 const uuid_t nx_uuid, const uuid_t nx_if_uuid);
642
643 /* private flow APIs */
644 extern int __os_nexus_flow_add(const nexus_controller_t ncd,
645 const uuid_t nx_uuid, const struct nx_flow_req *nfr);
646 extern int __os_nexus_flow_del(const nexus_controller_t ncd,
647 const uuid_t nx_uuid, const struct nx_flow_req *nfr);
648 extern int __os_nexus_get_llink_info(const nexus_controller_t ncd,
649 const uuid_t nx_uuid, const struct nx_llink_info_req *nlir, size_t len);
650
651 __END_DECLS
652 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
653 #endif /* !KERNEL */
654 #if defined(LIBSYSCALL_INTERFACE) || defined(BSD_KERNEL_PRIVATE)
655 #include <skywalk/nexus_common.h>
656 #endif /* LIBSYSCALL_INTERFACE || BSD_KERNEL_PRIVATE */
657 #endif /* PRIVATE || BSD_KERNEL_PRIVATE */
658 #endif /* !_SKYWALK_OS_NEXUS_PRIVATE_H_ */
659