1 /*
2 * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Once a packet is classified, it goes through checks to see if there
31 * is a matching flow entry in the flow table. The key used to search
32 * the entry is composed of the fields contained in struct flow_ptrs.
33 *
34 * Flow entry insertion and deletion to the flow table, on behalf of
35 * the owning client process, requires the use of the rule ID (UUID)
36 * as the search key.
37 *
38 * Because of the above, each flow entry simultaneously exists in two
39 * respective trees: flow_entry_tree and flow_entry_id_tree.
40 *
41 * Using a single RW lock to protect the two trees is simple, but the
42 * data path performance is impacted during flow insertion and deletion,
43 * especially as the number of client processes and flows grow.
44 *
45 * To solve that, we deploy the following scheme:
46 *
47 * Given that the flow_entry_tree is searched on a per-packet basis,
48 * we break it down into a series of trees, each one contained within
49 * a flow_bucket structure. The hash from flow_ptrs determines the
50 * index of the flow_bucket to search the flow_entry_tree from.
51 *
52 * The flow_entry_id_tree is searched on each flow insertion and
53 * deletion, and similarly we break it down into a series of trees,
54 * each contained within a flow_owner_bucket structure. We use the
55 * client process ID (pid_t) to determine the bucket index.
56 *
57 * Each flow_bucket and flow_owner_bucket structure is dynamically
58 * created, and is aligned on the CPU cache boundary. The amount
59 * of those buckets is determined by client module at the time the
60 * flow manager context is initialized. This is done to avoid false
61 * sharing, especially given that each bucket has its own RW lock.
62 */
63
64 #ifndef _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
65 #define _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
66
67 #ifdef BSD_KERNEL_PRIVATE
68 #include <skywalk/core/skywalk_var.h>
69 #include <skywalk/lib/cuckoo_hashtable.h>
70 #include <skywalk/namespace/netns.h>
71 #include <skywalk/namespace/protons.h>
72 #include <skywalk/packet/packet_var.h>
73 #include <net/flowhash.h>
74 #include <netinet/ip.h>
75 #include <netinet/in_stat.h>
76 #include <netinet/ip6.h>
77 #include <sys/eventhandler.h>
78
79 RB_HEAD(flow_owner_tree, flow_owner);
80
81 struct flow_owner_bucket {
82 decl_lck_mtx_data(, fob_lock);
83 struct flow_owner_tree fob_owner_head;
84 uint16_t fob_busy_flags;
85 uint16_t fob_open_waiters;
86 uint16_t fob_close_waiters;
87 uint16_t fob_dtor_waiters;
88 const size_t fob_idx;
89 };
90
91 #define FOBF_OPEN_BUSY 0x1 /* flow open monitor */
92 #define FOBF_CLOSE_BUSY 0x2 /* flow close monitor */
93 #define FOBF_DEAD 0x4 /* no longer usable */
94
95 #define FOB_LOCK(_fob) \
96 lck_mtx_lock(&(_fob)->fob_lock)
97 #define FOB_LOCK_SPIN(_fob) \
98 lck_mtx_lock_spin(&(_fob)->fob_lock)
99 #define FOB_LOCK_CONVERT(_fob) \
100 lck_mtx_convert_spin(&(_fob)->fob_lock)
101 #define FOB_TRY_LOCK(_fob) \
102 lck_mtx_try_lock(&(_fob)->fob_lock)
103 #define FOB_LOCK_ASSERT_HELD(_fob) \
104 LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_OWNED)
105 #define FOB_LOCK_ASSERT_NOTHELD(_fob) \
106 LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_NOTOWNED)
107 #define FOB_UNLOCK(_fob) \
108 lck_mtx_unlock(&(_fob)->fob_lock)
109
110 RB_HEAD(flow_entry_id_tree, flow_entry);
111
112 #define FLOW_PROCESS_NAME_LENGTH 24
113
114 struct flow_owner {
115 RB_ENTRY(flow_owner) fo_link;
116 struct flow_entry_id_tree fo_flow_entry_id_head;
117 const struct flow_owner_bucket *fo_bucket;
118 void *fo_context;
119 pid_t fo_pid;
120 bool fo_nx_port_pid_bound;
121 bool fo_nx_port_destroyed;
122 bool fo_low_latency;
123 nexus_port_t fo_nx_port;
124 uuid_t fo_key;
125
126 struct nexus_adapter * const fo_nx_port_na;
127 struct nx_flowswitch * const fo_fsw;
128
129 /*
130 * Array of bitmaps to manage the flow advisory table indices.
131 * Currently we are restricting a flow owner to a single nexus
132 * port, so this structure is effectively managing the flow advisory
133 * indices for a port.
134 */
135 bitmap_t *__counted_by(fo_num_flowadv_bmaps)fo_flowadv_bmap;
136 uint32_t fo_flowadv_max;
137 uint32_t fo_num_flowadv;
138 uint32_t fo_num_flowadv_bmaps;
139
140 /* for debugging */
141 char fo_name[FLOW_PROCESS_NAME_LENGTH];
142 };
143
144 #define FO_BUCKET(_fo) \
145 __DECONST(struct flow_owner_bucket *, (_fo)->fo_bucket)
146
147 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_owner_tree, flow_owner,
148 fo_link, fo_cmp);
149 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_entry_id_tree, flow_entry,
150 fe_id_link, fe_id_cmp);
151
152 typedef enum {
153 /*
154 * TCP states.
155 */
156 FT_STATE_CLOSED = 0, /* closed */
157 FT_STATE_LISTEN, /* listening for connection */
158 FT_STATE_SYN_SENT, /* active, have sent SYN */
159 FT_STATE_SYN_RECEIVED, /* have sent and rcvd SYN */
160 FT_STATE_ESTABLISHED, /* established */
161 FT_STATE_CLOSE_WAIT, /* rcvd FIN, waiting close */
162 FT_STATE_FIN_WAIT_1, /* have sent FIN */
163 FT_STATE_CLOSING, /* exchanged FINs, waiting FIN|ACK */
164 FT_STATE_LAST_ACK, /* rcvd FIN, closed, waiting FIN|ACK */
165 FT_STATE_FIN_WAIT_2, /* closed, FIN is ACK'd */
166 FT_STATE_TIME_WAIT, /* quiet wait after close */
167
168 /*
169 * UDP states.
170 */
171 FT_STATE_NO_TRAFFIC = 20, /* no packet observed */
172 FT_STATE_SINGLE, /* single packet */
173 FT_STATE_MULTIPLE, /* multiple packets */
174
175 FT_STATE_MAX = 255
176 } flow_track_state_t;
177
178 struct flow_track_rtt {
179 uint64_t frtt_timestamp; /* tracked segment timestamp */
180 uint64_t frtt_last; /* previous net_uptime(rate limiting) */
181 uint32_t frtt_seg_begin; /* tracked segment begin SEQ */
182 uint32_t frtt_seg_end; /* tracked segment end SEQ */
183 uint32_t frtt_usec; /* avg RTT in usec */
184 };
185
186 #define FLOWTRACK_RTT_SAMPLE_INTERVAL 2 /* sample ACK RTT every 2 sec */
187
188 struct flow_track {
189 /*
190 * TCP specific tracking info.
191 */
192 uint32_t fse_seqlo; /* max sequence number sent */
193 uint32_t fse_seqhi; /* max the other end ACKd + win */
194 uint32_t fse_seqlast; /* last sequence number (FIN) */
195 uint16_t fse_max_win; /* largest window (pre scaling) */
196 uint16_t fse_mss; /* maximum segment size option */
197 uint8_t fse_state; /* active state level (FT_STATE_*) */
198 uint8_t fse_wscale; /* window scaling factor */
199 uint16_t fse_flags; /* FLOWSTATEF_* */
200 uint32_t fse_syn_ts; /* SYN timestamp */
201 uint32_t fse_syn_cnt; /* # of SYNs per second */
202
203 struct flow_track_rtt fse_rtt; /* ACK RTT tracking */
204 #define fse_rtt_usec fse_rtt.frtt_usec
205 } __sk_aligned(8);
206
207 /* valid values for fse_flags */
208 #define FLOWSTATEF_WSCALE 0x1 /* fse_wscale is valid */
209
210 struct flow_llhdr {
211 uint32_t flh_gencnt; /* link-layer address gencnt */
212
213 const uint8_t flh_off;
214 const uint8_t flh_len;
215 uint16_t flh_pad; /* for future */
216
217 union _flh_u {
218 uint64_t _buf[2];
219 struct {
220 uint16_t _eth_pad;
221 struct ether_header _eth;
222 } _eth_padded;
223 } __sk_aligned(8) _flh;
224 #define flh_eth_padded _flh._eth_padded
225 #define flh_eth _flh._eth_padded._eth
226 };
227
228 typedef enum {
229 FE_QSET_SELECT_NONE,
230 FE_QSET_SELECT_FIXED,
231 FE_QSET_SELECT_DYNAMIC
232 } flow_qset_select_t;
233
234 extern kern_allocation_name_t skmem_tag_flow_demux;
235 typedef int (*flow_demux_memcmp_mask_t)(const uint8_t *src1, const uint8_t *src2,
236 const uint8_t *byte_mask);
237
238 struct kern_flow_demux_pattern {
239 struct flow_demux_pattern fdp_demux_pattern;
240 flow_demux_memcmp_mask_t fdp_memcmp_mask;
241 };
242
243 #define MAX_PKT_DEMUX_LIMIT 1000
244
245 TAILQ_HEAD(flow_entry_list, flow_entry);
246
247 #define FLOW_PROC_FLAG_GSO 0x0001
248 typedef void (*flow_tx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
249 uint32_t flags);
250
251 #define FLOW_PROC_FLAG_FRAGMENTS 0x0001
252 typedef void (*flow_rx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
253 struct pktq *pkts, uint32_t rx_bytes, struct mbufq *host_mq,
254 uint32_t flags);
255
256 struct flow_entry {
257 /**** Common Group ****/
258 os_refcnt_t fe_refcnt;
259 struct flow_key fe_key;
260 uint32_t fe_flags;
261 uint32_t fe_key_hash;
262 struct cuckoo_node fe_cnode;
263
264 uuid_t fe_uuid __sk_aligned(8);
265 nexus_port_t fe_nx_port;
266 uint32_t fe_laddr_gencnt;
267 uint32_t fe_want_nonviable;
268 uint32_t fe_want_withdraw;
269 uint8_t fe_transport_protocol;
270
271 /**** Rx Group ****/
272 /*
273 * If multiple threads end up working on the same flow entry, the one
274 * that reaches rx_flow_batch_packets first will be responsible for
275 * sending up all the packets from different RX completion queues.
276 * fe_rx_worker_tid marks its thread ID. Other threads only enqueues their
277 * packets into fe_rx_pktq but do not call fe_rx_process on the flow entry.
278 */
279 uint16_t fe_rx_frag_count;
280 uint32_t fe_rx_pktq_bytes;
281 decl_lck_mtx_data(, fe_rx_pktq_lock);
282 struct pktq fe_rx_pktq;
283 TAILQ_ENTRY(flow_entry) fe_rx_link;
284 flow_rx_action_t fe_rx_process;
285 uint64_t fe_rx_worker_tid;
286
287 /*
288 * largest allocated packet size.
289 * used by:
290 * - mbuf batch allocation logic during RX aggregtion and netif copy.
291 * - packet allocation logic during RX aggregation.
292 */
293 uint32_t fe_rx_largest_size;
294
295 /**** Tx Group ****/
296 bool fe_tx_is_cont_frag;
297 uint32_t fe_tx_frag_id;
298 struct pktq fe_tx_pktq;
299 TAILQ_ENTRY(flow_entry) fe_tx_link;
300 flow_tx_action_t fe_tx_process;
301
302 uuid_t fe_eproc_uuid __sk_aligned(8);
303 flowadv_idx_t fe_adv_idx;
304 kern_packet_svc_class_t fe_svc_class;
305 uint32_t fe_policy_id; /* policy id matched to flow */
306 uint32_t fe_skip_policy_id; /* skip policy id matched to flow */
307
308 /**** Misc Group ****/
309 struct nx_flowswitch * const fe_fsw;
310 struct ns_token *fe_port_reservation;
311 struct protons_token *fe_proto_reservation;
312 void *fe_ipsec_reservation;
313
314 struct flow_track fe_ltrack; /* local endpoint state */
315 struct flow_track fe_rtrack; /* remote endpoint state */
316
317 /*
318 * Flow stats are kept externally stand-alone, refcnt'ed by various
319 * users (e.g. flow_entry, necp_client_flow, etc.)
320 */
321 struct flow_stats *fe_stats;
322 struct flow_route *fe_route;
323
324 RB_ENTRY(flow_entry) fe_id_link;
325
326 TAILQ_ENTRY(flow_entry) fe_linger_link;
327 uint64_t fe_linger_expire; /* expiration deadline */
328 uint32_t fe_linger_wait; /* linger time (seconds) */
329
330 pid_t fe_pid;
331 pid_t fe_epid;
332 char fe_proc_name[FLOW_PROCESS_NAME_LENGTH];
333 char fe_eproc_name[FLOW_PROCESS_NAME_LENGTH];
334
335 uint32_t fe_flowid; /* globally unique flow ID */
336
337 /* Logical link related information */
338 struct netif_qset *fe_qset;
339 uint64_t fe_qset_id;
340 flow_qset_select_t fe_qset_select;
341 uint32_t fe_tr_genid;
342
343 /* Parent child information */
344 decl_lck_rw_data(, fe_child_list_lock);
345 struct flow_entry_list fe_child_list;
346 TAILQ_ENTRY(flow_entry) fe_child_link;
347 #if DEVELOPMENT || DEBUG
348 int16_t fe_child_count;
349 #endif // DEVELOPMENT || DEBUG
350 uint8_t fe_demux_pattern_count;
351 struct kern_flow_demux_pattern *__counted_by(fe_demux_pattern_count)fe_demux_patterns;
352 uint8_t *__sized_by_or_null(FLOW_DEMUX_MAX_LEN) fe_demux_pkt_data;
353
354 TAILQ_ENTRY(flow_entry) fe_rxstrc_link;
355 };
356
357 /* valid values for fe_flags */
358 #define FLOWENTF_INITED 0x00000001 /* {src,dst} states initialized */
359 #define FLOWENTF_AOP_OFFLOAD 0x00000002 /* AOP Offload flow */
360 #define FLOWENTF_RX_STEERING 0x00000004 /* RX flow steering configured */
361 #define FLOWENTF_TRACK 0x00000010 /* enable state tracking */
362 #define FLOWENTF_CONNECTED 0x00000020 /* connected mode */
363 #define FLOWENTF_LISTENER 0x00000040 /* listener mode */
364 #define FLOWENTF_RXSTRC_PENDING 0x00000080 /* Rx steering rule cleanup pending */
365 #define FLOWENTF_QOS_MARKING 0x00000100 /* flow can have qos marking */
366 #define FLOWENTF_LOW_LATENCY 0x00000200 /* low latency flow */
367 #define FLOWENTF_WAIT_CLOSE 0x00001000 /* defer free after close */
368 #define FLOWENTF_CLOSE_NOTIFY 0x00002000 /* notify NECP upon tear down */
369 #define FLOWENTF_EXTRL_PORT 0x00004000 /* port reservation is held externally */
370 #define FLOWENTF_EXTRL_PROTO 0x00008000 /* proto reservation is held externally */
371 #define FLOWENTF_EXTRL_FLOWID 0x00010000 /* flowid reservation is held externally */
372 #define FLOWENTF_CHILD 0x00020000 /* child flow */
373 #define FLOWENTF_PARENT 0x00040000 /* parent flow */
374 #define FLOWENTF_NOWAKEFROMSLEEP 0x00080000 /* don't wake for this flow */
375 #define FLOWENTF_CONNECTION_IDLE 0x00100000 /* connection is idle */
376 #define FLOWENTF_ABORTED 0x01000000 /* has sent RST to peer */
377 #define FLOWENTF_NONVIABLE 0x02000000 /* disabled; awaiting tear down */
378 #define FLOWENTF_WITHDRAWN 0x04000000 /* flow has been withdrawn */
379 #define FLOWENTF_TORN_DOWN 0x08000000 /* torn down and awaiting destroy */
380 #define FLOWENTF_HALF_CLOSED 0x10000000 /* flow is half closed */
381 #define FLOWENTF_DESTROYED 0x40000000 /* not in RB trees anymore */
382 #define FLOWENTF_LINGERING 0x80000000 /* destroyed and in linger list */
383
384 #define FLOWENTF_BITS \
385 "\020\01INITED\02AOP_OFFLOAD\03RX_STEERING\05TRACK\06CONNECTED\07LISTNER\011QOS_MARKING" \
386 "\012LOW_LATENCY\015WAIT_CLOSE\016CLOSE_NOTIFY\017EXT_PORT" \
387 "\020EXT_PROTO\021EXT_FLOWID\024NOWAKEFROMSLEEP\025CONNECTION_IDLE" \
388 "\031ABORTED\032NONVIABLE\033WITHDRAWN" \
389 "\034TORN_DOWN\035HALF_CLOSED\037DESTROYED\40LINGERING"
390
391 TAILQ_HEAD(flow_entry_linger_head, flow_entry);
392
393 struct flow_entry_dead {
394 LIST_ENTRY(flow_entry_dead) fed_link;
395
396 boolean_t fed_want_nonviable;
397 boolean_t fed_want_clonotify;
398
399 /* rule (flow) UUID */
400 union {
401 uint64_t fed_uuid_64[2];
402 uint32_t fed_uuid_32[4];
403 uuid_t fed_uuid;
404 } __sk_aligned(8);
405 };
406
407 TAILQ_HEAD(flow_entry_rxstrc_head, flow_entry);
408
409 /*
410 * Minimum refcnt for a flow route entry to be considered as idle.
411 */
412 #define FLOW_ROUTE_MINREF 2 /* for the 2 RB trees */
413
414 struct flow_route {
415 RB_ENTRY(flow_route) fr_link;
416 RB_ENTRY(flow_route) fr_id_link;
417
418 /*
419 * fr_laddr represents the local address that the system chooses
420 * for the foreign destination in fr_faddr. The flow entry that
421 * is referring to this flow route object may choose a different
422 * local address if it wishes.
423 *
424 * fr_gaddr represents the gateway address to reach the final
425 * foreign destination fr_faddr, valid only if the destination is
426 * not directly attached (FLOWRTF_GATEWAY is set).
427 *
428 * The use of sockaddr for storage is for convenience; the port
429 * value is not applicable for this object, as this is shared
430 * among flow entries.
431 */
432 union sockaddr_in_4_6 fr_laddr; /* local IP address */
433 union sockaddr_in_4_6 fr_faddr; /* remote IP address */
434 #define fr_af fr_faddr.sa.sa_family
435 union sockaddr_in_4_6 fr_gaddr; /* gateway IP address */
436
437 struct flow_llhdr fr_llhdr;
438 #define fr_eth_padded fr_llhdr.flh_eth_padded
439 #define fr_eth fr_llhdr.flh_eth
440
441 /*
442 * In flow_route_tree, we use the destination address as key.
443 * To speed up searches, we initialize fr_addr_key to the address
444 * portion of fr_faddr depending on the address family.
445 */
446 void *fr_addr_key;
447
448 /* flow route UUID */
449 uuid_t fr_uuid __sk_aligned(8);
450
451 /*
452 * fr_usecnt is updated atomically; incremented when a flow entry
453 * refers to this object and decremented otherwise. Periodically,
454 * the flowswitch instance garbage collects flow_route objects
455 * that aren't being referred to by any flow entries.
456 *
457 * fr_expire is set when fr_usecnt reaches its minimum count, and
458 * is cleared when it goes above the minimum count.
459 *
460 * The spin lock fr_reflock is used to serialize both.
461 */
462 decl_lck_spin_data(, fr_reflock);
463 uint64_t fr_expire;
464 volatile uint32_t fr_usecnt;
465
466 uint32_t fr_flags;
467 uint32_t fr_laddr_gencnt; /* local IP gencnt */
468 uint32_t fr_addr_len; /* sizeof {in,in6}_addr */
469
470 volatile uint32_t fr_want_configure;
471 volatile uint32_t fr_want_probe;
472
473 /* lock to serialize resolver */
474 decl_lck_mtx_data(, fr_lock);
475
476 /*
477 * fr_rt_dst is the route to final destination, and along with
478 * fr_rt_evhdlr_tag, they are used in route event registration.
479 *
480 * fr_rt_gw is valid only if FLOWRTF_GATEWAY is set.
481 */
482 eventhandler_tag fr_rt_evhdlr_tag;
483 struct rtentry *fr_rt_dst;
484 struct rtentry *fr_rt_gw;
485
486 /* nexus UUID */
487 uuid_t fr_nx_uuid __sk_aligned(8);
488
489 const struct flow_mgr *fr_mgr;
490 const struct flow_route_bucket *fr_frb;
491 const struct flow_route_id_bucket *fr_frib;
492 };
493
494 /* valid values for fr_flags */
495 #define FLOWRTF_ATTACHED 0x00000001 /* attached to RB trees */
496 #define FLOWRTF_ONLINK 0x00000010 /* dst directly on the link */
497 #define FLOWRTF_GATEWAY 0x00000020 /* gw IP address is valid */
498 #define FLOWRTF_RESOLVED 0x00000040 /* flow route is resolved */
499 #define FLOWRTF_HAS_LLINFO 0x00000080 /* has dst link-layer address */
500 #define FLOWRTF_DELETED 0x00000100 /* route has been deleted */
501 #define FLOWRTF_DST_LL_MCAST 0x00000200 /* dst is link layer multicast */
502 #define FLOWRTF_DST_LL_BCAST 0x00000400 /* dst is link layer broadcast */
503 #define FLOWRTF_STABLE_ADDR 0x00000800 /* local address prefers stable */
504
505 #define FR_LOCK(_fr) \
506 lck_mtx_lock(&(_fr)->fr_lock)
507 #define FR_TRY_LOCK(_fr) \
508 lck_mtx_try_lock(&(_fr)->fr_lock)
509 #define FR_LOCK_ASSERT_HELD(_fr) \
510 LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_OWNED)
511 #define FR_LOCK_ASSERT_NOTHELD(_fr) \
512 LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_NOTOWNED)
513 #define FR_UNLOCK(_fr) \
514 lck_mtx_unlock(&(_fr)->fr_lock)
515
516 #define FLOWRT_UPD_ETH_DST(_fr, _addr) do { \
517 bcopy((_addr), (_fr)->fr_eth.ether_dhost, ETHER_ADDR_LEN); \
518 (_fr)->fr_flags &= ~(FLOWRTF_DST_LL_MCAST|FLOWRTF_DST_LL_BCAST);\
519 if (ETHER_IS_MULTICAST(_addr)) { \
520 if (_ether_cmp(etherbroadcastaddr, (_addr)) == 0) \
521 (_fr)->fr_flags |= FLOWRTF_DST_LL_BCAST; \
522 else \
523 (_fr)->fr_flags |= FLOWRTF_DST_LL_MCAST; \
524 } \
525 } while (0)
526
527 RB_HEAD(flow_route_tree, flow_route);
528 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_tree, flow_route,
529 fr_link, fr_cmp);
530
531 struct flow_route_bucket {
532 decl_lck_rw_data(, frb_lock);
533 struct flow_route_tree frb_head;
534 const uint32_t frb_idx;
535 };
536
537 #define FRB_WLOCK(_frb) \
538 lck_rw_lock_exclusive(&(_frb)->frb_lock)
539 #define FRB_WLOCKTORLOCK(_frb) \
540 lck_rw_lock_exclusive_to_shared(&(_frb)->frb_lock)
541 #define FRB_WTRYLOCK(_frb) \
542 lck_rw_try_lock_exclusive(&(_frb)->frb_lock)
543 #define FRB_WUNLOCK(_frb) \
544 lck_rw_unlock_exclusive(&(_frb)->frb_lock)
545 #define FRB_RLOCK(_frb) \
546 lck_rw_lock_shared(&(_frb)->frb_lock)
547 #define FRB_RLOCKTOWLOCK(_frb) \
548 lck_rw_lock_shared_to_exclusive(&(_frb)->frb_lock)
549 #define FRB_RTRYLOCK(_frb) \
550 lck_rw_try_lock_shared(&(_frb)->frb_lock)
551 #define FRB_RUNLOCK(_frb) \
552 lck_rw_unlock_shared(&(_frb)->frb_lock)
553 #define FRB_UNLOCK(_frb) \
554 lck_rw_done(&(_frb)->frb_lock)
555 #define FRB_WLOCK_ASSERT_HELD(_frb) \
556 LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_EXCLUSIVE)
557 #define FRB_RLOCK_ASSERT_HELD(_frb) \
558 LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_SHARED)
559 #define FRB_LOCK_ASSERT_HELD(_frb) \
560 LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_HELD)
561
562 RB_HEAD(flow_route_id_tree, flow_route);
563 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_id_tree, flow_route,
564 fr_id_link, fr_id_cmp);
565
566 struct flow_route_id_bucket {
567 decl_lck_rw_data(, frib_lock);
568 struct flow_route_id_tree frib_head;
569 const uint32_t frib_idx;
570 };
571
572 #define FRIB_WLOCK(_frib) \
573 lck_rw_lock_exclusive(&(_frib)->frib_lock)
574 #define FRIB_WLOCKTORLOCK(_frib) \
575 lck_rw_lock_exclusive_to_shared(&(_frib)->frib_lock)
576 #define FRIB_WTRYLOCK(_frib) \
577 lck_rw_try_lock_exclusive(&(_frib)->frib_lock)
578 #define FRIB_WUNLOCK(_frib) \
579 lck_rw_unlock_exclusive(&(_frib)->frib_lock)
580 #define FRIB_RLOCK(_frib) \
581 lck_rw_lock_shared(&(_frib)->frib_lock)
582 #define FRIB_RLOCKTOWLOCK(_frib) \
583 lck_rw_lock_shared_to_exclusive(&(_frib)->frib_lock)
584 #define FRIB_RTRYLOCK(_frib) \
585 lck_rw_try_lock_shared(&(_frib)->frib_lock)
586 #define FRIB_RUNLOCK(_frib) \
587 lck_rw_unlock_shared(&(_frib)->frib_lock)
588 #define FRIB_UNLOCK(_frib) \
589 lck_rw_done(&(_frib)->frib_lock)
590 #define FRIB_WLOCK_ASSERT_HELD(_frib) \
591 LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_EXCLUSIVE)
592 #define FRIB_RLOCK_ASSERT_HELD(_frib) \
593 LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_SHARED)
594 #define FRIB_LOCK_ASSERT_HELD(_frib) \
595 LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_HELD)
596
597 struct flow_mgr {
598 char fm_name[IFNAMSIZ];
599 uuid_t fm_uuid;
600 RB_ENTRY(flow_mgr) fm_link;
601
602 struct cuckoo_hashtable *fm_flow_table;
603 size_t fm_flow_hash_count[FKMASK_IDX_MAX]; /* # of flows with mask */
604 uint16_t fm_flow_hash_masks[FKMASK_IDX_MAX];
605
606 void *__sized_by(fm_owner_bucket_tot_sz) fm_owner_buckets; /* cache-aligned fob */
607 size_t fm_owner_buckets_cnt; /* total # of fobs */
608 size_t fm_owner_bucket_sz; /* size of each fob */
609 size_t fm_owner_bucket_tot_sz; /* allocated size of each fob */
610
611 void *__sized_by(fm_route_bucket_tot_sz) fm_route_buckets; /* cache-aligned frb */
612 size_t fm_route_buckets_cnt; /* total # of frb */
613 size_t fm_route_bucket_sz; /* size of each frb */
614 size_t fm_route_bucket_tot_sz; /* allocated size of each frb */
615
616 void *__sized_by(fm_route_id_bucket_tot_sz) fm_route_id_buckets; /* cache-aligned frib */
617 size_t fm_route_id_buckets_cnt; /* total # of frib */
618 size_t fm_route_id_bucket_sz; /* size of each frib */
619 size_t fm_route_id_bucket_tot_sz; /* allocated size of each frib */
620 };
621
622 /*
623 * this func compare match with key;
624 * return values:
625 * 0 as long as @key(exact) matches what @match(wildcard) wants to match on.
626 * 1 when it doesn't match
627 */
628 static inline int
flow_key_cmp(const struct flow_key * match,const struct flow_key * key)629 flow_key_cmp(const struct flow_key *match, const struct flow_key *key)
630 {
631 #define FK_CMP(field, mask) \
632 if ((match->fk_mask & mask) != 0) { \
633 if ((key->fk_mask & mask) == 0) { \
634 return 1; \
635 } \
636 int d = memcmp(&match->field, &key->field, sizeof(match->field)); \
637 if (d != 0) { \
638 return d; \
639 } \
640 }
641
642 FK_CMP(fk_ipver, FKMASK_IPVER);
643 FK_CMP(fk_proto, FKMASK_PROTO);
644 FK_CMP(fk_src, FKMASK_SRC);
645 FK_CMP(fk_dst, FKMASK_DST);
646 FK_CMP(fk_sport, FKMASK_SPORT);
647 FK_CMP(fk_dport, FKMASK_DPORT);
648
649 return 0;
650 }
651
652 /*
653 * Similar to flow_key_cmp() except using memory compare with mask,
654 * done with SIMD instructions, if available for the platform.
655 */
656 static inline int
flow_key_cmp_mask(const struct flow_key * match,const struct flow_key * key,const struct flow_key * mask)657 flow_key_cmp_mask(const struct flow_key *match,
658 const struct flow_key *key, const struct flow_key *mask)
659 {
660 static_assert(FLOW_KEY_LEN == 48);
661 static_assert(FLOW_KEY_LEN == sizeof(struct flow_key));
662 static_assert((sizeof(struct flow_entry) % 16) == 0);
663 static_assert((offsetof(struct flow_entry, fe_key) % 16) == 0);
664
665 /* local variables are __bidi_indexable with -fbounds-safety */
666 const struct flow_key *match_idx = match;
667 const struct flow_key *key_idx = key;
668 const struct flow_key *mask_idx = mask;
669
670 return sk_memcmp_mask_48B((const uint8_t *)match_idx,
671 (const uint8_t *)key_idx, (const uint8_t *)mask_idx);
672 }
673
674 static inline uint32_t
flow_key_hash(const struct flow_key * key)675 flow_key_hash(const struct flow_key *key)
676 {
677 uint32_t hash = FK_HASH_SEED;
678 #define FK_HASH(field, mask) \
679 if ((key->fk_mask & mask) != 0) { \
680 hash = net_flowhash(&key->field, sizeof(key->field), hash); \
681 }
682
683 FK_HASH(fk_ipver, FKMASK_IPVER);
684 FK_HASH(fk_proto, FKMASK_PROTO);
685 FK_HASH(fk_src, FKMASK_SRC);
686 FK_HASH(fk_dst, FKMASK_DST);
687 FK_HASH(fk_sport, FKMASK_SPORT);
688 FK_HASH(fk_dport, FKMASK_DPORT);
689
690 return hash;
691 }
692
693 __attribute__((always_inline))
694 static inline void
flow_key_unpack(const struct flow_key * key,union sockaddr_in_4_6 * laddr,union sockaddr_in_4_6 * faddr,uint8_t * protocol)695 flow_key_unpack(const struct flow_key *key, union sockaddr_in_4_6 *laddr,
696 union sockaddr_in_4_6 *faddr, uint8_t *protocol)
697 {
698 *protocol = key->fk_proto;
699 if (key->fk_ipver == IPVERSION) {
700 laddr->sa.sa_family = AF_INET;
701 laddr->sin.sin_addr = key->fk_src4;
702 laddr->sin.sin_port = key->fk_sport;
703 faddr->sa.sa_family = AF_INET;
704 faddr->sin.sin_addr = key->fk_dst4;
705 faddr->sin.sin_port = key->fk_dport;
706 } else if (key->fk_ipver == IPV6_VERSION) {
707 laddr->sa.sa_family = AF_INET6;
708 laddr->sin6.sin6_addr = key->fk_src6;
709 laddr->sin6.sin6_port = key->fk_sport;
710 faddr->sa.sa_family = AF_INET6;
711 faddr->sin6.sin6_addr = key->fk_dst6;
712 faddr->sin6.sin6_port = key->fk_dport;
713 }
714 }
715
716 __attribute__((always_inline))
717 static inline int
flow_req2key(struct nx_flow_req * req,struct flow_key * key)718 flow_req2key(struct nx_flow_req *req, struct flow_key *key)
719 {
720 FLOW_KEY_CLEAR(key);
721
722 if (req->nfr_saddr.sa.sa_family == AF_INET) {
723 key->fk_ipver = IPVERSION;
724 key->fk_proto = req->nfr_ip_protocol;
725 key->fk_mask |= FKMASK_PROTO;
726 if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
727 key->fk_src4 = req->nfr_saddr.sin.sin_addr;
728 key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
729 }
730 if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
731 key->fk_dst4 = req->nfr_daddr.sin.sin_addr;
732 key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
733 }
734 if (sk_sa_has_port(SA(&req->nfr_saddr))) {
735 key->fk_sport = req->nfr_saddr.sin.sin_port;
736 key->fk_mask |= FKMASK_SPORT;
737 }
738 if (sk_sa_has_port(SA(&req->nfr_daddr))) {
739 key->fk_dport = req->nfr_daddr.sin.sin_port;
740 key->fk_mask |= FKMASK_DPORT;
741 }
742 } else if (req->nfr_saddr.sa.sa_family == AF_INET6) {
743 key->fk_ipver = IPV6_VERSION;
744 key->fk_proto = req->nfr_ip_protocol;
745 key->fk_mask |= FKMASK_PROTO;
746 if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
747 key->fk_src6 = req->nfr_saddr.sin6.sin6_addr;
748 key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
749 }
750 if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
751 key->fk_dst6 = req->nfr_daddr.sin6.sin6_addr;
752 key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
753 }
754 if (sk_sa_has_port(SA(&req->nfr_saddr))) {
755 key->fk_sport = req->nfr_saddr.sin6.sin6_port;
756 key->fk_mask |= FKMASK_SPORT;
757 }
758 if (sk_sa_has_port(SA(&req->nfr_daddr))) {
759 key->fk_dport = req->nfr_daddr.sin6.sin6_port;
760 key->fk_mask |= FKMASK_DPORT;
761 }
762 } else {
763 SK_ERR("unknown AF %d", req->nfr_saddr.sa.sa_family);
764 return ENOTSUP;
765 }
766
767 switch (key->fk_mask) {
768 case FKMASK_5TUPLE:
769 case FKMASK_4TUPLE:
770 case FKMASK_3TUPLE:
771 case FKMASK_2TUPLE:
772 case FKMASK_IPFLOW3:
773 case FKMASK_IPFLOW2:
774 case FKMASK_IPFLOW1:
775 break;
776 default:
777 SK_ERR("unknown flow key mask 0x%04x", key->fk_mask);
778 return ENOTSUP;
779 }
780
781 return 0;
782 }
783
784 __attribute__((always_inline))
785 static inline void
flow_pkt2key(struct __kern_packet * pkt,boolean_t input,struct flow_key * key)786 flow_pkt2key(struct __kern_packet *pkt, boolean_t input,
787 struct flow_key *key)
788 {
789 struct __flow *flow = pkt->pkt_flow;
790
791 FLOW_KEY_CLEAR(key);
792
793 if (__improbable((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0)) {
794 return;
795 }
796
797 ASSERT(flow->flow_l3._l3_ip_ver != 0);
798
799 key->fk_ipver = flow->flow_l3._l3_ip_ver;
800 key->fk_proto = flow->flow_ip_proto;
801 if (input) {
802 if (flow->flow_ip_ver == IPVERSION) {
803 key->fk_src4 = flow->flow_ipv4_dst;
804 key->fk_sport = flow->flow_tcp_dst;
805 key->fk_dst4 = flow->flow_ipv4_src;
806 key->fk_dport = flow->flow_tcp_src;
807 } else {
808 key->fk_src6 = flow->flow_ipv6_dst;
809 key->fk_sport = flow->flow_tcp_dst;
810 key->fk_dst6 = flow->flow_ipv6_src;
811 key->fk_dport = flow->flow_tcp_src;
812 }
813 } else {
814 if (flow->flow_ip_ver == IPVERSION) {
815 key->fk_src4 = flow->flow_ipv4_src;
816 key->fk_sport = flow->flow_tcp_src;
817 key->fk_dst4 = flow->flow_ipv4_dst;
818 key->fk_dport = flow->flow_tcp_dst;
819 } else {
820 key->fk_src6 = flow->flow_ipv6_src;
821 key->fk_sport = flow->flow_tcp_src;
822 key->fk_dst6 = flow->flow_ipv6_dst;
823 key->fk_dport = flow->flow_tcp_dst;
824 }
825 }
826 }
827
828 __attribute__((always_inline))
829 static inline int
flow_ip_cmp(const void * a0,const void * b0,size_t alen)830 flow_ip_cmp(const void *a0, const void *b0, size_t alen)
831 {
832 struct flow_ip_addr *a = __DECONST(struct flow_ip_addr *, a0),
833 *b = __DECONST(struct flow_ip_addr *, b0);
834
835 switch (alen) {
836 case sizeof(struct in_addr):
837 if (a->_addr32[0] > b->_addr32[0]) {
838 return 1;
839 }
840 if (a->_addr32[0] < b->_addr32[0]) {
841 return -1;
842 }
843 break;
844
845 case sizeof(struct in6_addr):
846 if (a->_addr64[1] > b->_addr64[1]) {
847 return 1;
848 }
849 if (a->_addr64[1] < b->_addr64[1]) {
850 return -1;
851 }
852 if (a->_addr64[0] > b->_addr64[0]) {
853 return 1;
854 }
855 if (a->_addr64[0] < b->_addr64[0]) {
856 return -1;
857 }
858 break;
859
860 default:
861 VERIFY(0);
862 /* NOTREACHED */
863 __builtin_unreachable();
864 }
865 return 0;
866 }
867
868 __attribute__((always_inline))
869 static inline struct flow_owner_bucket *
flow_mgr_get_fob_at_idx(struct flow_mgr * fm,uint32_t idx)870 flow_mgr_get_fob_at_idx(struct flow_mgr *fm, uint32_t idx)
871 {
872 char *buckets = fm->fm_owner_buckets;
873 void *bucket = buckets + (idx * fm->fm_owner_bucket_sz);
874 return bucket;
875 }
876
877 __attribute__((always_inline))
878 static inline struct flow_route_bucket *
flow_mgr_get_frb_at_idx(struct flow_mgr * fm,uint32_t idx)879 flow_mgr_get_frb_at_idx(struct flow_mgr *fm, uint32_t idx)
880 {
881 char *buckets = fm->fm_route_buckets;
882 void *bucket = buckets + (idx * fm->fm_route_bucket_sz);
883 return bucket;
884 }
885
886 __attribute__((always_inline))
887 static inline struct flow_route_id_bucket *
flow_mgr_get_frib_at_idx(struct flow_mgr * fm,uint32_t idx)888 flow_mgr_get_frib_at_idx(struct flow_mgr *fm, uint32_t idx)
889 {
890 char *buckets = fm->fm_route_id_buckets;
891 void *bucket = buckets + (idx * fm->fm_route_id_bucket_sz);
892 return bucket;
893 }
894
895 __attribute__((always_inline))
896 static inline uint32_t
flow_mgr_get_fob_idx(struct flow_mgr * fm,struct flow_owner_bucket * bkt)897 flow_mgr_get_fob_idx(struct flow_mgr *fm,
898 struct flow_owner_bucket *bkt)
899 {
900 ASSERT(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) %
901 fm->fm_owner_bucket_sz == 0);
902 return (uint32_t)(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) /
903 fm->fm_owner_bucket_sz);
904 }
905
906 __attribute__((always_inline))
907 static inline size_t
flow_mgr_get_num_flows(struct flow_mgr * mgr)908 flow_mgr_get_num_flows(struct flow_mgr *mgr)
909 {
910 ASSERT(mgr->fm_flow_table != NULL);
911 return cuckoo_hashtable_entries(mgr->fm_flow_table);
912 }
913
914 extern unsigned int sk_fo_size;
915 extern struct skmem_cache *sk_fo_cache;
916
917 extern unsigned int sk_fe_size;
918 extern struct skmem_cache *sk_fe_cache;
919
920 extern unsigned int sk_fab_size;
921 extern struct skmem_cache *sk_fab_cache;
922
923 extern uint32_t flow_seed;
924
925 extern struct skmem_cache *flow_route_cache;
926 extern struct skmem_cache *flow_stats_cache;
927
928 __BEGIN_DECLS
929
930 typedef void (*flow_route_ctor_fn_t)(void *arg, struct flow_route *);
931 typedef int (*flow_route_resolve_fn_t)(void *arg, struct flow_route *,
932 struct __kern_packet *);
933
934 extern int flow_init(void);
935 extern void flow_fini(void);
936
937 extern void flow_mgr_init(void);
938 extern void flow_mgr_fini(void);
939 extern struct flow_mgr *flow_mgr_find_lock(uuid_t);
940 extern void flow_mgr_unlock(void);
941 extern struct flow_mgr * flow_mgr_create(size_t, size_t, size_t, size_t);
942 extern void flow_mgr_destroy(struct flow_mgr *);
943 extern void flow_mgr_terminate(struct flow_mgr *);
944 extern int flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
945 struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
946 flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve, void *fr_arg);
947 extern struct flow_owner_bucket *flow_mgr_get_fob_by_pid(
948 struct flow_mgr *, pid_t);
949 extern struct flow_entry *flow_mgr_get_fe_by_uuid_rlock(
950 struct flow_mgr *, uuid_t);
951 extern struct flow_route_bucket *flow_mgr_get_frb_by_addr(
952 struct flow_mgr *, union sockaddr_in_4_6 *);
953 extern struct flow_route_id_bucket *flow_mgr_get_frib_by_uuid(
954 struct flow_mgr *, uuid_t);
955 extern int flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask);
956 extern int flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask);
957
958 extern struct flow_entry * fe_alloc(boolean_t can_block);
959
960 extern int flow_namespace_create(union sockaddr_in_4_6 *, uint8_t protocol,
961 netns_token *, uint32_t, struct ns_flow_info *);
962 extern void flow_namespace_half_close(netns_token *token);
963 extern void flow_namespace_withdraw(netns_token *);
964 extern void flow_namespace_destroy(netns_token *);
965
966 extern struct flow_owner_bucket *__sized_by(*tot_sz)
967 flow_owner_buckets_alloc(size_t, size_t *, size_t * tot_sz);
968 extern void flow_owner_buckets_free(struct flow_owner_bucket *, size_t);
969 extern void flow_owner_bucket_init(struct flow_owner_bucket *);
970 extern void flow_owner_bucket_destroy(struct flow_owner_bucket *);
971 extern void flow_owner_bucket_purge_all(struct flow_owner_bucket *);
972 extern void flow_owner_attach_nexus_port(struct flow_mgr *, boolean_t,
973 pid_t, nexus_port_t);
974 extern uint32_t flow_owner_detach_nexus_port(struct flow_mgr *,
975 boolean_t, pid_t, nexus_port_t, boolean_t);
976 extern struct flow_owner *flow_owner_alloc(struct flow_owner_bucket *,
977 struct proc *, nexus_port_t, bool, bool, struct nx_flowswitch*,
978 struct nexus_adapter *, void *, bool);
979 extern void flow_owner_free(struct flow_owner_bucket *, struct flow_owner *);
980 extern struct flow_entry *flow_owner_create_entry(struct flow_owner *,
981 struct nx_flow_req *, boolean_t, uint32_t, boolean_t,
982 struct flow_route *, int *);
983 extern int flow_owner_destroy_entry(struct flow_owner *, uuid_t, bool, void *);
984 extern struct flow_owner *flow_owner_find_by_pid(struct flow_owner_bucket *,
985 pid_t, void *, bool);
986 extern int flow_owner_flowadv_index_alloc(struct flow_owner *, flowadv_idx_t *);
987 extern void flow_owner_flowadv_index_free(struct flow_owner *, flowadv_idx_t);
988 extern uint32_t flow_owner_activate_nexus_port(struct flow_mgr *,
989 boolean_t, pid_t, nexus_port_t, struct nexus_adapter *,
990 na_activate_mode_t);
991
992 extern struct flow_entry *flow_mgr_find_fe_by_key(struct flow_mgr *,
993 struct flow_key *);
994 extern struct flow_entry * flow_mgr_find_conflicting_fe(struct flow_mgr *fm,
995 struct flow_key *fe_key);
996 extern void flow_mgr_foreach_flow(struct flow_mgr *fm,
997 void (^flow_handler)(struct flow_entry *fe));
998 extern struct flow_entry *flow_entry_find_by_uuid(struct flow_owner *,
999 uuid_t);
1000 extern struct flow_entry * flow_entry_alloc(struct flow_owner *fo,
1001 struct nx_flow_req *req, int *perr);
1002 extern void flow_entry_teardown(struct flow_owner *, struct flow_entry *);
1003 extern void flow_entry_destroy(struct flow_owner *, struct flow_entry *, bool,
1004 void *);
1005 extern int flow_entry_add_rx_steering_rule(struct nx_flowswitch *fsw,
1006 struct flow_entry *fe);
1007 extern void flow_entry_rx_steering_rule_cleanup(struct nx_flowswitch *,
1008 struct flow_entry *);
1009 extern void flow_entry_retain(struct flow_entry *fe);
1010 extern void flow_entry_release(struct flow_entry **pfe);
1011 extern uint32_t flow_entry_refcnt(struct flow_entry *fe);
1012 extern bool rx_flow_demux_match(struct nx_flowswitch *, struct flow_entry *, struct __kern_packet *);
1013 extern struct flow_entry *rx_lookup_child_flow(struct nx_flowswitch *fsw,
1014 struct flow_entry *, struct __kern_packet *);
1015 extern struct flow_entry *tx_lookup_child_flow(struct flow_entry *, uuid_t);
1016
1017 extern struct flow_entry_dead *flow_entry_dead_alloc(zalloc_flags_t);
1018 extern void flow_entry_dead_free(struct flow_entry_dead *);
1019
1020 extern void flow_entry_stats_get(struct flow_entry *, struct sk_stats_flow *);
1021 extern void fe_stats_update(struct flow_entry *);
1022
1023 extern int flow_pkt_classify(struct __kern_packet *pkt, struct ifnet *ifp,
1024 sa_family_t af, bool input);
1025
1026 extern void flow_track_stats(struct flow_entry *, uint64_t, uint64_t,
1027 bool, bool);
1028 extern int flow_pkt_track(struct flow_entry *, struct __kern_packet *, bool);
1029 extern boolean_t flow_track_tcp_want_abort(struct flow_entry *);
1030 extern void flow_track_abort_tcp( struct flow_entry *fe,
1031 struct __kern_packet *in_pkt, struct __kern_packet *rst_pkt);
1032 extern void flow_track_abort_quic(struct flow_entry *fe,
1033 uint8_t *__counted_by(QUIC_STATELESS_RESET_TOKEN_SIZE)token);
1034
1035 extern void fsw_host_rx_cb(struct nx_flowswitch *fsw, struct pktq *pktq);
1036 extern void fsw_host_rx_enqueue_mbq(struct nx_flowswitch *fsw, struct pktq *pktq,
1037 struct mbufq *host_mq);
1038 extern void fsw_host_sendup(struct ifnet *ifp, struct mbufq *host_mq);
1039
1040 extern void flow_rx_agg_tcp(struct nx_flowswitch *fsw, struct flow_entry *fe,
1041 struct pktq *rx_pkts, uint32_t rx_bytes, struct mbufq *host_mq,
1042 uint32_t flags);
1043
1044 extern void flow_route_init(void);
1045 extern void flow_route_fini(void);
1046 extern struct flow_route_bucket *__sized_by(*tot_sz)
1047 flow_route_buckets_alloc(size_t, size_t *, size_t * tot_sz);
1048 extern void flow_route_buckets_free(struct flow_route_bucket *, size_t);
1049 extern void flow_route_bucket_init(struct flow_route_bucket *);
1050 extern void flow_route_bucket_destroy(struct flow_route_bucket *);
1051 extern void flow_route_bucket_purge_all(struct flow_route_bucket *);
1052 extern struct flow_route_id_bucket *__sized_by(*tot_sz)
1053 flow_route_id_buckets_alloc(size_t, size_t *, size_t * tot_sz);
1054 extern void flow_route_id_buckets_free(struct flow_route_id_bucket *, size_t);
1055 extern void flow_route_id_bucket_init(struct flow_route_id_bucket *);
1056 extern void flow_route_id_bucket_destroy(struct flow_route_id_bucket *);
1057
1058 extern int flow_route_select_laddr(union sockaddr_in_4_6 *,
1059 union sockaddr_in_4_6 *, struct ifnet *, struct rtentry *, uint32_t *, int);
1060 extern int flow_route_find(struct kern_nexus *, struct flow_mgr *,
1061 struct ifnet *, struct nx_flow_req *, flow_route_ctor_fn_t,
1062 flow_route_resolve_fn_t, void *, struct flow_route **);
1063 extern int flow_route_configure(struct flow_route *, struct ifnet *, struct nx_flow_req *);
1064 extern void flow_route_retain(struct flow_route *);
1065 extern void flow_route_release(struct flow_route *);
1066 extern uint32_t flow_route_prune(struct flow_mgr *, struct ifnet *,
1067 uint32_t *);
1068 extern void flow_route_cleanup(struct flow_route *);
1069 extern boolean_t flow_route_laddr_validate(union sockaddr_in_4_6 *,
1070 struct ifnet *, uint32_t *);
1071 extern boolean_t flow_route_key_validate(struct flow_key *, struct ifnet *,
1072 uint32_t *);
1073 extern void flow_qset_select_dynamic(struct nx_flowswitch *,
1074 struct flow_entry *, boolean_t);
1075 extern void flow_stats_init(void);
1076 extern void flow_stats_fini(void);
1077 extern struct flow_stats *flow_stats_alloc(boolean_t cansleep);
1078
1079 #if SK_LOG
1080 #define FLOWKEY_DBGBUF_SIZE 256
1081 #define FLOWENTRY_DBGBUF_SIZE 512
1082 extern char *fk2str(const struct flow_key *fk, char *__counted_by(dsz)dst, size_t dsz);
1083 extern char *fe2str(const struct flow_entry *fe, char *__counted_by(dsz)dst, size_t dsz);
1084 #endif /* SK_LOG */
1085 __END_DECLS
1086 #endif /* BSD_KERNEL_PRIVATE */
1087 #endif /* !_SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ */
1088