xref: /xnu-10063.121.3/bsd/skywalk/nexus/flowswitch/flow/flow_var.h (revision 2c2f96dc2b9a4408a43d3150ae9c105355ca3daa)
1 /*
2  * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Once a packet is classified, it goes through checks to see if there
31  * is a matching flow entry in the flow table.  The key used to search
32  * the entry is composed of the fields contained in struct flow_ptrs.
33  *
34  * Flow entry insertion and deletion to the flow table, on behalf of
35  * the owning client process, requires the use of the rule ID (UUID)
36  * as the search key.
37  *
38  * Because of the above, each flow entry simultaneously exists in two
39  * respective trees: flow_entry_tree and flow_entry_id_tree.
40  *
41  * Using a single RW lock to protect the two trees is simple, but the
42  * data path performance is impacted during flow insertion and deletion,
43  * especially as the number of client processes and flows grow.
44  *
45  * To solve that, we deploy the following scheme:
46  *
47  * Given that the flow_entry_tree is searched on a per-packet basis,
48  * we break it down into a series of trees, each one contained within
49  * a flow_bucket structure.  The hash from flow_ptrs determines the
50  * index of the flow_bucket to search the flow_entry_tree from.
51  *
52  * The flow_entry_id_tree is searched on each flow insertion and
53  * deletion, and similarly we break it down into a series of trees,
54  * each contained within a flow_owner_bucket structure. We use the
55  * client process ID (pid_t) to determine the bucket index.
56  *
57  * Each flow_bucket and flow_owner_bucket structure is dynamically
58  * created, and is aligned on the CPU cache boundary.  The amount
59  * of those buckets is determined by client module at the time the
60  * flow manager context is initialized.  This is done to avoid false
61  * sharing, especially given that each bucket has its own RW lock.
62  */
63 
64 #ifndef _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
65 #define _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
66 
67 #ifdef BSD_KERNEL_PRIVATE
68 #include <skywalk/core/skywalk_var.h>
69 #include <skywalk/lib/cuckoo_hashtable.h>
70 #include <skywalk/namespace/netns.h>
71 #include <skywalk/namespace/protons.h>
72 #include <skywalk/packet/packet_var.h>
73 #include <net/flowhash.h>
74 #include <netinet/ip.h>
75 #include <netinet/in_stat.h>
76 #include <netinet/ip6.h>
77 #include <sys/eventhandler.h>
78 
79 RB_HEAD(flow_owner_tree, flow_owner);
80 
81 struct flow_owner_bucket {
82 	decl_lck_mtx_data(, fob_lock);
83 	struct flow_owner_tree  fob_owner_head;
84 	uint16_t                fob_busy_flags;
85 	uint16_t                fob_open_waiters;
86 	uint16_t                fob_close_waiters;
87 	uint16_t                fob_dtor_waiters;
88 	const size_t            fob_idx;
89 };
90 
91 #define FOBF_OPEN_BUSY          0x1     /* flow open monitor */
92 #define FOBF_CLOSE_BUSY         0x2     /* flow close monitor */
93 #define FOBF_DEAD               0x4     /* no longer usable */
94 
95 #define FOB_LOCK(_fob)                  \
96 	lck_mtx_lock(&(_fob)->fob_lock)
97 #define FOB_LOCK_SPIN(_fob)             \
98 	lck_mtx_lock_spin(&(_fob)->fob_lock)
99 #define FOB_LOCK_CONVERT(_fob)          \
100 	lck_mtx_convert_spin(&(_fob)->fob_lock)
101 #define FOB_TRY_LOCK(_fob)              \
102 	lck_mtx_try_lock(&(_fob)->fob_lock)
103 #define FOB_LOCK_ASSERT_HELD(_fob)      \
104 	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_OWNED)
105 #define FOB_LOCK_ASSERT_NOTHELD(_fob)   \
106 	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_NOTOWNED)
107 #define FOB_UNLOCK(_fob)                \
108 	lck_mtx_unlock(&(_fob)->fob_lock)
109 
110 RB_HEAD(flow_entry_id_tree, flow_entry);
111 
112 #define FLOW_PROCESS_NAME_LENGTH        24
113 
114 struct flow_owner {
115 	RB_ENTRY(flow_owner)    fo_link;
116 	struct flow_entry_id_tree fo_flow_entry_id_head;
117 	const struct flow_owner_bucket *fo_bucket;
118 	void                    *fo_context;
119 	pid_t                   fo_pid;
120 	bool                    fo_nx_port_pid_bound;
121 	bool                    fo_nx_port_destroyed;
122 	bool                    fo_low_latency;
123 	nexus_port_t            fo_nx_port;
124 	uuid_t                  fo_key;
125 
126 	struct nexus_adapter *  const fo_nx_port_na;
127 	struct nx_flowswitch *  const fo_fsw;
128 
129 	/*
130 	 * Array of bitmaps to manage the flow advisory table indices.
131 	 * Currently we are restricting a flow owner to a single nexus
132 	 * port, so this structure is effectively managing the flow advisory
133 	 * indices for a port.
134 	 */
135 	bitmap_t                *fo_flowadv_bmap;
136 	uint32_t                fo_flowadv_max;
137 	uint32_t                fo_num_flowadv;
138 
139 	/* for debugging */
140 	char                    fo_name[FLOW_PROCESS_NAME_LENGTH];
141 };
142 
143 #define FO_BUCKET(_fo)  \
144 	__DECONST(struct flow_owner_bucket *, (_fo)->fo_bucket)
145 
146 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_owner_tree, flow_owner,
147     fo_link, fo_cmp);
148 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_entry_id_tree, flow_entry,
149     fe_id_link, fe_id_cmp);
150 
151 typedef enum {
152 	/*
153 	 * TCP states.
154 	 */
155 	FT_STATE_CLOSED = 0,            /* closed */
156 	FT_STATE_LISTEN,                /* listening for connection */
157 	FT_STATE_SYN_SENT,              /* active, have sent SYN */
158 	FT_STATE_SYN_RECEIVED,          /* have sent and rcvd SYN */
159 	FT_STATE_ESTABLISHED,           /* established */
160 	FT_STATE_CLOSE_WAIT,            /* rcvd FIN, waiting close */
161 	FT_STATE_FIN_WAIT_1,            /* have sent FIN */
162 	FT_STATE_CLOSING,               /* exchanged FINs, waiting FIN|ACK */
163 	FT_STATE_LAST_ACK,              /* rcvd FIN, closed, waiting FIN|ACK */
164 	FT_STATE_FIN_WAIT_2,            /* closed, FIN is ACK'd */
165 	FT_STATE_TIME_WAIT,             /* quiet wait after close */
166 
167 	/*
168 	 * UDP states.
169 	 */
170 	FT_STATE_NO_TRAFFIC = 20,       /* no packet observed */
171 	FT_STATE_SINGLE,                /* single packet */
172 	FT_STATE_MULTIPLE,              /* multiple packets */
173 
174 	FT_STATE_MAX = 255
175 } flow_track_state_t;
176 
177 struct flow_track_rtt {
178 	uint64_t        frtt_timestamp; /* tracked segment timestamp */
179 	uint64_t        frtt_last;      /* previous net_uptime(rate limiting) */
180 	uint32_t        frtt_seg_begin; /* tracked segment begin SEQ */
181 	uint32_t        frtt_seg_end;   /* tracked segment end SEQ */
182 	uint32_t        frtt_usec;      /* avg RTT in usec */
183 };
184 
185 #define FLOWTRACK_RTT_SAMPLE_INTERVAL   2       /* sample ACK RTT every 2 sec */
186 
187 struct flow_track {
188 	/*
189 	 * TCP specific tracking info.
190 	 */
191 	uint32_t fse_seqlo;     /* max sequence number sent */
192 	uint32_t fse_seqhi;     /* max the other end ACKd + win	*/
193 	uint32_t fse_seqlast;   /* last sequence number (FIN) */
194 	uint16_t fse_max_win;   /* largest window (pre scaling)	*/
195 	uint16_t fse_mss;       /* maximum segment size option */
196 	uint8_t fse_state;      /* active state level (FT_STATE_*) */
197 	uint8_t fse_wscale;     /* window scaling factor */
198 	uint16_t fse_flags;     /* FLOWSTATEF_* */
199 	uint32_t fse_syn_ts;    /* SYN timestamp */
200 	uint32_t fse_syn_cnt;   /* # of SYNs per second */
201 
202 	struct flow_track_rtt   fse_rtt;        /* ACK RTT tracking */
203 #define fse_rtt_usec    fse_rtt.frtt_usec
204 } __sk_aligned(8);
205 
206 /* valid values for fse_flags */
207 #define FLOWSTATEF_WSCALE       0x1     /* fse_wscale is valid */
208 
209 struct flow_llhdr {
210 	uint32_t                flh_gencnt;     /* link-layer address gencnt */
211 
212 	const uint8_t           flh_off;
213 	const uint8_t           flh_len;
214 	uint16_t                flh_pad;        /* for future */
215 
216 	union _flh_u {
217 		uint64_t        _buf[2];
218 		struct {
219 			uint16_t _eth_pad;
220 			struct ether_header _eth;
221 		} _eth_padded;
222 	}  __sk_aligned(8)      _flh;
223 #define flh_eth_padded          _flh._eth_padded
224 #define flh_eth                 _flh._eth_padded._eth
225 };
226 
227 typedef enum {
228 	FE_QSET_SELECT_NONE,
229 	FE_QSET_SELECT_FIXED,
230 	FE_QSET_SELECT_DYNAMIC
231 } flow_qset_select_t;
232 
233 extern kern_allocation_name_t skmem_tag_flow_demux;
234 typedef int (*flow_demux_memcmp_mask_t)(const uint8_t *src1, const uint8_t *src2,
235     const uint8_t *byte_mask);
236 
237 struct kern_flow_demux_pattern {
238 	struct flow_demux_pattern  fdp_demux_pattern;
239 	flow_demux_memcmp_mask_t   fdp_memcmp_mask;
240 };
241 
242 #define MAX_PKT_DEMUX_LIMIT        1000
243 
244 TAILQ_HEAD(flow_entry_list, flow_entry);
245 
246 #define FLOW_PROC_FLAG_GSO        0x0001
247 typedef void (*flow_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
248     uint32_t flags);
249 
250 struct flow_entry {
251 	/**** Common Group ****/
252 	os_refcnt_t             fe_refcnt;
253 	struct flow_key         fe_key;
254 	uint32_t                fe_flags;
255 	uint32_t                fe_key_hash;
256 	struct cuckoo_node      fe_cnode;
257 
258 	uuid_t                  fe_uuid __sk_aligned(8);
259 	nexus_port_t            fe_nx_port;
260 	uint32_t                fe_laddr_gencnt;
261 	uint32_t                fe_want_nonviable;
262 	uint32_t                fe_want_withdraw;
263 	uint8_t                 fe_transport_protocol;
264 
265 	/**** Rx Group ****/
266 	uint16_t                fe_rx_frag_count;
267 	uint32_t                fe_rx_pktq_bytes;
268 	struct pktq             fe_rx_pktq;
269 	TAILQ_ENTRY(flow_entry) fe_rx_link;
270 	flow_action_t           fe_rx_process;
271 
272 	/*
273 	 * largest allocated packet size.
274 	 * used by:
275 	 *  - mbuf batch allocation logic during RX aggregtion and netif copy.
276 	 *  - packet allocation logic during RX aggregation.
277 	 */
278 	uint32_t                fe_rx_largest_size;
279 
280 	/**** Tx Group ****/
281 	bool                    fe_tx_is_cont_frag;
282 	uint32_t                fe_tx_frag_id;
283 	struct pktq             fe_tx_pktq;
284 	TAILQ_ENTRY(flow_entry) fe_tx_link;
285 	flow_action_t           fe_tx_process;
286 
287 	uuid_t                  fe_eproc_uuid __sk_aligned(8);
288 	flowadv_idx_t           fe_adv_idx;
289 	kern_packet_svc_class_t fe_svc_class;
290 	uint32_t                fe_policy_id;   /* policy id matched to flow */
291 	uint32_t                fe_skip_policy_id; /* skip policy id matched to flow */
292 
293 	/**** Misc Group ****/
294 	struct nx_flowswitch *  const fe_fsw;
295 	struct ns_token         *fe_port_reservation;
296 	struct protons_token    *fe_proto_reservation;
297 	void                    *fe_ipsec_reservation;
298 
299 	struct flow_track       fe_ltrack;      /* local endpoint state */
300 	struct flow_track       fe_rtrack;      /* remote endpoint state */
301 
302 	/*
303 	 * Flow stats are kept externally stand-alone, refcnt'ed by various
304 	 * users (e.g. flow_entry, necp_client_flow, etc.)
305 	 */
306 	struct flow_stats       *fe_stats;
307 	struct flow_route       *fe_route;
308 
309 	RB_ENTRY(flow_entry)    fe_id_link;
310 
311 	TAILQ_ENTRY(flow_entry) fe_linger_link;
312 	uint64_t                fe_linger_expire; /* expiration deadline */
313 	uint32_t                fe_linger_wait;   /* linger time (seconds) */
314 
315 	pid_t                   fe_pid;
316 	pid_t                   fe_epid;
317 	char                    fe_proc_name[FLOW_PROCESS_NAME_LENGTH];
318 	char                    fe_eproc_name[FLOW_PROCESS_NAME_LENGTH];
319 
320 	uint32_t                fe_flowid; /* globally unique flow ID */
321 
322 	/* Logical link related information */
323 	struct netif_qset      *fe_qset;
324 	uint64_t                fe_qset_id;
325 	flow_qset_select_t      fe_qset_select;
326 	uint32_t                fe_tr_genid;
327 
328 	/* Parent child information */
329 	decl_lck_rw_data(, fe_child_list_lock);
330 	struct flow_entry_list          fe_child_list;
331 	TAILQ_ENTRY(flow_entry)         fe_child_link;
332 #if DEVELOPMENT || DEBUG
333 	int16_t                         fe_child_count;
334 #endif // DEVELOPMENT || DEBUG
335 	uint8_t                         fe_demux_pattern_count;
336 	struct kern_flow_demux_pattern  *fe_demux_patterns;
337 	uint8_t                         *fe_demux_pkt_data;
338 };
339 
340 /* valid values for fe_flags */
341 #define FLOWENTF_INITED                 0x00000001 /* {src,dst} states initialized */
342 #define FLOWENTF_TRACK                  0x00000010 /* enable state tracking */
343 #define FLOWENTF_CONNECTED              0x00000020 /* connected mode */
344 #define FLOWENTF_LISTENER               0x00000040 /* listener mode */
345 #define FLOWENTF_QOS_MARKING            0x00000100 /* flow can have qos marking */
346 #define FLOWENTF_LOW_LATENCY            0x00000200 /* low latency flow */
347 #define FLOWENTF_WAIT_CLOSE             0x00001000 /* defer free after close */
348 #define FLOWENTF_CLOSE_NOTIFY           0x00002000 /* notify NECP upon tear down */
349 #define FLOWENTF_EXTRL_PORT             0x00004000 /* port reservation is held externally */
350 #define FLOWENTF_EXTRL_PROTO            0x00008000 /* proto reservation is held externally */
351 #define FLOWENTF_EXTRL_FLOWID           0x00010000 /* flowid reservation is held externally */
352 #define FLOWENTF_CHILD                  0x00020000 /* child flow */
353 #define FLOWENTF_PARENT                 0x00040000 /* parent flow */
354 #define FLOWENTF_NOWAKEFROMSLEEP        0x00080000 /* don't wake for this flow */
355 #define FLOWENTF_ABORTED                0x01000000 /* has sent RST to peer */
356 #define FLOWENTF_NONVIABLE              0x02000000 /* disabled; awaiting tear down */
357 #define FLOWENTF_WITHDRAWN              0x04000000 /* flow has been withdrawn */
358 #define FLOWENTF_TORN_DOWN              0x08000000 /* torn down and awaiting destroy */
359 #define FLOWENTF_HALF_CLOSED            0x10000000 /* flow is half closed */
360 #define FLOWENTF_DESTROYED              0x40000000 /* not in RB trees anymore */
361 #define FLOWENTF_LINGERING              0x80000000 /* destroyed and in linger list */
362 
363 #define FLOWENTF_BITS                                            \
364     "\020\01INITED\05TRACK\06CONNECTED\07LISTNER\011QOS_MARKING" \
365     "\012LOW_LATENCY\015WAIT_CLOSE\016CLOSE_NOTIFY\017EXT_PORT"  \
366     "\020EXT_PROTO\021EXT_FLOWID\031ABORTED\032NONVIABLE\033WITHDRAWN"  \
367     "\034TORN_DOWN\035HALF_CLOSED\037DESTROYED\40LINGERING"
368 
369 TAILQ_HEAD(flow_entry_linger_head, flow_entry);
370 
371 struct flow_entry_dead {
372 	LIST_ENTRY(flow_entry_dead)     fed_link;
373 
374 	boolean_t               fed_want_nonviable;
375 	boolean_t               fed_want_clonotify;
376 
377 	/* rule (flow) UUID */
378 	union {
379 		uint64_t        fed_uuid_64[2];
380 		uint32_t        fed_uuid_32[4];
381 		uuid_t          fed_uuid;
382 	} __sk_aligned(8);
383 };
384 
385 /*
386  * Minimum refcnt for a flow route entry to be considered as idle.
387  */
388 #define FLOW_ROUTE_MINREF       2       /* for the 2 RB trees */
389 
390 struct flow_route {
391 	RB_ENTRY(flow_route)    fr_link;
392 	RB_ENTRY(flow_route)    fr_id_link;
393 
394 	/*
395 	 * fr_laddr represents the local address that the system chooses
396 	 * for the foreign destination in fr_faddr.  The flow entry that
397 	 * is referring to this flow route object may choose a different
398 	 * local address if it wishes.
399 	 *
400 	 * fr_gaddr represents the gateway address to reach the final
401 	 * foreign destination fr_faddr, valid only if the destination is
402 	 * not directly attached (FLOWRTF_GATEWAY is set).
403 	 *
404 	 * The use of sockaddr for storage is for convenience; the port
405 	 * value is not applicable for this object, as this is shared
406 	 * among flow entries.
407 	 */
408 	union sockaddr_in_4_6   fr_laddr;       /* local IP address */
409 	union sockaddr_in_4_6   fr_faddr;       /* remote IP address */
410 #define fr_af                   fr_faddr.sa.sa_family
411 	union sockaddr_in_4_6   fr_gaddr;       /* gateway IP address */
412 
413 	struct flow_llhdr       fr_llhdr;
414 #define fr_eth_padded           fr_llhdr.flh_eth_padded
415 #define fr_eth                  fr_llhdr.flh_eth
416 
417 	/*
418 	 * In flow_route_tree, we use the destination address as key.
419 	 * To speed up searches, we initialize fr_addr_key to the address
420 	 * portion of fr_faddr depending on the address family.
421 	 */
422 	void                    *fr_addr_key;
423 
424 	/* flow route UUID */
425 	uuid_t                  fr_uuid __sk_aligned(8);
426 
427 	/*
428 	 * fr_usecnt is updated atomically; incremented when a flow entry
429 	 * refers to this object and decremented otherwise.  Periodically,
430 	 * the flowswitch instance garbage collects flow_route objects
431 	 * that aren't being referred to by any flow entries.
432 	 *
433 	 * fr_expire is set when fr_usecnt reaches its minimum count, and
434 	 * is cleared when it goes above the minimum count.
435 	 *
436 	 * The spin lock fr_reflock is used to serialize both.
437 	 */
438 	decl_lck_spin_data(, fr_reflock);
439 	uint64_t                fr_expire;
440 	volatile uint32_t       fr_usecnt;
441 
442 	uint32_t                fr_flags;
443 	uint32_t                fr_laddr_gencnt; /* local IP gencnt */
444 	uint32_t                fr_addr_len;     /* sizeof {in,in6}_addr */
445 
446 	volatile uint32_t       fr_want_configure;
447 	volatile uint32_t       fr_want_probe;
448 
449 	/* lock to serialize resolver */
450 	decl_lck_mtx_data(, fr_lock);
451 
452 	/*
453 	 * fr_rt_dst is the route to final destination, and along with
454 	 * fr_rt_evhdlr_tag, they are used in route event registration.
455 	 *
456 	 * fr_rt_gw is valid only if FLOWRTF_GATEWAY is set.
457 	 */
458 	eventhandler_tag        fr_rt_evhdlr_tag;
459 	struct rtentry          *fr_rt_dst;
460 	struct rtentry          *fr_rt_gw;
461 
462 	/* nexus UUID */
463 	uuid_t                  fr_nx_uuid __sk_aligned(8);
464 
465 	const struct flow_mgr   *fr_mgr;
466 	const struct flow_route_bucket  *fr_frb;
467 	const struct flow_route_id_bucket *fr_frib;
468 };
469 
470 /* valid values for fr_flags */
471 #define FLOWRTF_ATTACHED        0x00000001 /* attached to RB trees */
472 #define FLOWRTF_ONLINK          0x00000010 /* dst directly on the link */
473 #define FLOWRTF_GATEWAY         0x00000020 /* gw IP address is valid */
474 #define FLOWRTF_RESOLVED        0x00000040 /* flow route is resolved */
475 #define FLOWRTF_HAS_LLINFO      0x00000080 /* has dst link-layer address */
476 #define FLOWRTF_DELETED         0x00000100 /* route has been deleted */
477 #define FLOWRTF_DST_LL_MCAST    0x00000200 /* dst is link layer multicast */
478 #define FLOWRTF_DST_LL_BCAST    0x00000400 /* dst is link layer broadcast */
479 #define FLOWRTF_STABLE_ADDR     0x00000800 /* local address prefers stable */
480 
481 #define FR_LOCK(_fr)                    \
482 	lck_mtx_lock(&(_fr)->fr_lock)
483 #define FR_TRY_LOCK(_fr)                \
484 	lck_mtx_try_lock(&(_fr)->fr_lock)
485 #define FR_LOCK_ASSERT_HELD(_fr)        \
486 	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_OWNED)
487 #define FR_LOCK_ASSERT_NOTHELD(_fr)     \
488 	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_NOTOWNED)
489 #define FR_UNLOCK(_fr)                  \
490 	lck_mtx_unlock(&(_fr)->fr_lock)
491 
492 #define FLOWRT_UPD_ETH_DST(_fr, _addr)  do {                            \
493 	bcopy((_addr), (_fr)->fr_eth.ether_dhost, ETHER_ADDR_LEN);      \
494 	(_fr)->fr_flags &= ~(FLOWRTF_DST_LL_MCAST|FLOWRTF_DST_LL_BCAST);\
495 	if (ETHER_IS_MULTICAST(_addr)) {                                \
496 	        if (_ether_cmp(etherbroadcastaddr, (_addr)) == 0)       \
497 	                (_fr)->fr_flags |= FLOWRTF_DST_LL_BCAST;        \
498 	        else                                                    \
499 	                (_fr)->fr_flags |= FLOWRTF_DST_LL_MCAST;        \
500 	}                                                               \
501 } while (0)
502 
503 RB_HEAD(flow_route_tree, flow_route);
504 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_tree, flow_route,
505     fr_link, fr_cmp);
506 
507 struct flow_route_bucket {
508 	decl_lck_rw_data(, frb_lock);
509 	struct flow_route_tree  frb_head;
510 	const uint32_t          frb_idx;
511 };
512 
513 #define FRB_WLOCK(_frb)                 \
514 	lck_rw_lock_exclusive(&(_frb)->frb_lock)
515 #define FRB_WLOCKTORLOCK(_frb)          \
516 	lck_rw_lock_exclusive_to_shared(&(_frb)->frb_lock)
517 #define FRB_WTRYLOCK(_frb)              \
518 	lck_rw_try_lock_exclusive(&(_frb)->frb_lock)
519 #define FRB_WUNLOCK(_frb)               \
520 	lck_rw_unlock_exclusive(&(_frb)->frb_lock)
521 #define FRB_RLOCK(_frb)                 \
522 	lck_rw_lock_shared(&(_frb)->frb_lock)
523 #define FRB_RLOCKTOWLOCK(_frb)          \
524 	lck_rw_lock_shared_to_exclusive(&(_frb)->frb_lock)
525 #define FRB_RTRYLOCK(_frb)              \
526 	lck_rw_try_lock_shared(&(_frb)->frb_lock)
527 #define FRB_RUNLOCK(_frb)               \
528 	lck_rw_unlock_shared(&(_frb)->frb_lock)
529 #define FRB_UNLOCK(_frb)                \
530 	lck_rw_done(&(_frb)->frb_lock)
531 #define FRB_WLOCK_ASSERT_HELD(_frb)     \
532 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_EXCLUSIVE)
533 #define FRB_RLOCK_ASSERT_HELD(_frb)     \
534 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_SHARED)
535 #define FRB_LOCK_ASSERT_HELD(_frb)      \
536 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_HELD)
537 
538 RB_HEAD(flow_route_id_tree, flow_route);
539 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_id_tree, flow_route,
540     fr_id_link, fr_id_cmp);
541 
542 struct flow_route_id_bucket {
543 	decl_lck_rw_data(, frib_lock);
544 	struct flow_route_id_tree       frib_head;
545 	const uint32_t                  frib_idx;
546 };
547 
548 #define FRIB_WLOCK(_frib)               \
549 	lck_rw_lock_exclusive(&(_frib)->frib_lock)
550 #define FRIB_WLOCKTORLOCK(_frib)        \
551 	lck_rw_lock_exclusive_to_shared(&(_frib)->frib_lock)
552 #define FRIB_WTRYLOCK(_frib)            \
553 	lck_rw_try_lock_exclusive(&(_frib)->frib_lock)
554 #define FRIB_WUNLOCK(_frib)             \
555 	lck_rw_unlock_exclusive(&(_frib)->frib_lock)
556 #define FRIB_RLOCK(_frib)               \
557 	lck_rw_lock_shared(&(_frib)->frib_lock)
558 #define FRIB_RLOCKTOWLOCK(_frib)        \
559 	lck_rw_lock_shared_to_exclusive(&(_frib)->frib_lock)
560 #define FRIB_RTRYLOCK(_frib)            \
561 	lck_rw_try_lock_shared(&(_frib)->frib_lock)
562 #define FRIB_RUNLOCK(_frib)             \
563 	lck_rw_unlock_shared(&(_frib)->frib_lock)
564 #define FRIB_UNLOCK(_frib)              \
565 	lck_rw_done(&(_frib)->frib_lock)
566 #define FRIB_WLOCK_ASSERT_HELD(_frib)   \
567 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_EXCLUSIVE)
568 #define FRIB_RLOCK_ASSERT_HELD(_frib)   \
569 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_SHARED)
570 #define FRIB_LOCK_ASSERT_HELD(_frib)    \
571 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_HELD)
572 
573 struct flow_mgr {
574 	char            fm_name[IFNAMSIZ];
575 	uuid_t          fm_uuid;
576 	RB_ENTRY(flow_mgr) fm_link;
577 
578 	struct cuckoo_hashtable *fm_flow_table;
579 	size_t   fm_flow_hash_count[FKMASK_IDX_MAX]; /* # of flows with mask */
580 	uint16_t fm_flow_hash_masks[FKMASK_IDX_MAX];
581 
582 	void            *fm_owner_buckets __sized_by(fm_owner_bucket_tot_sz);     /* cache-aligned fob */
583 	const size_t    fm_owner_buckets_cnt;  /* total # of fobs */
584 	const size_t    fm_owner_bucket_sz;    /* size of each fob */
585 	const size_t    fm_owner_bucket_tot_sz; /* allocated size of each fob */
586 
587 	void            *fm_route_buckets __sized_by(fm_route_bucket_tot_sz);     /* cache-aligned frb */
588 	const size_t    fm_route_buckets_cnt;  /* total # of frb */
589 	const size_t    fm_route_bucket_sz;    /* size of each frb */
590 	const size_t    fm_route_bucket_tot_sz; /* allocated size of each frb */
591 
592 	void            *fm_route_id_buckets __sized_by(fm_route_id_bucket_tot_sz);    /* cache-aligned frib */
593 	const size_t    fm_route_id_buckets_cnt; /* total # of frib */
594 	const size_t    fm_route_id_bucket_sz;   /* size of each frib */
595 	const size_t    fm_route_id_bucket_tot_sz; /* allocated size of each frib */
596 };
597 
598 /*
599  * this func compare match with key;
600  * return values:
601  * 0 as long as @key(exact) matches what @match(wildcard) wants to match on.
602  * 1 when it doesn't match
603  */
604 static inline int
flow_key_cmp(const struct flow_key * match,const struct flow_key * key)605 flow_key_cmp(const struct flow_key *match, const struct flow_key *key)
606 {
607 #define FK_CMP(field, mask)     \
608 	if ((match->fk_mask & mask) != 0) {     \
609 	        if ((key->fk_mask & mask) == 0) {       \
610 	                return 1;       \
611 	        }       \
612 	        int d = memcmp(&match->field, &key->field, sizeof(match->field));       \
613 	        if (d != 0) {   \
614 	                return d;       \
615 	        }       \
616 	}
617 
618 	FK_CMP(fk_ipver, FKMASK_IPVER);
619 	FK_CMP(fk_proto, FKMASK_PROTO);
620 	FK_CMP(fk_src, FKMASK_SRC);
621 	FK_CMP(fk_dst, FKMASK_DST);
622 	FK_CMP(fk_sport, FKMASK_SPORT);
623 	FK_CMP(fk_dport, FKMASK_DPORT);
624 
625 	return 0;
626 }
627 
628 /*
629  * Similar to flow_key_cmp() except using memory compare with mask,
630  * done with SIMD instructions, if available for the platform.
631  */
632 static inline int
flow_key_cmp_mask(const struct flow_key * match,const struct flow_key * key,const struct flow_key * mask)633 flow_key_cmp_mask(const struct flow_key *match,
634     const struct flow_key *key, const struct flow_key *mask)
635 {
636 	_CASSERT(FLOW_KEY_LEN == 48);
637 	_CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key));
638 	_CASSERT((sizeof(struct flow_entry) % 16) == 0);
639 	_CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0);
640 
641 	/* local variables are __bidi_indexable with -fbounds-safety */
642 	const struct flow_key *match_idx = match;
643 	const struct flow_key *key_idx = key;
644 	const struct flow_key *mask_idx = mask;
645 
646 	return sk_memcmp_mask_48B((const uint8_t *)match_idx,
647 	           (const uint8_t *)key_idx, (const uint8_t *)mask_idx);
648 }
649 
650 static inline uint32_t
flow_key_hash(const struct flow_key * key)651 flow_key_hash(const struct flow_key *key)
652 {
653 	uint32_t hash = FK_HASH_SEED;
654 #define FK_HASH(field, mask)    \
655 	if ((key->fk_mask & mask) != 0) {       \
656 	        hash = net_flowhash(&key->field, sizeof(key->field), hash);     \
657 	}
658 
659 	FK_HASH(fk_ipver, FKMASK_IPVER);
660 	FK_HASH(fk_proto, FKMASK_PROTO);
661 	FK_HASH(fk_src, FKMASK_SRC);
662 	FK_HASH(fk_dst, FKMASK_DST);
663 	FK_HASH(fk_sport, FKMASK_SPORT);
664 	FK_HASH(fk_dport, FKMASK_DPORT);
665 
666 	return hash;
667 }
668 
669 __attribute__((always_inline))
670 static inline void
flow_key_unpack(const struct flow_key * key,union sockaddr_in_4_6 * laddr,union sockaddr_in_4_6 * faddr,uint8_t * protocol)671 flow_key_unpack(const struct flow_key *key, union sockaddr_in_4_6 *laddr,
672     union sockaddr_in_4_6 *faddr, uint8_t *protocol)
673 {
674 	*protocol = key->fk_proto;
675 	if (key->fk_ipver == IPVERSION) {
676 		laddr->sa.sa_family = AF_INET;
677 		laddr->sin.sin_addr = key->fk_src4;
678 		laddr->sin.sin_port = key->fk_sport;
679 		faddr->sa.sa_family = AF_INET;
680 		faddr->sin.sin_addr = key->fk_dst4;
681 		faddr->sin.sin_port = key->fk_dport;
682 	} else if (key->fk_ipver == IPV6_VERSION) {
683 		laddr->sa.sa_family = AF_INET6;
684 		laddr->sin6.sin6_addr = key->fk_src6;
685 		laddr->sin6.sin6_port = key->fk_sport;
686 		faddr->sa.sa_family = AF_INET6;
687 		faddr->sin6.sin6_addr = key->fk_dst6;
688 		faddr->sin6.sin6_port = key->fk_dport;
689 	}
690 }
691 
692 __attribute__((always_inline))
693 static inline int
flow_req2key(struct nx_flow_req * req,struct flow_key * key)694 flow_req2key(struct nx_flow_req *req, struct flow_key *key)
695 {
696 	FLOW_KEY_CLEAR(key);
697 
698 	if (req->nfr_saddr.sa.sa_family == AF_INET) {
699 		key->fk_ipver = IPVERSION;
700 		key->fk_proto = req->nfr_ip_protocol;
701 		key->fk_mask |= FKMASK_PROTO;
702 		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
703 			key->fk_src4 = req->nfr_saddr.sin.sin_addr;
704 			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
705 		}
706 		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
707 			key->fk_dst4 = req->nfr_daddr.sin.sin_addr;
708 			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
709 		}
710 		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
711 			key->fk_sport = req->nfr_saddr.sin.sin_port;
712 			key->fk_mask |= FKMASK_SPORT;
713 		}
714 		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
715 			key->fk_dport = req->nfr_daddr.sin.sin_port;
716 			key->fk_mask |= FKMASK_DPORT;
717 		}
718 	} else if (req->nfr_saddr.sa.sa_family == AF_INET6) {
719 		key->fk_ipver = IPV6_VERSION;
720 		key->fk_proto = req->nfr_ip_protocol;
721 		key->fk_mask |= FKMASK_PROTO;
722 		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
723 			key->fk_src6 = req->nfr_saddr.sin6.sin6_addr;
724 			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
725 		}
726 		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
727 			key->fk_dst6 = req->nfr_daddr.sin6.sin6_addr;
728 			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
729 		}
730 		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
731 			key->fk_sport = req->nfr_saddr.sin6.sin6_port;
732 			key->fk_mask |= FKMASK_SPORT;
733 		}
734 		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
735 			key->fk_dport = req->nfr_daddr.sin6.sin6_port;
736 			key->fk_mask |= FKMASK_DPORT;
737 		}
738 	} else {
739 		SK_ERR("unknown AF %d", req->nfr_saddr.sa.sa_family);
740 		return ENOTSUP;
741 	}
742 
743 	switch (key->fk_mask) {
744 	case FKMASK_5TUPLE:
745 	case FKMASK_4TUPLE:
746 	case FKMASK_3TUPLE:
747 	case FKMASK_2TUPLE:
748 	case FKMASK_IPFLOW3:
749 	case FKMASK_IPFLOW2:
750 	case FKMASK_IPFLOW1:
751 		break;
752 	default:
753 		SK_ERR("unknown flow key mask 0x%04x", key->fk_mask);
754 		return ENOTSUP;
755 	}
756 
757 	return 0;
758 }
759 
760 __attribute__((always_inline))
761 static inline void
flow_pkt2key(struct __kern_packet * pkt,boolean_t input,struct flow_key * key)762 flow_pkt2key(struct __kern_packet *pkt, boolean_t input,
763     struct flow_key *key)
764 {
765 	struct __flow *flow = pkt->pkt_flow;
766 
767 	FLOW_KEY_CLEAR(key);
768 
769 	if (__improbable((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0)) {
770 		return;
771 	}
772 
773 	ASSERT(flow->flow_l3._l3_ip_ver != 0);
774 
775 	key->fk_ipver = flow->flow_l3._l3_ip_ver;
776 	key->fk_proto = flow->flow_ip_proto;
777 	if (input) {
778 		if (flow->flow_ip_ver == IPVERSION) {
779 			key->fk_src4 = flow->flow_ipv4_dst;
780 			key->fk_sport = flow->flow_tcp_dst;
781 			key->fk_dst4 = flow->flow_ipv4_src;
782 			key->fk_dport = flow->flow_tcp_src;
783 		} else {
784 			key->fk_src6 = flow->flow_ipv6_dst;
785 			key->fk_sport = flow->flow_tcp_dst;
786 			key->fk_dst6 = flow->flow_ipv6_src;
787 			key->fk_dport = flow->flow_tcp_src;
788 		}
789 	} else {
790 		if (flow->flow_ip_ver == IPVERSION) {
791 			key->fk_src4 = flow->flow_ipv4_src;
792 			key->fk_sport = flow->flow_tcp_src;
793 			key->fk_dst4 = flow->flow_ipv4_dst;
794 			key->fk_dport = flow->flow_tcp_dst;
795 		} else {
796 			key->fk_src6 = flow->flow_ipv6_src;
797 			key->fk_sport = flow->flow_tcp_src;
798 			key->fk_dst6 = flow->flow_ipv6_dst;
799 			key->fk_dport = flow->flow_tcp_dst;
800 		}
801 	}
802 }
803 
804 __attribute__((always_inline))
805 static inline int
flow_ip_cmp(const void * a0,const void * b0,size_t alen)806 flow_ip_cmp(const void *a0, const void *b0, size_t alen)
807 {
808 	struct flow_ip_addr *a = __DECONST(struct flow_ip_addr *, a0),
809 	    *b = __DECONST(struct flow_ip_addr *, b0);
810 
811 	switch (alen) {
812 	case sizeof(struct in_addr):
813 		if (a->_addr32[0] > b->_addr32[0]) {
814 			return 1;
815 		}
816 		if (a->_addr32[0] < b->_addr32[0]) {
817 			return -1;
818 		}
819 		break;
820 
821 	case sizeof(struct in6_addr):
822 		if (a->_addr64[1] > b->_addr64[1]) {
823 			return 1;
824 		}
825 		if (a->_addr64[1] < b->_addr64[1]) {
826 			return -1;
827 		}
828 		if (a->_addr64[0] > b->_addr64[0]) {
829 			return 1;
830 		}
831 		if (a->_addr64[0] < b->_addr64[0]) {
832 			return -1;
833 		}
834 		break;
835 
836 	default:
837 		VERIFY(0);
838 		/* NOTREACHED */
839 		__builtin_unreachable();
840 	}
841 	return 0;
842 }
843 
844 __attribute__((always_inline))
845 static inline struct flow_owner_bucket *
flow_mgr_get_fob_at_idx(struct flow_mgr * fm,uint32_t idx)846 flow_mgr_get_fob_at_idx(struct flow_mgr *fm, uint32_t idx)
847 {
848 	char *buckets = fm->fm_owner_buckets;
849 	void *bucket = buckets + (idx * fm->fm_owner_bucket_sz);
850 	return bucket;
851 }
852 
853 __attribute__((always_inline))
854 static inline struct flow_route_bucket *
flow_mgr_get_frb_at_idx(struct flow_mgr * fm,uint32_t idx)855 flow_mgr_get_frb_at_idx(struct flow_mgr *fm, uint32_t idx)
856 {
857 	char *buckets = fm->fm_route_buckets;
858 	void *bucket = buckets + (idx * fm->fm_route_bucket_sz);
859 	return bucket;
860 }
861 
862 __attribute__((always_inline))
863 static inline struct flow_route_id_bucket *
flow_mgr_get_frib_at_idx(struct flow_mgr * fm,uint32_t idx)864 flow_mgr_get_frib_at_idx(struct flow_mgr *fm, uint32_t idx)
865 {
866 	char *buckets = fm->fm_route_id_buckets;
867 	void *bucket = buckets + (idx * fm->fm_route_id_bucket_sz);
868 	return bucket;
869 }
870 
871 __attribute__((always_inline))
872 static inline uint32_t
flow_mgr_get_fob_idx(struct flow_mgr * fm,struct flow_owner_bucket * bkt)873 flow_mgr_get_fob_idx(struct flow_mgr *fm,
874     struct flow_owner_bucket *bkt)
875 {
876 	ASSERT(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) %
877 	    fm->fm_owner_bucket_sz == 0);
878 	return (uint32_t)(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) /
879 	       fm->fm_owner_bucket_sz);
880 }
881 
882 __attribute__((always_inline))
883 static inline size_t
flow_mgr_get_num_flows(struct flow_mgr * mgr)884 flow_mgr_get_num_flows(struct flow_mgr *mgr)
885 {
886 	ASSERT(mgr->fm_flow_table != NULL);
887 	return cuckoo_hashtable_entries(mgr->fm_flow_table);
888 }
889 
890 extern unsigned int sk_fo_size;
891 extern struct skmem_cache *sk_fo_cache;
892 
893 extern unsigned int sk_fe_size;
894 extern struct skmem_cache *sk_fe_cache;
895 
896 extern unsigned int sk_fab_size;
897 extern struct skmem_cache *sk_fab_cache;
898 
899 extern uint32_t flow_seed;
900 
901 extern struct skmem_cache *flow_route_cache;
902 extern struct skmem_cache *flow_stats_cache;
903 
904 __BEGIN_DECLS
905 
906 typedef void (*flow_route_ctor_fn_t)(void *arg, struct flow_route *);
907 typedef int (*flow_route_resolve_fn_t)(void *arg, struct flow_route *,
908     struct __kern_packet *);
909 
910 extern int flow_init(void);
911 extern void flow_fini(void);
912 
913 extern void flow_mgr_init(void);
914 extern void flow_mgr_fini(void);
915 extern struct flow_mgr *flow_mgr_find_lock(uuid_t);
916 extern void flow_mgr_unlock(void);
917 extern struct flow_mgr * flow_mgr_create(size_t, size_t, size_t, size_t);
918 extern void flow_mgr_destroy(struct flow_mgr *);
919 extern void flow_mgr_terminate(struct flow_mgr *);
920 extern int flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
921     struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
922     flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve, void *fr_arg);
923 extern struct flow_owner_bucket *flow_mgr_get_fob_by_pid(
924 	struct flow_mgr *, pid_t);
925 extern struct flow_entry *flow_mgr_get_fe_by_uuid_rlock(
926 	struct flow_mgr *, uuid_t);
927 extern struct flow_route_bucket *flow_mgr_get_frb_by_addr(
928 	struct flow_mgr *, union sockaddr_in_4_6 *);
929 extern struct flow_route_id_bucket *flow_mgr_get_frib_by_uuid(
930 	struct flow_mgr *, uuid_t);
931 extern int flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask);
932 extern int flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask);
933 
934 extern struct flow_entry * fe_alloc(boolean_t can_block);
935 
936 extern int flow_namespace_create(union sockaddr_in_4_6 *, uint8_t protocol,
937     netns_token *, uint16_t, struct ns_flow_info *);
938 extern void flow_namespace_half_close(netns_token *token);
939 extern void flow_namespace_withdraw(netns_token *);
940 extern void flow_namespace_destroy(netns_token *);
941 
942 extern struct flow_owner_bucket *flow_owner_buckets_alloc(size_t, size_t *, size_t *);
943 extern void flow_owner_buckets_free(struct flow_owner_bucket *, size_t);
944 extern void flow_owner_bucket_init(struct flow_owner_bucket *);
945 extern void flow_owner_bucket_destroy(struct flow_owner_bucket *);
946 extern void flow_owner_bucket_purge_all(struct flow_owner_bucket *);
947 extern void flow_owner_attach_nexus_port(struct flow_mgr *, boolean_t,
948     pid_t, nexus_port_t);
949 extern uint32_t flow_owner_detach_nexus_port(struct flow_mgr *,
950     boolean_t, pid_t, nexus_port_t, boolean_t);
951 extern struct flow_owner *flow_owner_alloc(struct flow_owner_bucket *,
952     struct proc *, nexus_port_t, bool, bool, struct nx_flowswitch*,
953     struct nexus_adapter *, void *, bool);
954 extern void flow_owner_free(struct flow_owner_bucket *, struct flow_owner *);
955 extern struct flow_entry *flow_owner_create_entry(struct flow_owner *,
956     struct nx_flow_req *, boolean_t, uint32_t, boolean_t,
957     struct flow_route *, int *);
958 extern int flow_owner_destroy_entry(struct flow_owner *, uuid_t, bool, void *);
959 extern struct flow_owner *flow_owner_find_by_pid(struct flow_owner_bucket *,
960     pid_t, void *, bool);
961 extern int flow_owner_flowadv_index_alloc(struct flow_owner *, flowadv_idx_t *);
962 extern void flow_owner_flowadv_index_free(struct flow_owner *, flowadv_idx_t);
963 extern uint32_t flow_owner_activate_nexus_port(struct flow_mgr *,
964     boolean_t, pid_t, nexus_port_t, struct nexus_adapter *,
965     na_activate_mode_t);
966 
967 extern struct flow_entry *flow_mgr_find_fe_by_key(struct flow_mgr *,
968     struct flow_key *);
969 extern struct flow_entry * flow_mgr_find_conflicting_fe(struct flow_mgr *fm,
970     struct flow_key *fe_key);
971 extern void flow_mgr_foreach_flow(struct flow_mgr *fm,
972     void (^flow_handler)(struct flow_entry *fe));
973 extern struct flow_entry *flow_entry_find_by_uuid(struct flow_owner *,
974     uuid_t);
975 extern struct flow_entry * flow_entry_alloc(struct flow_owner *fo,
976     struct nx_flow_req *req, int *perr);
977 extern void flow_entry_teardown(struct flow_owner *, struct flow_entry *);
978 extern void flow_entry_destroy(struct flow_owner *, struct flow_entry *, bool,
979     void *);
980 extern void flow_entry_retain(struct flow_entry *fe);
981 extern void flow_entry_release(struct flow_entry **pfe);
982 extern uint32_t flow_entry_refcnt(struct flow_entry *fe);
983 extern bool rx_flow_demux_match(struct nx_flowswitch *, struct flow_entry *, struct __kern_packet *);
984 extern struct flow_entry *rx_lookup_child_flow(struct nx_flowswitch *fsw,
985     struct flow_entry *, struct __kern_packet *);
986 extern struct flow_entry *tx_lookup_child_flow(struct flow_entry *, uuid_t);
987 
988 extern struct flow_entry_dead *flow_entry_dead_alloc(zalloc_flags_t);
989 extern void flow_entry_dead_free(struct flow_entry_dead *);
990 
991 extern void flow_entry_stats_get(struct flow_entry *, struct sk_stats_flow *);
992 
993 extern int flow_pkt_classify(struct __kern_packet *pkt, struct ifnet *ifp,
994     sa_family_t af, bool input);
995 
996 extern void flow_track_stats(struct flow_entry *, uint64_t, uint64_t,
997     bool, bool);
998 extern int flow_pkt_track(struct flow_entry *, struct __kern_packet *, bool);
999 extern boolean_t flow_track_tcp_want_abort(struct flow_entry *);
1000 extern void flow_track_abort_tcp( struct flow_entry *fe,
1001     struct __kern_packet *in_pkt, struct __kern_packet *rst_pkt);
1002 extern void flow_track_abort_quic(struct flow_entry *fe, uint8_t *token);
1003 
1004 extern void fsw_host_rx(struct nx_flowswitch *, struct pktq *);
1005 extern void fsw_host_sendup(struct ifnet *, struct mbuf *, struct mbuf *,
1006     uint32_t, uint32_t);
1007 
1008 extern void flow_rx_agg_tcp(struct nx_flowswitch *fsw, struct flow_entry *fe,
1009     uint32_t flags);
1010 
1011 extern void flow_route_init(void);
1012 extern void flow_route_fini(void);
1013 extern struct flow_route_bucket *flow_route_buckets_alloc(size_t, size_t *, size_t *);
1014 extern void flow_route_buckets_free(struct flow_route_bucket *, size_t);
1015 extern void flow_route_bucket_init(struct flow_route_bucket *);
1016 extern void flow_route_bucket_destroy(struct flow_route_bucket *);
1017 extern void flow_route_bucket_purge_all(struct flow_route_bucket *);
1018 extern struct flow_route_id_bucket *flow_route_id_buckets_alloc(size_t,
1019     size_t *, size_t *);
1020 extern void flow_route_id_buckets_free(struct flow_route_id_bucket *, size_t);
1021 extern void flow_route_id_bucket_init(struct flow_route_id_bucket *);
1022 extern void flow_route_id_bucket_destroy(struct flow_route_id_bucket *);
1023 
1024 extern int flow_route_select_laddr(union sockaddr_in_4_6 *,
1025     union sockaddr_in_4_6 *, struct ifnet *, struct rtentry *, uint32_t *, int);
1026 extern int flow_route_find(struct kern_nexus *, struct flow_mgr *,
1027     struct ifnet *, struct nx_flow_req *, flow_route_ctor_fn_t,
1028     flow_route_resolve_fn_t, void *, struct flow_route **);
1029 extern int flow_route_configure(struct flow_route *, struct ifnet *, struct nx_flow_req *);
1030 extern void flow_route_retain(struct flow_route *);
1031 extern void flow_route_release(struct flow_route *);
1032 extern uint32_t flow_route_prune(struct flow_mgr *, struct ifnet *,
1033     uint32_t *);
1034 extern void flow_route_cleanup(struct flow_route *);
1035 extern boolean_t flow_route_laddr_validate(union sockaddr_in_4_6 *,
1036     struct ifnet *, uint32_t *);
1037 extern boolean_t flow_route_key_validate(struct flow_key *, struct ifnet *,
1038     uint32_t *);
1039 extern void flow_qset_select_dynamic(struct nx_flowswitch *,
1040     struct flow_entry *, boolean_t);
1041 extern void flow_stats_init(void);
1042 extern void flow_stats_fini(void);
1043 extern struct flow_stats *flow_stats_alloc(boolean_t cansleep);
1044 
1045 #if SK_LOG
1046 #define FLOWKEY_DBGBUF_SIZE   256
1047 #define FLOWENTRY_DBGBUF_SIZE   512
1048 extern char *fk_as_string(const struct flow_key *fk, char *, size_t);
1049 extern char *fe_as_string(const struct flow_entry *fe, char *, size_t);
1050 #endif /* SK_LOG */
1051 __END_DECLS
1052 #endif /* BSD_KERNEL_PRIVATE */
1053 #endif /* !_SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ */
1054