xref: /xnu-12377.41.6/bsd/skywalk/nexus/flowswitch/flow/flow_var.h (revision bbb1b6f9e71b8cdde6e5cd6f4841f207dee3d828)
1 /*
2  * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Once a packet is classified, it goes through checks to see if there
31  * is a matching flow entry in the flow table.  The key used to search
32  * the entry is composed of the fields contained in struct flow_ptrs.
33  *
34  * Flow entry insertion and deletion to the flow table, on behalf of
35  * the owning client process, requires the use of the rule ID (UUID)
36  * as the search key.
37  *
38  * Because of the above, each flow entry simultaneously exists in two
39  * respective trees: flow_entry_tree and flow_entry_id_tree.
40  *
41  * Using a single RW lock to protect the two trees is simple, but the
42  * data path performance is impacted during flow insertion and deletion,
43  * especially as the number of client processes and flows grow.
44  *
45  * To solve that, we deploy the following scheme:
46  *
47  * Given that the flow_entry_tree is searched on a per-packet basis,
48  * we break it down into a series of trees, each one contained within
49  * a flow_bucket structure.  The hash from flow_ptrs determines the
50  * index of the flow_bucket to search the flow_entry_tree from.
51  *
52  * The flow_entry_id_tree is searched on each flow insertion and
53  * deletion, and similarly we break it down into a series of trees,
54  * each contained within a flow_owner_bucket structure. We use the
55  * client process ID (pid_t) to determine the bucket index.
56  *
57  * Each flow_bucket and flow_owner_bucket structure is dynamically
58  * created, and is aligned on the CPU cache boundary.  The amount
59  * of those buckets is determined by client module at the time the
60  * flow manager context is initialized.  This is done to avoid false
61  * sharing, especially given that each bucket has its own RW lock.
62  */
63 
64 #ifndef _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
65 #define _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
66 
67 #ifdef BSD_KERNEL_PRIVATE
68 #include <skywalk/core/skywalk_var.h>
69 #include <skywalk/lib/cuckoo_hashtable.h>
70 #include <skywalk/namespace/netns.h>
71 #include <skywalk/namespace/protons.h>
72 #include <skywalk/packet/packet_var.h>
73 #include <net/flowhash.h>
74 #include <netinet/ip.h>
75 #include <netinet/in_stat.h>
76 #include <netinet/ip6.h>
77 #include <sys/eventhandler.h>
78 
79 RB_HEAD(flow_owner_tree, flow_owner);
80 
81 struct flow_owner_bucket {
82 	decl_lck_mtx_data(, fob_lock);
83 	struct flow_owner_tree  fob_owner_head;
84 	uint16_t                fob_busy_flags;
85 	uint16_t                fob_open_waiters;
86 	uint16_t                fob_close_waiters;
87 	uint16_t                fob_dtor_waiters;
88 	const size_t            fob_idx;
89 };
90 
91 #define FOBF_OPEN_BUSY          0x1     /* flow open monitor */
92 #define FOBF_CLOSE_BUSY         0x2     /* flow close monitor */
93 #define FOBF_DEAD               0x4     /* no longer usable */
94 
95 #define FOB_LOCK(_fob)                  \
96 	lck_mtx_lock(&(_fob)->fob_lock)
97 #define FOB_LOCK_SPIN(_fob)             \
98 	lck_mtx_lock_spin(&(_fob)->fob_lock)
99 #define FOB_LOCK_CONVERT(_fob)          \
100 	lck_mtx_convert_spin(&(_fob)->fob_lock)
101 #define FOB_TRY_LOCK(_fob)              \
102 	lck_mtx_try_lock(&(_fob)->fob_lock)
103 #define FOB_LOCK_ASSERT_HELD(_fob)      \
104 	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_OWNED)
105 #define FOB_LOCK_ASSERT_NOTHELD(_fob)   \
106 	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_NOTOWNED)
107 #define FOB_UNLOCK(_fob)                \
108 	lck_mtx_unlock(&(_fob)->fob_lock)
109 
110 RB_HEAD(flow_entry_id_tree, flow_entry);
111 
112 #define FLOW_PROCESS_NAME_LENGTH        24
113 
114 struct flow_owner {
115 	RB_ENTRY(flow_owner)    fo_link;
116 	struct flow_entry_id_tree fo_flow_entry_id_head;
117 	const struct flow_owner_bucket *fo_bucket;
118 	void                    *fo_context;
119 	pid_t                   fo_pid;
120 	bool                    fo_nx_port_pid_bound;
121 	bool                    fo_nx_port_destroyed;
122 	bool                    fo_low_latency;
123 	nexus_port_t            fo_nx_port;
124 	uuid_t                  fo_key;
125 
126 	struct nexus_adapter *  const fo_nx_port_na;
127 	struct nx_flowswitch *  const fo_fsw;
128 
129 	/*
130 	 * Array of bitmaps to manage the flow advisory table indices.
131 	 * Currently we are restricting a flow owner to a single nexus
132 	 * port, so this structure is effectively managing the flow advisory
133 	 * indices for a port.
134 	 */
135 	bitmap_t                *__counted_by(fo_num_flowadv_bmaps)fo_flowadv_bmap;
136 	uint32_t                fo_flowadv_max;
137 	uint32_t                fo_num_flowadv;
138 	uint32_t                fo_num_flowadv_bmaps;
139 
140 	/* for debugging */
141 	char                    fo_name[FLOW_PROCESS_NAME_LENGTH];
142 };
143 
144 #define FO_BUCKET(_fo)  \
145 	__DECONST(struct flow_owner_bucket *, (_fo)->fo_bucket)
146 
147 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_owner_tree, flow_owner,
148     fo_link, fo_cmp);
149 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_entry_id_tree, flow_entry,
150     fe_id_link, fe_id_cmp);
151 
152 typedef enum {
153 	/*
154 	 * TCP states.
155 	 */
156 	FT_STATE_CLOSED = 0,            /* closed */
157 	FT_STATE_LISTEN,                /* listening for connection */
158 	FT_STATE_SYN_SENT,              /* active, have sent SYN */
159 	FT_STATE_SYN_RECEIVED,          /* have sent and rcvd SYN */
160 	FT_STATE_ESTABLISHED,           /* established */
161 	FT_STATE_CLOSE_WAIT,            /* rcvd FIN, waiting close */
162 	FT_STATE_FIN_WAIT_1,            /* have sent FIN */
163 	FT_STATE_CLOSING,               /* exchanged FINs, waiting FIN|ACK */
164 	FT_STATE_LAST_ACK,              /* rcvd FIN, closed, waiting FIN|ACK */
165 	FT_STATE_FIN_WAIT_2,            /* closed, FIN is ACK'd */
166 	FT_STATE_TIME_WAIT,             /* quiet wait after close */
167 
168 	/*
169 	 * UDP states.
170 	 */
171 	FT_STATE_NO_TRAFFIC = 20,       /* no packet observed */
172 	FT_STATE_SINGLE,                /* single packet */
173 	FT_STATE_MULTIPLE,              /* multiple packets */
174 
175 	FT_STATE_MAX = 255
176 } flow_track_state_t;
177 
178 struct flow_track_rtt {
179 	uint64_t        frtt_timestamp; /* tracked segment timestamp */
180 	uint64_t        frtt_last;      /* previous net_uptime(rate limiting) */
181 	uint32_t        frtt_seg_begin; /* tracked segment begin SEQ */
182 	uint32_t        frtt_seg_end;   /* tracked segment end SEQ */
183 	uint32_t        frtt_usec;      /* avg RTT in usec */
184 };
185 
186 #define FLOWTRACK_RTT_SAMPLE_INTERVAL   2       /* sample ACK RTT every 2 sec */
187 
188 struct flow_track {
189 	/*
190 	 * TCP specific tracking info.
191 	 */
192 	uint32_t fse_seqlo;     /* max sequence number sent */
193 	uint32_t fse_seqhi;     /* max the other end ACKd + win	*/
194 	uint32_t fse_seqlast;   /* last sequence number (FIN) */
195 	uint16_t fse_max_win;   /* largest window (pre scaling)	*/
196 	uint16_t fse_mss;       /* maximum segment size option */
197 	uint8_t fse_state;      /* active state level (FT_STATE_*) */
198 	uint8_t fse_wscale;     /* window scaling factor */
199 	uint16_t fse_flags;     /* FLOWSTATEF_* */
200 	uint32_t fse_syn_ts;    /* SYN timestamp */
201 	uint32_t fse_syn_cnt;   /* # of SYNs per second */
202 
203 	struct flow_track_rtt   fse_rtt;        /* ACK RTT tracking */
204 #define fse_rtt_usec    fse_rtt.frtt_usec
205 } __sk_aligned(8);
206 
207 /* valid values for fse_flags */
208 #define FLOWSTATEF_WSCALE       0x1     /* fse_wscale is valid */
209 
210 struct flow_llhdr {
211 	uint32_t                flh_gencnt;     /* link-layer address gencnt */
212 
213 	const uint8_t           flh_off;
214 	const uint8_t           flh_len;
215 	uint16_t                flh_pad;        /* for future */
216 
217 	union _flh_u {
218 		uint64_t        _buf[2];
219 		struct {
220 			uint16_t _eth_pad;
221 			struct ether_header _eth;
222 		} _eth_padded;
223 	}  __sk_aligned(8)      _flh;
224 #define flh_eth_padded          _flh._eth_padded
225 #define flh_eth                 _flh._eth_padded._eth
226 };
227 
228 typedef enum {
229 	FE_QSET_SELECT_NONE,
230 	FE_QSET_SELECT_FIXED,
231 	FE_QSET_SELECT_DYNAMIC
232 } flow_qset_select_t;
233 
234 extern kern_allocation_name_t skmem_tag_flow_demux;
235 typedef int (*flow_demux_memcmp_mask_t)(const uint8_t *src1, const uint8_t *src2,
236     const uint8_t *byte_mask);
237 
238 struct kern_flow_demux_pattern {
239 	struct flow_demux_pattern  fdp_demux_pattern;
240 	flow_demux_memcmp_mask_t   fdp_memcmp_mask;
241 };
242 
243 #define MAX_PKT_DEMUX_LIMIT        1000
244 
245 TAILQ_HEAD(flow_entry_list, flow_entry);
246 
247 #define FLOW_PROC_FLAG_GSO        0x0001
248 typedef void (*flow_tx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
249     uint32_t flags);
250 
251 #define FLOW_PROC_FLAG_FRAGMENTS  0x0001
252 typedef void (*flow_rx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
253     struct pktq *pkts, uint32_t rx_bytes, struct mbufq *host_mq,
254     uint32_t flags);
255 
256 struct flow_entry {
257 	/**** Common Group ****/
258 	os_refcnt_t             fe_refcnt;
259 	struct flow_key         fe_key;
260 	uint32_t                fe_flags;
261 	uint32_t                fe_key_hash;
262 	struct cuckoo_node      fe_cnode;
263 
264 	uuid_t                  fe_uuid __sk_aligned(8);
265 	nexus_port_t            fe_nx_port;
266 	uint32_t                fe_laddr_gencnt;
267 	uint32_t                fe_want_nonviable;
268 	uint32_t                fe_want_withdraw;
269 	uint8_t                 fe_transport_protocol;
270 
271 	/**** Rx Group ****/
272 	/*
273 	 * If multiple threads end up working on the same flow entry, the one
274 	 * that reaches rx_flow_batch_packets first will be responsible for
275 	 * sending up all the packets from different RX completion queues.
276 	 * fe_rx_worker_tid marks its thread ID. Other threads only enqueues their
277 	 * packets into fe_rx_pktq but do not call fe_rx_process on the flow entry.
278 	 */
279 	uint16_t                fe_rx_frag_count;
280 	uint32_t                fe_rx_pktq_bytes;
281 	decl_lck_mtx_data(, fe_rx_pktq_lock);
282 	struct pktq             fe_rx_pktq;
283 	TAILQ_ENTRY(flow_entry) fe_rx_link;
284 	flow_rx_action_t        fe_rx_process;
285 	uint64_t                fe_rx_worker_tid;
286 
287 	/*
288 	 * largest allocated packet size.
289 	 * used by:
290 	 *  - mbuf batch allocation logic during RX aggregtion and netif copy.
291 	 *  - packet allocation logic during RX aggregation.
292 	 */
293 	uint32_t                fe_rx_largest_size;
294 
295 	/**** Tx Group ****/
296 	bool                    fe_tx_is_cont_frag;
297 	uint32_t                fe_tx_frag_id;
298 	struct pktq             fe_tx_pktq;
299 	TAILQ_ENTRY(flow_entry) fe_tx_link;
300 	flow_tx_action_t        fe_tx_process;
301 
302 	uuid_t                  fe_eproc_uuid __sk_aligned(8);
303 	flowadv_idx_t           fe_adv_idx;
304 	kern_packet_svc_class_t fe_svc_class;
305 	uint32_t                fe_policy_id;   /* policy id matched to flow */
306 	uint32_t                fe_skip_policy_id; /* skip policy id matched to flow */
307 
308 	/**** Misc Group ****/
309 	struct nx_flowswitch *  const fe_fsw;
310 	struct ns_token         *fe_port_reservation;
311 	struct protons_token    *fe_proto_reservation;
312 	void                    *fe_ipsec_reservation;
313 
314 	struct flow_track       fe_ltrack;      /* local endpoint state */
315 	struct flow_track       fe_rtrack;      /* remote endpoint state */
316 
317 	/*
318 	 * Flow stats are kept externally stand-alone, refcnt'ed by various
319 	 * users (e.g. flow_entry, necp_client_flow, etc.)
320 	 */
321 	struct flow_stats       *fe_stats;
322 	struct flow_route       *fe_route;
323 
324 	RB_ENTRY(flow_entry)    fe_id_link;
325 
326 	TAILQ_ENTRY(flow_entry) fe_linger_link;
327 	uint64_t                fe_linger_expire; /* expiration deadline */
328 	uint32_t                fe_linger_wait;   /* linger time (seconds) */
329 
330 	pid_t                   fe_pid;
331 	pid_t                   fe_epid;
332 	char                    fe_proc_name[FLOW_PROCESS_NAME_LENGTH];
333 	char                    fe_eproc_name[FLOW_PROCESS_NAME_LENGTH];
334 
335 	uint32_t                fe_flowid; /* globally unique flow ID */
336 
337 	/* Logical link related information */
338 	struct netif_qset      *fe_qset;
339 	uint64_t                fe_qset_id;
340 	flow_qset_select_t      fe_qset_select;
341 	uint32_t                fe_tr_genid;
342 
343 	/* Parent child information */
344 	decl_lck_rw_data(, fe_child_list_lock);
345 	struct flow_entry_list          fe_child_list;
346 	TAILQ_ENTRY(flow_entry)         fe_child_link;
347 #if DEVELOPMENT || DEBUG
348 	int16_t                         fe_child_count;
349 #endif // DEVELOPMENT || DEBUG
350 	uint8_t                         fe_demux_pattern_count;
351 	struct kern_flow_demux_pattern  *__counted_by(fe_demux_pattern_count)fe_demux_patterns;
352 	uint8_t                         *__sized_by_or_null(FLOW_DEMUX_MAX_LEN) fe_demux_pkt_data;
353 
354 	TAILQ_ENTRY(flow_entry) fe_rxstrc_link;
355 };
356 
357 /* valid values for fe_flags */
358 #define FLOWENTF_INITED                 0x00000001 /* {src,dst} states initialized */
359 #define FLOWENTF_AOP_OFFLOAD            0x00000002 /* AOP Offload flow */
360 #define FLOWENTF_RX_STEERING            0x00000004 /* RX flow steering configured */
361 #define FLOWENTF_TRACK                  0x00000010 /* enable state tracking */
362 #define FLOWENTF_CONNECTED              0x00000020 /* connected mode */
363 #define FLOWENTF_LISTENER               0x00000040 /* listener mode */
364 #define FLOWENTF_RXSTRC_PENDING         0x00000080 /* Rx steering rule cleanup pending */
365 #define FLOWENTF_QOS_MARKING            0x00000100 /* flow can have qos marking */
366 #define FLOWENTF_LOW_LATENCY            0x00000200 /* low latency flow */
367 #define FLOWENTF_WAIT_CLOSE             0x00001000 /* defer free after close */
368 #define FLOWENTF_CLOSE_NOTIFY           0x00002000 /* notify NECP upon tear down */
369 #define FLOWENTF_EXTRL_PORT             0x00004000 /* port reservation is held externally */
370 #define FLOWENTF_EXTRL_PROTO            0x00008000 /* proto reservation is held externally */
371 #define FLOWENTF_EXTRL_FLOWID           0x00010000 /* flowid reservation is held externally */
372 #define FLOWENTF_CHILD                  0x00020000 /* child flow */
373 #define FLOWENTF_PARENT                 0x00040000 /* parent flow */
374 #define FLOWENTF_NOWAKEFROMSLEEP        0x00080000 /* don't wake for this flow */
375 #define FLOWENTF_CONNECTION_IDLE        0x00100000 /* connection is idle */
376 #define FLOWENTF_ABORTED                0x01000000 /* has sent RST to peer */
377 #define FLOWENTF_NONVIABLE              0x02000000 /* disabled; awaiting tear down */
378 #define FLOWENTF_WITHDRAWN              0x04000000 /* flow has been withdrawn */
379 #define FLOWENTF_TORN_DOWN              0x08000000 /* torn down and awaiting destroy */
380 #define FLOWENTF_HALF_CLOSED            0x10000000 /* flow is half closed */
381 #define FLOWENTF_DESTROYED              0x40000000 /* not in RB trees anymore */
382 #define FLOWENTF_LINGERING              0x80000000 /* destroyed and in linger list */
383 
384 #define FLOWENTF_BITS                                            \
385     "\020\01INITED\02AOP_OFFLOAD\03RX_STEERING\05TRACK\06CONNECTED\07LISTNER\011QOS_MARKING" \
386     "\012LOW_LATENCY\015WAIT_CLOSE\016CLOSE_NOTIFY\017EXT_PORT"  \
387     "\020EXT_PROTO\021EXT_FLOWID\024NOWAKEFROMSLEEP\025CONNECTION_IDLE" \
388     "\031ABORTED\032NONVIABLE\033WITHDRAWN"  \
389     "\034TORN_DOWN\035HALF_CLOSED\037DESTROYED\40LINGERING"
390 
391 TAILQ_HEAD(flow_entry_linger_head, flow_entry);
392 
393 struct flow_entry_dead {
394 	LIST_ENTRY(flow_entry_dead)     fed_link;
395 
396 	boolean_t               fed_want_nonviable;
397 	boolean_t               fed_want_clonotify;
398 
399 	/* rule (flow) UUID */
400 	union {
401 		uint64_t        fed_uuid_64[2];
402 		uint32_t        fed_uuid_32[4];
403 		uuid_t          fed_uuid;
404 	} __sk_aligned(8);
405 };
406 
407 TAILQ_HEAD(flow_entry_rxstrc_head, flow_entry);
408 
409 /*
410  * Minimum refcnt for a flow route entry to be considered as idle.
411  */
412 #define FLOW_ROUTE_MINREF       2       /* for the 2 RB trees */
413 
414 struct flow_route {
415 	RB_ENTRY(flow_route)    fr_link;
416 	RB_ENTRY(flow_route)    fr_id_link;
417 
418 	/*
419 	 * fr_laddr represents the local address that the system chooses
420 	 * for the foreign destination in fr_faddr.  The flow entry that
421 	 * is referring to this flow route object may choose a different
422 	 * local address if it wishes.
423 	 *
424 	 * fr_gaddr represents the gateway address to reach the final
425 	 * foreign destination fr_faddr, valid only if the destination is
426 	 * not directly attached (FLOWRTF_GATEWAY is set).
427 	 *
428 	 * The use of sockaddr for storage is for convenience; the port
429 	 * value is not applicable for this object, as this is shared
430 	 * among flow entries.
431 	 */
432 	union sockaddr_in_4_6   fr_laddr;       /* local IP address */
433 	union sockaddr_in_4_6   fr_faddr;       /* remote IP address */
434 #define fr_af                   fr_faddr.sa.sa_family
435 	union sockaddr_in_4_6   fr_gaddr;       /* gateway IP address */
436 
437 	struct flow_llhdr       fr_llhdr;
438 #define fr_eth_padded           fr_llhdr.flh_eth_padded
439 #define fr_eth                  fr_llhdr.flh_eth
440 
441 	/*
442 	 * In flow_route_tree, we use the destination address as key.
443 	 * To speed up searches, we initialize fr_addr_key to the address
444 	 * portion of fr_faddr depending on the address family.
445 	 */
446 	void                    *fr_addr_key;
447 
448 	/* flow route UUID */
449 	uuid_t                  fr_uuid __sk_aligned(8);
450 
451 	/*
452 	 * fr_usecnt is updated atomically; incremented when a flow entry
453 	 * refers to this object and decremented otherwise.  Periodically,
454 	 * the flowswitch instance garbage collects flow_route objects
455 	 * that aren't being referred to by any flow entries.
456 	 *
457 	 * fr_expire is set when fr_usecnt reaches its minimum count, and
458 	 * is cleared when it goes above the minimum count.
459 	 *
460 	 * The spin lock fr_reflock is used to serialize both.
461 	 */
462 	decl_lck_spin_data(, fr_reflock);
463 	uint64_t                fr_expire;
464 	volatile uint32_t       fr_usecnt;
465 
466 	uint32_t                fr_flags;
467 	uint32_t                fr_laddr_gencnt; /* local IP gencnt */
468 	uint32_t                fr_addr_len;     /* sizeof {in,in6}_addr */
469 
470 	volatile uint32_t       fr_want_configure;
471 	volatile uint32_t       fr_want_probe;
472 
473 	/* lock to serialize resolver */
474 	decl_lck_mtx_data(, fr_lock);
475 
476 	/*
477 	 * fr_rt_dst is the route to final destination, and along with
478 	 * fr_rt_evhdlr_tag, they are used in route event registration.
479 	 *
480 	 * fr_rt_gw is valid only if FLOWRTF_GATEWAY is set.
481 	 */
482 	eventhandler_tag        fr_rt_evhdlr_tag;
483 	struct rtentry          *fr_rt_dst;
484 	struct rtentry          *fr_rt_gw;
485 
486 	/* nexus UUID */
487 	uuid_t                  fr_nx_uuid __sk_aligned(8);
488 
489 	const struct flow_mgr   *fr_mgr;
490 	const struct flow_route_bucket  *fr_frb;
491 	const struct flow_route_id_bucket *fr_frib;
492 };
493 
494 /* valid values for fr_flags */
495 #define FLOWRTF_ATTACHED        0x00000001 /* attached to RB trees */
496 #define FLOWRTF_ONLINK          0x00000010 /* dst directly on the link */
497 #define FLOWRTF_GATEWAY         0x00000020 /* gw IP address is valid */
498 #define FLOWRTF_RESOLVED        0x00000040 /* flow route is resolved */
499 #define FLOWRTF_HAS_LLINFO      0x00000080 /* has dst link-layer address */
500 #define FLOWRTF_DELETED         0x00000100 /* route has been deleted */
501 #define FLOWRTF_DST_LL_MCAST    0x00000200 /* dst is link layer multicast */
502 #define FLOWRTF_DST_LL_BCAST    0x00000400 /* dst is link layer broadcast */
503 #define FLOWRTF_STABLE_ADDR     0x00000800 /* local address prefers stable */
504 
505 #define FR_LOCK(_fr)                    \
506 	lck_mtx_lock(&(_fr)->fr_lock)
507 #define FR_TRY_LOCK(_fr)                \
508 	lck_mtx_try_lock(&(_fr)->fr_lock)
509 #define FR_LOCK_ASSERT_HELD(_fr)        \
510 	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_OWNED)
511 #define FR_LOCK_ASSERT_NOTHELD(_fr)     \
512 	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_NOTOWNED)
513 #define FR_UNLOCK(_fr)                  \
514 	lck_mtx_unlock(&(_fr)->fr_lock)
515 
516 #define FLOWRT_UPD_ETH_DST(_fr, _addr)  do {                            \
517 	bcopy((_addr), (_fr)->fr_eth.ether_dhost, ETHER_ADDR_LEN);      \
518 	(_fr)->fr_flags &= ~(FLOWRTF_DST_LL_MCAST|FLOWRTF_DST_LL_BCAST);\
519 	if (ETHER_IS_MULTICAST(_addr)) {                                \
520 	        if (_ether_cmp(etherbroadcastaddr, (_addr)) == 0)       \
521 	                (_fr)->fr_flags |= FLOWRTF_DST_LL_BCAST;        \
522 	        else                                                    \
523 	                (_fr)->fr_flags |= FLOWRTF_DST_LL_MCAST;        \
524 	}                                                               \
525 } while (0)
526 
527 RB_HEAD(flow_route_tree, flow_route);
528 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_tree, flow_route,
529     fr_link, fr_cmp);
530 
531 struct flow_route_bucket {
532 	decl_lck_rw_data(, frb_lock);
533 	struct flow_route_tree  frb_head;
534 	const uint32_t          frb_idx;
535 };
536 
537 #define FRB_WLOCK(_frb)                 \
538 	lck_rw_lock_exclusive(&(_frb)->frb_lock)
539 #define FRB_WLOCKTORLOCK(_frb)          \
540 	lck_rw_lock_exclusive_to_shared(&(_frb)->frb_lock)
541 #define FRB_WTRYLOCK(_frb)              \
542 	lck_rw_try_lock_exclusive(&(_frb)->frb_lock)
543 #define FRB_WUNLOCK(_frb)               \
544 	lck_rw_unlock_exclusive(&(_frb)->frb_lock)
545 #define FRB_RLOCK(_frb)                 \
546 	lck_rw_lock_shared(&(_frb)->frb_lock)
547 #define FRB_RLOCKTOWLOCK(_frb)          \
548 	lck_rw_lock_shared_to_exclusive(&(_frb)->frb_lock)
549 #define FRB_RTRYLOCK(_frb)              \
550 	lck_rw_try_lock_shared(&(_frb)->frb_lock)
551 #define FRB_RUNLOCK(_frb)               \
552 	lck_rw_unlock_shared(&(_frb)->frb_lock)
553 #define FRB_UNLOCK(_frb)                \
554 	lck_rw_done(&(_frb)->frb_lock)
555 #define FRB_WLOCK_ASSERT_HELD(_frb)     \
556 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_EXCLUSIVE)
557 #define FRB_RLOCK_ASSERT_HELD(_frb)     \
558 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_SHARED)
559 #define FRB_LOCK_ASSERT_HELD(_frb)      \
560 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_HELD)
561 
562 RB_HEAD(flow_route_id_tree, flow_route);
563 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_id_tree, flow_route,
564     fr_id_link, fr_id_cmp);
565 
566 struct flow_route_id_bucket {
567 	decl_lck_rw_data(, frib_lock);
568 	struct flow_route_id_tree       frib_head;
569 	const uint32_t                  frib_idx;
570 };
571 
572 #define FRIB_WLOCK(_frib)               \
573 	lck_rw_lock_exclusive(&(_frib)->frib_lock)
574 #define FRIB_WLOCKTORLOCK(_frib)        \
575 	lck_rw_lock_exclusive_to_shared(&(_frib)->frib_lock)
576 #define FRIB_WTRYLOCK(_frib)            \
577 	lck_rw_try_lock_exclusive(&(_frib)->frib_lock)
578 #define FRIB_WUNLOCK(_frib)             \
579 	lck_rw_unlock_exclusive(&(_frib)->frib_lock)
580 #define FRIB_RLOCK(_frib)               \
581 	lck_rw_lock_shared(&(_frib)->frib_lock)
582 #define FRIB_RLOCKTOWLOCK(_frib)        \
583 	lck_rw_lock_shared_to_exclusive(&(_frib)->frib_lock)
584 #define FRIB_RTRYLOCK(_frib)            \
585 	lck_rw_try_lock_shared(&(_frib)->frib_lock)
586 #define FRIB_RUNLOCK(_frib)             \
587 	lck_rw_unlock_shared(&(_frib)->frib_lock)
588 #define FRIB_UNLOCK(_frib)              \
589 	lck_rw_done(&(_frib)->frib_lock)
590 #define FRIB_WLOCK_ASSERT_HELD(_frib)   \
591 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_EXCLUSIVE)
592 #define FRIB_RLOCK_ASSERT_HELD(_frib)   \
593 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_SHARED)
594 #define FRIB_LOCK_ASSERT_HELD(_frib)    \
595 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_HELD)
596 
597 struct flow_mgr {
598 	char            fm_name[IFNAMSIZ];
599 	uuid_t          fm_uuid;
600 	RB_ENTRY(flow_mgr) fm_link;
601 
602 	struct cuckoo_hashtable *fm_flow_table;
603 	size_t   fm_flow_hash_count[FKMASK_IDX_MAX]; /* # of flows with mask */
604 	uint16_t fm_flow_hash_masks[FKMASK_IDX_MAX];
605 
606 	void      *__sized_by(fm_owner_bucket_tot_sz) fm_owner_buckets;     /* cache-aligned fob */
607 	size_t    fm_owner_buckets_cnt;  /* total # of fobs */
608 	size_t    fm_owner_bucket_sz;    /* size of each fob */
609 	size_t    fm_owner_bucket_tot_sz; /* allocated size of each fob */
610 
611 	void      *__sized_by(fm_route_bucket_tot_sz) fm_route_buckets;     /* cache-aligned frb */
612 	size_t    fm_route_buckets_cnt;  /* total # of frb */
613 	size_t    fm_route_bucket_sz;    /* size of each frb */
614 	size_t    fm_route_bucket_tot_sz; /* allocated size of each frb */
615 
616 	void      *__sized_by(fm_route_id_bucket_tot_sz) fm_route_id_buckets;    /* cache-aligned frib */
617 	size_t    fm_route_id_buckets_cnt; /* total # of frib */
618 	size_t    fm_route_id_bucket_sz;   /* size of each frib */
619 	size_t    fm_route_id_bucket_tot_sz; /* allocated size of each frib */
620 };
621 
622 /*
623  * this func compare match with key;
624  * return values:
625  * 0 as long as @key(exact) matches what @match(wildcard) wants to match on.
626  * 1 when it doesn't match
627  */
628 static inline int
flow_key_cmp(const struct flow_key * match,const struct flow_key * key)629 flow_key_cmp(const struct flow_key *match, const struct flow_key *key)
630 {
631 #define FK_CMP(field, mask)     \
632 	if ((match->fk_mask & mask) != 0) {     \
633 	        if ((key->fk_mask & mask) == 0) {       \
634 	                return 1;       \
635 	        }       \
636 	        int d = memcmp(&match->field, &key->field, sizeof(match->field));       \
637 	        if (d != 0) {   \
638 	                return d;       \
639 	        }       \
640 	}
641 
642 	FK_CMP(fk_ipver, FKMASK_IPVER);
643 	FK_CMP(fk_proto, FKMASK_PROTO);
644 	FK_CMP(fk_src, FKMASK_SRC);
645 	FK_CMP(fk_dst, FKMASK_DST);
646 	FK_CMP(fk_sport, FKMASK_SPORT);
647 	FK_CMP(fk_dport, FKMASK_DPORT);
648 
649 	return 0;
650 }
651 
652 /*
653  * Similar to flow_key_cmp() except using memory compare with mask,
654  * done with SIMD instructions, if available for the platform.
655  */
656 static inline int
flow_key_cmp_mask(const struct flow_key * match,const struct flow_key * key,const struct flow_key * mask)657 flow_key_cmp_mask(const struct flow_key *match,
658     const struct flow_key *key, const struct flow_key *mask)
659 {
660 	static_assert(FLOW_KEY_LEN == 48);
661 	static_assert(FLOW_KEY_LEN == sizeof(struct flow_key));
662 	static_assert((sizeof(struct flow_entry) % 16) == 0);
663 	static_assert((offsetof(struct flow_entry, fe_key) % 16) == 0);
664 
665 	/* local variables are __bidi_indexable with -fbounds-safety */
666 	const struct flow_key *match_idx = match;
667 	const struct flow_key *key_idx = key;
668 	const struct flow_key *mask_idx = mask;
669 
670 	return sk_memcmp_mask_48B((const uint8_t *)match_idx,
671 	           (const uint8_t *)key_idx, (const uint8_t *)mask_idx);
672 }
673 
674 static inline uint32_t
flow_key_hash(const struct flow_key * key)675 flow_key_hash(const struct flow_key *key)
676 {
677 	uint32_t hash = FK_HASH_SEED;
678 #define FK_HASH(field, mask)    \
679 	if ((key->fk_mask & mask) != 0) {       \
680 	        hash = net_flowhash(&key->field, sizeof(key->field), hash);     \
681 	}
682 
683 	FK_HASH(fk_ipver, FKMASK_IPVER);
684 	FK_HASH(fk_proto, FKMASK_PROTO);
685 	FK_HASH(fk_src, FKMASK_SRC);
686 	FK_HASH(fk_dst, FKMASK_DST);
687 	FK_HASH(fk_sport, FKMASK_SPORT);
688 	FK_HASH(fk_dport, FKMASK_DPORT);
689 
690 	return hash;
691 }
692 
693 __attribute__((always_inline))
694 static inline void
flow_key_unpack(const struct flow_key * key,union sockaddr_in_4_6 * laddr,union sockaddr_in_4_6 * faddr,uint8_t * protocol)695 flow_key_unpack(const struct flow_key *key, union sockaddr_in_4_6 *laddr,
696     union sockaddr_in_4_6 *faddr, uint8_t *protocol)
697 {
698 	*protocol = key->fk_proto;
699 	if (key->fk_ipver == IPVERSION) {
700 		laddr->sa.sa_family = AF_INET;
701 		laddr->sin.sin_addr = key->fk_src4;
702 		laddr->sin.sin_port = key->fk_sport;
703 		faddr->sa.sa_family = AF_INET;
704 		faddr->sin.sin_addr = key->fk_dst4;
705 		faddr->sin.sin_port = key->fk_dport;
706 	} else if (key->fk_ipver == IPV6_VERSION) {
707 		laddr->sa.sa_family = AF_INET6;
708 		laddr->sin6.sin6_addr = key->fk_src6;
709 		laddr->sin6.sin6_port = key->fk_sport;
710 		faddr->sa.sa_family = AF_INET6;
711 		faddr->sin6.sin6_addr = key->fk_dst6;
712 		faddr->sin6.sin6_port = key->fk_dport;
713 	}
714 }
715 
716 __attribute__((always_inline))
717 static inline int
flow_req2key(struct nx_flow_req * req,struct flow_key * key)718 flow_req2key(struct nx_flow_req *req, struct flow_key *key)
719 {
720 	FLOW_KEY_CLEAR(key);
721 
722 	if (req->nfr_saddr.sa.sa_family == AF_INET) {
723 		key->fk_ipver = IPVERSION;
724 		key->fk_proto = req->nfr_ip_protocol;
725 		key->fk_mask |= FKMASK_PROTO;
726 		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
727 			key->fk_src4 = req->nfr_saddr.sin.sin_addr;
728 			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
729 		}
730 		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
731 			key->fk_dst4 = req->nfr_daddr.sin.sin_addr;
732 			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
733 		}
734 		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
735 			key->fk_sport = req->nfr_saddr.sin.sin_port;
736 			key->fk_mask |= FKMASK_SPORT;
737 		}
738 		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
739 			key->fk_dport = req->nfr_daddr.sin.sin_port;
740 			key->fk_mask |= FKMASK_DPORT;
741 		}
742 	} else if (req->nfr_saddr.sa.sa_family == AF_INET6) {
743 		key->fk_ipver = IPV6_VERSION;
744 		key->fk_proto = req->nfr_ip_protocol;
745 		key->fk_mask |= FKMASK_PROTO;
746 		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
747 			key->fk_src6 = req->nfr_saddr.sin6.sin6_addr;
748 			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
749 		}
750 		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
751 			key->fk_dst6 = req->nfr_daddr.sin6.sin6_addr;
752 			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
753 		}
754 		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
755 			key->fk_sport = req->nfr_saddr.sin6.sin6_port;
756 			key->fk_mask |= FKMASK_SPORT;
757 		}
758 		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
759 			key->fk_dport = req->nfr_daddr.sin6.sin6_port;
760 			key->fk_mask |= FKMASK_DPORT;
761 		}
762 	} else {
763 		SK_ERR("unknown AF %d", req->nfr_saddr.sa.sa_family);
764 		return ENOTSUP;
765 	}
766 
767 	switch (key->fk_mask) {
768 	case FKMASK_5TUPLE:
769 	case FKMASK_4TUPLE:
770 	case FKMASK_3TUPLE:
771 	case FKMASK_2TUPLE:
772 	case FKMASK_IPFLOW3:
773 	case FKMASK_IPFLOW2:
774 	case FKMASK_IPFLOW1:
775 		break;
776 	default:
777 		SK_ERR("unknown flow key mask 0x%04x", key->fk_mask);
778 		return ENOTSUP;
779 	}
780 
781 	return 0;
782 }
783 
784 __attribute__((always_inline))
785 static inline void
flow_pkt2key(struct __kern_packet * pkt,boolean_t input,struct flow_key * key)786 flow_pkt2key(struct __kern_packet *pkt, boolean_t input,
787     struct flow_key *key)
788 {
789 	struct __flow *flow = pkt->pkt_flow;
790 
791 	FLOW_KEY_CLEAR(key);
792 
793 	if (__improbable((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0)) {
794 		return;
795 	}
796 
797 	ASSERT(flow->flow_l3._l3_ip_ver != 0);
798 
799 	key->fk_ipver = flow->flow_l3._l3_ip_ver;
800 	key->fk_proto = flow->flow_ip_proto;
801 	if (input) {
802 		if (flow->flow_ip_ver == IPVERSION) {
803 			key->fk_src4 = flow->flow_ipv4_dst;
804 			key->fk_sport = flow->flow_tcp_dst;
805 			key->fk_dst4 = flow->flow_ipv4_src;
806 			key->fk_dport = flow->flow_tcp_src;
807 		} else {
808 			key->fk_src6 = flow->flow_ipv6_dst;
809 			key->fk_sport = flow->flow_tcp_dst;
810 			key->fk_dst6 = flow->flow_ipv6_src;
811 			key->fk_dport = flow->flow_tcp_src;
812 		}
813 	} else {
814 		if (flow->flow_ip_ver == IPVERSION) {
815 			key->fk_src4 = flow->flow_ipv4_src;
816 			key->fk_sport = flow->flow_tcp_src;
817 			key->fk_dst4 = flow->flow_ipv4_dst;
818 			key->fk_dport = flow->flow_tcp_dst;
819 		} else {
820 			key->fk_src6 = flow->flow_ipv6_src;
821 			key->fk_sport = flow->flow_tcp_src;
822 			key->fk_dst6 = flow->flow_ipv6_dst;
823 			key->fk_dport = flow->flow_tcp_dst;
824 		}
825 	}
826 }
827 
828 __attribute__((always_inline))
829 static inline int
flow_ip_cmp(const void * a0,const void * b0,size_t alen)830 flow_ip_cmp(const void *a0, const void *b0, size_t alen)
831 {
832 	struct flow_ip_addr *a = __DECONST(struct flow_ip_addr *, a0),
833 	    *b = __DECONST(struct flow_ip_addr *, b0);
834 
835 	switch (alen) {
836 	case sizeof(struct in_addr):
837 		if (a->_addr32[0] > b->_addr32[0]) {
838 			return 1;
839 		}
840 		if (a->_addr32[0] < b->_addr32[0]) {
841 			return -1;
842 		}
843 		break;
844 
845 	case sizeof(struct in6_addr):
846 		if (a->_addr64[1] > b->_addr64[1]) {
847 			return 1;
848 		}
849 		if (a->_addr64[1] < b->_addr64[1]) {
850 			return -1;
851 		}
852 		if (a->_addr64[0] > b->_addr64[0]) {
853 			return 1;
854 		}
855 		if (a->_addr64[0] < b->_addr64[0]) {
856 			return -1;
857 		}
858 		break;
859 
860 	default:
861 		VERIFY(0);
862 		/* NOTREACHED */
863 		__builtin_unreachable();
864 	}
865 	return 0;
866 }
867 
868 __attribute__((always_inline))
869 static inline struct flow_owner_bucket *
flow_mgr_get_fob_at_idx(struct flow_mgr * fm,uint32_t idx)870 flow_mgr_get_fob_at_idx(struct flow_mgr *fm, uint32_t idx)
871 {
872 	char *buckets = fm->fm_owner_buckets;
873 	void *bucket = buckets + (idx * fm->fm_owner_bucket_sz);
874 	return bucket;
875 }
876 
877 __attribute__((always_inline))
878 static inline struct flow_route_bucket *
flow_mgr_get_frb_at_idx(struct flow_mgr * fm,uint32_t idx)879 flow_mgr_get_frb_at_idx(struct flow_mgr *fm, uint32_t idx)
880 {
881 	char *buckets = fm->fm_route_buckets;
882 	void *bucket = buckets + (idx * fm->fm_route_bucket_sz);
883 	return bucket;
884 }
885 
886 __attribute__((always_inline))
887 static inline struct flow_route_id_bucket *
flow_mgr_get_frib_at_idx(struct flow_mgr * fm,uint32_t idx)888 flow_mgr_get_frib_at_idx(struct flow_mgr *fm, uint32_t idx)
889 {
890 	char *buckets = fm->fm_route_id_buckets;
891 	void *bucket = buckets + (idx * fm->fm_route_id_bucket_sz);
892 	return bucket;
893 }
894 
895 __attribute__((always_inline))
896 static inline uint32_t
flow_mgr_get_fob_idx(struct flow_mgr * fm,struct flow_owner_bucket * bkt)897 flow_mgr_get_fob_idx(struct flow_mgr *fm,
898     struct flow_owner_bucket *bkt)
899 {
900 	ASSERT(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) %
901 	    fm->fm_owner_bucket_sz == 0);
902 	return (uint32_t)(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) /
903 	       fm->fm_owner_bucket_sz);
904 }
905 
906 __attribute__((always_inline))
907 static inline size_t
flow_mgr_get_num_flows(struct flow_mgr * mgr)908 flow_mgr_get_num_flows(struct flow_mgr *mgr)
909 {
910 	ASSERT(mgr->fm_flow_table != NULL);
911 	return cuckoo_hashtable_entries(mgr->fm_flow_table);
912 }
913 
914 extern unsigned int sk_fo_size;
915 extern struct skmem_cache *sk_fo_cache;
916 
917 extern unsigned int sk_fe_size;
918 extern struct skmem_cache *sk_fe_cache;
919 
920 extern unsigned int sk_fab_size;
921 extern struct skmem_cache *sk_fab_cache;
922 
923 extern uint32_t flow_seed;
924 
925 extern struct skmem_cache *flow_route_cache;
926 extern struct skmem_cache *flow_stats_cache;
927 
928 __BEGIN_DECLS
929 
930 typedef void (*flow_route_ctor_fn_t)(void *arg, struct flow_route *);
931 typedef int (*flow_route_resolve_fn_t)(void *arg, struct flow_route *,
932     struct __kern_packet *);
933 
934 extern int flow_init(void);
935 extern void flow_fini(void);
936 
937 extern void flow_mgr_init(void);
938 extern void flow_mgr_fini(void);
939 extern struct flow_mgr *flow_mgr_find_lock(uuid_t);
940 extern void flow_mgr_unlock(void);
941 extern struct flow_mgr * flow_mgr_create(size_t, size_t, size_t, size_t);
942 extern void flow_mgr_destroy(struct flow_mgr *);
943 extern void flow_mgr_terminate(struct flow_mgr *);
944 extern int flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
945     struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
946     flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve, void *fr_arg);
947 extern struct flow_owner_bucket *flow_mgr_get_fob_by_pid(
948 	struct flow_mgr *, pid_t);
949 extern struct flow_entry *flow_mgr_get_fe_by_uuid_rlock(
950 	struct flow_mgr *, uuid_t);
951 extern struct flow_route_bucket *flow_mgr_get_frb_by_addr(
952 	struct flow_mgr *, union sockaddr_in_4_6 *);
953 extern struct flow_route_id_bucket *flow_mgr_get_frib_by_uuid(
954 	struct flow_mgr *, uuid_t);
955 extern int flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask);
956 extern int flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask);
957 
958 extern struct flow_entry * fe_alloc(boolean_t can_block);
959 
960 extern int flow_namespace_create(union sockaddr_in_4_6 *, uint8_t protocol,
961     netns_token *, uint32_t, struct ns_flow_info *);
962 extern void flow_namespace_half_close(netns_token *token);
963 extern void flow_namespace_withdraw(netns_token *);
964 extern void flow_namespace_destroy(netns_token *);
965 
966 extern struct flow_owner_bucket *__sized_by(*tot_sz)
967 flow_owner_buckets_alloc(size_t, size_t *, size_t * tot_sz);
968 extern void flow_owner_buckets_free(struct flow_owner_bucket *, size_t);
969 extern void flow_owner_bucket_init(struct flow_owner_bucket *);
970 extern void flow_owner_bucket_destroy(struct flow_owner_bucket *);
971 extern void flow_owner_bucket_purge_all(struct flow_owner_bucket *);
972 extern void flow_owner_attach_nexus_port(struct flow_mgr *, boolean_t,
973     pid_t, nexus_port_t);
974 extern uint32_t flow_owner_detach_nexus_port(struct flow_mgr *,
975     boolean_t, pid_t, nexus_port_t, boolean_t);
976 extern struct flow_owner *flow_owner_alloc(struct flow_owner_bucket *,
977     struct proc *, nexus_port_t, bool, bool, struct nx_flowswitch*,
978     struct nexus_adapter *, void *, bool);
979 extern void flow_owner_free(struct flow_owner_bucket *, struct flow_owner *);
980 extern struct flow_entry *flow_owner_create_entry(struct flow_owner *,
981     struct nx_flow_req *, boolean_t, uint32_t, boolean_t,
982     struct flow_route *, int *);
983 extern int flow_owner_destroy_entry(struct flow_owner *, uuid_t, bool, void *);
984 extern struct flow_owner *flow_owner_find_by_pid(struct flow_owner_bucket *,
985     pid_t, void *, bool);
986 extern int flow_owner_flowadv_index_alloc(struct flow_owner *, flowadv_idx_t *);
987 extern void flow_owner_flowadv_index_free(struct flow_owner *, flowadv_idx_t);
988 extern uint32_t flow_owner_activate_nexus_port(struct flow_mgr *,
989     boolean_t, pid_t, nexus_port_t, struct nexus_adapter *,
990     na_activate_mode_t);
991 
992 extern struct flow_entry *flow_mgr_find_fe_by_key(struct flow_mgr *,
993     struct flow_key *);
994 extern struct flow_entry * flow_mgr_find_conflicting_fe(struct flow_mgr *fm,
995     struct flow_key *fe_key);
996 extern void flow_mgr_foreach_flow(struct flow_mgr *fm,
997     void (^flow_handler)(struct flow_entry *fe));
998 extern struct flow_entry *flow_entry_find_by_uuid(struct flow_owner *,
999     uuid_t);
1000 extern struct flow_entry * flow_entry_alloc(struct flow_owner *fo,
1001     struct nx_flow_req *req, int *perr);
1002 extern void flow_entry_teardown(struct flow_owner *, struct flow_entry *);
1003 extern void flow_entry_destroy(struct flow_owner *, struct flow_entry *, bool,
1004     void *);
1005 extern int flow_entry_add_rx_steering_rule(struct nx_flowswitch *fsw,
1006     struct flow_entry *fe);
1007 extern void flow_entry_rx_steering_rule_cleanup(struct nx_flowswitch *,
1008     struct flow_entry *);
1009 extern void flow_entry_retain(struct flow_entry *fe);
1010 extern void flow_entry_release(struct flow_entry **pfe);
1011 extern uint32_t flow_entry_refcnt(struct flow_entry *fe);
1012 extern bool rx_flow_demux_match(struct nx_flowswitch *, struct flow_entry *, struct __kern_packet *);
1013 extern struct flow_entry *rx_lookup_child_flow(struct nx_flowswitch *fsw,
1014     struct flow_entry *, struct __kern_packet *);
1015 extern struct flow_entry *tx_lookup_child_flow(struct flow_entry *, uuid_t);
1016 
1017 extern struct flow_entry_dead *flow_entry_dead_alloc(zalloc_flags_t);
1018 extern void flow_entry_dead_free(struct flow_entry_dead *);
1019 
1020 extern void flow_entry_stats_get(struct flow_entry *, struct sk_stats_flow *);
1021 extern void fe_stats_update(struct flow_entry *);
1022 
1023 extern int flow_pkt_classify(struct __kern_packet *pkt, struct ifnet *ifp,
1024     sa_family_t af, bool input);
1025 
1026 extern void flow_track_stats(struct flow_entry *, uint64_t, uint64_t,
1027     bool, bool);
1028 extern int flow_pkt_track(struct flow_entry *, struct __kern_packet *, bool);
1029 extern boolean_t flow_track_tcp_want_abort(struct flow_entry *);
1030 extern void flow_track_abort_tcp( struct flow_entry *fe,
1031     struct __kern_packet *in_pkt, struct __kern_packet *rst_pkt);
1032 extern void flow_track_abort_quic(struct flow_entry *fe,
1033     uint8_t *__counted_by(QUIC_STATELESS_RESET_TOKEN_SIZE)token);
1034 
1035 extern void fsw_host_rx_cb(struct nx_flowswitch *fsw, struct pktq *pktq);
1036 extern void fsw_host_rx_enqueue_mbq(struct nx_flowswitch *fsw, struct pktq *pktq,
1037     struct mbufq *host_mq);
1038 extern void fsw_host_sendup(struct ifnet *ifp, struct mbufq *host_mq);
1039 
1040 extern void flow_rx_agg_tcp(struct nx_flowswitch *fsw, struct flow_entry *fe,
1041     struct pktq *rx_pkts, uint32_t rx_bytes, struct mbufq *host_mq,
1042     uint32_t flags);
1043 
1044 extern void flow_route_init(void);
1045 extern void flow_route_fini(void);
1046 extern struct flow_route_bucket *__sized_by(*tot_sz)
1047 flow_route_buckets_alloc(size_t, size_t *, size_t * tot_sz);
1048 extern void flow_route_buckets_free(struct flow_route_bucket *, size_t);
1049 extern void flow_route_bucket_init(struct flow_route_bucket *);
1050 extern void flow_route_bucket_destroy(struct flow_route_bucket *);
1051 extern void flow_route_bucket_purge_all(struct flow_route_bucket *);
1052 extern struct flow_route_id_bucket *__sized_by(*tot_sz)
1053 flow_route_id_buckets_alloc(size_t, size_t *, size_t * tot_sz);
1054 extern void flow_route_id_buckets_free(struct flow_route_id_bucket *, size_t);
1055 extern void flow_route_id_bucket_init(struct flow_route_id_bucket *);
1056 extern void flow_route_id_bucket_destroy(struct flow_route_id_bucket *);
1057 
1058 extern int flow_route_select_laddr(union sockaddr_in_4_6 *,
1059     union sockaddr_in_4_6 *, struct ifnet *, struct rtentry *, uint32_t *, int);
1060 extern int flow_route_find(struct kern_nexus *, struct flow_mgr *,
1061     struct ifnet *, struct nx_flow_req *, flow_route_ctor_fn_t,
1062     flow_route_resolve_fn_t, void *, struct flow_route **);
1063 extern int flow_route_configure(struct flow_route *, struct ifnet *, struct nx_flow_req *);
1064 extern void flow_route_retain(struct flow_route *);
1065 extern void flow_route_release(struct flow_route *);
1066 extern uint32_t flow_route_prune(struct flow_mgr *, struct ifnet *,
1067     uint32_t *);
1068 extern void flow_route_cleanup(struct flow_route *);
1069 extern boolean_t flow_route_laddr_validate(union sockaddr_in_4_6 *,
1070     struct ifnet *, uint32_t *);
1071 extern boolean_t flow_route_key_validate(struct flow_key *, struct ifnet *,
1072     uint32_t *);
1073 extern void flow_qset_select_dynamic(struct nx_flowswitch *,
1074     struct flow_entry *, boolean_t);
1075 extern void flow_stats_init(void);
1076 extern void flow_stats_fini(void);
1077 extern struct flow_stats *flow_stats_alloc(boolean_t cansleep);
1078 
1079 #if SK_LOG
1080 #define FLOWKEY_DBGBUF_SIZE   256
1081 #define FLOWENTRY_DBGBUF_SIZE   512
1082 extern char *fk2str(const struct flow_key *fk, char *__counted_by(dsz)dst, size_t dsz);
1083 extern char *fe2str(const struct flow_entry *fe, char *__counted_by(dsz)dst, size_t dsz);
1084 #endif /* SK_LOG */
1085 __END_DECLS
1086 #endif /* BSD_KERNEL_PRIVATE */
1087 #endif /* !_SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ */
1088