xref: /xnu-11215.41.3/bsd/skywalk/nexus/flowswitch/flow/flow_var.h (revision 33de042d024d46de5ff4e89f2471de6608e37fa4)
1 /*
2  * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*
30  * Once a packet is classified, it goes through checks to see if there
31  * is a matching flow entry in the flow table.  The key used to search
32  * the entry is composed of the fields contained in struct flow_ptrs.
33  *
34  * Flow entry insertion and deletion to the flow table, on behalf of
35  * the owning client process, requires the use of the rule ID (UUID)
36  * as the search key.
37  *
38  * Because of the above, each flow entry simultaneously exists in two
39  * respective trees: flow_entry_tree and flow_entry_id_tree.
40  *
41  * Using a single RW lock to protect the two trees is simple, but the
42  * data path performance is impacted during flow insertion and deletion,
43  * especially as the number of client processes and flows grow.
44  *
45  * To solve that, we deploy the following scheme:
46  *
47  * Given that the flow_entry_tree is searched on a per-packet basis,
48  * we break it down into a series of trees, each one contained within
49  * a flow_bucket structure.  The hash from flow_ptrs determines the
50  * index of the flow_bucket to search the flow_entry_tree from.
51  *
52  * The flow_entry_id_tree is searched on each flow insertion and
53  * deletion, and similarly we break it down into a series of trees,
54  * each contained within a flow_owner_bucket structure. We use the
55  * client process ID (pid_t) to determine the bucket index.
56  *
57  * Each flow_bucket and flow_owner_bucket structure is dynamically
58  * created, and is aligned on the CPU cache boundary.  The amount
59  * of those buckets is determined by client module at the time the
60  * flow manager context is initialized.  This is done to avoid false
61  * sharing, especially given that each bucket has its own RW lock.
62  */
63 
64 #ifndef _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
65 #define _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
66 
67 #ifdef BSD_KERNEL_PRIVATE
68 #include <skywalk/core/skywalk_var.h>
69 #include <skywalk/lib/cuckoo_hashtable.h>
70 #include <skywalk/namespace/netns.h>
71 #include <skywalk/namespace/protons.h>
72 #include <skywalk/packet/packet_var.h>
73 #include <net/flowhash.h>
74 #include <netinet/ip.h>
75 #include <netinet/in_stat.h>
76 #include <netinet/ip6.h>
77 #include <sys/eventhandler.h>
78 
79 RB_HEAD(flow_owner_tree, flow_owner);
80 
81 struct flow_owner_bucket {
82 	decl_lck_mtx_data(, fob_lock);
83 	struct flow_owner_tree  fob_owner_head;
84 	uint16_t                fob_busy_flags;
85 	uint16_t                fob_open_waiters;
86 	uint16_t                fob_close_waiters;
87 	uint16_t                fob_dtor_waiters;
88 	const size_t            fob_idx;
89 };
90 
91 #define FOBF_OPEN_BUSY          0x1     /* flow open monitor */
92 #define FOBF_CLOSE_BUSY         0x2     /* flow close monitor */
93 #define FOBF_DEAD               0x4     /* no longer usable */
94 
95 #define FOB_LOCK(_fob)                  \
96 	lck_mtx_lock(&(_fob)->fob_lock)
97 #define FOB_LOCK_SPIN(_fob)             \
98 	lck_mtx_lock_spin(&(_fob)->fob_lock)
99 #define FOB_LOCK_CONVERT(_fob)          \
100 	lck_mtx_convert_spin(&(_fob)->fob_lock)
101 #define FOB_TRY_LOCK(_fob)              \
102 	lck_mtx_try_lock(&(_fob)->fob_lock)
103 #define FOB_LOCK_ASSERT_HELD(_fob)      \
104 	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_OWNED)
105 #define FOB_LOCK_ASSERT_NOTHELD(_fob)   \
106 	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_NOTOWNED)
107 #define FOB_UNLOCK(_fob)                \
108 	lck_mtx_unlock(&(_fob)->fob_lock)
109 
110 RB_HEAD(flow_entry_id_tree, flow_entry);
111 
112 #define FLOW_PROCESS_NAME_LENGTH        24
113 
114 struct flow_owner {
115 	RB_ENTRY(flow_owner)    fo_link;
116 	struct flow_entry_id_tree fo_flow_entry_id_head;
117 	const struct flow_owner_bucket *fo_bucket;
118 	void                    *fo_context;
119 	pid_t                   fo_pid;
120 	bool                    fo_nx_port_pid_bound;
121 	bool                    fo_nx_port_destroyed;
122 	bool                    fo_low_latency;
123 	nexus_port_t            fo_nx_port;
124 	uuid_t                  fo_key;
125 
126 	struct nexus_adapter *  const fo_nx_port_na;
127 	struct nx_flowswitch *  const fo_fsw;
128 
129 	/*
130 	 * Array of bitmaps to manage the flow advisory table indices.
131 	 * Currently we are restricting a flow owner to a single nexus
132 	 * port, so this structure is effectively managing the flow advisory
133 	 * indices for a port.
134 	 */
135 	bitmap_t                *__counted_by(fo_num_flowadv_bmaps)fo_flowadv_bmap;
136 	uint32_t                fo_flowadv_max;
137 	uint32_t                fo_num_flowadv;
138 	uint32_t                fo_num_flowadv_bmaps;
139 
140 	/* for debugging */
141 	char                    fo_name[FLOW_PROCESS_NAME_LENGTH];
142 };
143 
144 #define FO_BUCKET(_fo)  \
145 	__DECONST(struct flow_owner_bucket *, (_fo)->fo_bucket)
146 
147 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_owner_tree, flow_owner,
148     fo_link, fo_cmp);
149 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_entry_id_tree, flow_entry,
150     fe_id_link, fe_id_cmp);
151 
152 typedef enum {
153 	/*
154 	 * TCP states.
155 	 */
156 	FT_STATE_CLOSED = 0,            /* closed */
157 	FT_STATE_LISTEN,                /* listening for connection */
158 	FT_STATE_SYN_SENT,              /* active, have sent SYN */
159 	FT_STATE_SYN_RECEIVED,          /* have sent and rcvd SYN */
160 	FT_STATE_ESTABLISHED,           /* established */
161 	FT_STATE_CLOSE_WAIT,            /* rcvd FIN, waiting close */
162 	FT_STATE_FIN_WAIT_1,            /* have sent FIN */
163 	FT_STATE_CLOSING,               /* exchanged FINs, waiting FIN|ACK */
164 	FT_STATE_LAST_ACK,              /* rcvd FIN, closed, waiting FIN|ACK */
165 	FT_STATE_FIN_WAIT_2,            /* closed, FIN is ACK'd */
166 	FT_STATE_TIME_WAIT,             /* quiet wait after close */
167 
168 	/*
169 	 * UDP states.
170 	 */
171 	FT_STATE_NO_TRAFFIC = 20,       /* no packet observed */
172 	FT_STATE_SINGLE,                /* single packet */
173 	FT_STATE_MULTIPLE,              /* multiple packets */
174 
175 	FT_STATE_MAX = 255
176 } flow_track_state_t;
177 
178 struct flow_track_rtt {
179 	uint64_t        frtt_timestamp; /* tracked segment timestamp */
180 	uint64_t        frtt_last;      /* previous net_uptime(rate limiting) */
181 	uint32_t        frtt_seg_begin; /* tracked segment begin SEQ */
182 	uint32_t        frtt_seg_end;   /* tracked segment end SEQ */
183 	uint32_t        frtt_usec;      /* avg RTT in usec */
184 };
185 
186 #define FLOWTRACK_RTT_SAMPLE_INTERVAL   2       /* sample ACK RTT every 2 sec */
187 
188 struct flow_track {
189 	/*
190 	 * TCP specific tracking info.
191 	 */
192 	uint32_t fse_seqlo;     /* max sequence number sent */
193 	uint32_t fse_seqhi;     /* max the other end ACKd + win	*/
194 	uint32_t fse_seqlast;   /* last sequence number (FIN) */
195 	uint16_t fse_max_win;   /* largest window (pre scaling)	*/
196 	uint16_t fse_mss;       /* maximum segment size option */
197 	uint8_t fse_state;      /* active state level (FT_STATE_*) */
198 	uint8_t fse_wscale;     /* window scaling factor */
199 	uint16_t fse_flags;     /* FLOWSTATEF_* */
200 	uint32_t fse_syn_ts;    /* SYN timestamp */
201 	uint32_t fse_syn_cnt;   /* # of SYNs per second */
202 
203 	struct flow_track_rtt   fse_rtt;        /* ACK RTT tracking */
204 #define fse_rtt_usec    fse_rtt.frtt_usec
205 } __sk_aligned(8);
206 
207 /* valid values for fse_flags */
208 #define FLOWSTATEF_WSCALE       0x1     /* fse_wscale is valid */
209 
210 struct flow_llhdr {
211 	uint32_t                flh_gencnt;     /* link-layer address gencnt */
212 
213 	const uint8_t           flh_off;
214 	const uint8_t           flh_len;
215 	uint16_t                flh_pad;        /* for future */
216 
217 	union _flh_u {
218 		uint64_t        _buf[2];
219 		struct {
220 			uint16_t _eth_pad;
221 			struct ether_header _eth;
222 		} _eth_padded;
223 	}  __sk_aligned(8)      _flh;
224 #define flh_eth_padded          _flh._eth_padded
225 #define flh_eth                 _flh._eth_padded._eth
226 };
227 
228 typedef enum {
229 	FE_QSET_SELECT_NONE,
230 	FE_QSET_SELECT_FIXED,
231 	FE_QSET_SELECT_DYNAMIC
232 } flow_qset_select_t;
233 
234 extern kern_allocation_name_t skmem_tag_flow_demux;
235 typedef int (*flow_demux_memcmp_mask_t)(const uint8_t *src1, const uint8_t *src2,
236     const uint8_t *byte_mask);
237 
238 struct kern_flow_demux_pattern {
239 	struct flow_demux_pattern  fdp_demux_pattern;
240 	flow_demux_memcmp_mask_t   fdp_memcmp_mask;
241 };
242 
243 #define MAX_PKT_DEMUX_LIMIT        1000
244 
245 TAILQ_HEAD(flow_entry_list, flow_entry);
246 
247 #define FLOW_PROC_FLAG_GSO        0x0001
248 typedef void (*flow_tx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
249     uint32_t flags);
250 
251 #define FLOW_PROC_FLAG_FRAGMENTS  0x0001
252 typedef void (*flow_rx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
253     struct pktq *pkts, uint32_t rx_bytes, uint32_t flags);
254 
255 struct flow_entry {
256 	/**** Common Group ****/
257 	os_refcnt_t             fe_refcnt;
258 	struct flow_key         fe_key;
259 	uint32_t                fe_flags;
260 	uint32_t                fe_key_hash;
261 	struct cuckoo_node      fe_cnode;
262 
263 	uuid_t                  fe_uuid __sk_aligned(8);
264 	nexus_port_t            fe_nx_port;
265 	uint32_t                fe_laddr_gencnt;
266 	uint32_t                fe_want_nonviable;
267 	uint32_t                fe_want_withdraw;
268 	uint8_t                 fe_transport_protocol;
269 
270 	/**** Rx Group ****/
271 	/*
272 	 * If multiple threads end up working on the same flow entry, the one
273 	 * that reaches rx_flow_batch_packets first will be responsible for
274 	 * sending up all the packets from different RX completion queues.
275 	 * fe_rx_worker_tid marks its thread ID. Other threads only enqueues their
276 	 * packets into fe_rx_pktq but do not call fe_rx_process on the flow entry.
277 	 */
278 	uint16_t                fe_rx_frag_count;
279 	uint32_t                fe_rx_pktq_bytes;
280 	decl_lck_mtx_data(, fe_rx_pktq_lock);
281 	struct pktq             fe_rx_pktq;
282 	TAILQ_ENTRY(flow_entry) fe_rx_link;
283 	flow_rx_action_t        fe_rx_process;
284 	uint64_t                fe_rx_worker_tid;
285 
286 	/*
287 	 * largest allocated packet size.
288 	 * used by:
289 	 *  - mbuf batch allocation logic during RX aggregtion and netif copy.
290 	 *  - packet allocation logic during RX aggregation.
291 	 */
292 	uint32_t                fe_rx_largest_size;
293 
294 	/**** Tx Group ****/
295 	bool                    fe_tx_is_cont_frag;
296 	uint32_t                fe_tx_frag_id;
297 	struct pktq             fe_tx_pktq;
298 	TAILQ_ENTRY(flow_entry) fe_tx_link;
299 	flow_tx_action_t        fe_tx_process;
300 
301 	uuid_t                  fe_eproc_uuid __sk_aligned(8);
302 	flowadv_idx_t           fe_adv_idx;
303 	kern_packet_svc_class_t fe_svc_class;
304 	uint32_t                fe_policy_id;   /* policy id matched to flow */
305 	uint32_t                fe_skip_policy_id; /* skip policy id matched to flow */
306 
307 	/**** Misc Group ****/
308 	struct nx_flowswitch *  const fe_fsw;
309 	struct ns_token         *fe_port_reservation;
310 	struct protons_token    *fe_proto_reservation;
311 	void                    *fe_ipsec_reservation;
312 
313 	struct flow_track       fe_ltrack;      /* local endpoint state */
314 	struct flow_track       fe_rtrack;      /* remote endpoint state */
315 
316 	/*
317 	 * Flow stats are kept externally stand-alone, refcnt'ed by various
318 	 * users (e.g. flow_entry, necp_client_flow, etc.)
319 	 */
320 	struct flow_stats       *fe_stats;
321 	struct flow_route       *fe_route;
322 
323 	RB_ENTRY(flow_entry)    fe_id_link;
324 
325 	TAILQ_ENTRY(flow_entry) fe_linger_link;
326 	uint64_t                fe_linger_expire; /* expiration deadline */
327 	uint32_t                fe_linger_wait;   /* linger time (seconds) */
328 
329 	pid_t                   fe_pid;
330 	pid_t                   fe_epid;
331 	char                    fe_proc_name[FLOW_PROCESS_NAME_LENGTH];
332 	char                    fe_eproc_name[FLOW_PROCESS_NAME_LENGTH];
333 
334 	uint32_t                fe_flowid; /* globally unique flow ID */
335 
336 	/* Logical link related information */
337 	struct netif_qset      *fe_qset;
338 	uint64_t                fe_qset_id;
339 	flow_qset_select_t      fe_qset_select;
340 	uint32_t                fe_tr_genid;
341 
342 	/* Parent child information */
343 	decl_lck_rw_data(, fe_child_list_lock);
344 	struct flow_entry_list          fe_child_list;
345 	TAILQ_ENTRY(flow_entry)         fe_child_link;
346 #if DEVELOPMENT || DEBUG
347 	int16_t                         fe_child_count;
348 #endif // DEVELOPMENT || DEBUG
349 	uint8_t                         fe_demux_pattern_count;
350 	struct kern_flow_demux_pattern  *__counted_by(fe_demux_pattern_count)fe_demux_patterns;
351 	uint8_t                         *__sized_by_or_null(FLOW_DEMUX_MAX_LEN) fe_demux_pkt_data;
352 };
353 
354 /* valid values for fe_flags */
355 #define FLOWENTF_INITED                 0x00000001 /* {src,dst} states initialized */
356 #define FLOWENTF_TRACK                  0x00000010 /* enable state tracking */
357 #define FLOWENTF_CONNECTED              0x00000020 /* connected mode */
358 #define FLOWENTF_LISTENER               0x00000040 /* listener mode */
359 #define FLOWENTF_QOS_MARKING            0x00000100 /* flow can have qos marking */
360 #define FLOWENTF_LOW_LATENCY            0x00000200 /* low latency flow */
361 #define FLOWENTF_WAIT_CLOSE             0x00001000 /* defer free after close */
362 #define FLOWENTF_CLOSE_NOTIFY           0x00002000 /* notify NECP upon tear down */
363 #define FLOWENTF_EXTRL_PORT             0x00004000 /* port reservation is held externally */
364 #define FLOWENTF_EXTRL_PROTO            0x00008000 /* proto reservation is held externally */
365 #define FLOWENTF_EXTRL_FLOWID           0x00010000 /* flowid reservation is held externally */
366 #define FLOWENTF_CHILD                  0x00020000 /* child flow */
367 #define FLOWENTF_PARENT                 0x00040000 /* parent flow */
368 #define FLOWENTF_NOWAKEFROMSLEEP        0x00080000 /* don't wake for this flow */
369 #define FLOWENTF_ABORTED                0x01000000 /* has sent RST to peer */
370 #define FLOWENTF_NONVIABLE              0x02000000 /* disabled; awaiting tear down */
371 #define FLOWENTF_WITHDRAWN              0x04000000 /* flow has been withdrawn */
372 #define FLOWENTF_TORN_DOWN              0x08000000 /* torn down and awaiting destroy */
373 #define FLOWENTF_HALF_CLOSED            0x10000000 /* flow is half closed */
374 #define FLOWENTF_DESTROYED              0x40000000 /* not in RB trees anymore */
375 #define FLOWENTF_LINGERING              0x80000000 /* destroyed and in linger list */
376 
377 #define FLOWENTF_BITS                                            \
378     "\020\01INITED\05TRACK\06CONNECTED\07LISTNER\011QOS_MARKING" \
379     "\012LOW_LATENCY\015WAIT_CLOSE\016CLOSE_NOTIFY\017EXT_PORT"  \
380     "\020EXT_PROTO\021EXT_FLOWID\031ABORTED\032NONVIABLE\033WITHDRAWN"  \
381     "\034TORN_DOWN\035HALF_CLOSED\037DESTROYED\40LINGERING"
382 
383 TAILQ_HEAD(flow_entry_linger_head, flow_entry);
384 
385 struct flow_entry_dead {
386 	LIST_ENTRY(flow_entry_dead)     fed_link;
387 
388 	boolean_t               fed_want_nonviable;
389 	boolean_t               fed_want_clonotify;
390 
391 	/* rule (flow) UUID */
392 	union {
393 		uint64_t        fed_uuid_64[2];
394 		uint32_t        fed_uuid_32[4];
395 		uuid_t          fed_uuid;
396 	} __sk_aligned(8);
397 };
398 
399 /*
400  * Minimum refcnt for a flow route entry to be considered as idle.
401  */
402 #define FLOW_ROUTE_MINREF       2       /* for the 2 RB trees */
403 
404 struct flow_route {
405 	RB_ENTRY(flow_route)    fr_link;
406 	RB_ENTRY(flow_route)    fr_id_link;
407 
408 	/*
409 	 * fr_laddr represents the local address that the system chooses
410 	 * for the foreign destination in fr_faddr.  The flow entry that
411 	 * is referring to this flow route object may choose a different
412 	 * local address if it wishes.
413 	 *
414 	 * fr_gaddr represents the gateway address to reach the final
415 	 * foreign destination fr_faddr, valid only if the destination is
416 	 * not directly attached (FLOWRTF_GATEWAY is set).
417 	 *
418 	 * The use of sockaddr for storage is for convenience; the port
419 	 * value is not applicable for this object, as this is shared
420 	 * among flow entries.
421 	 */
422 	union sockaddr_in_4_6   fr_laddr;       /* local IP address */
423 	union sockaddr_in_4_6   fr_faddr;       /* remote IP address */
424 #define fr_af                   fr_faddr.sa.sa_family
425 	union sockaddr_in_4_6   fr_gaddr;       /* gateway IP address */
426 
427 	struct flow_llhdr       fr_llhdr;
428 #define fr_eth_padded           fr_llhdr.flh_eth_padded
429 #define fr_eth                  fr_llhdr.flh_eth
430 
431 	/*
432 	 * In flow_route_tree, we use the destination address as key.
433 	 * To speed up searches, we initialize fr_addr_key to the address
434 	 * portion of fr_faddr depending on the address family.
435 	 */
436 	void                    *fr_addr_key;
437 
438 	/* flow route UUID */
439 	uuid_t                  fr_uuid __sk_aligned(8);
440 
441 	/*
442 	 * fr_usecnt is updated atomically; incremented when a flow entry
443 	 * refers to this object and decremented otherwise.  Periodically,
444 	 * the flowswitch instance garbage collects flow_route objects
445 	 * that aren't being referred to by any flow entries.
446 	 *
447 	 * fr_expire is set when fr_usecnt reaches its minimum count, and
448 	 * is cleared when it goes above the minimum count.
449 	 *
450 	 * The spin lock fr_reflock is used to serialize both.
451 	 */
452 	decl_lck_spin_data(, fr_reflock);
453 	uint64_t                fr_expire;
454 	volatile uint32_t       fr_usecnt;
455 
456 	uint32_t                fr_flags;
457 	uint32_t                fr_laddr_gencnt; /* local IP gencnt */
458 	uint32_t                fr_addr_len;     /* sizeof {in,in6}_addr */
459 
460 	volatile uint32_t       fr_want_configure;
461 	volatile uint32_t       fr_want_probe;
462 
463 	/* lock to serialize resolver */
464 	decl_lck_mtx_data(, fr_lock);
465 
466 	/*
467 	 * fr_rt_dst is the route to final destination, and along with
468 	 * fr_rt_evhdlr_tag, they are used in route event registration.
469 	 *
470 	 * fr_rt_gw is valid only if FLOWRTF_GATEWAY is set.
471 	 */
472 	eventhandler_tag        fr_rt_evhdlr_tag;
473 	struct rtentry          *fr_rt_dst;
474 	struct rtentry          *fr_rt_gw;
475 
476 	/* nexus UUID */
477 	uuid_t                  fr_nx_uuid __sk_aligned(8);
478 
479 	const struct flow_mgr   *fr_mgr;
480 	const struct flow_route_bucket  *fr_frb;
481 	const struct flow_route_id_bucket *fr_frib;
482 };
483 
484 /* valid values for fr_flags */
485 #define FLOWRTF_ATTACHED        0x00000001 /* attached to RB trees */
486 #define FLOWRTF_ONLINK          0x00000010 /* dst directly on the link */
487 #define FLOWRTF_GATEWAY         0x00000020 /* gw IP address is valid */
488 #define FLOWRTF_RESOLVED        0x00000040 /* flow route is resolved */
489 #define FLOWRTF_HAS_LLINFO      0x00000080 /* has dst link-layer address */
490 #define FLOWRTF_DELETED         0x00000100 /* route has been deleted */
491 #define FLOWRTF_DST_LL_MCAST    0x00000200 /* dst is link layer multicast */
492 #define FLOWRTF_DST_LL_BCAST    0x00000400 /* dst is link layer broadcast */
493 #define FLOWRTF_STABLE_ADDR     0x00000800 /* local address prefers stable */
494 
495 #define FR_LOCK(_fr)                    \
496 	lck_mtx_lock(&(_fr)->fr_lock)
497 #define FR_TRY_LOCK(_fr)                \
498 	lck_mtx_try_lock(&(_fr)->fr_lock)
499 #define FR_LOCK_ASSERT_HELD(_fr)        \
500 	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_OWNED)
501 #define FR_LOCK_ASSERT_NOTHELD(_fr)     \
502 	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_NOTOWNED)
503 #define FR_UNLOCK(_fr)                  \
504 	lck_mtx_unlock(&(_fr)->fr_lock)
505 
506 #define FLOWRT_UPD_ETH_DST(_fr, _addr)  do {                            \
507 	bcopy((_addr), (_fr)->fr_eth.ether_dhost, ETHER_ADDR_LEN);      \
508 	(_fr)->fr_flags &= ~(FLOWRTF_DST_LL_MCAST|FLOWRTF_DST_LL_BCAST);\
509 	if (ETHER_IS_MULTICAST(_addr)) {                                \
510 	        if (_ether_cmp(etherbroadcastaddr, (_addr)) == 0)       \
511 	                (_fr)->fr_flags |= FLOWRTF_DST_LL_BCAST;        \
512 	        else                                                    \
513 	                (_fr)->fr_flags |= FLOWRTF_DST_LL_MCAST;        \
514 	}                                                               \
515 } while (0)
516 
517 RB_HEAD(flow_route_tree, flow_route);
518 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_tree, flow_route,
519     fr_link, fr_cmp);
520 
521 struct flow_route_bucket {
522 	decl_lck_rw_data(, frb_lock);
523 	struct flow_route_tree  frb_head;
524 	const uint32_t          frb_idx;
525 };
526 
527 #define FRB_WLOCK(_frb)                 \
528 	lck_rw_lock_exclusive(&(_frb)->frb_lock)
529 #define FRB_WLOCKTORLOCK(_frb)          \
530 	lck_rw_lock_exclusive_to_shared(&(_frb)->frb_lock)
531 #define FRB_WTRYLOCK(_frb)              \
532 	lck_rw_try_lock_exclusive(&(_frb)->frb_lock)
533 #define FRB_WUNLOCK(_frb)               \
534 	lck_rw_unlock_exclusive(&(_frb)->frb_lock)
535 #define FRB_RLOCK(_frb)                 \
536 	lck_rw_lock_shared(&(_frb)->frb_lock)
537 #define FRB_RLOCKTOWLOCK(_frb)          \
538 	lck_rw_lock_shared_to_exclusive(&(_frb)->frb_lock)
539 #define FRB_RTRYLOCK(_frb)              \
540 	lck_rw_try_lock_shared(&(_frb)->frb_lock)
541 #define FRB_RUNLOCK(_frb)               \
542 	lck_rw_unlock_shared(&(_frb)->frb_lock)
543 #define FRB_UNLOCK(_frb)                \
544 	lck_rw_done(&(_frb)->frb_lock)
545 #define FRB_WLOCK_ASSERT_HELD(_frb)     \
546 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_EXCLUSIVE)
547 #define FRB_RLOCK_ASSERT_HELD(_frb)     \
548 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_SHARED)
549 #define FRB_LOCK_ASSERT_HELD(_frb)      \
550 	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_HELD)
551 
552 RB_HEAD(flow_route_id_tree, flow_route);
553 RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_id_tree, flow_route,
554     fr_id_link, fr_id_cmp);
555 
556 struct flow_route_id_bucket {
557 	decl_lck_rw_data(, frib_lock);
558 	struct flow_route_id_tree       frib_head;
559 	const uint32_t                  frib_idx;
560 };
561 
562 #define FRIB_WLOCK(_frib)               \
563 	lck_rw_lock_exclusive(&(_frib)->frib_lock)
564 #define FRIB_WLOCKTORLOCK(_frib)        \
565 	lck_rw_lock_exclusive_to_shared(&(_frib)->frib_lock)
566 #define FRIB_WTRYLOCK(_frib)            \
567 	lck_rw_try_lock_exclusive(&(_frib)->frib_lock)
568 #define FRIB_WUNLOCK(_frib)             \
569 	lck_rw_unlock_exclusive(&(_frib)->frib_lock)
570 #define FRIB_RLOCK(_frib)               \
571 	lck_rw_lock_shared(&(_frib)->frib_lock)
572 #define FRIB_RLOCKTOWLOCK(_frib)        \
573 	lck_rw_lock_shared_to_exclusive(&(_frib)->frib_lock)
574 #define FRIB_RTRYLOCK(_frib)            \
575 	lck_rw_try_lock_shared(&(_frib)->frib_lock)
576 #define FRIB_RUNLOCK(_frib)             \
577 	lck_rw_unlock_shared(&(_frib)->frib_lock)
578 #define FRIB_UNLOCK(_frib)              \
579 	lck_rw_done(&(_frib)->frib_lock)
580 #define FRIB_WLOCK_ASSERT_HELD(_frib)   \
581 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_EXCLUSIVE)
582 #define FRIB_RLOCK_ASSERT_HELD(_frib)   \
583 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_SHARED)
584 #define FRIB_LOCK_ASSERT_HELD(_frib)    \
585 	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_HELD)
586 
587 struct flow_mgr {
588 	char            fm_name[IFNAMSIZ];
589 	uuid_t          fm_uuid;
590 	RB_ENTRY(flow_mgr) fm_link;
591 
592 	struct cuckoo_hashtable *fm_flow_table;
593 	size_t   fm_flow_hash_count[FKMASK_IDX_MAX]; /* # of flows with mask */
594 	uint16_t fm_flow_hash_masks[FKMASK_IDX_MAX];
595 
596 	void      *__sized_by(fm_owner_bucket_tot_sz) fm_owner_buckets;     /* cache-aligned fob */
597 	size_t    fm_owner_buckets_cnt;  /* total # of fobs */
598 	size_t    fm_owner_bucket_sz;    /* size of each fob */
599 	size_t    fm_owner_bucket_tot_sz; /* allocated size of each fob */
600 
601 	void      *__sized_by(fm_route_bucket_tot_sz) fm_route_buckets;     /* cache-aligned frb */
602 	size_t    fm_route_buckets_cnt;  /* total # of frb */
603 	size_t    fm_route_bucket_sz;    /* size of each frb */
604 	size_t    fm_route_bucket_tot_sz; /* allocated size of each frb */
605 
606 	void      *__sized_by(fm_route_id_bucket_tot_sz) fm_route_id_buckets;    /* cache-aligned frib */
607 	size_t    fm_route_id_buckets_cnt; /* total # of frib */
608 	size_t    fm_route_id_bucket_sz;   /* size of each frib */
609 	size_t    fm_route_id_bucket_tot_sz; /* allocated size of each frib */
610 };
611 
612 /*
613  * this func compare match with key;
614  * return values:
615  * 0 as long as @key(exact) matches what @match(wildcard) wants to match on.
616  * 1 when it doesn't match
617  */
618 static inline int
flow_key_cmp(const struct flow_key * match,const struct flow_key * key)619 flow_key_cmp(const struct flow_key *match, const struct flow_key *key)
620 {
621 #define FK_CMP(field, mask)     \
622 	if ((match->fk_mask & mask) != 0) {     \
623 	        if ((key->fk_mask & mask) == 0) {       \
624 	                return 1;       \
625 	        }       \
626 	        int d = memcmp(&match->field, &key->field, sizeof(match->field));       \
627 	        if (d != 0) {   \
628 	                return d;       \
629 	        }       \
630 	}
631 
632 	FK_CMP(fk_ipver, FKMASK_IPVER);
633 	FK_CMP(fk_proto, FKMASK_PROTO);
634 	FK_CMP(fk_src, FKMASK_SRC);
635 	FK_CMP(fk_dst, FKMASK_DST);
636 	FK_CMP(fk_sport, FKMASK_SPORT);
637 	FK_CMP(fk_dport, FKMASK_DPORT);
638 
639 	return 0;
640 }
641 
642 /*
643  * Similar to flow_key_cmp() except using memory compare with mask,
644  * done with SIMD instructions, if available for the platform.
645  */
646 static inline int
flow_key_cmp_mask(const struct flow_key * match,const struct flow_key * key,const struct flow_key * mask)647 flow_key_cmp_mask(const struct flow_key *match,
648     const struct flow_key *key, const struct flow_key *mask)
649 {
650 	_CASSERT(FLOW_KEY_LEN == 48);
651 	_CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key));
652 	_CASSERT((sizeof(struct flow_entry) % 16) == 0);
653 	_CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0);
654 
655 	/* local variables are __bidi_indexable with -fbounds-safety */
656 	const struct flow_key *match_idx = match;
657 	const struct flow_key *key_idx = key;
658 	const struct flow_key *mask_idx = mask;
659 
660 	return sk_memcmp_mask_48B((const uint8_t *)match_idx,
661 	           (const uint8_t *)key_idx, (const uint8_t *)mask_idx);
662 }
663 
664 static inline uint32_t
flow_key_hash(const struct flow_key * key)665 flow_key_hash(const struct flow_key *key)
666 {
667 	uint32_t hash = FK_HASH_SEED;
668 #define FK_HASH(field, mask)    \
669 	if ((key->fk_mask & mask) != 0) {       \
670 	        hash = net_flowhash(&key->field, sizeof(key->field), hash);     \
671 	}
672 
673 	FK_HASH(fk_ipver, FKMASK_IPVER);
674 	FK_HASH(fk_proto, FKMASK_PROTO);
675 	FK_HASH(fk_src, FKMASK_SRC);
676 	FK_HASH(fk_dst, FKMASK_DST);
677 	FK_HASH(fk_sport, FKMASK_SPORT);
678 	FK_HASH(fk_dport, FKMASK_DPORT);
679 
680 	return hash;
681 }
682 
683 __attribute__((always_inline))
684 static inline void
flow_key_unpack(const struct flow_key * key,union sockaddr_in_4_6 * laddr,union sockaddr_in_4_6 * faddr,uint8_t * protocol)685 flow_key_unpack(const struct flow_key *key, union sockaddr_in_4_6 *laddr,
686     union sockaddr_in_4_6 *faddr, uint8_t *protocol)
687 {
688 	*protocol = key->fk_proto;
689 	if (key->fk_ipver == IPVERSION) {
690 		laddr->sa.sa_family = AF_INET;
691 		laddr->sin.sin_addr = key->fk_src4;
692 		laddr->sin.sin_port = key->fk_sport;
693 		faddr->sa.sa_family = AF_INET;
694 		faddr->sin.sin_addr = key->fk_dst4;
695 		faddr->sin.sin_port = key->fk_dport;
696 	} else if (key->fk_ipver == IPV6_VERSION) {
697 		laddr->sa.sa_family = AF_INET6;
698 		laddr->sin6.sin6_addr = key->fk_src6;
699 		laddr->sin6.sin6_port = key->fk_sport;
700 		faddr->sa.sa_family = AF_INET6;
701 		faddr->sin6.sin6_addr = key->fk_dst6;
702 		faddr->sin6.sin6_port = key->fk_dport;
703 	}
704 }
705 
706 __attribute__((always_inline))
707 static inline int
flow_req2key(struct nx_flow_req * req,struct flow_key * key)708 flow_req2key(struct nx_flow_req *req, struct flow_key *key)
709 {
710 	FLOW_KEY_CLEAR(key);
711 
712 	if (req->nfr_saddr.sa.sa_family == AF_INET) {
713 		key->fk_ipver = IPVERSION;
714 		key->fk_proto = req->nfr_ip_protocol;
715 		key->fk_mask |= FKMASK_PROTO;
716 		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
717 			key->fk_src4 = req->nfr_saddr.sin.sin_addr;
718 			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
719 		}
720 		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
721 			key->fk_dst4 = req->nfr_daddr.sin.sin_addr;
722 			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
723 		}
724 		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
725 			key->fk_sport = req->nfr_saddr.sin.sin_port;
726 			key->fk_mask |= FKMASK_SPORT;
727 		}
728 		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
729 			key->fk_dport = req->nfr_daddr.sin.sin_port;
730 			key->fk_mask |= FKMASK_DPORT;
731 		}
732 	} else if (req->nfr_saddr.sa.sa_family == AF_INET6) {
733 		key->fk_ipver = IPV6_VERSION;
734 		key->fk_proto = req->nfr_ip_protocol;
735 		key->fk_mask |= FKMASK_PROTO;
736 		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
737 			key->fk_src6 = req->nfr_saddr.sin6.sin6_addr;
738 			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
739 		}
740 		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
741 			key->fk_dst6 = req->nfr_daddr.sin6.sin6_addr;
742 			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
743 		}
744 		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
745 			key->fk_sport = req->nfr_saddr.sin6.sin6_port;
746 			key->fk_mask |= FKMASK_SPORT;
747 		}
748 		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
749 			key->fk_dport = req->nfr_daddr.sin6.sin6_port;
750 			key->fk_mask |= FKMASK_DPORT;
751 		}
752 	} else {
753 		SK_ERR("unknown AF %d", req->nfr_saddr.sa.sa_family);
754 		return ENOTSUP;
755 	}
756 
757 	switch (key->fk_mask) {
758 	case FKMASK_5TUPLE:
759 	case FKMASK_4TUPLE:
760 	case FKMASK_3TUPLE:
761 	case FKMASK_2TUPLE:
762 	case FKMASK_IPFLOW3:
763 	case FKMASK_IPFLOW2:
764 	case FKMASK_IPFLOW1:
765 		break;
766 	default:
767 		SK_ERR("unknown flow key mask 0x%04x", key->fk_mask);
768 		return ENOTSUP;
769 	}
770 
771 	return 0;
772 }
773 
774 __attribute__((always_inline))
775 static inline void
flow_pkt2key(struct __kern_packet * pkt,boolean_t input,struct flow_key * key)776 flow_pkt2key(struct __kern_packet *pkt, boolean_t input,
777     struct flow_key *key)
778 {
779 	struct __flow *flow = pkt->pkt_flow;
780 
781 	FLOW_KEY_CLEAR(key);
782 
783 	if (__improbable((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0)) {
784 		return;
785 	}
786 
787 	ASSERT(flow->flow_l3._l3_ip_ver != 0);
788 
789 	key->fk_ipver = flow->flow_l3._l3_ip_ver;
790 	key->fk_proto = flow->flow_ip_proto;
791 	if (input) {
792 		if (flow->flow_ip_ver == IPVERSION) {
793 			key->fk_src4 = flow->flow_ipv4_dst;
794 			key->fk_sport = flow->flow_tcp_dst;
795 			key->fk_dst4 = flow->flow_ipv4_src;
796 			key->fk_dport = flow->flow_tcp_src;
797 		} else {
798 			key->fk_src6 = flow->flow_ipv6_dst;
799 			key->fk_sport = flow->flow_tcp_dst;
800 			key->fk_dst6 = flow->flow_ipv6_src;
801 			key->fk_dport = flow->flow_tcp_src;
802 		}
803 	} else {
804 		if (flow->flow_ip_ver == IPVERSION) {
805 			key->fk_src4 = flow->flow_ipv4_src;
806 			key->fk_sport = flow->flow_tcp_src;
807 			key->fk_dst4 = flow->flow_ipv4_dst;
808 			key->fk_dport = flow->flow_tcp_dst;
809 		} else {
810 			key->fk_src6 = flow->flow_ipv6_src;
811 			key->fk_sport = flow->flow_tcp_src;
812 			key->fk_dst6 = flow->flow_ipv6_dst;
813 			key->fk_dport = flow->flow_tcp_dst;
814 		}
815 	}
816 }
817 
818 __attribute__((always_inline))
819 static inline int
flow_ip_cmp(const void * a0,const void * b0,size_t alen)820 flow_ip_cmp(const void *a0, const void *b0, size_t alen)
821 {
822 	struct flow_ip_addr *a = __DECONST(struct flow_ip_addr *, a0),
823 	    *b = __DECONST(struct flow_ip_addr *, b0);
824 
825 	switch (alen) {
826 	case sizeof(struct in_addr):
827 		if (a->_addr32[0] > b->_addr32[0]) {
828 			return 1;
829 		}
830 		if (a->_addr32[0] < b->_addr32[0]) {
831 			return -1;
832 		}
833 		break;
834 
835 	case sizeof(struct in6_addr):
836 		if (a->_addr64[1] > b->_addr64[1]) {
837 			return 1;
838 		}
839 		if (a->_addr64[1] < b->_addr64[1]) {
840 			return -1;
841 		}
842 		if (a->_addr64[0] > b->_addr64[0]) {
843 			return 1;
844 		}
845 		if (a->_addr64[0] < b->_addr64[0]) {
846 			return -1;
847 		}
848 		break;
849 
850 	default:
851 		VERIFY(0);
852 		/* NOTREACHED */
853 		__builtin_unreachable();
854 	}
855 	return 0;
856 }
857 
858 __attribute__((always_inline))
859 static inline struct flow_owner_bucket *
flow_mgr_get_fob_at_idx(struct flow_mgr * fm,uint32_t idx)860 flow_mgr_get_fob_at_idx(struct flow_mgr *fm, uint32_t idx)
861 {
862 	char *buckets = fm->fm_owner_buckets;
863 	void *bucket = buckets + (idx * fm->fm_owner_bucket_sz);
864 	return bucket;
865 }
866 
867 __attribute__((always_inline))
868 static inline struct flow_route_bucket *
flow_mgr_get_frb_at_idx(struct flow_mgr * fm,uint32_t idx)869 flow_mgr_get_frb_at_idx(struct flow_mgr *fm, uint32_t idx)
870 {
871 	char *buckets = fm->fm_route_buckets;
872 	void *bucket = buckets + (idx * fm->fm_route_bucket_sz);
873 	return bucket;
874 }
875 
876 __attribute__((always_inline))
877 static inline struct flow_route_id_bucket *
flow_mgr_get_frib_at_idx(struct flow_mgr * fm,uint32_t idx)878 flow_mgr_get_frib_at_idx(struct flow_mgr *fm, uint32_t idx)
879 {
880 	char *buckets = fm->fm_route_id_buckets;
881 	void *bucket = buckets + (idx * fm->fm_route_id_bucket_sz);
882 	return bucket;
883 }
884 
885 __attribute__((always_inline))
886 static inline uint32_t
flow_mgr_get_fob_idx(struct flow_mgr * fm,struct flow_owner_bucket * bkt)887 flow_mgr_get_fob_idx(struct flow_mgr *fm,
888     struct flow_owner_bucket *bkt)
889 {
890 	ASSERT(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) %
891 	    fm->fm_owner_bucket_sz == 0);
892 	return (uint32_t)(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) /
893 	       fm->fm_owner_bucket_sz);
894 }
895 
896 __attribute__((always_inline))
897 static inline size_t
flow_mgr_get_num_flows(struct flow_mgr * mgr)898 flow_mgr_get_num_flows(struct flow_mgr *mgr)
899 {
900 	ASSERT(mgr->fm_flow_table != NULL);
901 	return cuckoo_hashtable_entries(mgr->fm_flow_table);
902 }
903 
904 extern unsigned int sk_fo_size;
905 extern struct skmem_cache *sk_fo_cache;
906 
907 extern unsigned int sk_fe_size;
908 extern struct skmem_cache *sk_fe_cache;
909 
910 extern unsigned int sk_fab_size;
911 extern struct skmem_cache *sk_fab_cache;
912 
913 extern uint32_t flow_seed;
914 
915 extern struct skmem_cache *flow_route_cache;
916 extern struct skmem_cache *flow_stats_cache;
917 
918 __BEGIN_DECLS
919 
920 typedef void (*flow_route_ctor_fn_t)(void *arg, struct flow_route *);
921 typedef int (*flow_route_resolve_fn_t)(void *arg, struct flow_route *,
922     struct __kern_packet *);
923 
924 extern int flow_init(void);
925 extern void flow_fini(void);
926 
927 extern void flow_mgr_init(void);
928 extern void flow_mgr_fini(void);
929 extern struct flow_mgr *flow_mgr_find_lock(uuid_t);
930 extern void flow_mgr_unlock(void);
931 extern struct flow_mgr * flow_mgr_create(size_t, size_t, size_t, size_t);
932 extern void flow_mgr_destroy(struct flow_mgr *);
933 extern void flow_mgr_terminate(struct flow_mgr *);
934 extern int flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
935     struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
936     flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve, void *fr_arg);
937 extern struct flow_owner_bucket *flow_mgr_get_fob_by_pid(
938 	struct flow_mgr *, pid_t);
939 extern struct flow_entry *flow_mgr_get_fe_by_uuid_rlock(
940 	struct flow_mgr *, uuid_t);
941 extern struct flow_route_bucket *flow_mgr_get_frb_by_addr(
942 	struct flow_mgr *, union sockaddr_in_4_6 *);
943 extern struct flow_route_id_bucket *flow_mgr_get_frib_by_uuid(
944 	struct flow_mgr *, uuid_t);
945 extern int flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask);
946 extern int flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask);
947 
948 extern struct flow_entry * fe_alloc(boolean_t can_block);
949 
950 extern int flow_namespace_create(union sockaddr_in_4_6 *, uint8_t protocol,
951     netns_token *, uint16_t, struct ns_flow_info *);
952 extern void flow_namespace_half_close(netns_token *token);
953 extern void flow_namespace_withdraw(netns_token *);
954 extern void flow_namespace_destroy(netns_token *);
955 
956 extern struct flow_owner_bucket *__sized_by(*tot_sz)
957 flow_owner_buckets_alloc(size_t, size_t *, size_t * tot_sz);
958 extern void flow_owner_buckets_free(struct flow_owner_bucket *, size_t);
959 extern void flow_owner_bucket_init(struct flow_owner_bucket *);
960 extern void flow_owner_bucket_destroy(struct flow_owner_bucket *);
961 extern void flow_owner_bucket_purge_all(struct flow_owner_bucket *);
962 extern void flow_owner_attach_nexus_port(struct flow_mgr *, boolean_t,
963     pid_t, nexus_port_t);
964 extern uint32_t flow_owner_detach_nexus_port(struct flow_mgr *,
965     boolean_t, pid_t, nexus_port_t, boolean_t);
966 extern struct flow_owner *flow_owner_alloc(struct flow_owner_bucket *,
967     struct proc *, nexus_port_t, bool, bool, struct nx_flowswitch*,
968     struct nexus_adapter *, void *, bool);
969 extern void flow_owner_free(struct flow_owner_bucket *, struct flow_owner *);
970 extern struct flow_entry *flow_owner_create_entry(struct flow_owner *,
971     struct nx_flow_req *, boolean_t, uint32_t, boolean_t,
972     struct flow_route *, int *);
973 extern int flow_owner_destroy_entry(struct flow_owner *, uuid_t, bool, void *);
974 extern struct flow_owner *flow_owner_find_by_pid(struct flow_owner_bucket *,
975     pid_t, void *, bool);
976 extern int flow_owner_flowadv_index_alloc(struct flow_owner *, flowadv_idx_t *);
977 extern void flow_owner_flowadv_index_free(struct flow_owner *, flowadv_idx_t);
978 extern uint32_t flow_owner_activate_nexus_port(struct flow_mgr *,
979     boolean_t, pid_t, nexus_port_t, struct nexus_adapter *,
980     na_activate_mode_t);
981 
982 extern struct flow_entry *flow_mgr_find_fe_by_key(struct flow_mgr *,
983     struct flow_key *);
984 extern struct flow_entry * flow_mgr_find_conflicting_fe(struct flow_mgr *fm,
985     struct flow_key *fe_key);
986 extern void flow_mgr_foreach_flow(struct flow_mgr *fm,
987     void (^flow_handler)(struct flow_entry *fe));
988 extern struct flow_entry *flow_entry_find_by_uuid(struct flow_owner *,
989     uuid_t);
990 extern struct flow_entry * flow_entry_alloc(struct flow_owner *fo,
991     struct nx_flow_req *req, int *perr);
992 extern void flow_entry_teardown(struct flow_owner *, struct flow_entry *);
993 extern void flow_entry_destroy(struct flow_owner *, struct flow_entry *, bool,
994     void *);
995 extern void flow_entry_retain(struct flow_entry *fe);
996 extern void flow_entry_release(struct flow_entry **pfe);
997 extern uint32_t flow_entry_refcnt(struct flow_entry *fe);
998 extern bool rx_flow_demux_match(struct nx_flowswitch *, struct flow_entry *, struct __kern_packet *);
999 extern struct flow_entry *rx_lookup_child_flow(struct nx_flowswitch *fsw,
1000     struct flow_entry *, struct __kern_packet *);
1001 extern struct flow_entry *tx_lookup_child_flow(struct flow_entry *, uuid_t);
1002 
1003 extern struct flow_entry_dead *flow_entry_dead_alloc(zalloc_flags_t);
1004 extern void flow_entry_dead_free(struct flow_entry_dead *);
1005 
1006 extern void flow_entry_stats_get(struct flow_entry *, struct sk_stats_flow *);
1007 
1008 extern int flow_pkt_classify(struct __kern_packet *pkt, struct ifnet *ifp,
1009     sa_family_t af, bool input);
1010 
1011 extern void flow_track_stats(struct flow_entry *, uint64_t, uint64_t,
1012     bool, bool);
1013 extern int flow_pkt_track(struct flow_entry *, struct __kern_packet *, bool);
1014 extern boolean_t flow_track_tcp_want_abort(struct flow_entry *);
1015 extern void flow_track_abort_tcp( struct flow_entry *fe,
1016     struct __kern_packet *in_pkt, struct __kern_packet *rst_pkt);
1017 extern void flow_track_abort_quic(struct flow_entry *fe,
1018     uint8_t *__counted_by(QUIC_STATELESS_RESET_TOKEN_SIZE)token);
1019 
1020 extern void fsw_host_rx(struct nx_flowswitch *, struct pktq *);
1021 extern void fsw_host_sendup(struct ifnet *, struct mbuf *, struct mbuf *,
1022     uint32_t, uint32_t);
1023 
1024 extern void flow_rx_agg_tcp(struct nx_flowswitch *fsw, struct flow_entry *fe,
1025     struct pktq *rx_pkts, uint32_t rx_bytes, uint32_t flags);
1026 
1027 extern void flow_route_init(void);
1028 extern void flow_route_fini(void);
1029 extern struct flow_route_bucket *__sized_by(*tot_sz)
1030 flow_route_buckets_alloc(size_t, size_t *, size_t * tot_sz);
1031 extern void flow_route_buckets_free(struct flow_route_bucket *, size_t);
1032 extern void flow_route_bucket_init(struct flow_route_bucket *);
1033 extern void flow_route_bucket_destroy(struct flow_route_bucket *);
1034 extern void flow_route_bucket_purge_all(struct flow_route_bucket *);
1035 extern struct flow_route_id_bucket *__sized_by(*tot_sz)
1036 flow_route_id_buckets_alloc(size_t, size_t *, size_t * tot_sz);
1037 extern void flow_route_id_buckets_free(struct flow_route_id_bucket *, size_t);
1038 extern void flow_route_id_bucket_init(struct flow_route_id_bucket *);
1039 extern void flow_route_id_bucket_destroy(struct flow_route_id_bucket *);
1040 
1041 extern int flow_route_select_laddr(union sockaddr_in_4_6 *,
1042     union sockaddr_in_4_6 *, struct ifnet *, struct rtentry *, uint32_t *, int);
1043 extern int flow_route_find(struct kern_nexus *, struct flow_mgr *,
1044     struct ifnet *, struct nx_flow_req *, flow_route_ctor_fn_t,
1045     flow_route_resolve_fn_t, void *, struct flow_route **);
1046 extern int flow_route_configure(struct flow_route *, struct ifnet *, struct nx_flow_req *);
1047 extern void flow_route_retain(struct flow_route *);
1048 extern void flow_route_release(struct flow_route *);
1049 extern uint32_t flow_route_prune(struct flow_mgr *, struct ifnet *,
1050     uint32_t *);
1051 extern void flow_route_cleanup(struct flow_route *);
1052 extern boolean_t flow_route_laddr_validate(union sockaddr_in_4_6 *,
1053     struct ifnet *, uint32_t *);
1054 extern boolean_t flow_route_key_validate(struct flow_key *, struct ifnet *,
1055     uint32_t *);
1056 extern void flow_qset_select_dynamic(struct nx_flowswitch *,
1057     struct flow_entry *, boolean_t);
1058 extern void flow_stats_init(void);
1059 extern void flow_stats_fini(void);
1060 extern struct flow_stats *flow_stats_alloc(boolean_t cansleep);
1061 
1062 #if SK_LOG
1063 #define FLOWKEY_DBGBUF_SIZE   256
1064 #define FLOWENTRY_DBGBUF_SIZE   512
1065 extern char *fk_as_string(const struct flow_key *fk, char *__counted_by(dsz)dst, size_t dsz);
1066 extern char *fe_as_string(const struct flow_entry *fe, char *__counted_by(dsz)dst, size_t dsz);
1067 #endif /* SK_LOG */
1068 __END_DECLS
1069 #endif /* BSD_KERNEL_PRIVATE */
1070 #endif /* !_SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ */
1071