xref: /xnu-10002.61.3/bsd/skywalk/nexus/flowswitch/flow/flow_entry.c (revision 0f4c859e951fba394238ab619495c4e1d54d0f34)
1 /*
2  * Copyright (c) 2016-2022 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 
31 #include <dev/random/randomdev.h>
32 #include <net/flowhash.h>
33 #include <netkey/key.h>
34 
35 #include <skywalk/nexus/flowswitch/fsw_var.h>
36 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
37 #include <skywalk/nexus/netif/nx_netif.h>
38 #include <skywalk/namespace/flowidns.h>
39 
40 struct flow_entry *fe_alloc(boolean_t);
41 static void fe_free(struct flow_entry *);
42 static int fe_id_cmp(const struct flow_entry *, const struct flow_entry *);
43 static void fe_stats_init(struct flow_entry *);
44 static void fe_stats_update(struct flow_entry *);
45 
46 RB_GENERATE_PREV(flow_entry_id_tree, flow_entry, fe_id_link, fe_id_cmp);
47 
48 os_refgrp_decl(static, flow_entry_refgrp, "flow_entry", NULL);
49 
50 KALLOC_TYPE_DECLARE(sk_fed_zone);
51 
52 const struct flow_key fk_mask_2tuple
53 __sk_aligned(16) =
54 {
55 	.fk_mask = FKMASK_2TUPLE,
56 	.fk_ipver = 0,
57 	.fk_proto = 0xff,
58 	.fk_sport = 0xffff,
59 	.fk_dport = 0,
60 	.fk_src._addr64[0] = 0,
61 	.fk_src._addr64[1] = 0,
62 	.fk_dst._addr64[0] = 0,
63 	.fk_dst._addr64[1] = 0,
64 	.fk_pad[0] = 0,
65 };
66 
67 const struct flow_key fk_mask_3tuple
68 __sk_aligned(16) =
69 {
70 	.fk_mask = FKMASK_3TUPLE,
71 	.fk_ipver = 0xff,
72 	.fk_proto = 0xff,
73 	.fk_sport = 0xffff,
74 	.fk_dport = 0,
75 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
76 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
77 	.fk_dst._addr64[0] = 0,
78 	.fk_dst._addr64[1] = 0,
79 	.fk_pad[0] = 0,
80 };
81 
82 const struct flow_key fk_mask_4tuple
83 __sk_aligned(16) =
84 {
85 	.fk_mask = FKMASK_4TUPLE,
86 	.fk_ipver = 0xff,
87 	.fk_proto = 0xff,
88 	.fk_sport = 0xffff,
89 	.fk_dport = 0xffff,
90 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
91 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
92 	.fk_dst._addr64[0] = 0,
93 	.fk_dst._addr64[1] = 0,
94 	.fk_pad[0] = 0,
95 };
96 
97 const struct flow_key fk_mask_5tuple
98 __sk_aligned(16) =
99 {
100 	.fk_mask = FKMASK_5TUPLE,
101 	.fk_ipver = 0xff,
102 	.fk_proto = 0xff,
103 	.fk_sport = 0xffff,
104 	.fk_dport = 0xffff,
105 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
106 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
107 	.fk_dst._addr64[0] = 0xffffffffffffffffULL,
108 	.fk_dst._addr64[1] = 0xffffffffffffffffULL,
109 	.fk_pad[0] = 0,
110 };
111 
112 const struct flow_key fk_mask_ipflow1
113 __sk_aligned(16) =
114 {
115 	.fk_mask = FKMASK_IPFLOW1,
116 	.fk_ipver = 0,
117 	.fk_proto = 0xff,
118 	.fk_sport = 0,
119 	.fk_dport = 0,
120 	.fk_src._addr64[0] = 0,
121 	.fk_src._addr64[1] = 0,
122 	.fk_dst._addr64[0] = 0,
123 	.fk_dst._addr64[1] = 0,
124 	.fk_pad[0] = 0,
125 };
126 
127 const struct flow_key fk_mask_ipflow2
128 __sk_aligned(16) =
129 {
130 	.fk_mask = FKMASK_IPFLOW2,
131 	.fk_ipver = 0xff,
132 	.fk_proto = 0xff,
133 	.fk_sport = 0,
134 	.fk_dport = 0,
135 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
136 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
137 	.fk_dst._addr64[0] = 0,
138 	.fk_dst._addr64[1] = 0,
139 	.fk_pad[0] = 0,
140 };
141 
142 const struct flow_key fk_mask_ipflow3
143 __sk_aligned(16) =
144 {
145 	.fk_mask = FKMASK_IPFLOW3,
146 	.fk_ipver = 0xff,
147 	.fk_proto = 0xff,
148 	.fk_sport = 0,
149 	.fk_dport = 0,
150 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
151 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
152 	.fk_dst._addr64[0] = 0xffffffffffffffffULL,
153 	.fk_dst._addr64[1] = 0xffffffffffffffffULL,
154 	.fk_pad[0] = 0,
155 };
156 
157 struct flow_owner *
flow_owner_find_by_pid(struct flow_owner_bucket * fob,pid_t pid,void * context,bool low_latency)158 flow_owner_find_by_pid(struct flow_owner_bucket *fob, pid_t pid, void *context,
159     bool low_latency)
160 {
161 	struct flow_owner find = { .fo_context = context, .fo_pid = pid,
162 		                   .fo_low_latency = low_latency};
163 
164 	ASSERT(low_latency == true || low_latency == false);
165 	FOB_LOCK_ASSERT_HELD(fob);
166 	return RB_FIND(flow_owner_tree, &fob->fob_owner_head, &find);
167 }
168 
169 struct flow_entry *
flow_entry_find_by_uuid(struct flow_owner * fo,uuid_t uuid)170 flow_entry_find_by_uuid(struct flow_owner *fo, uuid_t uuid)
171 {
172 	struct flow_entry find, *fe = NULL;
173 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
174 
175 	uuid_copy(find.fe_uuid, uuid);
176 	fe = RB_FIND(flow_entry_id_tree, &fo->fo_flow_entry_id_head, &find);
177 	if (fe != NULL) {
178 		flow_entry_retain(fe);
179 	}
180 
181 	return fe;
182 }
183 
184 static uint32_t
flow_entry_calc_flowid(struct flow_entry * fe)185 flow_entry_calc_flowid(struct flow_entry *fe)
186 {
187 	uint32_t flowid;
188 	struct flowidns_flow_key fk;
189 
190 	bzero(&fk, sizeof(fk));
191 	_CASSERT(sizeof(fe->fe_key.fk_src) == sizeof(fk.ffk_laddr));
192 	_CASSERT(sizeof(fe->fe_key.fk_dst) == sizeof(fk.ffk_raddr));
193 	bcopy(&fe->fe_key.fk_src, &fk.ffk_laddr, sizeof(fk.ffk_laddr));
194 	bcopy(&fe->fe_key.fk_dst, &fk.ffk_raddr, sizeof(fk.ffk_raddr));
195 
196 	fk.ffk_lport = fe->fe_key.fk_sport;
197 	fk.ffk_rport = fe->fe_key.fk_dport;
198 	fk.ffk_af = (fe->fe_key.fk_ipver == 4) ? AF_INET : AF_INET6;
199 	fk.ffk_proto = fe->fe_key.fk_proto;
200 
201 	flowidns_allocate_flowid(FLOWIDNS_DOMAIN_FLOWSWITCH, &fk, &flowid);
202 	return flowid;
203 }
204 
205 static bool
flow_entry_add_child(struct flow_entry * parent_fe,struct flow_entry * child_fe)206 flow_entry_add_child(struct flow_entry *parent_fe, struct flow_entry *child_fe)
207 {
208 	SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]);
209 	ASSERT(parent_fe->fe_flags & FLOWENTF_PARENT);
210 
211 	lck_rw_lock_exclusive(&parent_fe->fe_child_list_lock);
212 
213 	if (parent_fe->fe_flags & FLOWENTF_NONVIABLE) {
214 		SK_ERR("child entry add failed, parent fe \"%s\" non viable 0x%llx "
215 		    "flags 0x%b %s(%d)", fe_as_string(parent_fe,
216 		    dbgbuf, sizeof(dbgbuf)), SK_KVA(parent_fe), parent_fe->fe_flags,
217 		    FLOWENTF_BITS, parent_fe->fe_proc_name,
218 		    parent_fe->fe_pid);
219 		lck_rw_unlock_exclusive(&parent_fe->fe_child_list_lock);
220 		return false;
221 	}
222 
223 	struct flow_entry *fe, *tfe;
224 	TAILQ_FOREACH_SAFE(fe, &parent_fe->fe_child_list, fe_child_link, tfe) {
225 		if (!fe_id_cmp(fe, child_fe)) {
226 			lck_rw_unlock_exclusive(&parent_fe->fe_child_list_lock);
227 			SK_ERR("child entry \"%s\" already exists at fe 0x%llx "
228 			    "flags 0x%b %s(%d)", fe_as_string(fe,
229 			    dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags,
230 			    FLOWENTF_BITS, fe->fe_proc_name,
231 			    fe->fe_pid);
232 			return false;
233 		}
234 
235 		if (fe->fe_flags & FLOWENTF_NONVIABLE) {
236 			TAILQ_REMOVE(&parent_fe->fe_child_list, fe, fe_child_link);
237 			ASSERT(--parent_fe->fe_child_count >= 0);
238 			flow_entry_release(&fe);
239 		}
240 	}
241 
242 	flow_entry_retain(child_fe);
243 	TAILQ_INSERT_TAIL(&parent_fe->fe_child_list, child_fe, fe_child_link);
244 	ASSERT(++parent_fe->fe_child_count > 0);
245 
246 	lck_rw_unlock_exclusive(&parent_fe->fe_child_list_lock);
247 
248 	return true;
249 }
250 
251 static void
flow_entry_remove_all_children(struct flow_entry * parent_fe,struct nx_flowswitch * fsw)252 flow_entry_remove_all_children(struct flow_entry *parent_fe, struct nx_flowswitch *fsw)
253 {
254 	bool sched_reaper_thread = false;
255 
256 	ASSERT(parent_fe->fe_flags & FLOWENTF_PARENT);
257 
258 	lck_rw_lock_exclusive(&parent_fe->fe_child_list_lock);
259 
260 	struct flow_entry *fe, *tfe;
261 	TAILQ_FOREACH_SAFE(fe, &parent_fe->fe_child_list, fe_child_link, tfe) {
262 		if (!(fe->fe_flags & FLOWENTF_NONVIABLE)) {
263 			/*
264 			 * fsw_pending_nonviable is a hint for reaper thread;
265 			 * due to the fact that setting fe_want_nonviable and
266 			 * incrementing fsw_pending_nonviable counter is not
267 			 * atomic, let the increment happen first, and the
268 			 * thread losing the CAS does decrement.
269 			 */
270 			os_atomic_inc(&fsw->fsw_pending_nonviable, relaxed);
271 			if (os_atomic_cmpxchg(&fe->fe_want_nonviable, 0, 1, acq_rel)) {
272 				sched_reaper_thread = true;
273 			} else {
274 				os_atomic_dec(&fsw->fsw_pending_nonviable, relaxed);
275 			}
276 		}
277 
278 		TAILQ_REMOVE(&parent_fe->fe_child_list, fe, fe_child_link);
279 		ASSERT(--parent_fe->fe_child_count >= 0);
280 		flow_entry_release(&fe);
281 	}
282 
283 	lck_rw_unlock_exclusive(&parent_fe->fe_child_list_lock);
284 
285 	if (sched_reaper_thread) {
286 		fsw_reap_sched(fsw);
287 	}
288 }
289 
290 static void
flow_entry_set_demux_patterns(struct flow_entry * fe,struct nx_flow_req * req)291 flow_entry_set_demux_patterns(struct flow_entry *fe, struct nx_flow_req *req)
292 {
293 	ASSERT(fe->fe_flags & FLOWENTF_CHILD);
294 	ASSERT(req->nfr_flow_demux_count > 0);
295 
296 	fe->fe_demux_patterns = sk_alloc_type_array(struct kern_flow_demux_pattern, req->nfr_flow_demux_count,
297 	    Z_WAITOK | Z_NOFAIL, skmem_tag_flow_demux);
298 
299 	for (int i = 0; i < req->nfr_flow_demux_count; i++) {
300 		bcopy(&req->nfr_flow_demux_patterns[i], &fe->fe_demux_patterns[i].fdp_demux_pattern,
301 		    sizeof(struct flow_demux_pattern));
302 
303 		fe->fe_demux_patterns[i].fdp_memcmp_mask = NULL;
304 		if (req->nfr_flow_demux_patterns[i].fdp_len == 16) {
305 			fe->fe_demux_patterns[i].fdp_memcmp_mask = sk_memcmp_mask_16B;
306 		} else if (req->nfr_flow_demux_patterns[i].fdp_len == 32) {
307 			fe->fe_demux_patterns[i].fdp_memcmp_mask = sk_memcmp_mask_32B;
308 		} else if (req->nfr_flow_demux_patterns[i].fdp_len > 32) {
309 			VERIFY(0);
310 		}
311 	}
312 
313 	fe->fe_demux_pattern_count = req->nfr_flow_demux_count;
314 }
315 
316 static int
convert_flowkey_to_inet_td(struct flow_key * key,struct ifnet_traffic_descriptor_inet * td)317 convert_flowkey_to_inet_td(struct flow_key *key,
318     struct ifnet_traffic_descriptor_inet *td)
319 {
320 	if ((key->fk_mask & FKMASK_IPVER) != 0) {
321 		td->inet_ipver = key->fk_ipver;
322 		td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_IPVER;
323 	}
324 	if ((key->fk_mask & FKMASK_PROTO) != 0) {
325 		td->inet_proto = key->fk_proto;
326 		td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_PROTO;
327 	}
328 	if ((key->fk_mask & FKMASK_SRC) != 0) {
329 		if (td->inet_ipver == IPVERSION) {
330 			bcopy(&key->fk_src4, &td->inet_laddr.iia_v4addr,
331 			    sizeof(key->fk_src4));
332 		} else {
333 			bcopy(&key->fk_src6, &td->inet_laddr,
334 			    sizeof(key->fk_src6));
335 		}
336 		td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_LADDR;
337 	}
338 	if ((key->fk_mask & FKMASK_DST) != 0) {
339 		if (td->inet_ipver == IPVERSION) {
340 			bcopy(&key->fk_dst4, &td->inet_raddr.iia_v4addr,
341 			    sizeof(key->fk_dst4));
342 		} else {
343 			bcopy(&key->fk_dst6, &td->inet_raddr,
344 			    sizeof(key->fk_dst6));
345 		}
346 		td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_RADDR;
347 	}
348 	if ((key->fk_mask & FKMASK_SPORT) != 0) {
349 		td->inet_lport = key->fk_sport;
350 		td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_LPORT;
351 	}
352 	if ((key->fk_mask & FKMASK_DPORT) != 0) {
353 		td->inet_rport = key->fk_dport;
354 		td->inet_mask |= IFNET_TRAFFIC_DESCRIPTOR_INET_RPORT;
355 	}
356 	td->inet_common.itd_type = IFNET_TRAFFIC_DESCRIPTOR_TYPE_INET;
357 	td->inet_common.itd_len = sizeof(*td);
358 	td->inet_common.itd_flags = IFNET_TRAFFIC_DESCRIPTOR_FLAG_INBOUND |
359 	    IFNET_TRAFFIC_DESCRIPTOR_FLAG_OUTBOUND;
360 	return 0;
361 }
362 
363 void
flow_qset_select_dynamic(struct nx_flowswitch * fsw,struct flow_entry * fe,boolean_t skip_if_no_change)364 flow_qset_select_dynamic(struct nx_flowswitch *fsw, struct flow_entry *fe,
365     boolean_t skip_if_no_change)
366 {
367 	struct ifnet_traffic_descriptor_inet td;
368 	struct ifnet *ifp;
369 	uint64_t qset_id;
370 	struct nx_netif *nif;
371 	boolean_t changed;
372 	int err;
373 
374 	ifp = fsw->fsw_ifp;
375 	changed = ifnet_sync_traffic_rule_genid(ifp, &fe->fe_tr_genid);
376 	if (!changed && skip_if_no_change) {
377 		return;
378 	}
379 	if (fe->fe_qset != NULL) {
380 		nx_netif_qset_release(&fe->fe_qset);
381 		ASSERT(fe->fe_qset == NULL);
382 	}
383 	if (ifp->if_traffic_rule_count == 0) {
384 		DTRACE_SKYWALK2(no__rules, struct nx_flowswitch *, fsw,
385 		    struct flow_entry *, fe);
386 		return;
387 	}
388 	err = convert_flowkey_to_inet_td(&fe->fe_key, &td);
389 	ASSERT(err == 0);
390 	err = nxctl_inet_traffic_rule_find_qset_id(ifp->if_xname, &td, &qset_id);
391 	if (err != 0) {
392 		DTRACE_SKYWALK3(qset__id__not__found,
393 		    struct nx_flowswitch *, fsw,
394 		    struct flow_entry *, fe,
395 		    struct ifnet_traffic_descriptor_inet *, &td);
396 		return;
397 	}
398 	DTRACE_SKYWALK4(qset__id__found, struct nx_flowswitch *, fsw,
399 	    struct flow_entry *, fe, struct ifnet_traffic_descriptor_inet *,
400 	    &td, uint64_t, qset_id);
401 	nif = NX_NETIF_PRIVATE(fsw->fsw_dev_ch->ch_na->na_nx);
402 	ASSERT(fe->fe_qset == NULL);
403 	fe->fe_qset = nx_netif_find_qset(nif, qset_id);
404 }
405 
406 /* writer-lock must be owned for memory management functions */
407 struct flow_entry *
flow_entry_alloc(struct flow_owner * fo,struct nx_flow_req * req,int * perr)408 flow_entry_alloc(struct flow_owner *fo, struct nx_flow_req *req, int *perr)
409 {
410 	SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]);
411 	nexus_port_t nx_port = req->nfr_nx_port;
412 	struct flow_entry *fe = NULL;
413 	struct flow_entry *parent_fe = NULL;
414 	flowadv_idx_t fadv_idx = FLOWADV_IDX_NONE;
415 	struct nexus_adapter *dev_na;
416 	struct nx_netif *nif;
417 	int err;
418 
419 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
420 	ASSERT(nx_port != NEXUS_PORT_ANY);
421 	ASSERT(!fo->fo_nx_port_destroyed);
422 
423 	*perr = 0;
424 
425 	struct flow_key key __sk_aligned(16);
426 	err = flow_req2key(req, &key);
427 	if (__improbable(err != 0)) {
428 		SK_ERR("invalid request (err %d)", err);
429 		goto done;
430 	}
431 
432 	struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr;
433 	fe = flow_mgr_find_conflicting_fe(fm, &key);
434 	if (fe != NULL) {
435 		if ((fe->fe_flags & FLOWENTF_PARENT) &&
436 		    uuid_compare(fe->fe_uuid, req->nfr_parent_flow_uuid) == 0) {
437 			parent_fe = fe;
438 			fe = NULL;
439 		} else {
440 			SK_ERR("entry \"%s\" already exists at fe 0x%llx "
441 			    "flags 0x%b %s(%d)", fe_as_string(fe,
442 			    dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags,
443 			    FLOWENTF_BITS, fe->fe_proc_name,
444 			    fe->fe_pid);
445 			/* don't return it */
446 			flow_entry_release(&fe);
447 			err = EEXIST;
448 			goto done;
449 		}
450 	} else if (!uuid_is_null(req->nfr_parent_flow_uuid)) {
451 		uuid_string_t uuid_str;
452 		sk_uuid_unparse(req->nfr_parent_flow_uuid, uuid_str);
453 		SK_ERR("parent entry \"%s\" does not exist", uuid_str);
454 		err = ENOENT;
455 		goto done;
456 	}
457 
458 	if ((req->nfr_flags & NXFLOWREQF_FLOWADV) &&
459 	    (flow_owner_flowadv_index_alloc(fo, &fadv_idx) != 0)) {
460 		SK_ERR("failed to alloc flowadv index for flow %s",
461 		    sk_uuid_unparse(req->nfr_flow_uuid, dbgbuf));
462 		/* XXX: what is the most appropriate error code ? */
463 		err = ENOSPC;
464 		goto done;
465 	}
466 
467 	fe = fe_alloc(TRUE);
468 	if (__improbable(fe == NULL)) {
469 		err = ENOMEM;
470 		goto done;
471 	}
472 
473 	fe->fe_key = key;
474 	if (req->nfr_route != NULL) {
475 		fe->fe_laddr_gencnt = req->nfr_route->fr_laddr_gencnt;
476 	} else {
477 		fe->fe_laddr_gencnt = req->nfr_saddr_gencnt;
478 	}
479 
480 	if (__improbable(req->nfr_flags & NXFLOWREQF_LISTENER)) {
481 		/* mark this as listener mode */
482 		os_atomic_or(&fe->fe_flags, FLOWENTF_LISTENER, relaxed);
483 	} else {
484 		ASSERT((fe->fe_key.fk_ipver == IPVERSION &&
485 		    fe->fe_key.fk_src4.s_addr != INADDR_ANY) ||
486 		    (fe->fe_key.fk_ipver == IPV6_VERSION &&
487 		    !IN6_IS_ADDR_UNSPECIFIED(&fe->fe_key.fk_src6)));
488 
489 		/* mark this as connected mode */
490 		os_atomic_or(&fe->fe_flags, FLOWENTF_CONNECTED, relaxed);
491 	}
492 
493 	if (req->nfr_flags & NXFLOWREQF_NOWAKEFROMSLEEP) {
494 		fe->fe_flags |= FLOWENTF_NOWAKEFROMSLEEP;
495 	}
496 	fe->fe_port_reservation = req->nfr_port_reservation;
497 	req->nfr_port_reservation = NULL;
498 	if (req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV) {
499 		fe->fe_flags |= FLOWENTF_EXTRL_PORT;
500 	}
501 	fe->fe_proto_reservation = req->nfr_proto_reservation;
502 	req->nfr_proto_reservation = NULL;
503 	if (req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV) {
504 		fe->fe_flags |= FLOWENTF_EXTRL_PROTO;
505 	}
506 	fe->fe_ipsec_reservation = req->nfr_ipsec_reservation;
507 	req->nfr_ipsec_reservation = NULL;
508 
509 	fe->fe_tx_process = dp_flow_tx_process;
510 	fe->fe_rx_process = dp_flow_rx_process;
511 
512 	dev_na = fo->fo_fsw->fsw_dev_ch->ch_na;
513 	nif = NX_NETIF_PRIVATE(dev_na->na_nx);
514 	if (NX_LLINK_PROV(nif->nif_nx) &&
515 	    (fe->fe_key.fk_mask & (FKMASK_IPVER | FKMASK_PROTO | FKMASK_DST)) ==
516 	    (FKMASK_IPVER | FKMASK_PROTO | FKMASK_DST)) {
517 		if (req->nfr_qset_id != 0) {
518 			fe->fe_qset_select = FE_QSET_SELECT_FIXED;
519 			fe->fe_qset_id = req->nfr_qset_id;
520 			fe->fe_qset = nx_netif_find_qset(nif, req->nfr_qset_id);
521 		} else {
522 			fe->fe_qset_select = FE_QSET_SELECT_DYNAMIC;
523 			fe->fe_qset_id = 0;
524 			flow_qset_select_dynamic(fo->fo_fsw, fe, FALSE);
525 		}
526 	} else {
527 		fe->fe_qset_select = FE_QSET_SELECT_NONE;
528 	}
529 	if (req->nfr_flags & NXFLOWREQF_LOW_LATENCY) {
530 		os_atomic_or(&fe->fe_flags, FLOWENTF_LOW_LATENCY, relaxed);
531 	}
532 
533 	fe->fe_transport_protocol = req->nfr_transport_protocol;
534 	if (NX_FSW_TCP_RX_AGG_ENABLED() &&
535 	    (fo->fo_fsw->fsw_nx->nx_prov->nxprov_params->nxp_max_frags > 1) &&
536 	    (fe->fe_key.fk_proto == IPPROTO_TCP) &&
537 	    (fe->fe_key.fk_mask == FKMASK_5TUPLE)) {
538 		fe->fe_rx_process = flow_rx_agg_tcp;
539 	}
540 	uuid_copy(fe->fe_uuid, req->nfr_flow_uuid);
541 	if ((req->nfr_flags & NXFLOWREQF_LISTENER) == 0 &&
542 	    (req->nfr_flags & NXFLOWREQF_TRACK) != 0) {
543 		switch (req->nfr_ip_protocol) {
544 		case IPPROTO_TCP:
545 		case IPPROTO_UDP:
546 			os_atomic_or(&fe->fe_flags, FLOWENTF_TRACK, relaxed);
547 			break;
548 		default:
549 			break;
550 		}
551 	}
552 
553 	if (req->nfr_flags & NXFLOWREQF_QOS_MARKING) {
554 		os_atomic_or(&fe->fe_flags, FLOWENTF_QOS_MARKING, relaxed);
555 	}
556 
557 	if (req->nfr_flags & NXFLOWREQF_PARENT) {
558 		os_atomic_or(&fe->fe_flags, FLOWENTF_PARENT, relaxed);
559 		TAILQ_INIT(&fe->fe_child_list);
560 		lck_rw_init(&fe->fe_child_list_lock, &nexus_lock_group, &nexus_lock_attr);
561 	}
562 
563 	if (req->nfr_route != NULL) {
564 		fe->fe_route = req->nfr_route;
565 		req->nfr_route = NULL;
566 	}
567 
568 	fe->fe_nx_port = nx_port;
569 	fe->fe_adv_idx = fadv_idx;
570 
571 	if (req->nfr_inp_flowhash != 0) {
572 		/*
573 		 * BSD flow, use the inpcb flow hash value
574 		 */
575 		fe->fe_flowid = req->nfr_inp_flowhash;
576 		fe->fe_flags |= FLOWENTF_EXTRL_FLOWID;
577 	} else {
578 		fe->fe_flowid = flow_entry_calc_flowid(fe);
579 	}
580 
581 	if (fe->fe_adv_idx != FLOWADV_IDX_NONE && fo->fo_nx_port_na != NULL) {
582 		na_flowadv_entry_alloc(fo->fo_nx_port_na, fe->fe_uuid,
583 		    fe->fe_adv_idx, fe->fe_flowid);
584 	}
585 
586 	if (KPKT_VALID_SVC(req->nfr_svc_class)) {
587 		fe->fe_svc_class = (kern_packet_svc_class_t)req->nfr_svc_class;
588 	} else {
589 		fe->fe_svc_class = KPKT_SC_BE;
590 	}
591 
592 	uuid_copy(fe->fe_eproc_uuid, req->nfr_euuid);
593 	fe->fe_policy_id = req->nfr_policy_id;
594 	fe->fe_skip_policy_id = req->nfr_skip_policy_id;
595 
596 	err = flow_mgr_flow_hash_mask_add(fm, fe->fe_key.fk_mask);
597 	ASSERT(err == 0);
598 
599 	if (parent_fe != NULL) {
600 		os_atomic_or(&fe->fe_flags, FLOWENTF_CHILD, relaxed);
601 		flow_entry_set_demux_patterns(fe, req);
602 		fe->fe_demux_pkt_data = sk_alloc_data(FLOW_DEMUX_MAX_LEN, Z_WAITOK | Z_NOFAIL, skmem_tag_flow_demux);
603 		if (!flow_entry_add_child(parent_fe, fe)) {
604 			goto done;
605 		}
606 	} else {
607 		fe->fe_key_hash = flow_key_hash(&fe->fe_key);
608 		err = cuckoo_hashtable_add_with_hash(fm->fm_flow_table, &fe->fe_cnode,
609 		    fe->fe_key_hash);
610 		if (err != 0) {
611 			SK_ERR("flow table add failed (err %d)", err);
612 			flow_mgr_flow_hash_mask_del(fm, fe->fe_key.fk_mask);
613 			goto done;
614 		}
615 	}
616 
617 	RB_INSERT(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe);
618 	flow_entry_retain(fe);  /* one refcnt in id_tree */
619 
620 	*(struct nx_flowswitch **)(uintptr_t)&fe->fe_fsw = fo->fo_fsw;
621 	fe->fe_pid = fo->fo_pid;
622 	if (req->nfr_epid != -1 && req->nfr_epid != fo->fo_pid) {
623 		fe->fe_epid = req->nfr_epid;
624 		proc_name(fe->fe_epid, fe->fe_eproc_name,
625 		    sizeof(fe->fe_eproc_name));
626 	} else {
627 		fe->fe_epid = -1;
628 	}
629 
630 	(void) snprintf(fe->fe_proc_name, sizeof(fe->fe_proc_name), "%s",
631 	    fo->fo_name);
632 
633 	fe_stats_init(fe);
634 	flow_stats_retain(fe->fe_stats);
635 	req->nfr_flow_stats = fe->fe_stats;
636 
637 #if SK_LOG
638 	SK_DF(SK_VERB_FLOW, "allocated entry \"%s\" fe 0x%llx flags 0x%b "
639 	    "[fo 0x%llx ]", fe_as_string(fe, dbgbuf,
640 	    sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS,
641 	    SK_KVA(fo));
642 #endif /* SK_LOG */
643 
644 done:
645 	if (parent_fe != NULL) {
646 		flow_entry_release(&parent_fe);
647 	}
648 	if (err != 0) {
649 		if (fadv_idx != FLOWADV_IDX_NONE) {
650 			flow_owner_flowadv_index_free(fo, fadv_idx);
651 		}
652 		if (fe != NULL) {
653 			flow_entry_release(&fe);
654 		}
655 	}
656 	*perr = err;
657 	return fe;
658 }
659 
660 void
flow_entry_teardown(struct flow_owner * fo,struct flow_entry * fe)661 flow_entry_teardown(struct flow_owner *fo, struct flow_entry *fe)
662 {
663 #if SK_LOG
664 	char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
665 	SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b [fo 0x%llx] "
666 	    "non_via %d withdrawn %d", fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
667 	    SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, SK_KVA(fo),
668 	    fe->fe_want_nonviable, fe->fe_want_withdraw);
669 #endif /* SK_LOG */
670 	struct nx_flowswitch *fsw = fo->fo_fsw;
671 
672 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
673 
674 	ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED));
675 	ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING));
676 	ASSERT(fsw != NULL);
677 
678 	if (os_atomic_cmpxchg(&fe->fe_want_nonviable, 1, 0, acq_rel)) {
679 		ASSERT(fsw->fsw_pending_nonviable != 0);
680 		os_atomic_dec(&fsw->fsw_pending_nonviable, relaxed);
681 		os_atomic_or(&fe->fe_flags, FLOWENTF_NONVIABLE, relaxed);
682 	}
683 
684 	/* always withdraw namespace during tear down */
685 	if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) &&
686 	    !(fe->fe_flags & FLOWENTF_WITHDRAWN)) {
687 		os_atomic_or(&fe->fe_flags, FLOWENTF_WITHDRAWN, relaxed);
688 		os_atomic_store(&fe->fe_want_withdraw, 0, release);
689 		/* local port is now inactive; not eligible for offload */
690 		flow_namespace_withdraw(&fe->fe_port_reservation);
691 	}
692 
693 	/* we may get here multiple times, so check */
694 	if (!(fe->fe_flags & FLOWENTF_TORN_DOWN)) {
695 		os_atomic_or(&fe->fe_flags, FLOWENTF_TORN_DOWN, relaxed);
696 		if (fe->fe_adv_idx != FLOWADV_IDX_NONE) {
697 			if (fo->fo_nx_port_na != NULL) {
698 				na_flowadv_entry_free(fo->fo_nx_port_na,
699 				    fe->fe_uuid, fe->fe_adv_idx, fe->fe_flowid);
700 			}
701 			flow_owner_flowadv_index_free(fo, fe->fe_adv_idx);
702 			fe->fe_adv_idx = FLOWADV_IDX_NONE;
703 		}
704 	}
705 	ASSERT(fe->fe_adv_idx == FLOWADV_IDX_NONE);
706 	ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN);
707 
708 	/* mark child flow as nonviable */
709 	if (fe->fe_flags & FLOWENTF_PARENT) {
710 		flow_entry_remove_all_children(fe, fsw);
711 	}
712 }
713 
714 void
flow_entry_destroy(struct flow_owner * fo,struct flow_entry * fe,bool nolinger,void * close_params)715 flow_entry_destroy(struct flow_owner *fo, struct flow_entry *fe, bool nolinger,
716     void *close_params)
717 {
718 	struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr;
719 	int err;
720 
721 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
722 
723 	/*
724 	 * regular flow: one in flow_table, one in id_tree, one here
725 	 * child flow: one in id_tree, one here
726 	 */
727 	ASSERT(flow_entry_refcnt(fe) > 2 ||
728 	    ((fe->fe_flags & FLOWENTF_CHILD) && flow_entry_refcnt(fe) > 1));
729 
730 	flow_entry_teardown(fo, fe);
731 
732 	err = flow_mgr_flow_hash_mask_del(fm, fe->fe_key.fk_mask);
733 	ASSERT(err == 0);
734 
735 	/* only regular or parent flows have entries in flow_table */
736 	if (__probable(!(fe->fe_flags & FLOWENTF_CHILD))) {
737 		uint32_t hash;
738 		hash = flow_key_hash(&fe->fe_key);
739 		cuckoo_hashtable_del(fm->fm_flow_table, &fe->fe_cnode, hash);
740 	}
741 
742 	RB_REMOVE(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe);
743 	struct flow_entry *tfe = fe;
744 	flow_entry_release(&tfe);
745 
746 	ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED));
747 	os_atomic_or(&fe->fe_flags, FLOWENTF_DESTROYED, relaxed);
748 
749 	if (fe->fe_transport_protocol == IPPROTO_QUIC) {
750 		if (!nolinger && close_params != NULL) {
751 			flow_track_abort_quic(fe, close_params);
752 		}
753 		flow_entry_release(&fe);
754 	} else if (nolinger || !(fe->fe_flags & FLOWENTF_WAIT_CLOSE)) {
755 		flow_entry_release(&fe);
756 	} else {
757 		fsw_linger_insert(fe);
758 	}
759 }
760 
761 uint32_t
flow_entry_refcnt(struct flow_entry * fe)762 flow_entry_refcnt(struct flow_entry *fe)
763 {
764 	return os_ref_get_count(&fe->fe_refcnt);
765 }
766 
767 void
flow_entry_retain(struct flow_entry * fe)768 flow_entry_retain(struct flow_entry *fe)
769 {
770 	os_ref_retain(&fe->fe_refcnt);
771 }
772 
773 void
flow_entry_release(struct flow_entry ** pfe)774 flow_entry_release(struct flow_entry **pfe)
775 {
776 	struct flow_entry *fe = *pfe;
777 	ASSERT(fe != NULL);
778 	*pfe = NULL;    /* caller lose reference */
779 #if SK_LOG
780 	if (__improbable(sk_verbose != 0)) {
781 		char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
782 		SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b",
783 		    fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe),
784 		    fe->fe_flags, FLOWENTF_BITS);
785 	}
786 #endif /* SK_LOG */
787 
788 	if (__improbable(os_ref_release(&fe->fe_refcnt) == 0)) {
789 		fe->fe_nx_port = NEXUS_PORT_ANY;
790 		if (fe->fe_route != NULL) {
791 			flow_route_release(fe->fe_route);
792 			fe->fe_route = NULL;
793 		}
794 		if (fe->fe_qset != NULL) {
795 			nx_netif_qset_release(&fe->fe_qset);
796 			ASSERT(fe->fe_qset == NULL);
797 		}
798 		if (fe->fe_demux_patterns != NULL) {
799 			sk_free_type_array(struct kern_flow_demux_pattern,
800 			    fe->fe_demux_pattern_count, fe->fe_demux_patterns);
801 			fe->fe_demux_patterns = NULL;
802 			fe->fe_demux_pattern_count = 0;
803 		}
804 		if (fe->fe_demux_pkt_data != NULL) {
805 			sk_free_data(fe->fe_demux_pkt_data, FLOW_DEMUX_MAX_LEN);
806 			fe->fe_demux_pkt_data = NULL;
807 		}
808 		fe_free(fe);
809 	}
810 }
811 
812 struct flow_entry_dead *
flow_entry_dead_alloc(zalloc_flags_t how)813 flow_entry_dead_alloc(zalloc_flags_t how)
814 {
815 	struct flow_entry_dead *fed;
816 
817 	fed = zalloc_flags(sk_fed_zone, how | Z_ZERO);
818 	if (fed != NULL) {
819 		SK_DF(SK_VERB_MEM, "fed 0x%llx ALLOC", SK_KVA(fed));
820 	}
821 	return fed;
822 }
823 
824 void
flow_entry_dead_free(struct flow_entry_dead * fed)825 flow_entry_dead_free(struct flow_entry_dead *fed)
826 {
827 	SK_DF(SK_VERB_MEM, "fed 0x%llx FREE", SK_KVA(fed));
828 	zfree(sk_fed_zone, fed);
829 }
830 
831 static void
fe_stats_init(struct flow_entry * fe)832 fe_stats_init(struct flow_entry *fe)
833 {
834 	struct nx_flowswitch *fsw = fe->fe_fsw;
835 	struct sk_stats_flow *sf = &fe->fe_stats->fs_stats;
836 
837 	ASSERT(fe->fe_stats != NULL);
838 	ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1);
839 
840 	bzero(sf, sizeof(*sf));
841 	uuid_copy(sf->sf_nx_uuid, fsw->fsw_nx->nx_uuid);
842 	uuid_copy(sf->sf_uuid, fe->fe_uuid);
843 	(void) strlcpy(sf->sf_if_name, fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
844 	sf->sf_if_index = fsw->fsw_ifp->if_index;
845 	sf->sf_pid = fe->fe_pid;
846 	sf->sf_epid = fe->fe_epid;
847 	(void) snprintf(sf->sf_proc_name, sizeof(sf->sf_proc_name), "%s",
848 	    fe->fe_proc_name);
849 	(void) snprintf(sf->sf_eproc_name, sizeof(sf->sf_eproc_name), "%s",
850 	    fe->fe_eproc_name);
851 
852 	sf->sf_nx_port = fe->fe_nx_port;
853 	sf->sf_key = fe->fe_key;
854 	sf->sf_protocol = fe->fe_transport_protocol;
855 	sf->sf_svc_class = (packet_svc_class_t)fe->fe_svc_class;
856 	sf->sf_adv_idx = fe->fe_adv_idx;
857 
858 	if (fe->fe_flags & FLOWENTF_TRACK) {
859 		sf->sf_flags |= SFLOWF_TRACK;
860 	}
861 	if (fe->fe_flags & FLOWENTF_LISTENER) {
862 		sf->sf_flags |= SFLOWF_LISTENER;
863 	}
864 	if (fe->fe_route != NULL && fe->fe_route->fr_flags & FLOWRTF_ONLINK) {
865 		sf->sf_flags |= SFLOWF_ONLINK;
866 	}
867 
868 	fe_stats_update(fe);
869 }
870 
871 static void
fe_stats_update(struct flow_entry * fe)872 fe_stats_update(struct flow_entry *fe)
873 {
874 	struct sk_stats_flow *sf = &fe->fe_stats->fs_stats;
875 
876 	ASSERT(fe->fe_stats != NULL);
877 	ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1);
878 
879 	if (fe->fe_flags & FLOWENTF_CONNECTED) {
880 		sf->sf_flags |= SFLOWF_CONNECTED;
881 	}
882 	if (fe->fe_flags & FLOWENTF_QOS_MARKING) {
883 		sf->sf_flags |= SFLOWF_QOS_MARKING;
884 	}
885 	if (fe->fe_flags & FLOWENTF_WAIT_CLOSE) {
886 		sf->sf_flags |= SFLOWF_WAIT_CLOSE;
887 	}
888 	if (fe->fe_flags & FLOWENTF_CLOSE_NOTIFY) {
889 		sf->sf_flags |= SFLOWF_CLOSE_NOTIFY;
890 	}
891 	if (fe->fe_flags & FLOWENTF_ABORTED) {
892 		sf->sf_flags |= SFLOWF_ABORTED;
893 	}
894 	if (fe->fe_flags & FLOWENTF_NONVIABLE) {
895 		sf->sf_flags |= SFLOWF_NONVIABLE;
896 	}
897 	if (fe->fe_flags & FLOWENTF_WITHDRAWN) {
898 		sf->sf_flags |= SFLOWF_WITHDRAWN;
899 	}
900 	if (fe->fe_flags & FLOWENTF_TORN_DOWN) {
901 		sf->sf_flags |= SFLOWF_TORN_DOWN;
902 	}
903 	if (fe->fe_flags & FLOWENTF_DESTROYED) {
904 		sf->sf_flags |= SFLOWF_DESTROYED;
905 	}
906 	if (fe->fe_flags & FLOWENTF_LINGERING) {
907 		sf->sf_flags |= SFLOWF_LINGERING;
908 	}
909 	if (fe->fe_flags & FLOWENTF_LOW_LATENCY) {
910 		sf->sf_flags |= SFLOWF_LOW_LATENCY;
911 	}
912 	if (fe->fe_flags & FLOWENTF_PARENT) {
913 		sf->sf_flags |= SFLOWF_PARENT;
914 	}
915 	if (fe->fe_flags & FLOWENTF_CHILD) {
916 		sf->sf_flags |= SFLOWF_CHILD;
917 	}
918 	if (fe->fe_flags & FLOWENTF_NOWAKEFROMSLEEP) {
919 		sf->sf_flags |= SFLOWF_NOWAKEFROMSLEEP;
920 	} else {
921 		sf->sf_flags &= ~SFLOWF_NOWAKEFROMSLEEP;
922 	}
923 
924 	sf->sf_bucket_idx = SFLOW_BUCKET_NONE;
925 
926 	sf->sf_ltrack.sft_state = fe->fe_ltrack.fse_state;
927 	sf->sf_ltrack.sft_seq = fe->fe_ltrack.fse_seqlo;
928 	sf->sf_ltrack.sft_max_win = fe->fe_ltrack.fse_max_win;
929 	sf->sf_ltrack.sft_wscale = fe->fe_ltrack.fse_wscale;
930 	sf->sf_rtrack.sft_state = fe->fe_rtrack.fse_state;
931 	sf->sf_rtrack.sft_seq = fe->fe_rtrack.fse_seqlo;
932 	sf->sf_rtrack.sft_max_win = fe->fe_rtrack.fse_max_win;
933 }
934 
935 void
flow_entry_stats_get(struct flow_entry * fe,struct sk_stats_flow * sf)936 flow_entry_stats_get(struct flow_entry *fe, struct sk_stats_flow *sf)
937 {
938 	_CASSERT(sizeof(fe->fe_stats->fs_stats) == sizeof(*sf));
939 
940 	fe_stats_update(fe);
941 	bcopy(&fe->fe_stats->fs_stats, sf, sizeof(*sf));
942 }
943 
944 struct flow_entry *
fe_alloc(boolean_t can_block)945 fe_alloc(boolean_t can_block)
946 {
947 	struct flow_entry *fe;
948 
949 	_CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0);
950 
951 	fe = skmem_cache_alloc(sk_fe_cache,
952 	    can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
953 	if (fe == NULL) {
954 		return NULL;
955 	}
956 
957 	/*
958 	 * fe_key is 16-bytes aligned which requires fe to begin on
959 	 * a 16-bytes boundary as well.  This alignment is specified
960 	 * at sk_fe_cache creation time and we assert here.
961 	 */
962 	ASSERT(IS_P2ALIGNED(fe, 16));
963 	bzero(fe, sk_fe_size);
964 
965 	fe->fe_stats = flow_stats_alloc(can_block);
966 	if (fe->fe_stats == NULL) {
967 		skmem_cache_free(sk_fe_cache, fe);
968 		return NULL;
969 	}
970 
971 	SK_DF(SK_VERB_MEM, "fe 0x%llx ALLOC", SK_KVA(fe));
972 
973 	os_ref_init(&fe->fe_refcnt, &flow_entry_refgrp);
974 
975 	KPKTQ_INIT(&fe->fe_rx_pktq);
976 	KPKTQ_INIT(&fe->fe_tx_pktq);
977 
978 	return fe;
979 }
980 
981 static void
fe_free(struct flow_entry * fe)982 fe_free(struct flow_entry *fe)
983 {
984 	ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN);
985 	ASSERT(fe->fe_flags & FLOWENTF_DESTROYED);
986 	ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING));
987 	ASSERT(fe->fe_route == NULL);
988 
989 	ASSERT(fe->fe_stats != NULL);
990 	flow_stats_release(fe->fe_stats);
991 	fe->fe_stats = NULL;
992 
993 	/* only at very last existence of flow releases namespace reservation */
994 	if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) &&
995 	    NETNS_TOKEN_VALID(&fe->fe_port_reservation)) {
996 		flow_namespace_destroy(&fe->fe_port_reservation);
997 		ASSERT(!NETNS_TOKEN_VALID(&fe->fe_port_reservation));
998 	}
999 	fe->fe_port_reservation = NULL;
1000 
1001 	if (!(fe->fe_flags & FLOWENTF_EXTRL_PROTO) &&
1002 	    protons_token_is_valid(fe->fe_proto_reservation)) {
1003 		protons_release(&fe->fe_proto_reservation);
1004 	}
1005 	fe->fe_proto_reservation = NULL;
1006 
1007 	if (key_custom_ipsec_token_is_valid(fe->fe_ipsec_reservation)) {
1008 		key_release_custom_ipsec(&fe->fe_ipsec_reservation);
1009 	}
1010 	fe->fe_ipsec_reservation = NULL;
1011 
1012 	if (!(fe->fe_flags & FLOWENTF_EXTRL_FLOWID) && (fe->fe_flowid != 0)) {
1013 		flowidns_release_flowid(fe->fe_flowid);
1014 		fe->fe_flowid = 0;
1015 	}
1016 
1017 	skmem_cache_free(sk_fe_cache, fe);
1018 }
1019 
1020 static __inline__ int
fe_id_cmp(const struct flow_entry * a,const struct flow_entry * b)1021 fe_id_cmp(const struct flow_entry *a, const struct flow_entry *b)
1022 {
1023 	return uuid_compare(a->fe_uuid, b->fe_uuid);
1024 }
1025 
1026 #if SK_LOG
1027 SK_NO_INLINE_ATTRIBUTE
1028 char *
fk_as_string(const struct flow_key * fk,char * dst,size_t dsz)1029 fk_as_string(const struct flow_key *fk, char *dst, size_t dsz)
1030 {
1031 	int af;
1032 	char src_s[MAX_IPv6_STR_LEN];
1033 	char dst_s[MAX_IPv6_STR_LEN];
1034 
1035 	af = fk->fk_ipver == 4 ? AF_INET : AF_INET6;
1036 
1037 	(void) inet_ntop(af, &fk->fk_src, src_s, sizeof(src_s));
1038 	(void) inet_ntop(af, &fk->fk_dst, dst_s, sizeof(dst_s));
1039 	(void) snprintf(dst, dsz,
1040 	    "ipver=%u,src=%s,dst=%s,proto=0x%02u,sport=%u,dport=%u "
1041 	    "mask=%08x,hash=%08x",
1042 	    fk->fk_ipver, src_s, dst_s, fk->fk_proto, ntohs(fk->fk_sport),
1043 	    ntohs(fk->fk_dport), fk->fk_mask, flow_key_hash(fk));
1044 
1045 	return dst;
1046 }
1047 
1048 SK_NO_INLINE_ATTRIBUTE
1049 char *
fe_as_string(const struct flow_entry * fe,char * dst,size_t dsz)1050 fe_as_string(const struct flow_entry *fe, char *dst, size_t dsz)
1051 {
1052 	char keybuf[FLOWKEY_DBGBUF_SIZE]; /* just for debug message */
1053 	uuid_string_t uuidstr;
1054 
1055 	fk_as_string(&fe->fe_key, keybuf, sizeof(keybuf));
1056 
1057 	(void) snprintf(dst, dsz,
1058 	    "fe 0x%llx proc %s nx_port %d flow_uuid %s %s tp_proto=0x%02u",
1059 	    SK_KVA(fe), fe->fe_proc_name, (int)fe->fe_nx_port,
1060 	    sk_uuid_unparse(fe->fe_uuid, uuidstr),
1061 	    keybuf, fe->fe_transport_protocol);
1062 
1063 	return dst;
1064 }
1065 #endif /* SK_LOG */
1066