xref: /xnu-8020.121.3/bsd/skywalk/nexus/flowswitch/flow/flow_entry.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 #include <skywalk/os_skywalk_private.h>
30 
31 #include <dev/random/randomdev.h>
32 #include <net/flowhash.h>
33 #include <netkey/key.h>
34 
35 #include <skywalk/nexus/flowswitch/fsw_var.h>
36 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
37 #include <skywalk/nexus/netif/nx_netif.h>
38 
39 struct flow_entry *fe_alloc(boolean_t);
40 static void fe_free(struct flow_entry *);
41 static int fe_id_cmp(const struct flow_entry *, const struct flow_entry *);
42 static void fe_stats_init(struct flow_entry *);
43 static void fe_stats_update(struct flow_entry *);
44 
45 RB_GENERATE_PREV(flow_entry_id_tree, flow_entry, fe_id_link, fe_id_cmp);
46 
47 os_refgrp_decl(static, flow_entry_refgrp, "flow_entry", NULL);
48 
49 extern struct zone *sk_fed_zone;
50 
51 const struct flow_key fk_mask_2tuple
52 __sk_aligned(16) =
53 {
54 	.fk_mask = FKMASK_2TUPLE,
55 	.fk_ipver = 0,
56 	.fk_proto = 0xff,
57 	.fk_sport = 0xffff,
58 	.fk_dport = 0,
59 	.fk_src._addr64[0] = 0,
60 	.fk_src._addr64[1] = 0,
61 	.fk_dst._addr64[0] = 0,
62 	.fk_dst._addr64[1] = 0,
63 	.fk_pad[0] = 0,
64 };
65 
66 const struct flow_key fk_mask_3tuple
67 __sk_aligned(16) =
68 {
69 	.fk_mask = FKMASK_3TUPLE,
70 	.fk_ipver = 0xff,
71 	.fk_proto = 0xff,
72 	.fk_sport = 0xffff,
73 	.fk_dport = 0,
74 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
75 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
76 	.fk_dst._addr64[0] = 0,
77 	.fk_dst._addr64[1] = 0,
78 	.fk_pad[0] = 0,
79 };
80 
81 const struct flow_key fk_mask_4tuple
82 __sk_aligned(16) =
83 {
84 	.fk_mask = FKMASK_4TUPLE,
85 	.fk_ipver = 0xff,
86 	.fk_proto = 0xff,
87 	.fk_sport = 0xffff,
88 	.fk_dport = 0xffff,
89 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
90 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
91 	.fk_dst._addr64[0] = 0,
92 	.fk_dst._addr64[1] = 0,
93 	.fk_pad[0] = 0,
94 };
95 
96 const struct flow_key fk_mask_5tuple
97 __sk_aligned(16) =
98 {
99 	.fk_mask = FKMASK_5TUPLE,
100 	.fk_ipver = 0xff,
101 	.fk_proto = 0xff,
102 	.fk_sport = 0xffff,
103 	.fk_dport = 0xffff,
104 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
105 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
106 	.fk_dst._addr64[0] = 0xffffffffffffffffULL,
107 	.fk_dst._addr64[1] = 0xffffffffffffffffULL,
108 	.fk_pad[0] = 0,
109 };
110 
111 const struct flow_key fk_mask_ipflow1
112 __sk_aligned(16) =
113 {
114 	.fk_mask = FKMASK_IPFLOW1,
115 	.fk_ipver = 0,
116 	.fk_proto = 0xff,
117 	.fk_sport = 0,
118 	.fk_dport = 0,
119 	.fk_src._addr64[0] = 0,
120 	.fk_src._addr64[1] = 0,
121 	.fk_dst._addr64[0] = 0,
122 	.fk_dst._addr64[1] = 0,
123 	.fk_pad[0] = 0,
124 };
125 
126 const struct flow_key fk_mask_ipflow2
127 __sk_aligned(16) =
128 {
129 	.fk_mask = FKMASK_IPFLOW2,
130 	.fk_ipver = 0xff,
131 	.fk_proto = 0xff,
132 	.fk_sport = 0,
133 	.fk_dport = 0,
134 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
135 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
136 	.fk_dst._addr64[0] = 0,
137 	.fk_dst._addr64[1] = 0,
138 	.fk_pad[0] = 0,
139 };
140 
141 const struct flow_key fk_mask_ipflow3
142 __sk_aligned(16) =
143 {
144 	.fk_mask = FKMASK_IPFLOW3,
145 	.fk_ipver = 0xff,
146 	.fk_proto = 0xff,
147 	.fk_sport = 0,
148 	.fk_dport = 0,
149 	.fk_src._addr64[0] = 0xffffffffffffffffULL,
150 	.fk_src._addr64[1] = 0xffffffffffffffffULL,
151 	.fk_dst._addr64[0] = 0xffffffffffffffffULL,
152 	.fk_dst._addr64[1] = 0xffffffffffffffffULL,
153 	.fk_pad[0] = 0,
154 };
155 
156 struct flow_owner *
flow_owner_find_by_pid(struct flow_owner_bucket * fob,pid_t pid,void * context,bool low_latency)157 flow_owner_find_by_pid(struct flow_owner_bucket *fob, pid_t pid, void *context,
158     bool low_latency)
159 {
160 	struct flow_owner find = { .fo_context = context, .fo_pid = pid,
161 		                   .fo_low_latency = low_latency};
162 
163 	ASSERT(low_latency == true || low_latency == false);
164 	FOB_LOCK_ASSERT_HELD(fob);
165 	return RB_FIND(flow_owner_tree, &fob->fob_owner_head, &find);
166 }
167 
168 struct flow_entry *
flow_entry_find_by_uuid(struct flow_owner * fo,uuid_t uuid)169 flow_entry_find_by_uuid(struct flow_owner *fo, uuid_t uuid)
170 {
171 	struct flow_entry find, *fe = NULL;
172 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
173 
174 	uuid_copy(find.fe_uuid, uuid);
175 	fe = RB_FIND(flow_entry_id_tree, &fo->fo_flow_entry_id_head, &find);
176 	if (fe != NULL) {
177 		flow_entry_retain(fe);
178 	}
179 
180 	return fe;
181 }
182 
183 /* writer-lock must be owned for memory management functions */
184 struct flow_entry *
flow_entry_alloc(struct flow_owner * fo,struct nx_flow_req * req,int * perr)185 flow_entry_alloc(struct flow_owner *fo, struct nx_flow_req *req, int *perr)
186 {
187 	SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]);
188 	nexus_port_t nx_port = req->nfr_nx_port;
189 	struct flow_entry *fe = NULL;
190 	flowadv_idx_t fadv_idx = FLOWADV_IDX_NONE;
191 	struct nexus_adapter *dev_na;
192 	struct nx_netif *nif;
193 	int err;
194 
195 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
196 	ASSERT(nx_port != NEXUS_PORT_ANY);
197 	ASSERT(!fo->fo_nx_port_destroyed);
198 
199 	*perr = 0;
200 
201 	struct flow_key key __sk_aligned(16);
202 	err = flow_req2key(req, &key);
203 	if (__improbable(err != 0)) {
204 		SK_ERR("invalid request (err %d)", err);
205 		goto done;
206 	}
207 
208 	struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr;
209 	fe = flow_mgr_find_conflicting_fe(fm, &key);
210 	if (fe != NULL) {
211 		SK_ERR("entry \"%s\" already exists at fe 0x%llx "
212 		    "flags 0x%b %s(%d)", fe_as_string(fe,
213 		    dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags,
214 		    FLOWENTF_BITS, fe->fe_proc_name,
215 		    fe->fe_pid);
216 		/* don't return it */
217 		flow_entry_release(&fe);
218 		err = EEXIST;
219 		goto done;
220 	}
221 
222 	if ((req->nfr_flags & NXFLOWREQF_FLOWADV) &&
223 	    (flow_owner_flowadv_index_alloc(fo, &fadv_idx) != 0)) {
224 		SK_ERR("failed to alloc flowadv index for flow %s",
225 		    sk_uuid_unparse(req->nfr_flow_uuid, dbgbuf));
226 		/* XXX: what is the most appropriate error code ? */
227 		err = ENOSPC;
228 		goto done;
229 	}
230 
231 	fe = fe_alloc(TRUE);
232 	if (__improbable(fe == NULL)) {
233 		err = ENOMEM;
234 		goto done;
235 	}
236 
237 	fe->fe_key = key;
238 	if (req->nfr_route != NULL) {
239 		fe->fe_laddr_gencnt = req->nfr_route->fr_laddr_gencnt;
240 	} else {
241 		fe->fe_laddr_gencnt = req->nfr_saddr_gencnt;
242 	}
243 
244 	if (__improbable(req->nfr_flags & NXFLOWREQF_LISTENER)) {
245 		/* mark this as listener mode */
246 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_LISTENER);
247 	} else {
248 		ASSERT((fe->fe_key.fk_ipver == IPVERSION &&
249 		    fe->fe_key.fk_src4.s_addr != INADDR_ANY) ||
250 		    (fe->fe_key.fk_ipver == IPV6_VERSION &&
251 		    !IN6_IS_ADDR_UNSPECIFIED(&fe->fe_key.fk_src6)));
252 
253 		/* mark this as connected mode */
254 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_CONNECTED);
255 	}
256 
257 	fe->fe_port_reservation = req->nfr_port_reservation;
258 	req->nfr_port_reservation = NULL;
259 	if (req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV) {
260 		fe->fe_flags |= FLOWENTF_EXTRL_PORT;
261 	}
262 	fe->fe_proto_reservation = req->nfr_proto_reservation;
263 	req->nfr_proto_reservation = NULL;
264 	if (req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV) {
265 		fe->fe_flags |= FLOWENTF_EXTRL_PROTO;
266 	}
267 	fe->fe_ipsec_reservation = req->nfr_ipsec_reservation;
268 	req->nfr_ipsec_reservation = NULL;
269 
270 	fe->fe_tx_process = dp_flow_tx_process;
271 	fe->fe_rx_process = dp_flow_rx_process;
272 
273 	if (nx_port == FSW_VP_HOST) {
274 		fe->fe_rx_process = fsw_host_rx;
275 	}
276 
277 	dev_na = fo->fo_fsw->fsw_dev_ch->ch_na;
278 	nif = NX_NETIF_PRIVATE(dev_na->na_nx);
279 	if (NETIF_LLINK_ENABLED(nif)) {
280 		fe->fe_qset = nx_netif_find_qset(nif, req->nfr_qset_id);
281 	}
282 	if (req->nfr_flags & NXFLOWREQF_LOW_LATENCY) {
283 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_LOW_LATENCY);
284 	}
285 
286 	fe->fe_transport_protocol = req->nfr_transport_protocol;
287 	if (sk_fsw_rx_agg_tcp &&
288 	    (fo->fo_fsw->fsw_nx->nx_prov->nxprov_params->nxp_max_frags > 1) &&
289 	    (fe->fe_key.fk_proto == IPPROTO_TCP) &&
290 	    (fe->fe_key.fk_mask == FKMASK_5TUPLE)) {
291 		fe->fe_rx_process = flow_rx_agg_tcp;
292 	}
293 	uuid_copy(fe->fe_uuid, req->nfr_flow_uuid);
294 	if ((req->nfr_flags & NXFLOWREQF_LISTENER) == 0 &&
295 	    (req->nfr_flags & NXFLOWREQF_TRACK) != 0) {
296 		switch (req->nfr_ip_protocol) {
297 		case IPPROTO_TCP:
298 		case IPPROTO_UDP:
299 			atomic_bitset_32(&fe->fe_flags, FLOWENTF_TRACK);
300 			break;
301 		default:
302 			break;
303 		}
304 	}
305 
306 	if (req->nfr_flags & NXFLOWREQF_QOS_MARKING) {
307 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_QOS_MARKING);
308 	}
309 
310 	if (req->nfr_route != NULL) {
311 		fe->fe_route = req->nfr_route;
312 		req->nfr_route = NULL;
313 	}
314 
315 	fe->fe_nx_port = nx_port;
316 	fe->fe_adv_idx = fadv_idx;
317 
318 	if (fe->fe_adv_idx != FLOWADV_IDX_NONE && fo->fo_nx_port_na != NULL) {
319 		na_flowadv_entry_alloc(fo->fo_nx_port_na, fe->fe_uuid,
320 		    fe->fe_adv_idx);
321 	}
322 
323 	if (KPKT_VALID_SVC(req->nfr_svc_class)) {
324 		fe->fe_svc_class = (kern_packet_svc_class_t)req->nfr_svc_class;
325 	} else {
326 		fe->fe_svc_class = KPKT_SC_BE;
327 	}
328 
329 	uuid_copy(fe->fe_eproc_uuid, req->nfr_euuid);
330 	fe->fe_policy_id = req->nfr_policy_id;
331 	fe->fe_inp_flowhash = req->nfr_inp_flowhash;
332 
333 	err = flow_mgr_flow_hash_mask_add(fm, fe->fe_key.fk_mask);
334 	ASSERT(err == 0);
335 
336 	fe->fe_key_hash = flow_key_hash(&fe->fe_key);
337 	err = cuckoo_hashtable_add_with_hash(fm->fm_flow_table, &fe->fe_cnode,
338 	    fe->fe_key_hash);
339 	if (err != 0) {
340 		SK_ERR("flow table add failed (err %d)", err);
341 		flow_mgr_flow_hash_mask_del(fm, fe->fe_key.fk_mask);
342 		goto done;
343 	}
344 
345 	RB_INSERT(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe);
346 	flow_entry_retain(fe);  /* one refcnt in id_tree */
347 
348 	*(struct nx_flowswitch **)(uintptr_t)&fe->fe_fsw = fo->fo_fsw;
349 	fe->fe_pid = fo->fo_pid;
350 	if (req->nfr_epid != -1 && req->nfr_epid != fo->fo_pid) {
351 		fe->fe_epid = req->nfr_epid;
352 		proc_name(fe->fe_epid, fe->fe_eproc_name,
353 		    sizeof(fe->fe_eproc_name));
354 	} else {
355 		fe->fe_epid = -1;
356 	}
357 
358 	(void) snprintf(fe->fe_proc_name, sizeof(fe->fe_proc_name), "%s",
359 	    fo->fo_name);
360 
361 	fe_stats_init(fe);
362 	flow_stats_retain(fe->fe_stats);
363 	req->nfr_flow_stats = fe->fe_stats;
364 
365 #if SK_LOG
366 	SK_DF(SK_VERB_FLOW, "allocated entry \"%s\" fe 0x%llx flags 0x%b "
367 	    "[fo 0x%llx ]", fe_as_string(fe, dbgbuf,
368 	    sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS,
369 	    SK_KVA(fo));
370 #endif /* SK_LOG */
371 
372 done:
373 	if (err != 0) {
374 		if (fadv_idx != FLOWADV_IDX_NONE) {
375 			flow_owner_flowadv_index_free(fo, fadv_idx);
376 		}
377 		if (fe != NULL) {
378 			flow_entry_release(&fe);
379 		}
380 	}
381 	*perr = err;
382 	return fe;
383 }
384 
385 void
flow_entry_teardown(struct flow_owner * fo,struct flow_entry * fe)386 flow_entry_teardown(struct flow_owner *fo, struct flow_entry *fe)
387 {
388 #if SK_LOG
389 	char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
390 	SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b [fo 0x%llx] "
391 	    "non_via %d withdrawn %d", fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
392 	    SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, SK_KVA(fo),
393 	    fe->fe_want_nonviable, fe->fe_want_withdraw);
394 #endif /* SK_LOG */
395 	struct nx_flowswitch *fsw = fo->fo_fsw;
396 
397 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
398 
399 	ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED));
400 	ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING));
401 	ASSERT(fsw != NULL);
402 
403 	if (atomic_test_set_32(&fe->fe_want_nonviable, 1, 0)) {
404 		ASSERT(fsw->fsw_pending_nonviable != 0);
405 		atomic_add_32(&fsw->fsw_pending_nonviable, -1);
406 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_NONVIABLE);
407 	}
408 
409 	/* always withdraw namespace during tear down */
410 	if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) &&
411 	    !(fe->fe_flags & FLOWENTF_WITHDRAWN)) {
412 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_WITHDRAWN);
413 		atomic_set_32(&fe->fe_want_withdraw, 0);
414 		/* local port is now inactive; not eligible for offload */
415 		flow_namespace_withdraw(&fe->fe_port_reservation);
416 	}
417 
418 	/* we may get here multiple times, so check */
419 	if (!(fe->fe_flags & FLOWENTF_TORN_DOWN)) {
420 		atomic_bitset_32(&fe->fe_flags, FLOWENTF_TORN_DOWN);
421 		if (fe->fe_adv_idx != FLOWADV_IDX_NONE) {
422 			if (fo->fo_nx_port_na != NULL) {
423 				na_flowadv_entry_free(fo->fo_nx_port_na,
424 				    fe->fe_uuid, fe->fe_adv_idx);
425 			}
426 			flow_owner_flowadv_index_free(fo, fe->fe_adv_idx);
427 			fe->fe_adv_idx = FLOWADV_IDX_NONE;
428 		}
429 	}
430 	ASSERT(fe->fe_adv_idx == FLOWADV_IDX_NONE);
431 	ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN);
432 }
433 
434 void
flow_entry_destroy(struct flow_owner * fo,struct flow_entry * fe,bool nolinger,void * close_params)435 flow_entry_destroy(struct flow_owner *fo, struct flow_entry *fe, bool nolinger,
436     void *close_params)
437 {
438 	struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr;
439 	int err;
440 
441 	FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
442 
443 	/* one in flow_table, one in id_tree, one here */
444 	ASSERT(flow_entry_refcnt(fe) > 2);
445 
446 	flow_entry_teardown(fo, fe);
447 
448 	err = flow_mgr_flow_hash_mask_del(fm, fe->fe_key.fk_mask);
449 	ASSERT(err == 0);
450 
451 	uint32_t hash;
452 	hash = flow_key_hash(&fe->fe_key);
453 	cuckoo_hashtable_del(fm->fm_flow_table, &fe->fe_cnode, hash);
454 
455 	RB_REMOVE(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe);
456 	struct flow_entry *tfe = fe;
457 	flow_entry_release(&tfe);
458 
459 	ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED));
460 	atomic_bitset_32(&fe->fe_flags, FLOWENTF_DESTROYED);
461 
462 	if (fe->fe_transport_protocol == IPPROTO_QUIC) {
463 		if (!nolinger && close_params != NULL) {
464 			fsw_flow_abort_quic(fe, close_params);
465 		}
466 		flow_entry_release(&fe);
467 	} else if (nolinger || !(fe->fe_flags & FLOWENTF_WAIT_CLOSE)) {
468 		flow_entry_release(&fe);
469 	} else {
470 		fsw_linger_insert(fe);
471 	}
472 }
473 
474 uint32_t
flow_entry_refcnt(struct flow_entry * fe)475 flow_entry_refcnt(struct flow_entry *fe)
476 {
477 	return os_ref_get_count(&fe->fe_refcnt);
478 }
479 
480 void
flow_entry_retain(struct flow_entry * fe)481 flow_entry_retain(struct flow_entry *fe)
482 {
483 	os_ref_retain(&fe->fe_refcnt);
484 }
485 
486 void
flow_entry_release(struct flow_entry ** pfe)487 flow_entry_release(struct flow_entry **pfe)
488 {
489 	struct flow_entry *fe = *pfe;
490 	ASSERT(fe != NULL);
491 	*pfe = NULL;    /* caller lose reference */
492 #if SK_LOG
493 	if (__improbable(sk_verbose != 0)) {
494 		char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
495 		SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b",
496 		    fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe),
497 		    fe->fe_flags, FLOWENTF_BITS);
498 	}
499 #endif /* SK_LOG */
500 
501 	if (__improbable(os_ref_release(&fe->fe_refcnt) == 0)) {
502 		fe->fe_nx_port = NEXUS_PORT_ANY;
503 		if (fe->fe_route != NULL) {
504 			flow_route_release(fe->fe_route);
505 			fe->fe_route = NULL;
506 		}
507 		if (fe->fe_qset != NULL) {
508 			nx_netif_qset_release(&fe->fe_qset);
509 			ASSERT(fe->fe_qset == NULL);
510 		}
511 		fe_free(fe);
512 	}
513 }
514 
515 struct flow_entry_dead *
flow_entry_dead_alloc(zalloc_flags_t how)516 flow_entry_dead_alloc(zalloc_flags_t how)
517 {
518 	struct flow_entry_dead *fed;
519 
520 	fed = zalloc_flags(sk_fed_zone, how | Z_ZERO);
521 	if (fed != NULL) {
522 		SK_DF(SK_VERB_MEM, "fed 0x%llx ALLOC", SK_KVA(fed));
523 	}
524 	return fed;
525 }
526 
527 void
flow_entry_dead_free(struct flow_entry_dead * fed)528 flow_entry_dead_free(struct flow_entry_dead *fed)
529 {
530 	SK_DF(SK_VERB_MEM, "fed 0x%llx FREE", SK_KVA(fed));
531 	zfree(sk_fed_zone, fed);
532 }
533 
534 static void
fe_stats_init(struct flow_entry * fe)535 fe_stats_init(struct flow_entry *fe)
536 {
537 	struct nx_flowswitch *fsw = fe->fe_fsw;
538 	struct sk_stats_flow *sf = &fe->fe_stats->fs_stats;
539 
540 	ASSERT(fe->fe_stats != NULL);
541 	ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1);
542 
543 	bzero(sf, sizeof(*sf));
544 	uuid_copy(sf->sf_nx_uuid, fsw->fsw_nx->nx_uuid);
545 	(void) strlcpy(sf->sf_if_name, fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
546 	sf->sf_if_index = fsw->fsw_ifp->if_index;
547 	sf->sf_pid = fe->fe_pid;
548 	sf->sf_epid = fe->fe_epid;
549 	(void) snprintf(sf->sf_proc_name, sizeof(sf->sf_proc_name), "%s",
550 	    fe->fe_proc_name);
551 	(void) snprintf(sf->sf_eproc_name, sizeof(sf->sf_eproc_name), "%s",
552 	    fe->fe_eproc_name);
553 
554 	sf->sf_nx_port = fe->fe_nx_port;
555 	sf->sf_key = fe->fe_key;
556 	sf->sf_protocol = fe->fe_transport_protocol;
557 	sf->sf_svc_class = fe->fe_svc_class;
558 	sf->sf_adv_idx = fe->fe_adv_idx;
559 
560 	if (fe->fe_flags & FLOWENTF_TRACK) {
561 		sf->sf_flags |= SFLOWF_TRACK;
562 	}
563 	if (fe->fe_flags & FLOWENTF_LISTENER) {
564 		sf->sf_flags |= SFLOWF_LISTENER;
565 	}
566 	if (fe->fe_route != NULL && fe->fe_route->fr_flags & FLOWRTF_ONLINK) {
567 		sf->sf_flags |= SFLOWF_ONLINK;
568 	}
569 
570 	fe_stats_update(fe);
571 }
572 
573 static void
fe_stats_update(struct flow_entry * fe)574 fe_stats_update(struct flow_entry *fe)
575 {
576 	struct sk_stats_flow *sf = &fe->fe_stats->fs_stats;
577 
578 	ASSERT(fe->fe_stats != NULL);
579 	ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1);
580 
581 	if (fe->fe_flags & FLOWENTF_CONNECTED) {
582 		sf->sf_flags |= SFLOWF_CONNECTED;
583 	}
584 	if (fe->fe_flags & FLOWENTF_QOS_MARKING) {
585 		sf->sf_flags |= SFLOWF_QOS_MARKING;
586 	}
587 	if (fe->fe_flags & FLOWENTF_WAIT_CLOSE) {
588 		sf->sf_flags |= SFLOWF_WAIT_CLOSE;
589 	}
590 	if (fe->fe_flags & FLOWENTF_CLOSE_NOTIFY) {
591 		sf->sf_flags |= SFLOWF_CLOSE_NOTIFY;
592 	}
593 	if (fe->fe_flags & FLOWENTF_ABORTED) {
594 		sf->sf_flags |= SFLOWF_ABORTED;
595 	}
596 	if (fe->fe_flags & FLOWENTF_NONVIABLE) {
597 		sf->sf_flags |= SFLOWF_NONVIABLE;
598 	}
599 	if (fe->fe_flags & FLOWENTF_WITHDRAWN) {
600 		sf->sf_flags |= SFLOWF_WITHDRAWN;
601 	}
602 	if (fe->fe_flags & FLOWENTF_TORN_DOWN) {
603 		sf->sf_flags |= SFLOWF_TORN_DOWN;
604 	}
605 	if (fe->fe_flags & FLOWENTF_DESTROYED) {
606 		sf->sf_flags |= SFLOWF_DESTROYED;
607 	}
608 	if (fe->fe_flags & FLOWENTF_LINGERING) {
609 		sf->sf_flags |= SFLOWF_LINGERING;
610 	}
611 	if (fe->fe_flags & FLOWENTF_LOW_LATENCY) {
612 		sf->sf_flags |= SFLOWF_LOW_LATENCY;
613 	}
614 
615 	sf->sf_bucket_idx = SFLOW_BUCKET_NONE;
616 
617 	sf->sf_ltrack.sft_state = fe->fe_ltrack.fse_state;
618 	sf->sf_ltrack.sft_seq = fe->fe_ltrack.fse_seqlo;
619 	sf->sf_ltrack.sft_max_win = fe->fe_ltrack.fse_max_win;
620 	sf->sf_ltrack.sft_wscale = fe->fe_ltrack.fse_wscale;
621 	sf->sf_rtrack.sft_state = fe->fe_rtrack.fse_state;
622 	sf->sf_rtrack.sft_seq = fe->fe_rtrack.fse_seqlo;
623 	sf->sf_rtrack.sft_max_win = fe->fe_rtrack.fse_max_win;
624 }
625 
626 void
flow_entry_stats_get(struct flow_entry * fe,struct sk_stats_flow * sf)627 flow_entry_stats_get(struct flow_entry *fe, struct sk_stats_flow *sf)
628 {
629 	_CASSERT(sizeof(fe->fe_stats->fs_stats) == sizeof(*sf));
630 
631 	fe_stats_update(fe);
632 	bcopy(&fe->fe_stats->fs_stats, sf, sizeof(*sf));
633 }
634 
635 struct flow_entry *
fe_alloc(boolean_t can_block)636 fe_alloc(boolean_t can_block)
637 {
638 	struct flow_entry *fe;
639 
640 	_CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0);
641 
642 	fe = skmem_cache_alloc(sk_fe_cache,
643 	    can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
644 	if (fe == NULL) {
645 		return NULL;
646 	}
647 
648 	/*
649 	 * fe_key is 16-bytes aligned which requires fe to begin on
650 	 * a 16-bytes boundary as well.  This alignment is specified
651 	 * at sk_fe_cache creation time and we assert here.
652 	 */
653 	ASSERT(IS_P2ALIGNED(fe, 16));
654 	bzero(fe, sk_fe_size);
655 
656 	fe->fe_stats = flow_stats_alloc(can_block);
657 	if (fe->fe_stats == NULL) {
658 		skmem_cache_free(sk_fe_cache, fe);
659 		return NULL;
660 	}
661 
662 	SK_DF(SK_VERB_MEM, "fe 0x%llx ALLOC", SK_KVA(fe));
663 
664 	os_ref_init(&fe->fe_refcnt, &flow_entry_refgrp);
665 
666 	KPKTQ_INIT(&fe->fe_rx_pktq);
667 	KPKTQ_INIT(&fe->fe_tx_pktq);
668 
669 	return fe;
670 }
671 
672 static void
fe_free(struct flow_entry * fe)673 fe_free(struct flow_entry *fe)
674 {
675 	ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN);
676 	ASSERT(fe->fe_flags & FLOWENTF_DESTROYED);
677 	ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING));
678 	ASSERT(fe->fe_route == NULL);
679 
680 	ASSERT(fe->fe_stats != NULL);
681 	flow_stats_release(fe->fe_stats);
682 	fe->fe_stats = NULL;
683 
684 	/* only at very last existence of flow releases namespace reservation */
685 	if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) &&
686 	    NETNS_TOKEN_VALID(&fe->fe_port_reservation)) {
687 		flow_namespace_destroy(&fe->fe_port_reservation);
688 		ASSERT(!NETNS_TOKEN_VALID(&fe->fe_port_reservation));
689 	}
690 	fe->fe_port_reservation = NULL;
691 
692 	if (!(fe->fe_flags & FLOWENTF_EXTRL_PROTO) &&
693 	    protons_token_is_valid(fe->fe_proto_reservation)) {
694 		protons_release(&fe->fe_proto_reservation);
695 	}
696 	fe->fe_proto_reservation = NULL;
697 
698 	if (key_custom_ipsec_token_is_valid(fe->fe_ipsec_reservation)) {
699 		key_release_custom_ipsec(&fe->fe_ipsec_reservation);
700 	}
701 	fe->fe_ipsec_reservation = NULL;
702 
703 	skmem_cache_free(sk_fe_cache, fe);
704 }
705 
706 static __inline__ int
fe_id_cmp(const struct flow_entry * a,const struct flow_entry * b)707 fe_id_cmp(const struct flow_entry *a, const struct flow_entry *b)
708 {
709 	return uuid_compare(a->fe_uuid, b->fe_uuid);
710 }
711 
712 #if SK_LOG
713 SK_NO_INLINE_ATTRIBUTE
714 char *
fk_as_string(const struct flow_key * fk,char * dst,size_t dsz)715 fk_as_string(const struct flow_key *fk, char *dst, size_t dsz)
716 {
717 	int af;
718 	char src_s[MAX_IPv6_STR_LEN];
719 	char dst_s[MAX_IPv6_STR_LEN];
720 
721 	af = fk->fk_ipver == 4 ? AF_INET : AF_INET6;
722 
723 	(void) inet_ntop(af, &fk->fk_src, src_s, sizeof(src_s));
724 	(void) inet_ntop(af, &fk->fk_dst, dst_s, sizeof(dst_s));
725 	(void) snprintf(dst, dsz,
726 	    "ipver=%u,src=%s,dst=%s,proto=0x%02u,sport=%u,dport=%u "
727 	    "mask=%08x,hash=%08x",
728 	    fk->fk_ipver, src_s, dst_s, fk->fk_proto, ntohs(fk->fk_sport),
729 	    ntohs(fk->fk_dport), fk->fk_mask, flow_key_hash(fk));
730 
731 	return dst;
732 }
733 
734 SK_NO_INLINE_ATTRIBUTE
735 char *
fe_as_string(const struct flow_entry * fe,char * dst,size_t dsz)736 fe_as_string(const struct flow_entry *fe, char *dst, size_t dsz)
737 {
738 	char keybuf[FLOWKEY_DBGBUF_SIZE]; /* just for debug message */
739 	uuid_string_t uuidstr;
740 
741 	fk_as_string(&fe->fe_key, keybuf, sizeof(keybuf));
742 
743 	(void) snprintf(dst, dsz,
744 	    "fe 0x%llx proc %s nx_port %d flow_uuid %s %s tp_proto=0x%02u",
745 	    SK_KVA(fe), fe->fe_proc_name, (int)fe->fe_nx_port,
746 	    sk_uuid_unparse(fe->fe_uuid, uuidstr),
747 	    keybuf, fe->fe_transport_protocol);
748 
749 	return dst;
750 }
751 #endif /* SK_LOG */
752