1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30
31 #include <dev/random/randomdev.h>
32 #include <net/flowhash.h>
33 #include <netkey/key.h>
34
35 #include <skywalk/nexus/flowswitch/fsw_var.h>
36 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
37 #include <skywalk/nexus/netif/nx_netif.h>
38
39 struct flow_entry *fe_alloc(boolean_t);
40 static void fe_free(struct flow_entry *);
41 static int fe_id_cmp(const struct flow_entry *, const struct flow_entry *);
42 static void fe_stats_init(struct flow_entry *);
43 static void fe_stats_update(struct flow_entry *);
44
45 RB_GENERATE_PREV(flow_entry_id_tree, flow_entry, fe_id_link, fe_id_cmp);
46
47 os_refgrp_decl(static, flow_entry_refgrp, "flow_entry", NULL);
48
49 extern struct zone *sk_fed_zone;
50
51 const struct flow_key fk_mask_2tuple
52 __sk_aligned(16) =
53 {
54 .fk_mask = FKMASK_2TUPLE,
55 .fk_ipver = 0,
56 .fk_proto = 0xff,
57 .fk_sport = 0xffff,
58 .fk_dport = 0,
59 .fk_src._addr64[0] = 0,
60 .fk_src._addr64[1] = 0,
61 .fk_dst._addr64[0] = 0,
62 .fk_dst._addr64[1] = 0,
63 .fk_pad[0] = 0,
64 };
65
66 const struct flow_key fk_mask_3tuple
67 __sk_aligned(16) =
68 {
69 .fk_mask = FKMASK_3TUPLE,
70 .fk_ipver = 0xff,
71 .fk_proto = 0xff,
72 .fk_sport = 0xffff,
73 .fk_dport = 0,
74 .fk_src._addr64[0] = 0xffffffffffffffffULL,
75 .fk_src._addr64[1] = 0xffffffffffffffffULL,
76 .fk_dst._addr64[0] = 0,
77 .fk_dst._addr64[1] = 0,
78 .fk_pad[0] = 0,
79 };
80
81 const struct flow_key fk_mask_4tuple
82 __sk_aligned(16) =
83 {
84 .fk_mask = FKMASK_4TUPLE,
85 .fk_ipver = 0xff,
86 .fk_proto = 0xff,
87 .fk_sport = 0xffff,
88 .fk_dport = 0xffff,
89 .fk_src._addr64[0] = 0xffffffffffffffffULL,
90 .fk_src._addr64[1] = 0xffffffffffffffffULL,
91 .fk_dst._addr64[0] = 0,
92 .fk_dst._addr64[1] = 0,
93 .fk_pad[0] = 0,
94 };
95
96 const struct flow_key fk_mask_5tuple
97 __sk_aligned(16) =
98 {
99 .fk_mask = FKMASK_5TUPLE,
100 .fk_ipver = 0xff,
101 .fk_proto = 0xff,
102 .fk_sport = 0xffff,
103 .fk_dport = 0xffff,
104 .fk_src._addr64[0] = 0xffffffffffffffffULL,
105 .fk_src._addr64[1] = 0xffffffffffffffffULL,
106 .fk_dst._addr64[0] = 0xffffffffffffffffULL,
107 .fk_dst._addr64[1] = 0xffffffffffffffffULL,
108 .fk_pad[0] = 0,
109 };
110
111 const struct flow_key fk_mask_ipflow1
112 __sk_aligned(16) =
113 {
114 .fk_mask = FKMASK_IPFLOW1,
115 .fk_ipver = 0,
116 .fk_proto = 0xff,
117 .fk_sport = 0,
118 .fk_dport = 0,
119 .fk_src._addr64[0] = 0,
120 .fk_src._addr64[1] = 0,
121 .fk_dst._addr64[0] = 0,
122 .fk_dst._addr64[1] = 0,
123 .fk_pad[0] = 0,
124 };
125
126 const struct flow_key fk_mask_ipflow2
127 __sk_aligned(16) =
128 {
129 .fk_mask = FKMASK_IPFLOW2,
130 .fk_ipver = 0xff,
131 .fk_proto = 0xff,
132 .fk_sport = 0,
133 .fk_dport = 0,
134 .fk_src._addr64[0] = 0xffffffffffffffffULL,
135 .fk_src._addr64[1] = 0xffffffffffffffffULL,
136 .fk_dst._addr64[0] = 0,
137 .fk_dst._addr64[1] = 0,
138 .fk_pad[0] = 0,
139 };
140
141 const struct flow_key fk_mask_ipflow3
142 __sk_aligned(16) =
143 {
144 .fk_mask = FKMASK_IPFLOW3,
145 .fk_ipver = 0xff,
146 .fk_proto = 0xff,
147 .fk_sport = 0,
148 .fk_dport = 0,
149 .fk_src._addr64[0] = 0xffffffffffffffffULL,
150 .fk_src._addr64[1] = 0xffffffffffffffffULL,
151 .fk_dst._addr64[0] = 0xffffffffffffffffULL,
152 .fk_dst._addr64[1] = 0xffffffffffffffffULL,
153 .fk_pad[0] = 0,
154 };
155
156 struct flow_owner *
flow_owner_find_by_pid(struct flow_owner_bucket * fob,pid_t pid,void * context,bool low_latency)157 flow_owner_find_by_pid(struct flow_owner_bucket *fob, pid_t pid, void *context,
158 bool low_latency)
159 {
160 struct flow_owner find = { .fo_context = context, .fo_pid = pid,
161 .fo_low_latency = low_latency};
162
163 ASSERT(low_latency == true || low_latency == false);
164 FOB_LOCK_ASSERT_HELD(fob);
165 return RB_FIND(flow_owner_tree, &fob->fob_owner_head, &find);
166 }
167
168 struct flow_entry *
flow_entry_find_by_uuid(struct flow_owner * fo,uuid_t uuid)169 flow_entry_find_by_uuid(struct flow_owner *fo, uuid_t uuid)
170 {
171 struct flow_entry find, *fe = NULL;
172 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
173
174 uuid_copy(find.fe_uuid, uuid);
175 fe = RB_FIND(flow_entry_id_tree, &fo->fo_flow_entry_id_head, &find);
176 if (fe != NULL) {
177 flow_entry_retain(fe);
178 }
179
180 return fe;
181 }
182
183 /* writer-lock must be owned for memory management functions */
184 struct flow_entry *
flow_entry_alloc(struct flow_owner * fo,struct nx_flow_req * req,int * perr)185 flow_entry_alloc(struct flow_owner *fo, struct nx_flow_req *req, int *perr)
186 {
187 #if SK_LOG
188 char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; /* just for debug message */
189 #endif /* SK_LOG */
190 nexus_port_t nx_port = req->nfr_nx_port;
191 struct flow_entry *fe = NULL;
192 flowadv_idx_t fadv_idx = FLOWADV_IDX_NONE;
193 struct nexus_adapter *dev_na;
194 struct nx_netif *nif;
195 int err;
196
197 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
198 ASSERT(nx_port != NEXUS_PORT_ANY);
199 ASSERT(!fo->fo_nx_port_destroyed);
200
201 *perr = 0;
202
203 struct flow_key key __sk_aligned(16);
204 err = flow_req2key(req, &key);
205 if (__improbable(err != 0)) {
206 SK_ERR("invalid request (err %d)", err);
207 goto done;
208 }
209
210 struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr;
211 fe = flow_mgr_find_conflicting_fe(fm, &key);
212 if (fe != NULL) {
213 SK_ERR("entry \"%s\" already exists at fe 0x%llx "
214 "flags 0x%b %s(%d)", fe_as_string(fe,
215 dbgbuf, sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags,
216 FLOWENTF_BITS, fe->fe_proc_name,
217 fe->fe_pid);
218 /* don't return it */
219 flow_entry_release(&fe);
220 err = EEXIST;
221 goto done;
222 }
223
224 if ((req->nfr_flags & NXFLOWREQF_FLOWADV) &&
225 (flow_owner_flowadv_index_alloc(fo, &fadv_idx) != 0)) {
226 SK_ERR("failed to alloc flowadv index for flow %s",
227 sk_uuid_unparse(req->nfr_flow_uuid, dbgbuf));
228 /* XXX: what is the most appropriate error code ? */
229 err = ENOSPC;
230 goto done;
231 }
232
233 fe = fe_alloc(TRUE);
234 if (__improbable(fe == NULL)) {
235 err = ENOMEM;
236 goto done;
237 }
238
239 fe->fe_key = key;
240 if (req->nfr_route != NULL) {
241 fe->fe_laddr_gencnt = req->nfr_route->fr_laddr_gencnt;
242 } else {
243 fe->fe_laddr_gencnt = req->nfr_saddr_gencnt;
244 }
245
246 if (__improbable(req->nfr_flags & NXFLOWREQF_LISTENER)) {
247 /* mark this as listener mode */
248 atomic_bitset_32(&fe->fe_flags, FLOWENTF_LISTENER);
249 } else {
250 ASSERT((fe->fe_key.fk_ipver == IPVERSION &&
251 fe->fe_key.fk_src4.s_addr != INADDR_ANY) ||
252 (fe->fe_key.fk_ipver == IPV6_VERSION &&
253 !IN6_IS_ADDR_UNSPECIFIED(&fe->fe_key.fk_src6)));
254
255 /* mark this as connected mode */
256 atomic_bitset_32(&fe->fe_flags, FLOWENTF_CONNECTED);
257 }
258
259 fe->fe_port_reservation = req->nfr_port_reservation;
260 req->nfr_port_reservation = NULL;
261 if (req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV) {
262 fe->fe_flags |= FLOWENTF_EXTRL_PORT;
263 }
264 fe->fe_proto_reservation = req->nfr_proto_reservation;
265 req->nfr_proto_reservation = NULL;
266 if (req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV) {
267 fe->fe_flags |= FLOWENTF_EXTRL_PROTO;
268 }
269 fe->fe_ipsec_reservation = req->nfr_ipsec_reservation;
270 req->nfr_ipsec_reservation = NULL;
271
272 fe->fe_tx_process = dp_flow_tx_process;
273 fe->fe_rx_process = dp_flow_rx_process;
274
275 if (nx_port == FSW_VP_HOST) {
276 fe->fe_rx_process = fsw_host_rx;
277 }
278
279 dev_na = fo->fo_fsw->fsw_dev_ch->ch_na;
280 nif = NX_NETIF_PRIVATE(dev_na->na_nx);
281 if (NETIF_LLINK_ENABLED(nif)) {
282 fe->fe_qset = nx_netif_find_qset(nif, req->nfr_qset_id);
283 }
284 if (req->nfr_flags & NXFLOWREQF_LOW_LATENCY) {
285 atomic_bitset_32(&fe->fe_flags, FLOWENTF_LOW_LATENCY);
286 }
287
288 fe->fe_transport_protocol = req->nfr_transport_protocol;
289 if (sk_fsw_rx_agg_tcp &&
290 (fo->fo_fsw->fsw_nx->nx_prov->nxprov_params->nxp_max_frags > 1) &&
291 (fe->fe_key.fk_proto == IPPROTO_TCP) &&
292 (fe->fe_key.fk_mask == FKMASK_5TUPLE)) {
293 fe->fe_rx_process = flow_rx_agg_tcp;
294 }
295 uuid_copy(fe->fe_uuid, req->nfr_flow_uuid);
296 if ((req->nfr_flags & NXFLOWREQF_LISTENER) == 0 &&
297 (req->nfr_flags & NXFLOWREQF_TRACK) != 0) {
298 switch (req->nfr_ip_protocol) {
299 case IPPROTO_TCP:
300 case IPPROTO_UDP:
301 atomic_bitset_32(&fe->fe_flags, FLOWENTF_TRACK);
302 break;
303 default:
304 break;
305 }
306 }
307
308 if (req->nfr_flags & NXFLOWREQF_QOS_MARKING) {
309 atomic_bitset_32(&fe->fe_flags, FLOWENTF_QOS_MARKING);
310 }
311
312 if (req->nfr_route != NULL) {
313 fe->fe_route = req->nfr_route;
314 req->nfr_route = NULL;
315 }
316
317 fe->fe_nx_port = nx_port;
318 fe->fe_adv_idx = fadv_idx;
319
320 if (fe->fe_adv_idx != FLOWADV_IDX_NONE && fo->fo_nx_port_na != NULL) {
321 na_flowadv_entry_alloc(fo->fo_nx_port_na, fe->fe_uuid,
322 fe->fe_adv_idx);
323 }
324
325 if (KPKT_VALID_SVC(req->nfr_svc_class)) {
326 fe->fe_svc_class = (kern_packet_svc_class_t)req->nfr_svc_class;
327 } else {
328 fe->fe_svc_class = KPKT_SC_BE;
329 }
330
331 uuid_copy(fe->fe_eproc_uuid, req->nfr_euuid);
332 fe->fe_policy_id = req->nfr_policy_id;
333 fe->fe_inp_flowhash = req->nfr_inp_flowhash;
334
335 err = flow_mgr_flow_hash_mask_add(fm, fe->fe_key.fk_mask);
336 ASSERT(err == 0);
337
338 fe->fe_key_hash = flow_key_hash(&fe->fe_key);
339 err = cuckoo_hashtable_add_with_hash(fm->fm_flow_table, &fe->fe_cnode,
340 fe->fe_key_hash);
341 if (err != 0) {
342 SK_ERR("flow table add failed (err %d)", err);
343 flow_mgr_flow_hash_mask_del(fm, fe->fe_key.fk_mask);
344 goto done;
345 }
346
347 RB_INSERT(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe);
348 flow_entry_retain(fe); /* one refcnt in id_tree */
349
350 *(struct nx_flowswitch **)(uintptr_t)&fe->fe_fsw = fo->fo_fsw;
351 fe->fe_pid = fo->fo_pid;
352 if (req->nfr_epid != -1 && req->nfr_epid != fo->fo_pid) {
353 fe->fe_epid = req->nfr_epid;
354 proc_name(fe->fe_epid, fe->fe_eproc_name,
355 sizeof(fe->fe_eproc_name));
356 } else {
357 fe->fe_epid = -1;
358 }
359
360 (void) snprintf(fe->fe_proc_name, sizeof(fe->fe_proc_name), "%s",
361 fo->fo_name);
362
363 fe_stats_init(fe);
364 flow_stats_retain(fe->fe_stats);
365 req->nfr_flow_stats = fe->fe_stats;
366
367 #if SK_LOG
368 SK_DF(SK_VERB_FLOW, "allocated entry \"%s\" fe 0x%llx flags 0x%b "
369 "[fo 0x%llx ]", fe_as_string(fe, dbgbuf,
370 sizeof(dbgbuf)), SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS,
371 SK_KVA(fo));
372 #endif /* SK_LOG */
373
374 done:
375 if (err != 0) {
376 if (fadv_idx != FLOWADV_IDX_NONE) {
377 flow_owner_flowadv_index_free(fo, fadv_idx);
378 }
379 if (fe != NULL) {
380 flow_entry_release(&fe);
381 }
382 }
383 *perr = err;
384 return fe;
385 }
386
387 void
flow_entry_teardown(struct flow_owner * fo,struct flow_entry * fe)388 flow_entry_teardown(struct flow_owner *fo, struct flow_entry *fe)
389 {
390 #if SK_LOG
391 char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
392 SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b [fo 0x%llx] "
393 "non_via %d withdrawn %d", fe_as_string(fe, dbgbuf, sizeof(dbgbuf)),
394 SK_KVA(fe), fe->fe_flags, FLOWENTF_BITS, SK_KVA(fo),
395 fe->fe_want_nonviable, fe->fe_want_withdraw);
396 #endif /* SK_LOG */
397 struct nx_flowswitch *fsw = fo->fo_fsw;
398
399 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
400
401 ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED));
402 ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING));
403 ASSERT(fsw != NULL);
404
405 if (atomic_test_set_32(&fe->fe_want_nonviable, 1, 0)) {
406 ASSERT(fsw->fsw_pending_nonviable != 0);
407 atomic_add_32(&fsw->fsw_pending_nonviable, -1);
408 atomic_bitset_32(&fe->fe_flags, FLOWENTF_NONVIABLE);
409 }
410
411 /* always withdraw namespace during tear down */
412 if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) &&
413 !(fe->fe_flags & FLOWENTF_WITHDRAWN)) {
414 atomic_bitset_32(&fe->fe_flags, FLOWENTF_WITHDRAWN);
415 atomic_set_32(&fe->fe_want_withdraw, 0);
416 /* local port is now inactive; not eligible for offload */
417 flow_namespace_withdraw(&fe->fe_port_reservation);
418 }
419
420 /* we may get here multiple times, so check */
421 if (!(fe->fe_flags & FLOWENTF_TORN_DOWN)) {
422 atomic_bitset_32(&fe->fe_flags, FLOWENTF_TORN_DOWN);
423 if (fe->fe_adv_idx != FLOWADV_IDX_NONE) {
424 if (fo->fo_nx_port_na != NULL) {
425 na_flowadv_entry_free(fo->fo_nx_port_na,
426 fe->fe_uuid, fe->fe_adv_idx);
427 }
428 flow_owner_flowadv_index_free(fo, fe->fe_adv_idx);
429 fe->fe_adv_idx = FLOWADV_IDX_NONE;
430 }
431 }
432 ASSERT(fe->fe_adv_idx == FLOWADV_IDX_NONE);
433 ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN);
434 }
435
436 void
flow_entry_destroy(struct flow_owner * fo,struct flow_entry * fe,bool nolinger,void * close_params)437 flow_entry_destroy(struct flow_owner *fo, struct flow_entry *fe, bool nolinger,
438 void *close_params)
439 {
440 struct flow_mgr *fm = fo->fo_fsw->fsw_flow_mgr;
441 int err;
442
443 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
444
445 /* one in flow_table, one in id_tree, one here */
446 ASSERT(flow_entry_refcnt(fe) > 2);
447
448 flow_entry_teardown(fo, fe);
449
450 err = flow_mgr_flow_hash_mask_del(fm, fe->fe_key.fk_mask);
451 ASSERT(err == 0);
452
453 uint32_t hash;
454 hash = flow_key_hash(&fe->fe_key);
455 cuckoo_hashtable_del(fm->fm_flow_table, &fe->fe_cnode, hash);
456
457 RB_REMOVE(flow_entry_id_tree, &fo->fo_flow_entry_id_head, fe);
458 struct flow_entry *tfe = fe;
459 flow_entry_release(&tfe);
460
461 ASSERT(!(fe->fe_flags & FLOWENTF_DESTROYED));
462 atomic_bitset_32(&fe->fe_flags, FLOWENTF_DESTROYED);
463
464 if (fe->fe_transport_protocol == IPPROTO_QUIC) {
465 if (!nolinger && close_params != NULL) {
466 fsw_flow_abort_quic(fe, close_params);
467 }
468 flow_entry_release(&fe);
469 } else if (nolinger || !(fe->fe_flags & FLOWENTF_WAIT_CLOSE)) {
470 flow_entry_release(&fe);
471 } else {
472 fsw_linger_insert(fe);
473 }
474 }
475
476 uint32_t
flow_entry_refcnt(struct flow_entry * fe)477 flow_entry_refcnt(struct flow_entry *fe)
478 {
479 return os_ref_get_count(&fe->fe_refcnt);
480 }
481
482 void
flow_entry_retain(struct flow_entry * fe)483 flow_entry_retain(struct flow_entry *fe)
484 {
485 os_ref_retain(&fe->fe_refcnt);
486 }
487
488 void
flow_entry_release(struct flow_entry ** pfe)489 flow_entry_release(struct flow_entry **pfe)
490 {
491 struct flow_entry *fe = *pfe;
492 ASSERT(fe != NULL);
493 *pfe = NULL; /* caller lose reference */
494 #if SK_LOG
495 if (__improbable(sk_verbose != 0)) {
496 char dbgbuf[FLOWENTRY_DBGBUF_SIZE];
497 SK_DF(SK_VERB_FLOW, "entry \"%s\" fe 0x%llx flags 0x%b",
498 fe_as_string(fe, dbgbuf, sizeof(dbgbuf)), SK_KVA(fe),
499 fe->fe_flags, FLOWENTF_BITS);
500 }
501 #endif /* SK_LOG */
502
503 if (__improbable(os_ref_release(&fe->fe_refcnt) == 0)) {
504 fe->fe_nx_port = NEXUS_PORT_ANY;
505 if (fe->fe_route != NULL) {
506 flow_route_release(fe->fe_route);
507 fe->fe_route = NULL;
508 }
509 if (fe->fe_qset != NULL) {
510 nx_netif_qset_release(&fe->fe_qset);
511 ASSERT(fe->fe_qset == NULL);
512 }
513 fe_free(fe);
514 }
515 }
516
517 struct flow_entry_dead *
flow_entry_dead_alloc(zalloc_flags_t how)518 flow_entry_dead_alloc(zalloc_flags_t how)
519 {
520 struct flow_entry_dead *fed;
521
522 fed = zalloc_flags(sk_fed_zone, how | Z_ZERO);
523 if (fed != NULL) {
524 SK_DF(SK_VERB_MEM, "fed 0x%llx ALLOC", SK_KVA(fed));
525 }
526 return fed;
527 }
528
529 void
flow_entry_dead_free(struct flow_entry_dead * fed)530 flow_entry_dead_free(struct flow_entry_dead *fed)
531 {
532 SK_DF(SK_VERB_MEM, "fed 0x%llx FREE", SK_KVA(fed));
533 zfree(sk_fed_zone, fed);
534 }
535
536 static void
fe_stats_init(struct flow_entry * fe)537 fe_stats_init(struct flow_entry *fe)
538 {
539 struct nx_flowswitch *fsw = fe->fe_fsw;
540 struct sk_stats_flow *sf = &fe->fe_stats->fs_stats;
541
542 ASSERT(fe->fe_stats != NULL);
543 ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1);
544
545 bzero(sf, sizeof(*sf));
546 uuid_copy(sf->sf_nx_uuid, fsw->fsw_nx->nx_uuid);
547 (void) strlcpy(sf->sf_if_name, fsw->fsw_flow_mgr->fm_name, IFNAMSIZ);
548 sf->sf_if_index = fsw->fsw_ifp->if_index;
549 sf->sf_pid = fe->fe_pid;
550 sf->sf_epid = fe->fe_epid;
551 (void) snprintf(sf->sf_proc_name, sizeof(sf->sf_proc_name), "%s",
552 fe->fe_proc_name);
553 (void) snprintf(sf->sf_eproc_name, sizeof(sf->sf_eproc_name), "%s",
554 fe->fe_eproc_name);
555
556 sf->sf_nx_port = fe->fe_nx_port;
557 sf->sf_key = fe->fe_key;
558 sf->sf_protocol = fe->fe_transport_protocol;
559 sf->sf_svc_class = fe->fe_svc_class;
560 sf->sf_adv_idx = fe->fe_adv_idx;
561
562 if (fe->fe_flags & FLOWENTF_TRACK) {
563 sf->sf_flags |= SFLOWF_TRACK;
564 }
565 if (fe->fe_flags & FLOWENTF_LISTENER) {
566 sf->sf_flags |= SFLOWF_LISTENER;
567 }
568 if (fe->fe_route != NULL && fe->fe_route->fr_flags & FLOWRTF_ONLINK) {
569 sf->sf_flags |= SFLOWF_ONLINK;
570 }
571
572 fe_stats_update(fe);
573 }
574
575 static void
fe_stats_update(struct flow_entry * fe)576 fe_stats_update(struct flow_entry *fe)
577 {
578 struct sk_stats_flow *sf = &fe->fe_stats->fs_stats;
579
580 ASSERT(fe->fe_stats != NULL);
581 ASSERT(os_ref_get_count(&fe->fe_stats->fs_refcnt) >= 1);
582
583 if (fe->fe_flags & FLOWENTF_CONNECTED) {
584 sf->sf_flags |= SFLOWF_CONNECTED;
585 }
586 if (fe->fe_flags & FLOWENTF_QOS_MARKING) {
587 sf->sf_flags |= SFLOWF_QOS_MARKING;
588 }
589 if (fe->fe_flags & FLOWENTF_WAIT_CLOSE) {
590 sf->sf_flags |= SFLOWF_WAIT_CLOSE;
591 }
592 if (fe->fe_flags & FLOWENTF_CLOSE_NOTIFY) {
593 sf->sf_flags |= SFLOWF_CLOSE_NOTIFY;
594 }
595 if (fe->fe_flags & FLOWENTF_ABORTED) {
596 sf->sf_flags |= SFLOWF_ABORTED;
597 }
598 if (fe->fe_flags & FLOWENTF_NONVIABLE) {
599 sf->sf_flags |= SFLOWF_NONVIABLE;
600 }
601 if (fe->fe_flags & FLOWENTF_WITHDRAWN) {
602 sf->sf_flags |= SFLOWF_WITHDRAWN;
603 }
604 if (fe->fe_flags & FLOWENTF_TORN_DOWN) {
605 sf->sf_flags |= SFLOWF_TORN_DOWN;
606 }
607 if (fe->fe_flags & FLOWENTF_DESTROYED) {
608 sf->sf_flags |= SFLOWF_DESTROYED;
609 }
610 if (fe->fe_flags & FLOWENTF_LINGERING) {
611 sf->sf_flags |= SFLOWF_LINGERING;
612 }
613 if (fe->fe_flags & FLOWENTF_LOW_LATENCY) {
614 sf->sf_flags |= SFLOWF_LOW_LATENCY;
615 }
616
617 sf->sf_bucket_idx = SFLOW_BUCKET_NONE;
618
619 sf->sf_ltrack.sft_state = fe->fe_ltrack.fse_state;
620 sf->sf_ltrack.sft_seq = fe->fe_ltrack.fse_seqlo;
621 sf->sf_ltrack.sft_max_win = fe->fe_ltrack.fse_max_win;
622 sf->sf_ltrack.sft_wscale = fe->fe_ltrack.fse_wscale;
623 sf->sf_rtrack.sft_state = fe->fe_rtrack.fse_state;
624 sf->sf_rtrack.sft_seq = fe->fe_rtrack.fse_seqlo;
625 sf->sf_rtrack.sft_max_win = fe->fe_rtrack.fse_max_win;
626 }
627
628 void
flow_entry_stats_get(struct flow_entry * fe,struct sk_stats_flow * sf)629 flow_entry_stats_get(struct flow_entry *fe, struct sk_stats_flow *sf)
630 {
631 _CASSERT(sizeof(fe->fe_stats->fs_stats) == sizeof(*sf));
632
633 fe_stats_update(fe);
634 bcopy(&fe->fe_stats->fs_stats, sf, sizeof(*sf));
635 }
636
637 struct flow_entry *
fe_alloc(boolean_t can_block)638 fe_alloc(boolean_t can_block)
639 {
640 struct flow_entry *fe;
641
642 _CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0);
643
644 fe = skmem_cache_alloc(sk_fe_cache,
645 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
646 if (fe == NULL) {
647 return NULL;
648 }
649
650 /*
651 * fe_key is 16-bytes aligned which requires fe to begin on
652 * a 16-bytes boundary as well. This alignment is specified
653 * at sk_fe_cache creation time and we assert here.
654 */
655 ASSERT(IS_P2ALIGNED(fe, 16));
656 bzero(fe, sk_fe_size);
657
658 fe->fe_stats = flow_stats_alloc(can_block);
659 if (fe->fe_stats == NULL) {
660 skmem_cache_free(sk_fe_cache, fe);
661 return NULL;
662 }
663
664 SK_DF(SK_VERB_MEM, "fe 0x%llx ALLOC", SK_KVA(fe));
665
666 os_ref_init(&fe->fe_refcnt, &flow_entry_refgrp);
667
668 KPKTQ_INIT(&fe->fe_rx_pktq);
669 KPKTQ_INIT(&fe->fe_tx_pktq);
670
671 return fe;
672 }
673
674 static void
fe_free(struct flow_entry * fe)675 fe_free(struct flow_entry *fe)
676 {
677 ASSERT(fe->fe_flags & FLOWENTF_TORN_DOWN);
678 ASSERT(fe->fe_flags & FLOWENTF_DESTROYED);
679 ASSERT(!(fe->fe_flags & FLOWENTF_LINGERING));
680 ASSERT(fe->fe_route == NULL);
681
682 ASSERT(fe->fe_stats != NULL);
683 flow_stats_release(fe->fe_stats);
684 fe->fe_stats = NULL;
685
686 /* only at very last existence of flow releases namespace reservation */
687 if (!(fe->fe_flags & FLOWENTF_EXTRL_PORT) &&
688 NETNS_TOKEN_VALID(&fe->fe_port_reservation)) {
689 flow_namespace_destroy(&fe->fe_port_reservation);
690 ASSERT(!NETNS_TOKEN_VALID(&fe->fe_port_reservation));
691 }
692 fe->fe_port_reservation = NULL;
693
694 if (!(fe->fe_flags & FLOWENTF_EXTRL_PROTO) &&
695 protons_token_is_valid(fe->fe_proto_reservation)) {
696 protons_release(&fe->fe_proto_reservation);
697 }
698 fe->fe_proto_reservation = NULL;
699
700 if (key_custom_ipsec_token_is_valid(fe->fe_ipsec_reservation)) {
701 key_release_custom_ipsec(&fe->fe_ipsec_reservation);
702 }
703 fe->fe_ipsec_reservation = NULL;
704
705 skmem_cache_free(sk_fe_cache, fe);
706 }
707
708 static __inline__ int
fe_id_cmp(const struct flow_entry * a,const struct flow_entry * b)709 fe_id_cmp(const struct flow_entry *a, const struct flow_entry *b)
710 {
711 return uuid_compare(a->fe_uuid, b->fe_uuid);
712 }
713
714 #if SK_LOG
715 char *
fk_as_string(const struct flow_key * fk,char * dst,size_t dsz)716 fk_as_string(const struct flow_key *fk, char *dst, size_t dsz)
717 {
718 int af;
719 char src_s[MAX_IPv6_STR_LEN];
720 char dst_s[MAX_IPv6_STR_LEN];
721
722 af = fk->fk_ipver == 4 ? AF_INET : AF_INET6;
723
724 (void) inet_ntop(af, &fk->fk_src, src_s, sizeof(src_s));
725 (void) inet_ntop(af, &fk->fk_dst, dst_s, sizeof(dst_s));
726 (void) snprintf(dst, dsz,
727 "ipver=%u,src=%s,dst=%s,proto=0x%02u,sport=%u,dport=%u "
728 "mask=%08x,hash=%08x",
729 fk->fk_ipver, src_s, dst_s, fk->fk_proto, ntohs(fk->fk_sport),
730 ntohs(fk->fk_dport), fk->fk_mask, flow_key_hash(fk));
731
732 return dst;
733 }
734
735 char *
fe_as_string(const struct flow_entry * fe,char * dst,size_t dsz)736 fe_as_string(const struct flow_entry *fe, char *dst, size_t dsz)
737 {
738 char keybuf[FLOWKEY_DBGBUF_SIZE]; /* just for debug message */
739 uuid_string_t uuidstr;
740
741 fk_as_string(&fe->fe_key, keybuf, sizeof(keybuf));
742
743 (void) snprintf(dst, dsz,
744 "fe 0x%llx proc %s nx_port %d flow_uuid %s %s tp_proto=0x%02u",
745 SK_KVA(fe), fe->fe_proc_name, (int)fe->fe_nx_port,
746 sk_uuid_unparse(fe->fe_uuid, uuidstr),
747 keybuf, fe->fe_transport_protocol);
748
749 return dst;
750 }
751 #endif /* SK_LOG */
752