1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/os_skywalk.h>
31 #include <skywalk/nexus/flowswitch/fsw_var.h>
32 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
33 #include <netinet/in.h>
34 #include <netinet/in_var.h>
35 #include <netinet6/ip6_var.h>
36 #include <netkey/key.h>
37
38 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
39
40 #if CONFIG_MACF
41 #include <security/mac_framework.h>
42 #endif /* CONFIG_MACF */
43
44 #include <net/net_api_stats.h>
45
46 #define SKMEM_TAG_FSW_FLOW_MGR "com.apple.skywalk.fsw.flow_mgr"
47 static SKMEM_TAG_DEFINE(skmem_tag_fsw_flow_mgr, SKMEM_TAG_FSW_FLOW_MGR);
48
49 static LCK_GRP_DECLARE(flow_mgr_lock_group, "sk_flow_mgr_lock");
50 static LCK_RW_DECLARE(flow_mgr_lock, &flow_mgr_lock_group);
51
52 static int fm_cmp(const struct flow_mgr *,
53 const struct flow_mgr *);
54
55 RB_HEAD(flow_mgr_tree, flow_mgr);
56 RB_PROTOTYPE_PREV(flow_mgr_tree, flow_mgr, fm_link, fm_cmp);
57 RB_GENERATE_PREV(flow_mgr_tree, flow_mgr, fm_link, fm_cmp);
58
59 /* protected by the global lock flow_mgr_lock */
60 static struct flow_mgr_tree flow_mgr_head;
61
62 static int __flow_mgr_inited = 0;
63
64 void
flow_mgr_init(void)65 flow_mgr_init(void)
66 {
67 ASSERT(!__flow_mgr_inited);
68
69 RB_INIT(&flow_mgr_head);
70 __flow_mgr_inited = 1;
71 }
72
73 void
flow_mgr_fini(void)74 flow_mgr_fini(void)
75 {
76 if (__flow_mgr_inited) {
77 VERIFY(RB_EMPTY(&flow_mgr_head));
78
79 __flow_mgr_inited = 0;
80 }
81 }
82
83 static int
__fe_cuckoo_cmp(struct cuckoo_node * node,void * key0)84 __fe_cuckoo_cmp(struct cuckoo_node *node, void *key0)
85 {
86 struct flow_entry *fe = container_of(node, struct flow_entry, fe_cnode);
87 struct flow_key *key = key0;
88 const struct flow_key *mask;
89
90 /*
91 * This can probably be made more efficient by having "mask" be
92 * set by the original caller at the time the key is initialized,
93 * though that needs to be done carefully to ensure there is no
94 * mismatch between fk_mask value and "mask" itself.
95 */
96 switch (key->fk_mask) {
97 case FKMASK_5TUPLE:
98 mask = &fk_mask_5tuple;
99 break;
100 case FKMASK_4TUPLE:
101 mask = &fk_mask_4tuple;
102 break;
103 case FKMASK_3TUPLE:
104 mask = &fk_mask_3tuple;
105 break;
106 case FKMASK_2TUPLE:
107 mask = &fk_mask_2tuple;
108 break;
109 case FKMASK_IPFLOW3:
110 mask = &fk_mask_ipflow3;
111 break;
112 case FKMASK_IPFLOW2:
113 mask = &fk_mask_ipflow2;
114 break;
115 case FKMASK_IPFLOW1:
116 mask = &fk_mask_ipflow1;
117 break;
118 default:
119 return flow_key_cmp(&fe->fe_key, key);
120 }
121
122 return flow_key_cmp_mask(&fe->fe_key, key, mask);
123 }
124
125 static void
__fe_cuckoo_retain(struct cuckoo_node * node)126 __fe_cuckoo_retain(struct cuckoo_node *node)
127 {
128 struct flow_entry *fe = container_of(node, struct flow_entry, fe_cnode);
129 return flow_entry_retain(fe);
130 }
131
132 static void
__fe_cuckoo_release(struct cuckoo_node * node)133 __fe_cuckoo_release(struct cuckoo_node *node)
134 {
135 #pragma unused(node)
136 struct flow_entry *fe = container_of(node, struct flow_entry, fe_cnode);
137 flow_entry_release(&fe);
138 }
139
140 struct flow_mgr *
flow_mgr_create(size_t fe_cnt,size_t fob_cnt,size_t frb_cnt,size_t frib_cnt)141 flow_mgr_create(size_t fe_cnt, size_t fob_cnt,
142 size_t frb_cnt, size_t frib_cnt)
143 {
144 struct flow_mgr *fm = NULL;
145 size_t fob_sz, frb_sz, frib_sz;
146 size_t fob_tot_sz, frb_tot_sz, frib_tot_sz;
147 uint32_t i;
148
149 /* caller needs to ensure {fb,frb}_cnt is a power of two */
150 ASSERT(frb_cnt != 0 && ((frb_cnt & (frb_cnt - 1)) == 0));
151 ASSERT(fob_cnt != 0);
152 ASSERT(frib_cnt != 0);
153
154 fm = sk_alloc_type(struct flow_mgr, Z_WAITOK | Z_NOFAIL, skmem_tag_fsw_flow_mgr);
155
156 struct cuckoo_hashtable_params p = {
157 .cht_capacity = fe_cnt,
158 .cht_obj_cmp = __fe_cuckoo_cmp,
159 .cht_obj_retain = __fe_cuckoo_retain,
160 .cht_obj_release = __fe_cuckoo_release,
161 };
162 fm->fm_flow_table = cuckoo_hashtable_create(&p);
163 if (fm->fm_flow_table == NULL) {
164 flow_mgr_destroy(fm);
165 return NULL;
166 }
167
168 /*
169 * flow_owner_bucket cache-aligned objects.
170 */
171 fm->fm_owner_buckets = flow_owner_buckets_alloc(fob_cnt, &fob_sz, &fob_tot_sz);
172 if (fm->fm_owner_buckets == NULL) {
173 flow_mgr_destroy(fm);
174 return NULL;
175 }
176 /* const overrides */
177 *(size_t *)(uintptr_t)&fm->fm_owner_buckets_cnt = fob_cnt;
178 *(size_t *)(uintptr_t)&fm->fm_owner_bucket_sz = fob_sz;
179 *(size_t *)(uintptr_t)&fm->fm_owner_bucket_tot_sz = fob_tot_sz;
180
181 /*
182 * flow_route_bucket cache-aligned objects.
183 */
184 fm->fm_route_buckets = flow_route_buckets_alloc(frb_cnt, &frb_sz, &frb_tot_sz);
185 if (fm->fm_route_buckets == NULL) {
186 flow_mgr_destroy(fm);
187 return NULL;
188 }
189 /* const overrides */
190 *(size_t *)(uintptr_t)&fm->fm_route_buckets_cnt = frb_cnt;
191 *(size_t *)(uintptr_t)&fm->fm_route_bucket_sz = frb_sz;
192 *(size_t *)(uintptr_t)&fm->fm_route_bucket_tot_sz = frb_tot_sz;
193
194 /*
195 * flow_route_id_bucket cache-aligned objects.
196 */
197 fm->fm_route_id_buckets =
198 flow_route_id_buckets_alloc(frib_cnt, &frib_sz, &frib_tot_sz);
199 if (fm->fm_route_id_buckets == NULL) {
200 flow_mgr_destroy(fm);
201 return NULL;
202 }
203 /* const overrides */
204 *(size_t *)(uintptr_t)&fm->fm_route_id_buckets_cnt = frib_cnt;
205 *(size_t *)(uintptr_t)&fm->fm_route_id_bucket_sz = frib_sz;
206 *(size_t *)(uintptr_t)&fm->fm_route_id_bucket_tot_sz = frib_tot_sz;
207
208 /* construct flow_owner_buckets */
209 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
210 struct flow_owner_bucket *fob = flow_mgr_get_fob_at_idx(fm, i);
211 flow_owner_bucket_init(fob);
212 /* const override */
213 *(size_t *)(uintptr_t)&fob->fob_idx = i;
214 }
215
216 /* construct flow_route_buckets */
217 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
218 struct flow_route_bucket *frb = flow_mgr_get_frb_at_idx(fm, i);
219 flow_route_bucket_init(frb);
220 /* const override */
221 *(size_t *)(uintptr_t)&frb->frb_idx = i;
222 }
223
224 /* construct flow_route_id_buckets */
225 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
226 struct flow_route_id_bucket *frib =
227 flow_mgr_get_frib_at_idx(fm, i);
228 flow_route_id_bucket_init(frib);
229 /* const override */
230 *(size_t *)(uintptr_t)&frib->frib_idx = i;
231 }
232
233 uuid_generate_random(fm->fm_uuid);
234
235 lck_rw_lock_exclusive(&flow_mgr_lock);
236 RB_INSERT(flow_mgr_tree, &flow_mgr_head, fm);
237 #if DEBUG
238 struct flow_mgr find;
239 uuid_copy(find.fm_uuid, fm->fm_uuid);
240 /* make sure our tree compare routine is sane */
241 ASSERT(RB_FIND(flow_mgr_tree,
242 &flow_mgr_head, &find) == fm);
243 #endif /* DEBUG */
244 lck_rw_done(&flow_mgr_lock);
245
246 fm->fm_flow_hash_masks[0] = FKMASK_5TUPLE;
247 fm->fm_flow_hash_masks[1] = FKMASK_4TUPLE;
248 fm->fm_flow_hash_masks[2] = FKMASK_3TUPLE;
249 fm->fm_flow_hash_masks[3] = FKMASK_2TUPLE;
250 fm->fm_flow_hash_masks[4] = FKMASK_IPFLOW3;
251 fm->fm_flow_hash_masks[5] = FKMASK_IPFLOW2;
252 fm->fm_flow_hash_masks[6] = FKMASK_IPFLOW1;
253
254 memset(&fm->fm_flow_hash_count, 0, sizeof(fm->fm_flow_hash_count));
255
256 return fm;
257 }
258
259 void
flow_mgr_destroy(struct flow_mgr * fm)260 flow_mgr_destroy(struct flow_mgr *fm)
261 {
262 uint32_t i;
263
264 lck_rw_lock_exclusive(&flow_mgr_lock);
265 ASSERT(!uuid_is_null(fm->fm_uuid));
266
267 if (fm->fm_flow_table != NULL) {
268 cuckoo_hashtable_free(fm->fm_flow_table);
269 }
270
271 if (fm->fm_owner_buckets != NULL) {
272 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
273 struct flow_owner_bucket *fob =
274 flow_mgr_get_fob_at_idx(fm, i);
275 ASSERT(fob->fob_idx == i);
276 flow_owner_bucket_destroy(fob);
277 }
278 flow_owner_buckets_free(fm->fm_owner_buckets,
279 fm->fm_owner_bucket_tot_sz);
280 fm->fm_owner_buckets = NULL;
281 *(uint32_t *)(uintptr_t)&fm->fm_owner_buckets_cnt = 0;
282 *(uint32_t *)(uintptr_t)&fm->fm_owner_bucket_sz = 0;
283 *(uint32_t *)(uintptr_t)&fm->fm_owner_bucket_tot_sz = 0;
284 }
285 ASSERT(fm->fm_owner_buckets_cnt == 0);
286 ASSERT(fm->fm_owner_bucket_sz == 0);
287 ASSERT(fm->fm_owner_bucket_tot_sz == 0);
288
289 if (fm->fm_route_buckets != NULL) {
290 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
291 struct flow_route_bucket *frb =
292 flow_mgr_get_frb_at_idx(fm, i);
293 ASSERT(frb->frb_idx == i);
294 flow_route_bucket_destroy(frb);
295 }
296 flow_route_buckets_free(fm->fm_route_buckets,
297 fm->fm_route_bucket_tot_sz);
298 fm->fm_route_buckets = NULL;
299 *(uint32_t *)(uintptr_t)&fm->fm_route_buckets_cnt = 0;
300 *(uint32_t *)(uintptr_t)&fm->fm_route_bucket_sz = 0;
301 *(uint32_t *)(uintptr_t)&fm->fm_route_bucket_tot_sz = 0;
302 }
303 ASSERT(fm->fm_route_buckets_cnt == 0);
304 ASSERT(fm->fm_route_bucket_sz == 0);
305 ASSERT(fm->fm_route_bucket_tot_sz == 0);
306
307 if (fm->fm_route_id_buckets != NULL) {
308 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
309 struct flow_route_id_bucket *frib =
310 flow_mgr_get_frib_at_idx(fm, i);
311 ASSERT(frib->frib_idx == i);
312 flow_route_id_bucket_destroy(frib);
313 }
314 flow_route_id_buckets_free(fm->fm_route_id_buckets,
315 fm->fm_route_id_bucket_tot_sz);
316 fm->fm_route_id_buckets = NULL;
317 *(uint32_t *)(uintptr_t)&fm->fm_route_id_buckets_cnt = 0;
318 *(uint32_t *)(uintptr_t)&fm->fm_route_id_bucket_sz = 0;
319 *(uint32_t *)(uintptr_t)&fm->fm_route_id_bucket_tot_sz = 0;
320 }
321 ASSERT(fm->fm_route_id_buckets_cnt == 0);
322 ASSERT(fm->fm_route_id_bucket_sz == 0);
323 ASSERT(fm->fm_route_id_bucket_tot_sz == 0);
324
325 uuid_clear(fm->fm_uuid);
326 RB_REMOVE(flow_mgr_tree, &flow_mgr_head, fm);
327 lck_rw_done(&flow_mgr_lock);
328
329 sk_free_type(struct flow_mgr, fm);
330 }
331
332 void
flow_mgr_terminate(struct flow_mgr * fm)333 flow_mgr_terminate(struct flow_mgr *fm)
334 {
335 uint32_t i;
336
337 /*
338 * Purge all flow entries.
339 */
340 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
341 struct flow_owner_bucket *fob =
342 flow_mgr_get_fob_at_idx(fm, i);
343 FOB_LOCK(fob);
344 fob->fob_busy_flags |= FOBF_DEAD;
345 }
346 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
347 struct flow_owner_bucket *fob =
348 flow_mgr_get_fob_at_idx(fm, i);
349 SK_DF(SK_VERB_FLOW, "purging fob 0x%llx [%u]", SK_KVA(fob), i);
350 flow_owner_bucket_purge_all(fob);
351 }
352
353 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
354 FOB_UNLOCK(flow_mgr_get_fob_at_idx(fm, i));
355 }
356
357 /*
358 * Purge all flow routes.
359 */
360 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
361 struct flow_route_bucket *frb =
362 flow_mgr_get_frb_at_idx(fm, i);
363 FRB_WLOCK(frb);
364 }
365 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
366 FRIB_WLOCK(flow_mgr_get_frib_at_idx(fm, i));
367 }
368
369 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
370 struct flow_route_bucket *frb =
371 flow_mgr_get_frb_at_idx(fm, i);
372 SK_DF(SK_VERB_FLOW, "purging frb 0x%llx [%u]", SK_KVA(frb), i);
373 flow_route_bucket_purge_all(frb);
374 }
375
376 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
377 FRIB_WUNLOCK(flow_mgr_get_frib_at_idx(fm, i));
378 }
379 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
380 FRB_WUNLOCK(flow_mgr_get_frb_at_idx(fm, i));
381 }
382 }
383
384 void
flow_mgr_setup_host_flow(struct flow_mgr * fm,struct nx_flowswitch * fsw)385 flow_mgr_setup_host_flow(struct flow_mgr *fm, struct nx_flowswitch *fsw)
386 {
387 struct flow_entry *host_fe = fe_alloc(true);
388 host_fe->fe_key.fk_mask = 0;
389 host_fe->fe_nx_port = FSW_VP_HOST;
390 *(struct nx_flowswitch **)(uintptr_t)&host_fe->fe_fsw = fsw;
391 host_fe->fe_svc_class = KPKT_SC_BE;
392 host_fe->fe_pid = proc_getpid(kernproc);
393 host_fe->fe_rx_process = fsw_host_rx;
394 (void) snprintf(host_fe->fe_proc_name, sizeof(host_fe->fe_proc_name),
395 "%s", proc_name_address(kernproc));
396 flow_entry_retain(host_fe);
397 fm->fm_host_fe = host_fe;
398 KPKTQ_INIT(&host_fe->fe_rx_pktq);
399 KPKTQ_INIT(&host_fe->fe_rx_pktq);
400 }
401
402 void
flow_mgr_teardown_host_flow(struct flow_mgr * fm)403 flow_mgr_teardown_host_flow(struct flow_mgr *fm)
404 {
405 flow_entry_release(&fm->fm_host_fe);
406 }
407
408 /*
409 * Must be matched with a call to flow_mgr_unlock(). Upon success will
410 * return the flow manager address of the specified UUID, and will acquire
411 * the global flow_mgr_lock as reader. The caller is then expected to release
412 * the lock.
413 */
414 struct flow_mgr *
flow_mgr_find_lock(uuid_t uuid)415 flow_mgr_find_lock(uuid_t uuid)
416 {
417 struct flow_mgr *fm, find;
418
419 uuid_copy(find.fm_uuid, uuid);
420
421 lck_rw_lock_shared(&flow_mgr_lock);
422
423 fm = RB_FIND(flow_mgr_tree, &flow_mgr_head, &find);
424 if (fm == NULL) {
425 lck_rw_done(&flow_mgr_lock);
426 return NULL;
427 }
428
429 /* caller is expected to call flow_mgr_unlock() when done */
430 LCK_RW_ASSERT(&flow_mgr_lock, LCK_RW_ASSERT_SHARED);
431 return fm;
432 }
433
434 /*
435 * Must be matched with a successful call to flow_mgr_find_lock().
436 */
437 void
flow_mgr_unlock(void)438 flow_mgr_unlock(void)
439 {
440 lck_rw_done(&flow_mgr_lock);
441 }
442
443 static inline int
fm_cmp(const struct flow_mgr * a,const struct flow_mgr * b)444 fm_cmp(const struct flow_mgr *a, const struct flow_mgr *b)
445 {
446 return uuid_compare(a->fm_uuid, b->fm_uuid);
447 }
448
449 static void
flow_mgr_clear_embedded_scope_id(struct sockaddr_in6 * addr)450 flow_mgr_clear_embedded_scope_id(struct sockaddr_in6 *addr)
451 {
452 struct in6_addr *in6;
453 in6 = &addr->sin6_addr;
454 if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(in6)) {
455 addr->sin6_scope_id = ntohs(in6->s6_addr16[1]);
456 in6->s6_addr16[1] = 0;
457 }
458 }
459
460 #if CONFIG_MACF
461 static bool
flow_req_check_mac_allowed(struct nx_flow_req * req)462 flow_req_check_mac_allowed(struct nx_flow_req *req)
463 {
464 int socktype;
465 switch (req->nfr_ip_protocol) {
466 case IPPROTO_TCP:
467 socktype = SOCK_STREAM;
468 break;
469
470 case IPPROTO_UDP:
471 socktype = SOCK_DGRAM;
472 break;
473
474 default:
475 /* Custom IP protocol, which is treated as IP diagram type */
476 socktype = SOCK_DGRAM;
477 return 0;
478 }
479
480 if (req->nfr_flags & NXFLOWREQF_LISTENER) {
481 return mac_skywalk_flow_check_listen(req->nfr_proc, NULL,
482 &req->nfr_saddr.sa, socktype, req->nfr_ip_protocol);
483 } else {
484 return mac_skywalk_flow_check_connect(req->nfr_proc, NULL,
485 &req->nfr_daddr.sa, socktype, req->nfr_ip_protocol);
486 }
487 }
488 #endif /* CONFIG_MACF */
489
490 static bool
flow_req_needs_netns_reservation(struct nx_flow_req * req)491 flow_req_needs_netns_reservation(struct nx_flow_req *req)
492 {
493 uint8_t proto = req->nfr_ip_protocol;
494 return proto == IPPROTO_TCP || proto == IPPROTO_UDP;
495 }
496
497 static bool
flow_req_needs_protons_reservation(struct nx_flow_req * req)498 flow_req_needs_protons_reservation(struct nx_flow_req *req)
499 {
500 uint8_t proto = req->nfr_ip_protocol;
501 return proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
502 proto != IPPROTO_ESP && proto != IPPROTO_AH;
503 }
504
505 static bool
flow_req_needs_ipsec_reservation(struct nx_flow_req * req)506 flow_req_needs_ipsec_reservation(struct nx_flow_req *req)
507 {
508 uint8_t proto = req->nfr_ip_protocol;
509 return proto == IPPROTO_ESP || proto == IPPROTO_AH;
510 }
511
512 static void
flow_set_port_info(struct ns_flow_info * nfi,struct nx_flow_req * req)513 flow_set_port_info(struct ns_flow_info *nfi, struct nx_flow_req *req)
514 {
515 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
516 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
517
518 bzero(nfi, sizeof(struct ns_flow_info));
519
520 nfi->nfi_ifp = req->nfr_ifp;
521
522 nfi->nfi_laddr = *saddr;
523 nfi->nfi_faddr = *daddr;
524
525 nfi->nfi_protocol = req->nfr_ip_protocol;
526
527 uuid_copy(nfi->nfi_flow_uuid, req->nfr_flow_uuid);
528 ASSERT(!uuid_is_null(nfi->nfi_flow_uuid));
529
530 nfi->nfi_owner_pid = req->nfr_pid;
531 if (req->nfr_epid != -1) {
532 nfi->nfi_effective_pid = req->nfr_epid;
533 proc_name(req->nfr_epid, nfi->nfi_effective_name,
534 sizeof(nfi->nfi_effective_name));
535 } else {
536 nfi->nfi_effective_pid = -1;
537 }
538
539 proc_name(req->nfr_pid, nfi->nfi_owner_name,
540 sizeof(nfi->nfi_owner_name));
541 }
542
543 static int
flow_req_prepare_namespace(struct nx_flow_req * req)544 flow_req_prepare_namespace(struct nx_flow_req *req)
545 {
546 SK_LOG_VAR(char src_s[MAX_IPv6_STR_LEN]);
547 int err = 0;
548
549 if (flow_req_needs_netns_reservation(req)) {
550 if (!NETNS_TOKEN_VALID(&req->nfr_port_reservation)) {
551 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
552 struct ns_flow_info nfi;
553 netns_token ns_token;
554 flow_set_port_info(&nfi, req);
555 err = flow_namespace_create(saddr,
556 req->nfr_ip_protocol, &ns_token,
557 req->nfr_flags, &nfi);
558 if (err != 0) {
559 SK_ERR("netns for %s.%u failed",
560 sk_sa_ntop(SA(saddr), src_s, sizeof(src_s)),
561 sk_sa_get_port(SA(saddr)));
562 goto fail;
563 }
564 req->nfr_port_reservation = ns_token;
565 req->nfr_flags &= ~NXFLOWREQF_EXT_PORT_RSV;
566 } else {
567 /* Validate PID associated with provided reservation */
568 struct ns_flow_info nfi = {};
569 err = netns_get_flow_info(&req->nfr_port_reservation,
570 &nfi);
571 /* flow info could be NULL for socket flow */
572 if (!err && (req->nfr_pid != nfi.nfi_owner_pid ||
573 (req->nfr_epid != -1 && nfi.nfi_effective_pid !=
574 req->nfr_epid))) {
575 SK_ERR("netns flow info mismatch, "
576 "req_(e)pid %d(%d), nfr_(e)pid %d(%d)",
577 req->nfr_pid, req->nfr_epid,
578 nfi.nfi_owner_pid, nfi.nfi_effective_pid);
579 err = EPERM;
580 goto fail;
581 }
582 req->nfr_flags |= NXFLOWREQF_EXT_PORT_RSV;
583 }
584 }
585
586 if (flow_req_needs_ipsec_reservation(req)) {
587 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
588 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
589 void *ipsec_token = NULL;
590 ASSERT(req->nfr_ipsec_reservation == NULL);
591 err = key_reserve_custom_ipsec(&ipsec_token, saddr,
592 daddr, req->nfr_ip_protocol);
593 if (err != 0) {
594 SK_ERR("custom ipsec %u reserve %s failed",
595 req->nfr_ip_protocol,
596 sk_sa_ntop(SA(saddr), src_s, sizeof(src_s)));
597 goto fail;
598 }
599 req->nfr_ipsec_reservation = ipsec_token;
600 }
601
602 if (flow_req_needs_protons_reservation(req)) {
603 struct protons_token *ns_token = NULL;
604 if (!protons_token_is_valid(req->nfr_proto_reservation)) {
605 err = protons_reserve(&ns_token, req->nfr_pid,
606 req->nfr_epid, req->nfr_ip_protocol);
607 if (err != 0) {
608 SK_ERR("protocol %u namespace failed",
609 req->nfr_ip_protocol);
610 goto fail;
611 }
612 req->nfr_flags &= ~NXFLOWREQF_EXT_PROTO_RSV;
613 req->nfr_proto_reservation = ns_token;
614 } else {
615 /* Validate PID associated with provided reservation */
616 if (!protons_token_has_matching_pid(req->nfr_proto_reservation,
617 req->nfr_pid, req->nfr_epid)) {
618 SK_ERR("protons token pid mismatch");
619 err = EPERM;
620 goto fail;
621 }
622 req->nfr_flags |= NXFLOWREQF_EXT_PROTO_RSV;
623 }
624 }
625
626 return 0;
627
628 fail:
629 VERIFY(err != 0);
630 SK_ERR("perparation failed (err %d)", err);
631 return err;
632 }
633
634 static int
flow_req_prepare(struct nx_flow_req * req,struct kern_nexus * nx,struct flow_mgr * fm,struct ifnet * ifp,flow_route_ctor_fn_t fr_ctor,flow_route_resolve_fn_t fr_resolve,void * fr_arg)635 flow_req_prepare(struct nx_flow_req *req, struct kern_nexus *nx,
636 struct flow_mgr *fm, struct ifnet *ifp, flow_route_ctor_fn_t fr_ctor,
637 flow_route_resolve_fn_t fr_resolve, void *fr_arg)
638 {
639 int err = 0;
640 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
641 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
642 uint8_t protocol = req->nfr_ip_protocol;
643
644 sa_family_t saf, daf, xaf, af;
645
646 saf = SA(saddr)->sa_family;
647 daf = SA(daddr)->sa_family;
648 xaf = saf ^ daf;
649 if (xaf != 0 && xaf != saf && xaf != daf) {
650 SK_ERR("invalid saddr af %d daddr af %d", saf, daf);
651 return EINVAL;
652 }
653 af = (xaf == 0) ? saf : xaf;
654
655 bool has_saddr = false, has_daddr = false;
656 bool has_sport = false, has_dport = false;
657 uint16_t sport, dport;
658 uint8_t sa_len;
659 switch (af) {
660 case AF_INET:
661 sa_len = sizeof(struct sockaddr_in);
662 has_saddr = (SIN(saddr)->sin_addr.s_addr != INADDR_ANY);
663 has_daddr = (SIN(daddr)->sin_addr.s_addr != INADDR_ANY);
664 sport = SIN(saddr)->sin_port;
665 dport = SIN(daddr)->sin_port;
666 has_sport = (sport != 0);
667 has_dport = (dport != 0);
668
669 if ((has_saddr && SIN(saddr)->sin_len != sa_len) ||
670 (has_daddr && SIN(daddr)->sin_len != sa_len)) {
671 SK_ERR("sin_len invalid");
672 err = EINVAL;
673 goto fail;
674 }
675 if ((has_saddr && IN_MULTICAST(ntohl(SIN(saddr)->sin_addr.s_addr))) ||
676 (has_daddr && IN_MULTICAST(ntohl(SIN(daddr)->sin_addr.s_addr)))) {
677 SK_ERR("multicast flow not yet supported");
678 err = EADDRNOTAVAIL;
679 goto fail;
680 }
681 if (__probable(protocol == IPPROTO_TCP)) {
682 INC_ATOMIC_INT64_LIM(
683 net_api_stats.nas_nx_flow_inet6_stream_total);
684 } else {
685 INC_ATOMIC_INT64_LIM(
686 net_api_stats.nas_nx_flow_inet6_dgram_total);
687 }
688 break;
689
690 case AF_INET6:
691 sa_len = sizeof(struct sockaddr_in6);
692 has_saddr = !IN6_IS_ADDR_UNSPECIFIED(&SIN6(saddr)->sin6_addr);
693 has_daddr = !IN6_IS_ADDR_UNSPECIFIED(&SIN6(daddr)->sin6_addr);
694 sport = SIN6(saddr)->sin6_port;
695 dport = SIN6(daddr)->sin6_port;
696 has_sport = (sport != 0);
697 has_dport = (dport != 0);
698 if ((has_saddr && SIN6(saddr)->sin6_len != sa_len) ||
699 (has_daddr && SIN6(daddr)->sin6_len != sa_len)) {
700 SK_ERR("sin_len invalid");
701 err = EINVAL;
702 goto fail;
703 }
704 /* clear embedded scope if link-local src */
705 if (has_saddr) {
706 flow_mgr_clear_embedded_scope_id(SIN6(saddr));
707 if (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&SIN6(saddr)->sin6_addr)) {
708 SIN6(saddr)->sin6_scope_id = ifp->if_index;
709 }
710 }
711 if (has_daddr) {
712 flow_mgr_clear_embedded_scope_id(SIN6(daddr));
713 if (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&SIN6(daddr)->sin6_addr)) {
714 SIN6(daddr)->sin6_scope_id = ifp->if_index;
715 }
716 }
717 if ((has_saddr && IN6_IS_ADDR_MULTICAST(&SIN6(saddr)->sin6_addr)) ||
718 (has_daddr && IN6_IS_ADDR_MULTICAST(&SIN6(daddr)->sin6_addr))) {
719 SK_ERR("multicast flow not yet supported");
720 err = EADDRNOTAVAIL;
721 goto fail;
722 }
723 if (__probable(protocol == IPPROTO_TCP)) {
724 INC_ATOMIC_INT64_LIM(
725 net_api_stats.nas_nx_flow_inet_stream_total);
726 } else {
727 INC_ATOMIC_INT64_LIM(
728 net_api_stats.nas_nx_flow_inet_dgram_total);
729 }
730 break;
731
732 default:
733 SK_ERR("unknown address families saf %d daf %d", saf, daf);
734 err = EINVAL;
735 goto fail;
736 }
737
738 SA(saddr)->sa_family = SA(daddr)->sa_family = af;
739 SA(saddr)->sa_len = SA(daddr)->sa_len = sa_len;
740
741 if (__improbable(has_saddr && !flow_route_laddr_validate(saddr, ifp,
742 &req->nfr_saddr_gencnt))) {
743 SK_LOG_VAR(char src_s[MAX_IPv6_STR_LEN]);
744 SK_ERR("src address %s is not valid",
745 sk_sa_ntop(SA(saddr), src_s, sizeof(src_s)));
746 err = EADDRNOTAVAIL;
747 goto fail;
748 }
749
750 bool is_tcp_udp = (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP);
751 if (!is_tcp_udp) {
752 if (has_sport || has_dport) {
753 SK_ERR("non-zero port for IP flow");
754 return EINVAL;
755 }
756 } else {
757 /* dst:dport as connected, 0:0 as listener, but not partial */
758 if (has_daddr != has_dport) {
759 err = EINVAL;
760 SK_ERR("invalid dst/dport for TCP/UDP (err %d)", err);
761 goto fail;
762 }
763 }
764
765 if (!has_daddr && !has_dport) {
766 req->nfr_flags |= NXFLOWREQF_LISTENER;
767 }
768
769 if (req->nfr_transport_protocol == 0) {
770 req->nfr_transport_protocol = req->nfr_ip_protocol;
771 }
772
773 req->nfr_ifp = ifp;
774
775 #if CONFIG_MACF
776 err = flow_req_check_mac_allowed(req);
777 if (err != 0) {
778 SK_ERR("flow req failed MAC check");
779 goto fail;
780 }
781 #endif /* CONFIG_MACF */
782
783 /* setup flow route and prepare saddr if needed */
784 if (__probable(has_daddr || has_dport)) {
785 struct flow_route *fr = NULL;
786 err = flow_route_find(nx, fm, ifp, req, fr_ctor,
787 fr_resolve, fr_arg, &fr);
788 if (__improbable(err != 0)) {
789 SK_ERR("flow route lookup failed");
790 ASSERT(fr == NULL);
791 goto fail;
792 }
793 ASSERT(fr != NULL);
794 /* Pick up the default source address from flow route. */
795 if (!has_saddr) {
796 *saddr = fr->fr_laddr;
797 SIN(saddr)->sin_port = sport;
798 }
799 req->nfr_route = fr;
800 fr = NULL;
801 }
802
803 err = flow_req_prepare_namespace(req);
804 if (err != 0) {
805 goto fail;
806 }
807
808 return 0;
809
810 fail:
811 VERIFY(err != 0);
812 if (req->nfr_route != NULL) {
813 flow_route_release(req->nfr_route);
814 req->nfr_route = NULL;
815 }
816 SK_ERR("preparation failed (err %d)", err);
817 return err;
818 }
819
820 static void
flow_req_cleanup(struct nx_flow_req * req)821 flow_req_cleanup(struct nx_flow_req *req)
822 {
823 if (NETNS_TOKEN_VALID(&req->nfr_port_reservation) &&
824 !(req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV)) {
825 netns_release(&req->nfr_port_reservation);
826 }
827
828 if (protons_token_is_valid(req->nfr_proto_reservation) &&
829 !(req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV)) {
830 protons_release(&req->nfr_proto_reservation);
831 }
832
833 if (key_custom_ipsec_token_is_valid(req->nfr_ipsec_reservation)) {
834 key_release_custom_ipsec(&req->nfr_ipsec_reservation);
835 }
836 }
837
838 #if SK_LOG
839 /* Hoisted out of line to reduce kernel stack footprint */
840 SK_LOG_ATTRIBUTE
841 static void
flow_req_dump(char * desc,struct nx_flow_req * req)842 flow_req_dump(char *desc, struct nx_flow_req *req)
843 {
844 if (!(sk_verbose & SK_VERB_FLOW)) {
845 return;
846 }
847
848 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
849 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
850 uint8_t protocol = req->nfr_ip_protocol;
851 char src_s[MAX_IPv6_STR_LEN];
852 char dst_s[MAX_IPv6_STR_LEN];
853 uint8_t sipver = 0, dipver = 0;
854 uint16_t sport = 0, dport = 0;
855 uuid_string_t uuid_s;
856
857 // unsanitized req, treat source and destination AF separately
858 if (saddr->sa.sa_family == AF_INET) {
859 sipver = IPVERSION;
860 (void) inet_ntop(AF_INET, &SIN(saddr)->sin_addr, src_s,
861 sizeof(src_s));
862 sport = ntohs(saddr->sin.sin_port);
863 } else if (saddr->sa.sa_family == AF_INET6) {
864 sipver = IPV6_VERSION;
865 (void) inet_ntop(AF_INET6, &SIN6(saddr)->sin6_addr, src_s,
866 sizeof(src_s));
867 sport = ntohs(saddr->sin6.sin6_port);
868 } else {
869 sipver = 0;
870 strlcpy(src_s, "INV", sizeof(src_s));
871 }
872 if (daddr->sa.sa_family == AF_INET) {
873 dipver = IPVERSION;
874 (void) inet_ntop(AF_INET, &SIN(daddr)->sin_addr, dst_s,
875 sizeof(dst_s));
876 dport = ntohs(daddr->sin.sin_port);
877 } else if (daddr->sa.sa_family == AF_INET6) {
878 dipver = IPV6_VERSION;
879 (void) inet_ntop(AF_INET6, &SIN6(saddr)->sin6_addr, dst_s,
880 sizeof(dst_s));
881 dport = ntohs(daddr->sin6.sin6_port);
882 } else {
883 dipver = 0;
884 strlcpy(dst_s, "INV", sizeof(src_s));
885 }
886
887 SK_DF(SK_VERB_FLOW,
888 "%s %s sipver=%u,dipver=%u,src=%s,dst=%s,proto=%d,sport=%u,dport=%d"
889 " nx_port=%u,flags 0x%b", desc, sk_uuid_unparse(req->nfr_flow_uuid,
890 uuid_s), sipver, dipver, src_s, dst_s, protocol, sport, dport,
891 req->nfr_nx_port, req->nfr_flags, NXFLOWREQF_BITS);
892 }
893 #else
894 #define flow_req_dump(str, req) do { ((void)0); } while (0)
895 #endif /* SK_LOG */
896
897 /*
898 * Upon success, returns a non-NULL fb that is (writer) locked.
899 */
900 int
flow_mgr_flow_add(struct kern_nexus * nx,struct flow_mgr * fm,struct flow_owner * fo,struct ifnet * ifp,struct nx_flow_req * req,flow_route_ctor_fn_t fr_ctor,flow_route_resolve_fn_t fr_resolve,void * fr_arg)901 flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
902 struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
903 flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve,
904 void *fr_arg)
905 {
906 struct flow_entry *fe;
907 int err = 0;
908
909 ASSERT(ifp != NULL);
910 ASSERT(fr_ctor != NULL && fr_resolve != NULL);
911 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
912
913 flow_req_dump("req", req);
914
915 if (!(req->nfr_flags & NXFLOWREQF_ASIS)) {
916 err = flow_req_prepare(req, nx, fm, ifp, fr_ctor, fr_resolve, fr_arg);
917 if (err != 0) {
918 SK_ERR("flow req preparation failure (err %d)", err);
919 return err;
920 }
921 }
922
923 /*
924 * Add entry in flowswitch table; upon success, flow entry adds a
925 * retain count on the flow route (we'll always need to release the
926 * refcnt from flow_route_find), and the local address:port of the
927 * flow entry will be set.
928 */
929 fe = flow_entry_alloc(fo, req, &err);
930 if (__improbable(fe == NULL)) {
931 ASSERT(err != 0);
932 goto fail;
933 }
934
935 VERIFY(NETNS_TOKEN_VALID(&fe->fe_port_reservation) ||
936 !(fe->fe_key.fk_mask & FKMASK_SPORT) ||
937 req->nfr_flags & NXFLOWREQF_ASIS);
938 VERIFY((req->nfr_flags & NXFLOWREQF_FLOWADV) ^
939 (req->nfr_flowadv_idx == FLOWADV_IDX_NONE));
940 req->nfr_flowadv_idx = fe->fe_adv_idx;
941
942 flow_req_dump("added ", req);
943
944 if (fe != NULL) {
945 flow_entry_release(&fe);
946 }
947
948 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
949 if (req->nfr_saddr.sa.sa_family == AF_INET6 &&
950 IN6_IS_SCOPE_EMBED(&req->nfr_saddr.sin6.sin6_addr)) {
951 req->nfr_saddr.sin6.sin6_scope_id = ifnet_index(
952 fsw->fsw_ifp);
953 }
954 if (req->nfr_daddr.sa.sa_family == AF_INET6 &&
955 IN6_IS_SCOPE_EMBED(&req->nfr_daddr.sin6.sin6_addr)) {
956 req->nfr_daddr.sin6.sin6_scope_id = ifnet_index(
957 fsw->fsw_ifp);
958 }
959
960 return 0;
961
962 fail:
963 VERIFY(err != 0);
964 flow_req_cleanup(req);
965
966 return err;
967 }
968
969 struct flow_owner_bucket *
flow_mgr_get_fob_by_pid(struct flow_mgr * fm,pid_t pid)970 flow_mgr_get_fob_by_pid(struct flow_mgr *fm, pid_t pid)
971 {
972 return flow_mgr_get_fob_at_idx(fm,
973 (pid % fm->fm_owner_buckets_cnt));
974 }
975
976 struct flow_entry *
flow_mgr_get_fe_by_uuid_rlock(struct flow_mgr * fm,uuid_t uuid)977 flow_mgr_get_fe_by_uuid_rlock(struct flow_mgr *fm, uuid_t uuid)
978 {
979 uint32_t i;
980 struct flow_owner_bucket *fob;
981 struct flow_owner *fo;
982 struct flow_entry *fe;
983
984 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
985 fob = flow_mgr_get_fob_at_idx(fm, i);
986 FOB_LOCK_SPIN(fob);
987 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
988 fe = flow_entry_find_by_uuid(fo, uuid);
989 if (fe != NULL) {
990 FOB_LOCK_CONVERT(fob);
991 FOB_UNLOCK(fob);
992 return fe;
993 }
994 }
995 FOB_UNLOCK(fob);
996 }
997 return NULL;
998 }
999
1000 struct flow_route_bucket *
flow_mgr_get_frb_by_addr(struct flow_mgr * fm,union sockaddr_in_4_6 * daddr)1001 flow_mgr_get_frb_by_addr(struct flow_mgr *fm,
1002 union sockaddr_in_4_6 *daddr)
1003 {
1004 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = flow_seed;
1005
1006 switch (SA(daddr)->sa_family) {
1007 case AF_INET: {
1008 uint8_t *p = (uint8_t *)&SIN(daddr)->sin_addr.s_addr;
1009 b += ((uint32_t)p[3]);
1010 a += ((uint32_t)p[2]) << 24;
1011 a += ((uint32_t)p[1]) << 16;
1012 a += ((uint32_t)p[0]) << 8;
1013 break;
1014 }
1015
1016 case AF_INET6: {
1017 b += SIN6(daddr)->sin6_addr.s6_addr32[3];
1018 a += SIN6(daddr)->sin6_addr.s6_addr32[2];
1019 a += SIN6(daddr)->sin6_addr.s6_addr32[1];
1020 a += SIN6(daddr)->sin6_addr.s6_addr32[0];
1021 break;
1022 }
1023
1024 default:
1025 VERIFY(0);
1026 /* NOTREACHED */
1027 __builtin_unreachable();
1028 }
1029
1030 /* mix */
1031 a -= b; a -= c; a ^= (c >> 13);
1032 b -= c; b -= a; b ^= (a << 8);
1033 c -= a; c -= b; c ^= (b >> 13);
1034 a -= b; a -= c; a ^= (c >> 12);
1035 b -= c; b -= a; b ^= (a << 16);
1036 c -= a; c -= b; c ^= (b >> 5);
1037 a -= b; a -= c; a ^= (c >> 3);
1038 b -= c; b -= a; b ^= (a << 10);
1039 c -= a; c -= b; c ^= (b >> 15);
1040
1041 c &= (fm->fm_route_buckets_cnt - 1);
1042
1043 return flow_mgr_get_frb_at_idx(fm, c);
1044 }
1045
1046 struct flow_route_id_bucket *
flow_mgr_get_frib_by_uuid(struct flow_mgr * fm,uuid_t fr_uuid)1047 flow_mgr_get_frib_by_uuid(struct flow_mgr *fm, uuid_t fr_uuid)
1048 {
1049 union {
1050 uuid_t uuid __sk_aligned(8);
1051 uint64_t u64[2];
1052 } u;
1053 uint64_t key;
1054
1055 _CASSERT(sizeof(u.uuid) == sizeof(u.u64));
1056 uuid_copy(u.uuid, fr_uuid);
1057
1058 /* XOR fold UUID down to 4-bytes */
1059 key = (u.u64[0] ^ u.u64[1]);
1060 key = ((key >> 32) ^ (key & 0xffffffff));
1061
1062 /* add some offset to get more entropy */
1063 return flow_mgr_get_frib_at_idx(fm,
1064 ((uint32_t)key % fm->fm_route_id_buckets_cnt));
1065 }
1066
1067 static int
flow_hash_mask_add(struct flow_mgr * fm,uint32_t mask,int32_t v)1068 flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask, int32_t v)
1069 {
1070 for (uint32_t i = 0; i < FKMASK_IDX_MAX; i++) {
1071 if (fm->fm_flow_hash_masks[i] == mask) {
1072 atomic_add_32(&fm->fm_flow_hash_count[i], v);
1073 return 0;
1074 }
1075 }
1076 SK_ERR("unkown hash mask 0x%x", mask);
1077 return ENOTSUP;
1078 }
1079
1080 int
flow_mgr_flow_hash_mask_add(struct flow_mgr * fm,uint32_t mask)1081 flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask)
1082 {
1083 return flow_hash_mask_add(fm, mask, 1);
1084 }
1085
1086 int
flow_mgr_flow_hash_mask_del(struct flow_mgr * fm,uint32_t mask)1087 flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask)
1088 {
1089 return flow_hash_mask_add(fm, mask, -1);
1090 }
1091
1092 #if SK_LOG
1093 SK_NO_INLINE_ATTRIBUTE
1094 static void
__flow_mgr_find_fe_by_key_prelog(struct flow_key * key)1095 __flow_mgr_find_fe_by_key_prelog(struct flow_key *key)
1096 {
1097 SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]);
1098 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP, "key %s",
1099 fk_as_string(key, dbgbuf, sizeof(dbgbuf)));
1100 }
1101
1102 SK_NO_INLINE_ATTRIBUTE
1103 static void
__flow_mgr_find_fe_by_key_epilog(struct flow_entry * fe)1104 __flow_mgr_find_fe_by_key_epilog(struct flow_entry *fe)
1105 {
1106 SK_LOG_VAR(char dbgbuf[FLOWENTRY_DBGBUF_SIZE]);
1107 if (fe != NULL) {
1108 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP, "fe 0x%llx \"%s\"",
1109 SK_KVA(fe), fe_as_string(fe, dbgbuf, sizeof(dbgbuf)));
1110 } else {
1111 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP, "fe not found");
1112 }
1113 }
1114 #else
1115 #define __flow_mgr_find_fe_by_key_prelog(key) do { ((void)0); } while (0)
1116 #define __flow_mgr_find_fe_by_key_epilog(fe) do { ((void)0); } while (0)
1117 #endif /* SK_LOG */
1118
1119 struct flow_entry *
flow_mgr_find_fe_by_key(struct flow_mgr * fm,struct flow_key * key)1120 flow_mgr_find_fe_by_key(struct flow_mgr *fm, struct flow_key *key)
1121 {
1122 struct cuckoo_node *node = NULL;
1123 struct flow_entry *fe = NULL;
1124 uint32_t hash = 0;
1125 uint16_t saved_mask = key->fk_mask;
1126
1127 __flow_mgr_find_fe_by_key_prelog(key);
1128
1129 for (int i = 0; i < FKMASK_IDX_MAX; i++) {
1130 size_t count = fm->fm_flow_hash_count[i];
1131 uint16_t mask = fm->fm_flow_hash_masks[i];
1132 if (count == 0 || mask == 0) {
1133 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP,
1134 "[%d] mask=%08x count=%zu skiped",
1135 i, mask, count);
1136 continue;
1137 }
1138 key->fk_mask = mask;
1139 hash = flow_key_hash(key);
1140 node = cuckoo_hashtable_find_with_hash(fm->fm_flow_table, key, hash);
1141 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP,
1142 "[%d] mask=%08x hash %08x node 0x%llx", i, mask, hash,
1143 SK_KVA(node));
1144 if (node != NULL) {
1145 fe = container_of(node, struct flow_entry, fe_cnode);
1146 /* v4 only listener fe shouldn't get v6 connection */
1147 if (__improbable(fe->fe_key.fk_mask == FKMASK_2TUPLE &&
1148 fe->fe_key.fk_ipver == IPVERSION &&
1149 key->fk_ipver == IPV6_VERSION)) {
1150 flow_entry_release(&fe);
1151 ASSERT(fe == NULL);
1152 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP,
1153 "\tskip v4 only fe");
1154 continue;
1155 }
1156 break;
1157 }
1158 }
1159
1160 key->fk_mask = saved_mask;
1161
1162 __flow_mgr_find_fe_by_key_epilog(fe);
1163
1164 return fe;
1165 }
1166
1167 struct flow_entry *
flow_mgr_find_conflicting_fe(struct flow_mgr * fm,struct flow_key * key)1168 flow_mgr_find_conflicting_fe(struct flow_mgr *fm, struct flow_key *key)
1169 {
1170 struct cuckoo_node *node = NULL;
1171 struct flow_entry *fe = NULL;
1172 uint32_t hash = 0;
1173
1174 hash = flow_key_hash(key);
1175 node = cuckoo_hashtable_find_with_hash(fm->fm_flow_table, key, hash);
1176 if (node != NULL) {
1177 fe = container_of(node, struct flow_entry, fe_cnode);
1178 return fe;
1179 }
1180
1181 /* listener flow confliction will be checked at netns reservation */
1182 return fe;
1183 }
1184
1185 void
1186 flow_mgr_foreach_flow(struct flow_mgr *fm,
1187 void (^flow_handler)(struct flow_entry *fe))
1188 {
1189 cuckoo_hashtable_foreach(fm->fm_flow_table,
1190 ^(struct cuckoo_node *node, uint32_t hv) {
1191 #pragma unused(hv)
1192 struct flow_entry *fe;
1193 fe = container_of(node, struct flow_entry, fe_cnode);
1194 flow_handler(fe);
1195 }
1196 );
1197 }
1198
1199 struct flow_entry *
flow_mgr_get_host_fe(struct flow_mgr * fm)1200 flow_mgr_get_host_fe(struct flow_mgr *fm)
1201 {
1202 struct flow_entry *fe;
1203 fe = fm->fm_host_fe;
1204 flow_entry_retain(fe);
1205 return fe;
1206 }
1207