1 /*
2 * Copyright (c) 2015-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <skywalk/os_skywalk_private.h>
30 #include <skywalk/os_skywalk.h>
31 #include <skywalk/nexus/flowswitch/fsw_var.h>
32 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
33 #include <netinet/in.h>
34 #include <netinet/in_var.h>
35 #include <netinet6/ip6_var.h>
36 #include <netkey/key.h>
37
38 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
39
40 #if CONFIG_MACF
41 #include <security/mac_framework.h>
42 #endif /* CONFIG_MACF */
43
44 #include <net/net_api_stats.h>
45
46 #define SKMEM_TAG_FSW_FLOW_MGR "com.apple.skywalk.fsw.flow_mgr"
47 static kern_allocation_name_t skmem_tag_fsw_flow_mgr;
48
49 static LCK_GRP_DECLARE(flow_mgr_lock_group, "sk_flow_mgr_lock");
50 static LCK_RW_DECLARE(flow_mgr_lock, &flow_mgr_lock_group);
51
52 static int fm_cmp(const struct flow_mgr *,
53 const struct flow_mgr *);
54
55 RB_HEAD(flow_mgr_tree, flow_mgr);
56 RB_PROTOTYPE_PREV(flow_mgr_tree, flow_mgr, fm_link, fm_cmp);
57 RB_GENERATE_PREV(flow_mgr_tree, flow_mgr, fm_link, fm_cmp);
58
59 /* protected by the global lock flow_mgr_lock */
60 static struct flow_mgr_tree flow_mgr_head;
61
62 static int __flow_mgr_inited = 0;
63
64 void
flow_mgr_init(void)65 flow_mgr_init(void)
66 {
67 ASSERT(!__flow_mgr_inited);
68
69 ASSERT(skmem_tag_fsw_flow_mgr == NULL);
70 skmem_tag_fsw_flow_mgr =
71 kern_allocation_name_allocate(SKMEM_TAG_FSW_FLOW_MGR, 0);
72 ASSERT(skmem_tag_fsw_flow_mgr != NULL);
73
74 RB_INIT(&flow_mgr_head);
75 __flow_mgr_inited = 1;
76 }
77
78 void
flow_mgr_fini(void)79 flow_mgr_fini(void)
80 {
81 if (__flow_mgr_inited) {
82 VERIFY(RB_EMPTY(&flow_mgr_head));
83
84 if (skmem_tag_fsw_flow_mgr != NULL) {
85 kern_allocation_name_release(skmem_tag_fsw_flow_mgr);
86 skmem_tag_fsw_flow_mgr = NULL;
87 }
88
89 __flow_mgr_inited = 0;
90 }
91 }
92
93 static int
__fe_cuckoo_cmp(struct cuckoo_node * node,void * key0)94 __fe_cuckoo_cmp(struct cuckoo_node *node, void *key0)
95 {
96 struct flow_entry *fe = container_of(node, struct flow_entry, fe_cnode);
97 struct flow_key *key = key0;
98 const struct flow_key *mask;
99
100 /*
101 * This can probably be made more efficient by having "mask" be
102 * set by the original caller at the time the key is initialized,
103 * though that needs to be done carefully to ensure there is no
104 * mismatch between fk_mask value and "mask" itself.
105 */
106 switch (key->fk_mask) {
107 case FKMASK_5TUPLE:
108 mask = &fk_mask_5tuple;
109 break;
110 case FKMASK_4TUPLE:
111 mask = &fk_mask_4tuple;
112 break;
113 case FKMASK_3TUPLE:
114 mask = &fk_mask_3tuple;
115 break;
116 case FKMASK_2TUPLE:
117 mask = &fk_mask_2tuple;
118 break;
119 case FKMASK_IPFLOW3:
120 mask = &fk_mask_ipflow3;
121 break;
122 case FKMASK_IPFLOW2:
123 mask = &fk_mask_ipflow2;
124 break;
125 case FKMASK_IPFLOW1:
126 mask = &fk_mask_ipflow1;
127 break;
128 default:
129 return flow_key_cmp(&fe->fe_key, key);
130 }
131
132 return flow_key_cmp_mask(&fe->fe_key, key, mask);
133 }
134
135 static void
__fe_cuckoo_retain(struct cuckoo_node * node)136 __fe_cuckoo_retain(struct cuckoo_node *node)
137 {
138 struct flow_entry *fe = container_of(node, struct flow_entry, fe_cnode);
139 return flow_entry_retain(fe);
140 }
141
142 static void
__fe_cuckoo_release(struct cuckoo_node * node)143 __fe_cuckoo_release(struct cuckoo_node *node)
144 {
145 #pragma unused(node)
146 struct flow_entry *fe = container_of(node, struct flow_entry, fe_cnode);
147 flow_entry_release(&fe);
148 }
149
150 struct flow_mgr *
flow_mgr_create(size_t fe_cnt,size_t fob_cnt,size_t frb_cnt,size_t frib_cnt)151 flow_mgr_create(size_t fe_cnt, size_t fob_cnt,
152 size_t frb_cnt, size_t frib_cnt)
153 {
154 struct flow_mgr *fm = NULL;
155 size_t fob_sz, frb_sz, frib_sz;
156 size_t fob_tot_sz, frb_tot_sz, frib_tot_sz;
157 uint32_t i;
158
159 /* caller needs to ensure {fb,frb}_cnt is a power of two */
160 ASSERT(frb_cnt != 0 && ((frb_cnt & (frb_cnt - 1)) == 0));
161 ASSERT(fob_cnt != 0);
162 ASSERT(frib_cnt != 0);
163
164 fm = sk_alloc_type(struct flow_mgr, Z_WAITOK | Z_NOFAIL, skmem_tag_fsw_flow_mgr);
165
166 struct cuckoo_hashtable_params p = {
167 .cht_capacity = fe_cnt,
168 .cht_obj_cmp = __fe_cuckoo_cmp,
169 .cht_obj_retain = __fe_cuckoo_retain,
170 .cht_obj_release = __fe_cuckoo_release,
171 };
172 fm->fm_flow_table = cuckoo_hashtable_create(&p);
173 if (fm->fm_flow_table == NULL) {
174 flow_mgr_destroy(fm);
175 return NULL;
176 }
177
178 /*
179 * flow_owner_bucket cache-aligned objects.
180 */
181 fm->fm_owner_buckets = flow_owner_buckets_alloc(fob_cnt, &fob_sz, &fob_tot_sz);
182 if (fm->fm_owner_buckets == NULL) {
183 flow_mgr_destroy(fm);
184 return NULL;
185 }
186 /* const overrides */
187 *(size_t *)(uintptr_t)&fm->fm_owner_buckets_cnt = fob_cnt;
188 *(size_t *)(uintptr_t)&fm->fm_owner_bucket_sz = fob_sz;
189 *(size_t *)(uintptr_t)&fm->fm_owner_bucket_tot_sz = fob_tot_sz;
190
191 /*
192 * flow_route_bucket cache-aligned objects.
193 */
194 fm->fm_route_buckets = flow_route_buckets_alloc(frb_cnt, &frb_sz, &frb_tot_sz);
195 if (fm->fm_route_buckets == NULL) {
196 flow_mgr_destroy(fm);
197 return NULL;
198 }
199 /* const overrides */
200 *(size_t *)(uintptr_t)&fm->fm_route_buckets_cnt = frb_cnt;
201 *(size_t *)(uintptr_t)&fm->fm_route_bucket_sz = frb_sz;
202 *(size_t *)(uintptr_t)&fm->fm_route_bucket_tot_sz = frb_tot_sz;
203
204 /*
205 * flow_route_id_bucket cache-aligned objects.
206 */
207 fm->fm_route_id_buckets =
208 flow_route_id_buckets_alloc(frib_cnt, &frib_sz, &frib_tot_sz);
209 if (fm->fm_route_id_buckets == NULL) {
210 flow_mgr_destroy(fm);
211 return NULL;
212 }
213 /* const overrides */
214 *(size_t *)(uintptr_t)&fm->fm_route_id_buckets_cnt = frib_cnt;
215 *(size_t *)(uintptr_t)&fm->fm_route_id_bucket_sz = frib_sz;
216 *(size_t *)(uintptr_t)&fm->fm_route_id_bucket_tot_sz = frib_tot_sz;
217
218 /* construct flow_owner_buckets */
219 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
220 struct flow_owner_bucket *fob = flow_mgr_get_fob_at_idx(fm, i);
221 flow_owner_bucket_init(fob);
222 /* const override */
223 *(size_t *)(uintptr_t)&fob->fob_idx = i;
224 }
225
226 /* construct flow_route_buckets */
227 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
228 struct flow_route_bucket *frb = flow_mgr_get_frb_at_idx(fm, i);
229 flow_route_bucket_init(frb);
230 /* const override */
231 *(size_t *)(uintptr_t)&frb->frb_idx = i;
232 }
233
234 /* construct flow_route_id_buckets */
235 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
236 struct flow_route_id_bucket *frib =
237 flow_mgr_get_frib_at_idx(fm, i);
238 flow_route_id_bucket_init(frib);
239 /* const override */
240 *(size_t *)(uintptr_t)&frib->frib_idx = i;
241 }
242
243 uuid_generate_random(fm->fm_uuid);
244
245 lck_rw_lock_exclusive(&flow_mgr_lock);
246 RB_INSERT(flow_mgr_tree, &flow_mgr_head, fm);
247 #if DEBUG
248 struct flow_mgr find;
249 uuid_copy(find.fm_uuid, fm->fm_uuid);
250 /* make sure our tree compare routine is sane */
251 ASSERT(RB_FIND(flow_mgr_tree,
252 &flow_mgr_head, &find) == fm);
253 #endif /* DEBUG */
254 lck_rw_done(&flow_mgr_lock);
255
256 fm->fm_flow_hash_masks[0] = FKMASK_5TUPLE;
257 fm->fm_flow_hash_masks[1] = FKMASK_4TUPLE;
258 fm->fm_flow_hash_masks[2] = FKMASK_3TUPLE;
259 fm->fm_flow_hash_masks[3] = FKMASK_2TUPLE;
260 fm->fm_flow_hash_masks[4] = FKMASK_IPFLOW3;
261 fm->fm_flow_hash_masks[5] = FKMASK_IPFLOW2;
262 fm->fm_flow_hash_masks[6] = FKMASK_IPFLOW1;
263
264 memset(&fm->fm_flow_hash_count, 0, sizeof(fm->fm_flow_hash_count));
265
266 return fm;
267 }
268
269 void
flow_mgr_destroy(struct flow_mgr * fm)270 flow_mgr_destroy(struct flow_mgr *fm)
271 {
272 uint32_t i;
273
274 lck_rw_lock_exclusive(&flow_mgr_lock);
275 ASSERT(!uuid_is_null(fm->fm_uuid));
276
277 if (fm->fm_flow_table != NULL) {
278 cuckoo_hashtable_free(fm->fm_flow_table);
279 }
280
281 if (fm->fm_owner_buckets != NULL) {
282 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
283 struct flow_owner_bucket *fob =
284 flow_mgr_get_fob_at_idx(fm, i);
285 ASSERT(fob->fob_idx == i);
286 flow_owner_bucket_destroy(fob);
287 }
288 flow_owner_buckets_free(fm->fm_owner_buckets,
289 fm->fm_owner_bucket_tot_sz);
290 fm->fm_owner_buckets = NULL;
291 *(uint32_t *)(uintptr_t)&fm->fm_owner_buckets_cnt = 0;
292 *(uint32_t *)(uintptr_t)&fm->fm_owner_bucket_sz = 0;
293 *(uint32_t *)(uintptr_t)&fm->fm_owner_bucket_tot_sz = 0;
294 }
295 ASSERT(fm->fm_owner_buckets_cnt == 0);
296 ASSERT(fm->fm_owner_bucket_sz == 0);
297 ASSERT(fm->fm_owner_bucket_tot_sz == 0);
298
299 if (fm->fm_route_buckets != NULL) {
300 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
301 struct flow_route_bucket *frb =
302 flow_mgr_get_frb_at_idx(fm, i);
303 ASSERT(frb->frb_idx == i);
304 flow_route_bucket_destroy(frb);
305 }
306 flow_route_buckets_free(fm->fm_route_buckets,
307 fm->fm_route_bucket_tot_sz);
308 fm->fm_route_buckets = NULL;
309 *(uint32_t *)(uintptr_t)&fm->fm_route_buckets_cnt = 0;
310 *(uint32_t *)(uintptr_t)&fm->fm_route_bucket_sz = 0;
311 *(uint32_t *)(uintptr_t)&fm->fm_route_bucket_tot_sz = 0;
312 }
313 ASSERT(fm->fm_route_buckets_cnt == 0);
314 ASSERT(fm->fm_route_bucket_sz == 0);
315 ASSERT(fm->fm_route_bucket_tot_sz == 0);
316
317 if (fm->fm_route_id_buckets != NULL) {
318 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
319 struct flow_route_id_bucket *frib =
320 flow_mgr_get_frib_at_idx(fm, i);
321 ASSERT(frib->frib_idx == i);
322 flow_route_id_bucket_destroy(frib);
323 }
324 flow_route_id_buckets_free(fm->fm_route_id_buckets,
325 fm->fm_route_id_bucket_tot_sz);
326 fm->fm_route_id_buckets = NULL;
327 *(uint32_t *)(uintptr_t)&fm->fm_route_id_buckets_cnt = 0;
328 *(uint32_t *)(uintptr_t)&fm->fm_route_id_bucket_sz = 0;
329 *(uint32_t *)(uintptr_t)&fm->fm_route_id_bucket_tot_sz = 0;
330 }
331 ASSERT(fm->fm_route_id_buckets_cnt == 0);
332 ASSERT(fm->fm_route_id_bucket_sz == 0);
333 ASSERT(fm->fm_route_id_bucket_tot_sz == 0);
334
335 uuid_clear(fm->fm_uuid);
336 RB_REMOVE(flow_mgr_tree, &flow_mgr_head, fm);
337 lck_rw_done(&flow_mgr_lock);
338
339 sk_free_type(struct flow_mgr, fm);
340 }
341
342 void
flow_mgr_terminate(struct flow_mgr * fm)343 flow_mgr_terminate(struct flow_mgr *fm)
344 {
345 uint32_t i;
346
347 /*
348 * Purge all flow entries.
349 */
350 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
351 struct flow_owner_bucket *fob =
352 flow_mgr_get_fob_at_idx(fm, i);
353 FOB_LOCK(fob);
354 fob->fob_busy_flags |= FOBF_DEAD;
355 }
356 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
357 struct flow_owner_bucket *fob =
358 flow_mgr_get_fob_at_idx(fm, i);
359 SK_DF(SK_VERB_FLOW, "purging fob 0x%llx [%u]", SK_KVA(fob), i);
360 flow_owner_bucket_purge_all(fob);
361 }
362
363 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
364 FOB_UNLOCK(flow_mgr_get_fob_at_idx(fm, i));
365 }
366
367 /*
368 * Purge all flow routes.
369 */
370 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
371 struct flow_route_bucket *frb =
372 flow_mgr_get_frb_at_idx(fm, i);
373 FRB_WLOCK(frb);
374 }
375 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
376 FRIB_WLOCK(flow_mgr_get_frib_at_idx(fm, i));
377 }
378
379 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
380 struct flow_route_bucket *frb =
381 flow_mgr_get_frb_at_idx(fm, i);
382 SK_DF(SK_VERB_FLOW, "purging frb 0x%llx [%u]", SK_KVA(frb), i);
383 flow_route_bucket_purge_all(frb);
384 }
385
386 for (i = 0; i < fm->fm_route_id_buckets_cnt; i++) {
387 FRIB_WUNLOCK(flow_mgr_get_frib_at_idx(fm, i));
388 }
389 for (i = 0; i < fm->fm_route_buckets_cnt; i++) {
390 FRB_WUNLOCK(flow_mgr_get_frb_at_idx(fm, i));
391 }
392 }
393
394 void
flow_mgr_setup_host_flow(struct flow_mgr * fm,struct nx_flowswitch * fsw)395 flow_mgr_setup_host_flow(struct flow_mgr *fm, struct nx_flowswitch *fsw)
396 {
397 struct flow_entry *host_fe = fe_alloc(true);
398 host_fe->fe_key.fk_mask = 0;
399 host_fe->fe_nx_port = FSW_VP_HOST;
400 *(struct nx_flowswitch **)(uintptr_t)&host_fe->fe_fsw = fsw;
401 host_fe->fe_svc_class = KPKT_SC_BE;
402 host_fe->fe_pid = proc_getpid(kernproc);
403 host_fe->fe_rx_process = fsw_host_rx;
404 (void) snprintf(host_fe->fe_proc_name, sizeof(host_fe->fe_proc_name),
405 "%s", proc_name_address(kernproc));
406 flow_entry_retain(host_fe);
407 fm->fm_host_fe = host_fe;
408 KPKTQ_INIT(&host_fe->fe_rx_pktq);
409 KPKTQ_INIT(&host_fe->fe_rx_pktq);
410 }
411
412 void
flow_mgr_teardown_host_flow(struct flow_mgr * fm)413 flow_mgr_teardown_host_flow(struct flow_mgr *fm)
414 {
415 flow_entry_release(&fm->fm_host_fe);
416 }
417
418 /*
419 * Must be matched with a call to flow_mgr_unlock(). Upon success will
420 * return the flow manager address of the specified UUID, and will acquire
421 * the global flow_mgr_lock as reader. The caller is then expected to release
422 * the lock.
423 */
424 struct flow_mgr *
flow_mgr_find_lock(uuid_t uuid)425 flow_mgr_find_lock(uuid_t uuid)
426 {
427 struct flow_mgr *fm, find;
428
429 uuid_copy(find.fm_uuid, uuid);
430
431 lck_rw_lock_shared(&flow_mgr_lock);
432
433 fm = RB_FIND(flow_mgr_tree, &flow_mgr_head, &find);
434 if (fm == NULL) {
435 lck_rw_done(&flow_mgr_lock);
436 return NULL;
437 }
438
439 /* caller is expected to call flow_mgr_unlock() when done */
440 LCK_RW_ASSERT(&flow_mgr_lock, LCK_RW_ASSERT_SHARED);
441 return fm;
442 }
443
444 /*
445 * Must be matched with a successful call to flow_mgr_find_lock().
446 */
447 void
flow_mgr_unlock(void)448 flow_mgr_unlock(void)
449 {
450 lck_rw_done(&flow_mgr_lock);
451 }
452
453 static inline int
fm_cmp(const struct flow_mgr * a,const struct flow_mgr * b)454 fm_cmp(const struct flow_mgr *a, const struct flow_mgr *b)
455 {
456 return uuid_compare(a->fm_uuid, b->fm_uuid);
457 }
458
459 static void
flow_mgr_clear_embedded_scope_id(struct sockaddr_in6 * addr)460 flow_mgr_clear_embedded_scope_id(struct sockaddr_in6 *addr)
461 {
462 struct in6_addr *in6;
463 in6 = &addr->sin6_addr;
464 if (in6_embedded_scope && IN6_IS_SCOPE_EMBED(in6)) {
465 addr->sin6_scope_id = ntohs(in6->s6_addr16[1]);
466 in6->s6_addr16[1] = 0;
467 }
468 }
469
470 #if CONFIG_MACF
471 static bool
flow_req_check_mac_allowed(struct nx_flow_req * req)472 flow_req_check_mac_allowed(struct nx_flow_req *req)
473 {
474 int socktype;
475 switch (req->nfr_ip_protocol) {
476 case IPPROTO_TCP:
477 socktype = SOCK_STREAM;
478 break;
479
480 case IPPROTO_UDP:
481 socktype = SOCK_DGRAM;
482 break;
483
484 default:
485 /* Custom IP protocol, which is treated as IP diagram type */
486 socktype = SOCK_DGRAM;
487 return 0;
488 }
489
490 if (req->nfr_flags & NXFLOWREQF_LISTENER) {
491 return mac_skywalk_flow_check_listen(req->nfr_proc, NULL,
492 &req->nfr_saddr.sa, socktype, req->nfr_ip_protocol);
493 } else {
494 return mac_skywalk_flow_check_connect(req->nfr_proc, NULL,
495 &req->nfr_daddr.sa, socktype, req->nfr_ip_protocol);
496 }
497 }
498 #endif /* CONFIG_MACF */
499
500 static bool
flow_req_needs_netns_reservation(struct nx_flow_req * req)501 flow_req_needs_netns_reservation(struct nx_flow_req *req)
502 {
503 uint8_t proto = req->nfr_ip_protocol;
504 return proto == IPPROTO_TCP || proto == IPPROTO_UDP;
505 }
506
507 static bool
flow_req_needs_protons_reservation(struct nx_flow_req * req)508 flow_req_needs_protons_reservation(struct nx_flow_req *req)
509 {
510 uint8_t proto = req->nfr_ip_protocol;
511 return proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
512 proto != IPPROTO_ESP && proto != IPPROTO_AH;
513 }
514
515 static bool
flow_req_needs_ipsec_reservation(struct nx_flow_req * req)516 flow_req_needs_ipsec_reservation(struct nx_flow_req *req)
517 {
518 uint8_t proto = req->nfr_ip_protocol;
519 return proto == IPPROTO_ESP || proto == IPPROTO_AH;
520 }
521
522 static void
flow_set_port_info(struct ns_flow_info * nfi,struct nx_flow_req * req)523 flow_set_port_info(struct ns_flow_info *nfi, struct nx_flow_req *req)
524 {
525 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
526 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
527
528 bzero(nfi, sizeof(struct ns_flow_info));
529
530 nfi->nfi_ifp = req->nfr_ifp;
531
532 nfi->nfi_laddr = *saddr;
533 nfi->nfi_faddr = *daddr;
534
535 nfi->nfi_protocol = req->nfr_ip_protocol;
536
537 uuid_copy(nfi->nfi_flow_uuid, req->nfr_flow_uuid);
538 ASSERT(!uuid_is_null(nfi->nfi_flow_uuid));
539
540 nfi->nfi_owner_pid = req->nfr_pid;
541 if (req->nfr_epid != -1) {
542 nfi->nfi_effective_pid = req->nfr_epid;
543 proc_name(req->nfr_epid, nfi->nfi_effective_name,
544 sizeof(nfi->nfi_effective_name));
545 } else {
546 nfi->nfi_effective_pid = -1;
547 }
548
549 proc_name(req->nfr_pid, nfi->nfi_owner_name,
550 sizeof(nfi->nfi_owner_name));
551 }
552
553 static int
flow_req_prepare_namespace(struct nx_flow_req * req)554 flow_req_prepare_namespace(struct nx_flow_req *req)
555 {
556 #if SK_LOG
557 char src_s[MAX_IPv6_STR_LEN];
558 #endif /* SK_LOG */
559 int err = 0;
560
561 if (flow_req_needs_netns_reservation(req)) {
562 if (!NETNS_TOKEN_VALID(&req->nfr_port_reservation)) {
563 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
564 struct ns_flow_info nfi;
565 netns_token ns_token;
566 flow_set_port_info(&nfi, req);
567 err = flow_namespace_create(saddr,
568 req->nfr_ip_protocol, &ns_token,
569 req->nfr_flags & NXFLOWREQF_LISTENER, &nfi);
570 if (err != 0) {
571 SK_ERR("netns for %s.%u failed",
572 sk_sa_ntop(SA(saddr), src_s, sizeof(src_s)),
573 sk_sa_get_port(SA(saddr)));
574 goto fail;
575 }
576 req->nfr_port_reservation = ns_token;
577 req->nfr_flags &= ~NXFLOWREQF_EXT_PORT_RSV;
578 } else {
579 /* Validate PID associated with provided reservation */
580 struct ns_flow_info nfi = {};
581 err = netns_get_flow_info(&req->nfr_port_reservation,
582 &nfi);
583 /* flow info could be NULL for socket flow */
584 if (!err && (req->nfr_pid != nfi.nfi_owner_pid ||
585 (req->nfr_epid != -1 && nfi.nfi_effective_pid !=
586 req->nfr_epid))) {
587 SK_ERR("netns flow info mismatch, "
588 "req_(e)pid %d(%d), nfr_(e)pid %d(%d)",
589 req->nfr_pid, req->nfr_epid,
590 nfi.nfi_owner_pid, nfi.nfi_effective_pid);
591 err = EPERM;
592 goto fail;
593 }
594 req->nfr_flags |= NXFLOWREQF_EXT_PORT_RSV;
595 }
596 }
597
598 if (flow_req_needs_ipsec_reservation(req)) {
599 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
600 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
601 void *ipsec_token = NULL;
602 ASSERT(req->nfr_ipsec_reservation == NULL);
603 err = key_reserve_custom_ipsec(&ipsec_token, saddr,
604 daddr, req->nfr_ip_protocol);
605 if (err != 0) {
606 SK_ERR("custom ipsec %u reserve %s failed",
607 req->nfr_ip_protocol,
608 sk_sa_ntop(SA(saddr), src_s, sizeof(src_s)));
609 goto fail;
610 }
611 req->nfr_ipsec_reservation = ipsec_token;
612 }
613
614 if (flow_req_needs_protons_reservation(req)) {
615 struct protons_token *ns_token = NULL;
616 if (!protons_token_is_valid(req->nfr_proto_reservation)) {
617 err = protons_reserve(&ns_token, req->nfr_pid,
618 req->nfr_epid, req->nfr_ip_protocol);
619 if (err != 0) {
620 SK_ERR("protocol %u namespace failed",
621 req->nfr_ip_protocol);
622 goto fail;
623 }
624 req->nfr_flags &= ~NXFLOWREQF_EXT_PROTO_RSV;
625 req->nfr_proto_reservation = ns_token;
626 } else {
627 /* Validate PID associated with provided reservation */
628 if (!protons_token_has_matching_pid(req->nfr_proto_reservation,
629 req->nfr_pid, req->nfr_epid)) {
630 SK_ERR("protons token pid mismatch");
631 err = EPERM;
632 goto fail;
633 }
634 req->nfr_flags |= NXFLOWREQF_EXT_PROTO_RSV;
635 }
636 }
637
638 return 0;
639
640 fail:
641 VERIFY(err != 0);
642 SK_ERR("perparation failed (err %d)", err);
643 return err;
644 }
645
646 static int
flow_req_prepare(struct nx_flow_req * req,struct kern_nexus * nx,struct flow_mgr * fm,struct ifnet * ifp,flow_route_ctor_fn_t fr_ctor,flow_route_resolve_fn_t fr_resolve,void * fr_arg)647 flow_req_prepare(struct nx_flow_req *req, struct kern_nexus *nx,
648 struct flow_mgr *fm, struct ifnet *ifp, flow_route_ctor_fn_t fr_ctor,
649 flow_route_resolve_fn_t fr_resolve, void *fr_arg)
650 {
651 int err = 0;
652 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
653 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
654 uint8_t protocol = req->nfr_ip_protocol;
655
656 sa_family_t saf, daf, xaf, af;
657
658 saf = SA(saddr)->sa_family;
659 daf = SA(daddr)->sa_family;
660 xaf = saf ^ daf;
661 if (xaf != 0 && xaf != saf && xaf != daf) {
662 SK_ERR("invalid saddr af %d daddr af %d", saf, daf);
663 return EINVAL;
664 }
665 af = (xaf == 0) ? saf : xaf;
666
667 bool has_saddr = false, has_daddr = false;
668 bool has_sport = false, has_dport = false;
669 uint16_t sport, dport;
670 uint8_t sa_len;
671 switch (af) {
672 case AF_INET:
673 sa_len = sizeof(struct sockaddr_in);
674 has_saddr = (SIN(saddr)->sin_addr.s_addr != INADDR_ANY);
675 has_daddr = (SIN(daddr)->sin_addr.s_addr != INADDR_ANY);
676 sport = SIN(saddr)->sin_port;
677 dport = SIN(daddr)->sin_port;
678 has_sport = (sport != 0);
679 has_dport = (dport != 0);
680
681 if ((has_saddr && SIN(saddr)->sin_len != sa_len) ||
682 (has_daddr && SIN(daddr)->sin_len != sa_len)) {
683 SK_ERR("sin_len invalid");
684 err = EINVAL;
685 goto fail;
686 }
687 if ((has_saddr && IN_MULTICAST(ntohl(SIN(saddr)->sin_addr.s_addr))) ||
688 (has_daddr && IN_MULTICAST(ntohl(SIN(daddr)->sin_addr.s_addr)))) {
689 SK_ERR("multicast flow not yet supported");
690 err = EADDRNOTAVAIL;
691 goto fail;
692 }
693 if (__probable(protocol == IPPROTO_TCP)) {
694 INC_ATOMIC_INT64_LIM(
695 net_api_stats.nas_nx_flow_inet6_stream_total);
696 } else {
697 INC_ATOMIC_INT64_LIM(
698 net_api_stats.nas_nx_flow_inet6_dgram_total);
699 }
700 break;
701
702 case AF_INET6:
703 sa_len = sizeof(struct sockaddr_in6);
704 has_saddr = !IN6_IS_ADDR_UNSPECIFIED(&SIN6(saddr)->sin6_addr);
705 has_daddr = !IN6_IS_ADDR_UNSPECIFIED(&SIN6(daddr)->sin6_addr);
706 sport = SIN6(saddr)->sin6_port;
707 dport = SIN6(daddr)->sin6_port;
708 has_sport = (sport != 0);
709 has_dport = (dport != 0);
710 if ((has_saddr && SIN6(saddr)->sin6_len != sa_len) ||
711 (has_daddr && SIN6(daddr)->sin6_len != sa_len)) {
712 SK_ERR("sin_len invalid");
713 err = EINVAL;
714 goto fail;
715 }
716 /* clear embedded scope if link-local src */
717 if (has_saddr) {
718 flow_mgr_clear_embedded_scope_id(SIN6(saddr));
719 if (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&SIN6(saddr)->sin6_addr)) {
720 SIN6(saddr)->sin6_scope_id = ifp->if_index;
721 }
722 }
723 if (has_daddr) {
724 flow_mgr_clear_embedded_scope_id(SIN6(daddr));
725 if (!in6_embedded_scope && IN6_IS_SCOPE_EMBED(&SIN6(daddr)->sin6_addr)) {
726 SIN6(daddr)->sin6_scope_id = ifp->if_index;
727 }
728 }
729 if ((has_saddr && IN6_IS_ADDR_MULTICAST(&SIN6(saddr)->sin6_addr)) ||
730 (has_daddr && IN6_IS_ADDR_MULTICAST(&SIN6(daddr)->sin6_addr))) {
731 SK_ERR("multicast flow not yet supported");
732 err = EADDRNOTAVAIL;
733 goto fail;
734 }
735 if (__probable(protocol == IPPROTO_TCP)) {
736 INC_ATOMIC_INT64_LIM(
737 net_api_stats.nas_nx_flow_inet_stream_total);
738 } else {
739 INC_ATOMIC_INT64_LIM(
740 net_api_stats.nas_nx_flow_inet_dgram_total);
741 }
742 break;
743
744 default:
745 SK_ERR("unknown address families saf %d daf %d", saf, daf);
746 err = EINVAL;
747 goto fail;
748 }
749
750 SA(saddr)->sa_family = SA(daddr)->sa_family = af;
751 SA(saddr)->sa_len = SA(daddr)->sa_len = sa_len;
752
753 if (__improbable(has_saddr && !flow_route_laddr_validate(saddr, ifp,
754 &req->nfr_saddr_gencnt))) {
755 #if SK_LOG
756 char src_s[MAX_IPv6_STR_LEN];
757 #endif /* SK_LOG */
758 SK_ERR("src address %s is not valid",
759 sk_sa_ntop(SA(saddr), src_s, sizeof(src_s)));
760 err = EADDRNOTAVAIL;
761 goto fail;
762 }
763
764 bool is_tcp_udp = (protocol == IPPROTO_TCP || protocol == IPPROTO_UDP);
765 if (!is_tcp_udp) {
766 if (has_sport || has_dport) {
767 SK_ERR("non-zero port for IP flow");
768 return EINVAL;
769 }
770 } else {
771 /* dst:dport as connected, 0:0 as listener, but not partial */
772 if (has_daddr != has_dport) {
773 err = EINVAL;
774 SK_ERR("invalid dst/dport for TCP/UDP (err %d)", err);
775 goto fail;
776 }
777 }
778
779 if (!has_daddr && !has_dport) {
780 req->nfr_flags |= NXFLOWREQF_LISTENER;
781 }
782
783 if (req->nfr_transport_protocol == 0) {
784 req->nfr_transport_protocol = req->nfr_ip_protocol;
785 }
786
787 req->nfr_ifp = ifp;
788
789 #if CONFIG_MACF
790 err = flow_req_check_mac_allowed(req);
791 if (err != 0) {
792 SK_ERR("flow req failed MAC check");
793 goto fail;
794 }
795 #endif /* CONFIG_MACF */
796
797 /* setup flow route and prepare saddr if needed */
798 if (__probable(has_daddr || has_dport)) {
799 struct flow_route *fr = NULL;
800 err = flow_route_find(nx, fm, ifp, req, fr_ctor,
801 fr_resolve, fr_arg, &fr);
802 if (__improbable(err != 0)) {
803 SK_ERR("flow route lookup failed");
804 ASSERT(fr == NULL);
805 goto fail;
806 }
807 ASSERT(fr != NULL);
808 /* Pick up the default source address from flow route. */
809 if (!has_saddr) {
810 *saddr = fr->fr_laddr;
811 SIN(saddr)->sin_port = sport;
812 }
813 req->nfr_route = fr;
814 fr = NULL;
815 }
816
817 err = flow_req_prepare_namespace(req);
818 if (err != 0) {
819 goto fail;
820 }
821
822 return 0;
823
824 fail:
825 VERIFY(err != 0);
826 if (req->nfr_route != NULL) {
827 flow_route_release(req->nfr_route);
828 req->nfr_route = NULL;
829 }
830 SK_ERR("preparation failed (err %d)", err);
831 return err;
832 }
833
834 static void
flow_req_cleanup(struct nx_flow_req * req)835 flow_req_cleanup(struct nx_flow_req *req)
836 {
837 if (NETNS_TOKEN_VALID(&req->nfr_port_reservation) &&
838 !(req->nfr_flags & NXFLOWREQF_EXT_PORT_RSV)) {
839 netns_release(&req->nfr_port_reservation);
840 }
841
842 if (protons_token_is_valid(req->nfr_proto_reservation) &&
843 !(req->nfr_flags & NXFLOWREQF_EXT_PROTO_RSV)) {
844 protons_release(&req->nfr_proto_reservation);
845 }
846
847 if (key_custom_ipsec_token_is_valid(req->nfr_ipsec_reservation)) {
848 key_release_custom_ipsec(&req->nfr_ipsec_reservation);
849 }
850 }
851
852 #if SK_LOG
853 /* Hoisted out of line to reduce kernel stack footprint */
854 SK_LOG_ATTRIBUTE
855 static void
flow_req_dump(char * desc,struct nx_flow_req * req)856 flow_req_dump(char *desc, struct nx_flow_req *req)
857 {
858 if (!(sk_verbose & SK_VERB_FLOW)) {
859 return;
860 }
861
862 union sockaddr_in_4_6 *saddr = &req->nfr_saddr;
863 union sockaddr_in_4_6 *daddr = &req->nfr_daddr;
864 uint8_t protocol = req->nfr_ip_protocol;
865 char src_s[MAX_IPv6_STR_LEN];
866 char dst_s[MAX_IPv6_STR_LEN];
867 uint8_t sipver = 0, dipver = 0;
868 uint16_t sport = 0, dport = 0;
869 uuid_string_t uuid_s;
870
871 // unsanitized req, treat source and destination AF separately
872 if (saddr->sa.sa_family == AF_INET) {
873 sipver = IPVERSION;
874 (void) inet_ntop(AF_INET, &SIN(saddr)->sin_addr, src_s,
875 sizeof(src_s));
876 sport = ntohs(saddr->sin.sin_port);
877 } else if (saddr->sa.sa_family == AF_INET6) {
878 sipver = IPV6_VERSION;
879 (void) inet_ntop(AF_INET6, &SIN6(saddr)->sin6_addr, src_s,
880 sizeof(src_s));
881 sport = ntohs(saddr->sin6.sin6_port);
882 } else {
883 sipver = 0;
884 strlcpy(src_s, "INV", sizeof(src_s));
885 }
886 if (daddr->sa.sa_family == AF_INET) {
887 dipver = IPVERSION;
888 (void) inet_ntop(AF_INET, &SIN(daddr)->sin_addr, dst_s,
889 sizeof(dst_s));
890 dport = ntohs(daddr->sin.sin_port);
891 } else if (daddr->sa.sa_family == AF_INET6) {
892 dipver = IPV6_VERSION;
893 (void) inet_ntop(AF_INET6, &SIN6(saddr)->sin6_addr, dst_s,
894 sizeof(dst_s));
895 dport = ntohs(daddr->sin6.sin6_port);
896 } else {
897 dipver = 0;
898 strlcpy(dst_s, "INV", sizeof(src_s));
899 }
900
901 SK_DF(SK_VERB_FLOW,
902 "%s %s sipver=%u,dipver=%u,src=%s,dst=%s,proto=%d,sport=%u,dport=%d"
903 " nx_port=%u,flags 0x%b", desc, sk_uuid_unparse(req->nfr_flow_uuid,
904 uuid_s), sipver, dipver, src_s, dst_s, protocol, sport, dport,
905 req->nfr_nx_port, req->nfr_flags, NXFLOWREQF_BITS);
906 }
907 #else
908 #define flow_req_dump(str, req) do { ((void)0); } while (0)
909 #endif /* SK_LOG */
910
911 /*
912 * Upon success, returns a non-NULL fb that is (writer) locked.
913 */
914 int
flow_mgr_flow_add(struct kern_nexus * nx,struct flow_mgr * fm,struct flow_owner * fo,struct ifnet * ifp,struct nx_flow_req * req,flow_route_ctor_fn_t fr_ctor,flow_route_resolve_fn_t fr_resolve,void * fr_arg)915 flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
916 struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
917 flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve,
918 void *fr_arg)
919 {
920 struct flow_entry *fe;
921 int err = 0;
922
923 ASSERT(ifp != NULL);
924 ASSERT(fr_ctor != NULL && fr_resolve != NULL);
925 FOB_LOCK_ASSERT_HELD(FO_BUCKET(fo));
926
927 flow_req_dump("req", req);
928
929 if (!(req->nfr_flags & NXFLOWREQF_ASIS)) {
930 err = flow_req_prepare(req, nx, fm, ifp, fr_ctor, fr_resolve, fr_arg);
931 if (err != 0) {
932 SK_ERR("flow req preparation failure (err %d)", err);
933 return err;
934 }
935 }
936
937 /*
938 * Add entry in flowswitch table; upon success, flow entry adds a
939 * retain count on the flow route (we'll always need to release the
940 * refcnt from flow_route_find), and the local address:port of the
941 * flow entry will be set.
942 */
943 fe = flow_entry_alloc(fo, req, &err);
944 if (__improbable(fe == NULL)) {
945 ASSERT(err != 0);
946 goto fail;
947 }
948
949 VERIFY(NETNS_TOKEN_VALID(&fe->fe_port_reservation) ||
950 !(fe->fe_key.fk_mask & FKMASK_SPORT) ||
951 req->nfr_flags & NXFLOWREQF_ASIS);
952 VERIFY((req->nfr_flags & NXFLOWREQF_FLOWADV) ^
953 (req->nfr_flowadv_idx == FLOWADV_IDX_NONE));
954 req->nfr_flowadv_idx = fe->fe_adv_idx;
955
956 flow_req_dump("added ", req);
957
958 if (fe != NULL) {
959 flow_entry_release(&fe);
960 }
961
962 struct nx_flowswitch *fsw = NX_FSW_PRIVATE(nx);
963 if (req->nfr_saddr.sa.sa_family == AF_INET6 &&
964 IN6_IS_SCOPE_EMBED(&req->nfr_saddr.sin6.sin6_addr)) {
965 req->nfr_saddr.sin6.sin6_scope_id = ifnet_index(
966 fsw->fsw_ifp);
967 }
968 if (req->nfr_daddr.sa.sa_family == AF_INET6 &&
969 IN6_IS_SCOPE_EMBED(&req->nfr_daddr.sin6.sin6_addr)) {
970 req->nfr_daddr.sin6.sin6_scope_id = ifnet_index(
971 fsw->fsw_ifp);
972 }
973
974 return 0;
975
976 fail:
977 VERIFY(err != 0);
978 flow_req_cleanup(req);
979
980 return err;
981 }
982
983 struct flow_owner_bucket *
flow_mgr_get_fob_by_pid(struct flow_mgr * fm,pid_t pid)984 flow_mgr_get_fob_by_pid(struct flow_mgr *fm, pid_t pid)
985 {
986 return flow_mgr_get_fob_at_idx(fm,
987 (pid % fm->fm_owner_buckets_cnt));
988 }
989
990 struct flow_entry *
flow_mgr_get_fe_by_uuid_rlock(struct flow_mgr * fm,uuid_t uuid)991 flow_mgr_get_fe_by_uuid_rlock(struct flow_mgr *fm, uuid_t uuid)
992 {
993 uint32_t i;
994 struct flow_owner_bucket *fob;
995 struct flow_owner *fo;
996 struct flow_entry *fe;
997
998 for (i = 0; i < fm->fm_owner_buckets_cnt; i++) {
999 fob = flow_mgr_get_fob_at_idx(fm, i);
1000 FOB_LOCK_SPIN(fob);
1001 RB_FOREACH(fo, flow_owner_tree, &fob->fob_owner_head) {
1002 fe = flow_entry_find_by_uuid(fo, uuid);
1003 if (fe != NULL) {
1004 FOB_LOCK_CONVERT(fob);
1005 FOB_UNLOCK(fob);
1006 return fe;
1007 }
1008 }
1009 FOB_UNLOCK(fob);
1010 }
1011 return NULL;
1012 }
1013
1014 struct flow_route_bucket *
flow_mgr_get_frb_by_addr(struct flow_mgr * fm,union sockaddr_in_4_6 * daddr)1015 flow_mgr_get_frb_by_addr(struct flow_mgr *fm,
1016 union sockaddr_in_4_6 *daddr)
1017 {
1018 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = flow_seed;
1019
1020 switch (SA(daddr)->sa_family) {
1021 case AF_INET: {
1022 uint8_t *p = (uint8_t *)&SIN(daddr)->sin_addr.s_addr;
1023 b += ((uint32_t)p[3]);
1024 a += ((uint32_t)p[2]) << 24;
1025 a += ((uint32_t)p[1]) << 16;
1026 a += ((uint32_t)p[0]) << 8;
1027 break;
1028 }
1029
1030 case AF_INET6: {
1031 b += SIN6(daddr)->sin6_addr.s6_addr32[3];
1032 a += SIN6(daddr)->sin6_addr.s6_addr32[2];
1033 a += SIN6(daddr)->sin6_addr.s6_addr32[1];
1034 a += SIN6(daddr)->sin6_addr.s6_addr32[0];
1035 break;
1036 }
1037
1038 default:
1039 VERIFY(0);
1040 /* NOTREACHED */
1041 __builtin_unreachable();
1042 }
1043
1044 /* mix */
1045 a -= b; a -= c; a ^= (c >> 13);
1046 b -= c; b -= a; b ^= (a << 8);
1047 c -= a; c -= b; c ^= (b >> 13);
1048 a -= b; a -= c; a ^= (c >> 12);
1049 b -= c; b -= a; b ^= (a << 16);
1050 c -= a; c -= b; c ^= (b >> 5);
1051 a -= b; a -= c; a ^= (c >> 3);
1052 b -= c; b -= a; b ^= (a << 10);
1053 c -= a; c -= b; c ^= (b >> 15);
1054
1055 c &= (fm->fm_route_buckets_cnt - 1);
1056
1057 return flow_mgr_get_frb_at_idx(fm, c);
1058 }
1059
1060 struct flow_route_id_bucket *
flow_mgr_get_frib_by_uuid(struct flow_mgr * fm,uuid_t fr_uuid)1061 flow_mgr_get_frib_by_uuid(struct flow_mgr *fm, uuid_t fr_uuid)
1062 {
1063 union {
1064 uuid_t uuid __sk_aligned(8);
1065 uint64_t u64[2];
1066 } u;
1067 uint64_t key;
1068
1069 _CASSERT(sizeof(u.uuid) == sizeof(u.u64));
1070 uuid_copy(u.uuid, fr_uuid);
1071
1072 /* XOR fold UUID down to 4-bytes */
1073 key = (u.u64[0] ^ u.u64[1]);
1074 key = ((key >> 32) ^ (key & 0xffffffff));
1075
1076 /* add some offset to get more entropy */
1077 return flow_mgr_get_frib_at_idx(fm,
1078 ((uint32_t)key % fm->fm_route_id_buckets_cnt));
1079 }
1080
1081 static int
flow_hash_mask_add(struct flow_mgr * fm,uint32_t mask,int32_t v)1082 flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask, int32_t v)
1083 {
1084 for (uint32_t i = 0; i < FKMASK_IDX_MAX; i++) {
1085 if (fm->fm_flow_hash_masks[i] == mask) {
1086 atomic_add_32(&fm->fm_flow_hash_count[i], v);
1087 return 0;
1088 }
1089 }
1090 SK_ERR("unkown hash mask 0x%x", mask);
1091 return ENOTSUP;
1092 }
1093
1094 int
flow_mgr_flow_hash_mask_add(struct flow_mgr * fm,uint32_t mask)1095 flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask)
1096 {
1097 return flow_hash_mask_add(fm, mask, 1);
1098 }
1099
1100 int
flow_mgr_flow_hash_mask_del(struct flow_mgr * fm,uint32_t mask)1101 flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask)
1102 {
1103 return flow_hash_mask_add(fm, mask, -1);
1104 }
1105
1106 struct flow_entry *
flow_mgr_find_fe_by_key(struct flow_mgr * fm,struct flow_key * key)1107 flow_mgr_find_fe_by_key(struct flow_mgr *fm, struct flow_key *key)
1108 {
1109 #if SK_LOG
1110 char dbgbuf[FLOWENTRY_DBGBUF_SIZE]; /* just for debug message */
1111 #endif /* SK_LOG */
1112 struct cuckoo_node *node = NULL;
1113 struct flow_entry *fe = NULL;
1114 uint32_t hash = 0;
1115 uint16_t saved_mask = key->fk_mask;
1116
1117 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP, "key %s",
1118 fk_as_string(key, dbgbuf, sizeof(dbgbuf)));
1119
1120 for (int i = 0; i < FKMASK_IDX_MAX; i++) {
1121 size_t count = fm->fm_flow_hash_count[i];
1122 uint16_t mask = fm->fm_flow_hash_masks[i];
1123 if (count == 0 || mask == 0) {
1124 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP,
1125 "[%d] mask=%08x count=%zu skiped",
1126 i, mask, count);
1127 continue;
1128 }
1129 key->fk_mask = mask;
1130 hash = flow_key_hash(key);
1131 node = cuckoo_hashtable_find_with_hash(fm->fm_flow_table, key, hash);
1132 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP,
1133 "[%d] mask=%08x hash %08x node 0x%llx", i, mask, hash,
1134 SK_KVA(node));
1135 if (node != NULL) {
1136 fe = container_of(node, struct flow_entry, fe_cnode);
1137 /* v4 only listener fe shouldn't get v6 connection */
1138 if (__improbable(fe->fe_key.fk_mask == FKMASK_2TUPLE &&
1139 fe->fe_key.fk_ipver == IPVERSION &&
1140 key->fk_ipver == IPV6_VERSION)) {
1141 flow_entry_release(&fe);
1142 ASSERT(fe == NULL);
1143 SK_DF(SK_VERB_FLOW | SK_VERB_LOOKUP,
1144 "\tskip v4 only fe");
1145 continue;
1146 }
1147 break;
1148 }
1149 }
1150
1151 key->fk_mask = saved_mask;
1152
1153 return fe;
1154 }
1155
1156 struct flow_entry *
flow_mgr_find_conflicting_fe(struct flow_mgr * fm,struct flow_key * key)1157 flow_mgr_find_conflicting_fe(struct flow_mgr *fm, struct flow_key *key)
1158 {
1159 struct cuckoo_node *node = NULL;
1160 struct flow_entry *fe = NULL;
1161 uint32_t hash = 0;
1162
1163 hash = flow_key_hash(key);
1164 node = cuckoo_hashtable_find_with_hash(fm->fm_flow_table, key, hash);
1165 if (node != NULL) {
1166 fe = container_of(node, struct flow_entry, fe_cnode);
1167 return fe;
1168 }
1169
1170 /* listener flow confliction will be checked at netns reservation */
1171 return fe;
1172 }
1173
1174 void
1175 flow_mgr_foreach_flow(struct flow_mgr *fm,
1176 void (^flow_handler)(struct flow_entry *fe))
1177 {
1178 cuckoo_hashtable_foreach(fm->fm_flow_table,
1179 ^(struct cuckoo_node *node, uint32_t hv) {
1180 #pragma unused(hv)
1181 struct flow_entry *fe;
1182 fe = container_of(node, struct flow_entry, fe_cnode);
1183 flow_handler(fe);
1184 }
1185 );
1186 }
1187
1188 struct flow_entry *
flow_mgr_get_host_fe(struct flow_mgr * fm)1189 flow_mgr_get_host_fe(struct flow_mgr *fm)
1190 {
1191 struct flow_entry *fe;
1192 fe = fm->fm_host_fe;
1193 flow_entry_retain(fe);
1194 return fe;
1195 }
1196