1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/assert.h>
30 #include <kern/locks.h>
31 #include <kern/zalloc.h>
32 #include <libkern/tree.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/bitstring.h>
36 #include <net/if.h>
37 #include <net/kpi_interface.h>
38 #include <net/restricted_in_port.h>
39
40 #include <netinet/in.h>
41 #include <netinet/in_pcb.h>
42 #include <netinet/tcp_fsm.h>
43 #include <netinet/tcp_var.h>
44
45 #include <netinet6/in6_var.h>
46 #include <string.h>
47
48 #include <skywalk/os_skywalk.h>
49 #include <skywalk/os_skywalk_private.h>
50 #include <skywalk/os_stats_private.h>
51 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
52 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
53
54 #include <net/if_ports_used.h>
55
56 static int __netns_inited = 0;
57
58 /*
59 * Logging
60 */
61
62 #define NS_VERB_PROTO(proto) ((proto == IPPROTO_TCP) ? SK_VERB_NS_TCP : \
63 SK_VERB_NS_UDP)
64 #define NS_VERB_IP(addr_len) ((addr_len == sizeof (struct in_addr)) ? \
65 SK_VERB_NS_IPV4 : SK_VERB_NS_IPV6)
66 #define PROTO_STR(proto) ((proto == IPPROTO_TCP) ? "tcp" : "udp")
67 #define LEN_TO_AF(len) (((len == sizeof (struct in_addr)) ? \
68 AF_INET : AF_INET6))
69 /*
70 * Locking
71 * Netns is currently protected by a global mutex, NETNS_LOCK. This lock is
72 * aquired at the entry of every kernel-facing function, and released at the
73 * end. Data within netns_token structures is also protected under this lock.
74 */
75
76 #define NETNS_LOCK() \
77 lck_mtx_lock(&netns_lock)
78 #define NETNS_LOCK_SPIN() \
79 lck_mtx_lock_spin(&netns_lock)
80 #define NETNS_LOCK_CONVERT() do { \
81 NETNS_LOCK_ASSERT_HELD(); \
82 lck_mtx_convert_spin(&netns_lock); \
83 } while (0)
84 #define NETNS_UNLOCK() \
85 lck_mtx_unlock(&netns_lock)
86 #define NETNS_LOCK_ASSERT_HELD() \
87 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_OWNED)
88 #define NETNS_LOCK_ASSERT_NOTHELD() \
89 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_NOTOWNED)
90
91 static LCK_GRP_DECLARE(netns_lock_group, "netns_lock");
92 static LCK_MTX_DECLARE(netns_lock, &netns_lock_group);
93
94 /*
95 * Internal data structures and parameters
96 */
97
98 /*
99 * Local ports are kept track of by reference counts kept in a tree specific to
100 * an <IP, protocol> tuple (see struct ns).
101 *
102 * Note: port numbers are stored in host byte order.
103 */
104 struct ns_reservation {
105 RB_ENTRY(ns_reservation) nsr_link;
106 uint32_t nsr_refs[NETNS_OWNER_MAX + 1];
107 in_port_t nsr_port;
108 bool nsr_reuseport:1;
109 };
110
111 #define NETNS_REF_COUNT(nsr, flags) \
112 (nsr)->nsr_refs[((flags) & NETNS_OWNER_MASK)]
113
114 static inline int nsr_cmp(const struct ns_reservation *,
115 const struct ns_reservation *);
116
117 RB_HEAD(ns_reservation_tree, ns_reservation);
118 RB_PROTOTYPE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
119 RB_GENERATE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
120
121 static inline struct ns_reservation *ns_reservation_tree_find(
122 struct ns_reservation_tree *, const in_port_t);
123
124 /*
125 * A namespace keeps track of the local port numbers in use for a given
126 * <IP, protocol> tuple. There are also global namespaces for each
127 * protocol to accomodate INADDR_ANY behavior and diagnostics.
128 */
129 struct ns {
130 RB_ENTRY(ns) ns_link;
131
132 void *ns_addr_key;
133
134 union {
135 uint32_t ns_addr[4];
136 struct in_addr ns_inaddr;
137 struct in6_addr ns_in6addr;
138 };
139 uint8_t ns_addr_len;
140 uint8_t ns_proto;
141
142 in_port_t ns_last_ephemeral_port_down;
143 in_port_t ns_last_ephemeral_port_up;
144
145 uint8_t ns_is_freeable;
146
147 uint32_t ns_n_reservations;
148 struct ns_reservation_tree ns_reservations;
149 };
150
151 static uint32_t netns_n_namespaces;
152
153 static inline int ns_cmp(const struct ns *, const struct ns *);
154
155 RB_HEAD(netns_namespaces_tree, ns) netns_namespaces =
156 RB_INITIALIZER(netns_namespaces);
157 RB_PROTOTYPE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
158 RB_GENERATE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
159
160 /*
161 * Declare pointers to global namespaces for each protocol.
162 * All non-wildcard reservations will have an entry here.
163 */
164 #define NETNS_N_GLOBAL 4
165 static struct ns *netns_global_non_wild[NETNS_N_GLOBAL];
166 static struct ns *netns_global_wild[NETNS_N_GLOBAL];
167 #define NETNS_ADDRLEN_V4 (sizeof(struct in_addr))
168 #define NETNS_ADDRLEN_V6 (sizeof(struct in6_addr))
169 #define NETNS_NS_TCP 0
170 #define NETNS_NS_UDP 1
171 #define NETNS_NS_V4 0
172 #define NETNS_NS_V6 2
173 #define NETNS_NS_GLOBAL_IDX(proto, addrlen) \
174 ((((proto) == IPPROTO_TCP) ? NETNS_NS_TCP : NETNS_NS_UDP) | \
175 (((addrlen) == NETNS_ADDRLEN_V4) ? NETNS_NS_V4 : NETNS_NS_V6))
176
177 #define NETNS_NS_UDP_EPHEMERAL_RESERVE 4096
178
179 /*
180 * Internal token structure
181 *
182 * Note: port numbers are stored in host byte order.
183 */
184 struct ns_token {
185 /* Reservation state */
186 ifnet_t nt_ifp;
187 SLIST_ENTRY(ns_token) nt_ifp_link;
188 SLIST_ENTRY(ns_token) nt_all_link;
189 uint32_t nt_state; /* NETNS_STATE_* */
190
191 /* Reservation context */
192 union {
193 uint32_t nt_addr[4];
194 struct in_addr nt_inaddr;
195 struct in6_addr nt_in6addr;
196 };
197 uint8_t nt_addr_len;
198 uint8_t nt_proto;
199 in_port_t nt_port;
200 uint32_t nt_flags;
201
202 /* Optional information about the flow */
203 struct ns_flow_info *nt_flow_info;
204 };
205
206 /* Valid values for nt_state */
207 #define NETNS_STATE_HALFCLOSED 0x1 /* half closed */
208 #define NETNS_STATE_WITHDRAWN 0x2 /* withdrawn; not offloadable */
209
210 #define NETNS_STATE_BITS "\020\01HALFCLOSED\02WITHDRAWN"
211
212 /* List of tokens not bound to an ifnet */
213 SLIST_HEAD(, ns_token) netns_unbound_tokens = SLIST_HEAD_INITIALIZER(
214 netns_unbound_tokens);
215
216 /* List of all tokens currently allocated in the system */
217 SLIST_HEAD(, ns_token) netns_all_tokens = SLIST_HEAD_INITIALIZER(
218 netns_all_tokens);
219
220 /*
221 * Memory management
222 */
223 static ZONE_DEFINE(netns_ns_zone, SKMEM_ZONE_PREFIX ".netns.ns",
224 sizeof(struct ns), ZC_ZFREE_CLEARMEM);
225
226 #define NETNS_NS_TOKEN_ZONE_NAME "netns.ns_token"
227 static unsigned int netns_ns_token_size; /* size of zone element */
228 static struct skmem_cache *netns_ns_token_cache; /* for ns_token */
229
230 #define NETNS_NS_FLOW_INFO_ZONE_NAME "netns.ns_flow_info"
231 static unsigned int netns_ns_flow_info_size; /* size of zone element */
232 static struct skmem_cache *netns_ns_flow_info_cache; /* for ns_flow_info */
233
234 #define NETNS_NS_RESERVATION_ZONE_NAME "netns.ns_reservation"
235 static unsigned int netns_ns_reservation_size; /* size of zone element */
236 static struct skmem_cache *netns_ns_reservation_cache; /* for ns_reservation */
237
238 static struct ns_reservation *netns_ns_reservation_alloc(in_port_t, uint32_t);
239 static void netns_ns_reservation_free(struct ns_reservation *);
240 static struct ns *netns_ns_alloc(zalloc_flags_t);
241 static void netns_ns_free(struct ns *);
242 static void netns_ns_cleanup(struct ns *);
243 static struct ns_token *netns_ns_token_alloc(boolean_t);
244 static void netns_ns_token_free(struct ns_token *);
245
246 /*
247 * Utility/internal code
248 */
249 static struct ns *_netns_get_ns(uint32_t *, uint8_t, uint8_t, bool);
250 static inline boolean_t _netns_is_wildcard_addr(const uint32_t *, uint8_t);
251 static int _netns_reserve_common(struct ns *, in_port_t, uint32_t);
252 static void _netns_release_common(struct ns *, in_port_t, uint32_t);
253 static inline void netns_clear_ifnet(struct ns_token *);
254 static int _netns_reserve_kpi_common(struct ns *, netns_token *, uint32_t *,
255 uint8_t, uint8_t, in_port_t *, uint32_t, struct ns_flow_info *);
256 static void _netns_set_ifnet_internal(struct ns_token *, struct ifnet *);
257
258 static struct ns_reservation *
netns_ns_reservation_alloc(in_port_t port,uint32_t flags)259 netns_ns_reservation_alloc(in_port_t port, uint32_t flags)
260 {
261 struct ns_reservation *res;
262
263 VERIFY(port != 0);
264
265 res = skmem_cache_alloc(netns_ns_reservation_cache, SKMEM_SLEEP);
266 ASSERT(res != NULL);
267
268 bzero(res, netns_ns_reservation_size);
269 res->nsr_port = port;
270 res->nsr_reuseport = ((flags & NETNS_REUSEPORT) != 0);
271 return res;
272 }
273
274 static void
netns_ns_reservation_free(struct ns_reservation * res)275 netns_ns_reservation_free(struct ns_reservation *res)
276 {
277 skmem_cache_free(netns_ns_reservation_cache, res);
278 }
279
280 static struct ns *
netns_ns_alloc(zalloc_flags_t how)281 netns_ns_alloc(zalloc_flags_t how)
282 {
283 struct ns *namespace;
284 in_port_t first = (in_port_t)ipport_firstauto;
285 in_port_t last = (in_port_t)ipport_lastauto;
286 in_port_t rand_port;
287
288 namespace = zalloc_flags(netns_ns_zone, how | Z_ZERO);
289 if (namespace == NULL) {
290 return NULL;
291 }
292
293 namespace->ns_is_freeable = 1;
294
295 RB_INIT(&namespace->ns_reservations);
296
297 /*
298 * Randomize the initial ephemeral port starting point, just in case
299 * this namespace is for an ipv6 address which gets brought up and
300 * down often.
301 */
302 if (first == last) {
303 rand_port = first;
304 } else {
305 read_frandom(&rand_port, sizeof(rand_port));
306
307 if (first > last) {
308 rand_port = last + (rand_port % (first - last));
309 } else {
310 rand_port = first + (rand_port % (last - first));
311 }
312 }
313 namespace->ns_last_ephemeral_port_down = rand_port;
314 namespace->ns_last_ephemeral_port_up = rand_port;
315
316 return namespace;
317 }
318
319 static void
netns_ns_free(struct ns * namespace)320 netns_ns_free(struct ns *namespace)
321 {
322 struct ns_reservation *res;
323 struct ns_reservation *tmp_res;
324 #if SK_LOG
325 char tmp_ip_str[MAX_IPv6_STR_LEN];
326 #endif /* SK_LOG */
327
328 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
329 NS_VERB_PROTO(namespace->ns_proto),
330 "freeing %s ns for IP %s",
331 PROTO_STR(namespace->ns_proto),
332 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
333 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)));
334
335 RB_FOREACH_SAFE(res, ns_reservation_tree, &namespace->ns_reservations,
336 tmp_res) {
337 netns_ns_reservation_free(res);
338 namespace->ns_n_reservations--;
339 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
340 res);
341 }
342
343 VERIFY(RB_EMPTY(&namespace->ns_reservations));
344
345 if (netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
346 namespace->ns_addr_len)] == namespace) {
347 netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
348 namespace->ns_addr_len)] = NULL;
349 }
350 if (netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
351 namespace->ns_addr_len)] == namespace) {
352 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
353 namespace->ns_addr_len)] = NULL;
354 }
355
356 zfree(netns_ns_zone, namespace);
357 }
358
359 static void
netns_ns_cleanup(struct ns * namespace)360 netns_ns_cleanup(struct ns *namespace)
361 {
362 if (namespace->ns_is_freeable &&
363 RB_EMPTY(&namespace->ns_reservations)) {
364 RB_REMOVE(netns_namespaces_tree, &netns_namespaces, namespace);
365 netns_n_namespaces--;
366 netns_ns_free(namespace);
367 }
368 }
369
370 static struct ns_token *
netns_ns_token_alloc(boolean_t with_nfi)371 netns_ns_token_alloc(boolean_t with_nfi)
372 {
373 struct ns_token *token;
374
375 NETNS_LOCK_ASSERT_HELD();
376 NETNS_LOCK_CONVERT();
377
378 token = skmem_cache_alloc(netns_ns_token_cache, SKMEM_SLEEP);
379 ASSERT(token != NULL);
380
381 bzero(token, netns_ns_token_size);
382
383 if (with_nfi) {
384 token->nt_flow_info = skmem_cache_alloc(netns_ns_flow_info_cache,
385 SKMEM_SLEEP);
386 ASSERT(token->nt_flow_info != NULL);
387 }
388 SLIST_INSERT_HEAD(&netns_all_tokens, token, nt_all_link);
389
390 return token;
391 }
392
393 static void
netns_ns_token_free(struct ns_token * token)394 netns_ns_token_free(struct ns_token *token)
395 {
396 NETNS_LOCK_ASSERT_HELD();
397 NETNS_LOCK_CONVERT();
398 SLIST_REMOVE(&netns_all_tokens, token, ns_token, nt_all_link);
399
400 if (token->nt_flow_info != NULL) {
401 skmem_cache_free(netns_ns_flow_info_cache, token->nt_flow_info);
402 }
403 skmem_cache_free(netns_ns_token_cache, token);
404 }
405
406 __attribute__((always_inline))
407 static inline int
nsr_cmp(const struct ns_reservation * nsr1,const struct ns_reservation * nsr2)408 nsr_cmp(const struct ns_reservation *nsr1, const struct ns_reservation *nsr2)
409 {
410 #define NSR_COMPARE(r1, r2) ((int)(r1)->nsr_port - (int)(r2)->nsr_port)
411 return NSR_COMPARE(nsr1, nsr2);
412 }
413
414 __attribute__((always_inline))
415 static inline int
ns_cmp(const struct ns * a,const struct ns * b)416 ns_cmp(const struct ns *a, const struct ns *b)
417 {
418 int d;
419
420 if ((d = (a->ns_addr_len - b->ns_addr_len)) != 0) {
421 return d;
422 }
423 if ((d = (a->ns_proto - b->ns_proto)) != 0) {
424 return d;
425 }
426 if ((d = flow_ip_cmp(a->ns_addr_key, b->ns_addr_key,
427 b->ns_addr_len)) != 0) {
428 return d;
429 }
430
431 return 0;
432 }
433
434 /*
435 * Common routine to look up a reservation.
436 *
437 * NOTE: Assumes the caller holds the NETNS global lock
438 */
439 __attribute__((always_inline))
440 static inline struct ns_reservation *
ns_reservation_tree_find(struct ns_reservation_tree * tree,const in_port_t port)441 ns_reservation_tree_find(struct ns_reservation_tree *tree, const in_port_t port)
442 {
443 struct ns_reservation res;
444 res.nsr_port = port;
445 return RB_FIND(ns_reservation_tree, tree, &res);
446 }
447
448 /*
449 * Retrieve the namespace for the supplied <address, protocol> tuple.
450 * If create is set and such a namespace doesn't already exist, one will be
451 * created.
452 */
453 static struct ns *
_netns_get_ns(uint32_t * addr,uint8_t addr_len,uint8_t proto,bool create)454 _netns_get_ns(uint32_t *addr, uint8_t addr_len, uint8_t proto, bool create)
455 {
456 struct ns *namespace = NULL;
457 struct ns find = {
458 .ns_addr_key = addr,
459 .ns_addr_len = addr_len,
460 .ns_proto = proto,
461 };
462 #if SK_LOG
463 char tmp_ip_str[MAX_IPv6_STR_LEN];
464 #endif /* SK_LOG */
465
466 VERIFY(addr_len == sizeof(struct in_addr) ||
467 addr_len == sizeof(struct in6_addr));
468
469 NETNS_LOCK_ASSERT_HELD();
470
471 namespace = RB_FIND(netns_namespaces_tree, &netns_namespaces, &find);
472
473 if (create && namespace == NULL) {
474 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
475 "allocating %s ns for IP %s",
476 PROTO_STR(proto), inet_ntop(LEN_TO_AF(addr_len), addr,
477 tmp_ip_str, sizeof(tmp_ip_str)));
478 NETNS_LOCK_CONVERT();
479 namespace = netns_ns_alloc(Z_WAITOK | Z_NOFAIL);
480 __builtin_assume(namespace != NULL);
481 memcpy(namespace->ns_addr, addr, addr_len);
482 namespace->ns_addr_key = &namespace->ns_addr;
483 namespace->ns_addr_len = addr_len;
484 namespace->ns_proto = proto;
485 RB_INSERT(netns_namespaces_tree, &netns_namespaces, namespace);
486 netns_n_namespaces++;
487
488 if (_netns_is_wildcard_addr(addr, addr_len) &&
489 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
490 addr_len)] == NULL) {
491 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
492 addr_len)] = namespace;
493 }
494 }
495
496 return namespace;
497 }
498
499 /*
500 * Return true if the supplied address is a wildcard (INADDR_ANY)
501 */
502 __attribute__((always_inline))
503 static boolean_t
_netns_is_wildcard_addr(const uint32_t * addr,uint8_t addr_len)504 _netns_is_wildcard_addr(const uint32_t *addr, uint8_t addr_len)
505 {
506 boolean_t wildcard;
507
508 switch (addr_len) {
509 case sizeof(struct in_addr):
510 wildcard = (addr[0] == 0);
511 break;
512
513 case sizeof(struct in6_addr):
514 wildcard = (addr[0] == 0 && addr[1] == 0 &&
515 addr[2] == 0 && addr[3] == 0);
516 break;
517
518 default:
519 wildcard = FALSE;
520 break;
521 }
522
523 return wildcard;
524 }
525
526 __attribute__((always_inline))
527 static boolean_t
_netns_is_port_used(struct ns * gns,struct ns_reservation * curr_res,in_port_t port)528 _netns_is_port_used(struct ns * gns, struct ns_reservation *curr_res, in_port_t port)
529 {
530 struct ns_reservation *res = NULL;
531
532 if (gns == NULL) {
533 return FALSE;
534 }
535
536 res = ns_reservation_tree_find(&gns->ns_reservations, port);
537 if (res != NULL && res != curr_res) {
538 if (!res->nsr_reuseport) {
539 return TRUE;
540 }
541 }
542
543 return FALSE;
544 }
545
546 /*
547 * Internal shared code to reserve ports within a specific namespace.
548 *
549 * Note: port numbers are in host byte-order here.
550 */
551 static int
_netns_reserve_common(struct ns * namespace,in_port_t port,uint32_t flags)552 _netns_reserve_common(struct ns *namespace, in_port_t port, uint32_t flags)
553 {
554 struct ns_reservation *res = NULL, *exist = NULL;
555 uint8_t proto, addr_len;
556 int err = 0;
557 #if SK_LOG
558 char tmp_ip_str[MAX_IPv6_STR_LEN];
559 #endif /* SK_LOG */
560
561 VERIFY(port != 0);
562 proto = namespace->ns_proto;
563 addr_len = namespace->ns_addr_len;
564 NETNS_LOCK_CONVERT();
565 res = netns_ns_reservation_alloc(port, flags);
566 if (res == NULL) {
567 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
568 "ERROR %s:%s:%d // flags 0x%x // OUT OF MEMORY",
569 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
570 namespace->ns_addr, tmp_ip_str,
571 sizeof(tmp_ip_str)), PROTO_STR(proto), port, flags);
572 return ENOMEM;
573 }
574 exist = RB_INSERT(ns_reservation_tree, &namespace->ns_reservations,
575 res);
576 if (__probable(exist == NULL)) {
577 namespace->ns_n_reservations++;
578 } else {
579 netns_ns_reservation_free(res);
580 res = exist;
581 }
582
583 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
584 "pre: %s:%s:%d // flags 0x%x // refs %d sky, %d ls, "
585 "%d bsd %d pf", inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
586 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
587 PROTO_STR(proto), port, flags,
588 NETNS_REF_COUNT(res, NETNS_SKYWALK),
589 NETNS_REF_COUNT(res, NETNS_LISTENER),
590 NETNS_REF_COUNT(res, NETNS_BSD),
591 NETNS_REF_COUNT(res, NETNS_PF));
592
593 /* Make reservation */
594 /*
595 * Bypass collision detection for reservations in the global non-wild
596 * namespace. We use that namespace for reference counts only.
597 */
598 if (namespace !=
599 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)]) {
600 struct ns_reservation *skres;
601 boolean_t is_wild = _netns_is_wildcard_addr(namespace->ns_addr,
602 addr_len);
603 struct ns *gns =
604 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)];
605
606 if (NETNS_IS_SKYWALK(flags)) {
607 if ((!is_wild || exist != NULL) && gns != NULL &&
608 (skres = ns_reservation_tree_find(
609 &gns->ns_reservations, port)) != NULL &&
610 NETNS_REF_COUNT(skres, NETNS_LISTENER) == 0) {
611 /*
612 * The mere existence of any non-skywalk
613 * listener wildcard entry for this
614 * protocol/port number means this must fail.
615 */
616 SK_ERR("ADDRINUSE: Duplicate wildcard");
617 err = EADDRINUSE;
618 goto done;
619 }
620
621 if (is_wild) {
622 gns = netns_global_non_wild[
623 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
624 VERIFY(gns != NULL);
625
626 if (_netns_is_port_used(netns_global_non_wild[
627 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V4)], res, port) ||
628 _netns_is_port_used(netns_global_non_wild[
629 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)], res, port)) {
630 /*
631 * If Skywalk is trying to reserve a
632 * wildcard, then the mere existance of
633 * any entry in either v4/v6 non-wild
634 * namespace for this port means this
635 * must fail.
636 */
637 SK_ERR("ADDRINUSE: Wildcard with non-wild.");
638 err = EADDRINUSE;
639 goto done;
640 }
641 }
642 } else {
643 /*
644 * Check if Skywalk has reserved a wildcard entry.
645 * Note that the arithmetic OR here is intentional.
646 */
647 if ((!is_wild || exist != NULL) && gns != NULL &&
648 (skres = ns_reservation_tree_find(
649 &gns->ns_reservations, port)) != NULL &&
650 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
651 NETNS_REF_COUNT(skres, NETNS_LISTENER)) != 0) {
652 /*
653 * BSD is trying to reserve a proto/port for
654 * which Skywalk already has a wildcard
655 * reservation.
656 */
657 SK_ERR("ADDRINUSE: BSD requesting Skywalk port");
658 err = EADDRINUSE;
659 goto done;
660 }
661
662 /*
663 * If BSD is trying to reserve a wildcard,
664 * ensure Skywalk has not already reserved
665 * a non-wildcard.
666 */
667 if (is_wild) {
668 gns = netns_global_non_wild[
669 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
670 VERIFY(gns != NULL);
671
672 /*
673 * Note that the arithmetic OR here is
674 * intentional.
675 */
676 if ((skres = ns_reservation_tree_find(
677 &gns->ns_reservations, port)) != NULL &&
678 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
679 NETNS_REF_COUNT(skres,
680 NETNS_LISTENER)) != 0) {
681 SK_ERR("ADDRINUSE: BSD wildcard with non-wild.");
682 err = EADDRINUSE;
683 goto done;
684 }
685 }
686 }
687
688 switch (flags & NETNS_OWNER_MASK) {
689 case NETNS_SKYWALK:
690 /* check collision w/ BSD */
691 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
692 NETNS_REF_COUNT(res, NETNS_PF) > 0) {
693 SK_ERR("ERROR - Skywalk got ADDRINUSE (w/ BSD)");
694 err = EADDRINUSE;
695 goto done;
696 }
697
698 /* BEGIN CSTYLED */
699 /*
700 * Scenarios with new Skywalk connected flow:
701 * 1. With existing Skywalk connected flow,
702 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
703 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 1
704 * reject by failing the wild gns lookup below.
705 * 2. With existing Skywalk 3-tuple listener,
706 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 1
707 * bypass the check below.
708 * 3. With existing Skywalk 2-tuple listener,
709 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
710 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 0
711 * pass with successful wild gns lookup.
712 */
713 /* END CSTYLED */
714 if (NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
715 NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0) {
716 /* check if covered by wild Skywalk listener */
717 gns = netns_global_wild[
718 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
719 if (gns != NULL &&
720 (skres = ns_reservation_tree_find(
721 &gns->ns_reservations, port)) != NULL &&
722 NETNS_REF_COUNT(skres, NETNS_LISTENER)
723 != 0) {
724 err = 0;
725 goto done;
726 }
727 if (addr_len == sizeof(struct in_addr)) {
728 /* If address is IPv4, also check for wild IPv6 registration */
729 gns = netns_global_wild[
730 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)];
731 if (gns != NULL &&
732 (skres = ns_reservation_tree_find(
733 &gns->ns_reservations, port)) != NULL &&
734 NETNS_REF_COUNT(skres, NETNS_LISTENER)
735 != 0) {
736 err = 0;
737 goto done;
738 }
739 }
740 SK_ERR("ERROR - Skywalk got ADDRINUSE (w/ SK connected flow)");
741 err = EADDRINUSE;
742 }
743 /*
744 * XXX: Duplicate 5-tuple flows under a Skywalk
745 * listener are currently detected by flow manager,
746 * till we implement 5-tuple-aware netns.
747 */
748 break;
749
750 case NETNS_LISTENER:
751 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
752 NETNS_REF_COUNT(res, NETNS_PF) > 0 ||
753 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0 ||
754 _netns_is_port_used(netns_global_wild[
755 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V4)], res, port) ||
756 _netns_is_port_used(netns_global_wild[
757 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)], res, port) ||
758 _netns_is_port_used(netns_global_non_wild[
759 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V4)], res, port) ||
760 _netns_is_port_used(netns_global_non_wild[
761 NETNS_NS_GLOBAL_IDX(proto, NETNS_ADDRLEN_V6)], res, port)) {
762 SK_ERR("ERROR - Listener got ADDRINUSE");
763 err = EADDRINUSE;
764 }
765 break;
766
767 case NETNS_BSD:
768 case NETNS_PF:
769 if (NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0 ||
770 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0) {
771 SK_ERR("ERROR - %s got ADDRINUSE",
772 ((flags & NETNS_OWNER_MASK) == NETNS_PF) ?
773 "PF" : "BSD");
774 err = EADDRINUSE;
775 }
776 break;
777
778 default:
779 panic("_netns_reserve_common: invalid owner 0x%x",
780 flags & NETNS_OWNER_MASK);
781 /* NOTREACHED */
782 __builtin_unreachable();
783 }
784 }
785
786 done:
787 ASSERT(res != NULL);
788 if (__probable(err == 0)) {
789 NETNS_REF_COUNT(res, flags)++;
790 /* Check for wrap around */
791 VERIFY(NETNS_REF_COUNT(res, flags) != 0);
792 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
793 NS_VERB_PROTO(namespace->ns_proto),
794 "post: %s:%s:%d err %d // flags 0x%x // refs %d sky, "
795 "%d ls, %d bsd %d pf",
796 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
797 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
798 PROTO_STR(namespace->ns_proto), port, err, flags,
799 NETNS_REF_COUNT(res, NETNS_SKYWALK),
800 NETNS_REF_COUNT(res, NETNS_LISTENER),
801 NETNS_REF_COUNT(res, NETNS_BSD),
802 NETNS_REF_COUNT(res, NETNS_PF));
803 } else {
804 if (exist == NULL) {
805 RB_REMOVE(ns_reservation_tree,
806 &namespace->ns_reservations, res);
807 namespace->ns_n_reservations--;
808 netns_ns_reservation_free(res);
809 }
810 }
811 return err;
812 }
813
814 /*
815 * Internal shared code to release ports within a specific namespace.
816 */
817 static void
_netns_release_common(struct ns * namespace,in_port_t port,uint32_t flags)818 _netns_release_common(struct ns *namespace, in_port_t port, uint32_t flags)
819 {
820 struct ns_reservation *res;
821 uint32_t refs;
822 int i;
823 #if SK_LOG
824 char tmp_ip_str[MAX_IPv6_STR_LEN];
825 #endif /* SK_LOG */
826
827 NETNS_LOCK_ASSERT_HELD();
828
829 res = ns_reservation_tree_find(&namespace->ns_reservations, port);
830 if (res == NULL) {
831 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
832 NS_VERB_PROTO(namespace->ns_proto),
833 "ERROR %s:%s:%d // flags 0x%x // not found",
834 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
835 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
836 PROTO_STR(namespace->ns_proto), port, flags);
837 VERIFY(res != NULL);
838 }
839
840 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
841 NS_VERB_PROTO(namespace->ns_proto),
842 "%s:%s:%d // flags 0x%x // refs %d sky, %d ls, %d bsd, %d pf",
843 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
844 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
845 PROTO_STR(namespace->ns_proto), port, flags,
846 NETNS_REF_COUNT(res, NETNS_SKYWALK),
847 NETNS_REF_COUNT(res, NETNS_LISTENER),
848 NETNS_REF_COUNT(res, NETNS_BSD),
849 NETNS_REF_COUNT(res, NETNS_PF));
850
851 /* Release reservation */
852 VERIFY(NETNS_REF_COUNT(res, flags) > 0);
853 NETNS_REF_COUNT(res, flags) -= 1;
854
855 /* Clean up memory, if appropriate */
856 for (i = 0, refs = 0; i <= NETNS_OWNER_MAX && refs == 0; i++) {
857 refs |= res->nsr_refs[i];
858 }
859 if (refs == 0) {
860 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
861 res);
862 namespace->ns_n_reservations--;
863 NETNS_LOCK_CONVERT();
864 netns_ns_reservation_free(res);
865 netns_ns_cleanup(namespace);
866 }
867 }
868
869 __attribute__((always_inline))
870 static inline void
netns_init_global_ns(struct ns ** global_ptr,uint8_t proto,uint8_t addrlen)871 netns_init_global_ns(struct ns **global_ptr, uint8_t proto, uint8_t addrlen)
872 {
873 struct ns *namespace;
874
875 namespace = *global_ptr = netns_ns_alloc(Z_WAITOK);
876 memset(namespace->ns_addr, 0xFF, addrlen);
877 namespace->ns_addr_len = addrlen;
878 namespace->ns_proto = proto;
879 namespace->ns_is_freeable = 0;
880 }
881
882 __attribute__((always_inline))
883 static inline void
netns_clear_ifnet(struct ns_token * nstoken)884 netns_clear_ifnet(struct ns_token *nstoken)
885 {
886 #if SK_LOG
887 char tmp_ip_str[MAX_IPv6_STR_LEN];
888 #endif /* SK_LOG */
889
890 NETNS_LOCK_ASSERT_HELD();
891
892 if (nstoken->nt_ifp != NULL) {
893 SLIST_REMOVE(&nstoken->nt_ifp->if_netns_tokens, nstoken,
894 ns_token, nt_ifp_link);
895
896 SK_DF(NS_VERB_IP(nstoken->nt_addr_len) |
897 NS_VERB_PROTO(nstoken->nt_proto),
898 "%s:%s:%d // removed from ifnet %d",
899 inet_ntop(LEN_TO_AF(nstoken->nt_addr_len),
900 nstoken->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
901 PROTO_STR(nstoken->nt_proto), nstoken->nt_port,
902 nstoken->nt_ifp->if_index);
903
904 NETNS_LOCK_CONVERT();
905 ifnet_decr_iorefcnt(nstoken->nt_ifp);
906 nstoken->nt_ifp = NULL;
907 } else {
908 SLIST_REMOVE(&netns_unbound_tokens, nstoken, ns_token,
909 nt_ifp_link);
910 }
911 }
912
913 /*
914 * Internal shared code to perform a port[-range] reservation, along with all
915 * the boilerplate and sanity checks expected for a call coming in from the
916 * surrounding kernel code.
917 */
918 static int
_netns_reserve_kpi_common(struct ns * ns,netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t * port,uint32_t flags,struct ns_flow_info * nfi)919 _netns_reserve_kpi_common(struct ns *ns, netns_token *token, uint32_t *addr,
920 uint8_t addr_len, uint8_t proto, in_port_t *port, uint32_t flags,
921 struct ns_flow_info *nfi)
922 {
923 boolean_t ns_want_cleanup = (ns == NULL);
924 struct ns_token *nt;
925 int err = 0;
926 in_port_t hport;
927 #if SK_LOG
928 char tmp_ip_str[MAX_IPv6_STR_LEN];
929 #endif /* SK_LOG */
930 struct ifnet *ifp = (nfi != NULL) ? nfi->nfi_ifp : NULL;
931
932 NETNS_LOCK_ASSERT_HELD();
933
934 hport = ntohs(*port);
935
936 VERIFY((flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
937 VERIFY(addr_len == sizeof(struct in_addr) ||
938 addr_len == sizeof(struct in6_addr));
939 VERIFY(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
940 VERIFY(hport != 0);
941
942 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
943 "reserving %s:%s:%d // flags 0x%x // token %svalid",
944 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str,
945 sizeof(tmp_ip_str)), PROTO_STR(proto), hport, flags,
946 NETNS_TOKEN_VALID(token) ? "" : "in");
947
948 /*
949 * See the documentation for NETNS_PRERESERVED in netns.h for an
950 * explanation of this block.
951 */
952 if (NETNS_TOKEN_VALID(token)) {
953 if (flags & NETNS_PRERESERVED) {
954 nt = *token;
955 VERIFY(nt->nt_addr_len == addr_len);
956 VERIFY(memcmp(nt->nt_addr, addr, addr_len) == 0);
957 VERIFY(nt->nt_proto == proto);
958 VERIFY(nt->nt_port == hport);
959 VERIFY((nt->nt_flags &
960 NETNS_RESERVATION_FLAGS | NETNS_PRERESERVED) ==
961 (flags & NETNS_RESERVATION_FLAGS));
962
963 if ((nt->nt_flags & NETNS_CONFIGURATION_FLAGS) ==
964 (flags & NETNS_CONFIGURATION_FLAGS)) {
965 SK_DF(NS_VERB_IP(nt->nt_addr_len) |
966 NS_VERB_PROTO(nt->nt_proto),
967 "%s:%s:%d // flags 0x%x -> 0x%x",
968 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
969 nt->nt_addr, tmp_ip_str,
970 sizeof(tmp_ip_str)),
971 PROTO_STR(nt->nt_proto),
972 nt->nt_port, nt->nt_flags, flags);
973 nt->nt_flags &= ~NETNS_CONFIGURATION_FLAGS;
974 nt->nt_flags |=
975 flags & NETNS_CONFIGURATION_FLAGS;
976 }
977 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
978 "token was prereserved");
979 goto done;
980 } else {
981 panic("Request to overwrite valid netns token");
982 /* NOTREACHED */
983 __builtin_unreachable();
984 }
985 }
986
987 /*
988 * TODO: Check range against bitmap
989 */
990 if (hport == 0) {
991 /*
992 * Caller request an arbitrary range of ports
993 * TODO: Need to figure out how to allocate
994 * emphemeral ports only.
995 */
996 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
997 "ERROR - wildcard port not yet supported");
998 err = ENOMEM;
999 goto done;
1000 }
1001
1002 /*
1003 * Fetch namespace for the specified address/protocol, creating
1004 * a new namespace if necessary.
1005 */
1006 if (ns == NULL) {
1007 ASSERT(ns_want_cleanup);
1008 ns = _netns_get_ns(addr, addr_len, proto, true);
1009 }
1010 if (__improbable(ns == NULL)) {
1011 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1012 "ERROR - couldn't create namespace");
1013 err = ENOMEM;
1014 goto done;
1015 }
1016
1017 /*
1018 * Make a reservation in the namespace
1019 * This will return an error if an incompatible reservation
1020 * already exists.
1021 */
1022 err = _netns_reserve_common(ns, hport, flags);
1023 if (__improbable(err != 0)) {
1024 NETNS_LOCK_CONVERT();
1025 if (ns_want_cleanup) {
1026 netns_ns_cleanup(ns);
1027 }
1028 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1029 "ERROR - reservation collision");
1030 goto done;
1031 }
1032
1033 if (!_netns_is_wildcard_addr(ns->ns_addr, addr_len)) {
1034 /* Record the reservation in the non-wild namespace */
1035 struct ns *nwns;
1036
1037 nwns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1038 addr_len)];
1039 err = _netns_reserve_common(nwns, hport, flags);
1040 if (__improbable(err != 0)) {
1041 /* Need to free the specific namespace entry */
1042 NETNS_LOCK_CONVERT();
1043 _netns_release_common(ns, hport, flags);
1044 if (ns_want_cleanup) {
1045 netns_ns_cleanup(ns);
1046 }
1047 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1048 "ERROR - reservation collision");
1049 goto done;
1050 }
1051 }
1052
1053 nt = netns_ns_token_alloc(nfi != NULL ? true : false);
1054 ASSERT(nt->nt_ifp == NULL);
1055 _netns_set_ifnet_internal(nt, ifp);
1056
1057 memcpy(nt->nt_addr, addr, addr_len);
1058 nt->nt_addr_len = addr_len;
1059 nt->nt_proto = proto;
1060 nt->nt_port = hport;
1061 nt->nt_flags = flags;
1062
1063 if (nfi != NULL) {
1064 VERIFY(nt->nt_flow_info != NULL);
1065
1066 memcpy(nt->nt_flow_info, nfi, sizeof(struct ns_flow_info));
1067 /*
1068 * The local port is passed as a separate argument
1069 */
1070 if (nfi->nfi_laddr.sa.sa_family == AF_INET) {
1071 nt->nt_flow_info->nfi_laddr.sin.sin_port = *port;
1072 } else if (nfi->nfi_laddr.sa.sa_family == AF_INET6) {
1073 nt->nt_flow_info->nfi_laddr.sin6.sin6_port = *port;
1074 }
1075 }
1076 *token = nt;
1077
1078 done:
1079 return err;
1080 }
1081
1082 /*
1083 * Kernel-facing functions
1084 */
1085
1086 int
netns_init(void)1087 netns_init(void)
1088 {
1089 VERIFY(__netns_inited == 0);
1090
1091 netns_ns_reservation_size = sizeof(struct ns_reservation);
1092 netns_ns_reservation_cache = skmem_cache_create(NETNS_NS_RESERVATION_ZONE_NAME,
1093 netns_ns_reservation_size, sizeof(uint64_t), NULL, NULL, NULL,
1094 NULL, NULL, 0);
1095 if (netns_ns_reservation_cache == NULL) {
1096 panic("%s: skmem_cache create failed (%s)", __func__,
1097 NETNS_NS_RESERVATION_ZONE_NAME);
1098 /* NOTREACHED */
1099 __builtin_unreachable();
1100 }
1101
1102 netns_ns_token_size = sizeof(struct ns_token);
1103 netns_ns_token_cache = skmem_cache_create(NETNS_NS_TOKEN_ZONE_NAME,
1104 netns_ns_token_size, sizeof(uint64_t), NULL, NULL, NULL, NULL,
1105 NULL, 0);
1106 if (netns_ns_token_cache == NULL) {
1107 panic("%s: skmem_cache create failed (%s)", __func__,
1108 NETNS_NS_TOKEN_ZONE_NAME);
1109 /* NOTREACHED */
1110 __builtin_unreachable();
1111 }
1112
1113 netns_ns_flow_info_size = sizeof(struct ns_flow_info);
1114 netns_ns_flow_info_cache = skmem_cache_create(NETNS_NS_FLOW_INFO_ZONE_NAME,
1115 netns_ns_flow_info_size, sizeof(uint64_t), NULL, NULL, NULL,
1116 NULL, NULL, 0);
1117 if (netns_ns_flow_info_cache == NULL) {
1118 panic("%s: skmem_cache create failed (%s)", __func__,
1119 NETNS_NS_FLOW_INFO_ZONE_NAME);
1120 /* NOTREACHED */
1121 __builtin_unreachable();
1122 }
1123
1124 SLIST_INIT(&netns_unbound_tokens);
1125 SLIST_INIT(&netns_all_tokens);
1126
1127 netns_n_namespaces = 0;
1128 RB_INIT(&netns_namespaces);
1129
1130 SK_D("initializing global namespaces");
1131
1132 netns_init_global_ns(
1133 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1134 NETNS_ADDRLEN_V4)], IPPROTO_TCP, sizeof(struct in_addr));
1135
1136 netns_init_global_ns(
1137 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1138 NETNS_ADDRLEN_V4)], IPPROTO_UDP, sizeof(struct in_addr));
1139
1140 netns_init_global_ns(
1141 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1142 NETNS_ADDRLEN_V6)], IPPROTO_TCP, sizeof(struct in6_addr));
1143
1144 netns_init_global_ns(
1145 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1146 NETNS_ADDRLEN_V6)], IPPROTO_UDP, sizeof(struct in6_addr));
1147
1148 /* Done */
1149
1150 __netns_inited = 1;
1151 sk_features |= SK_FEATURE_NETNS;
1152
1153 SK_D("initialized netns");
1154
1155 return 0;
1156 }
1157
1158 void
netns_uninit(void)1159 netns_uninit(void)
1160 {
1161 if (__netns_inited == 1) {
1162 struct ns *namespace;
1163 struct ns *temp_namespace;
1164 int i;
1165
1166 RB_FOREACH_SAFE(namespace, netns_namespaces_tree,
1167 &netns_namespaces, temp_namespace) {
1168 RB_REMOVE(netns_namespaces_tree, &netns_namespaces,
1169 namespace);
1170 netns_n_namespaces--;
1171 netns_ns_free(namespace);
1172 }
1173
1174 for (i = 0; i < NETNS_N_GLOBAL; i++) {
1175 netns_ns_free(netns_global_non_wild[i]);
1176 }
1177
1178 if (netns_ns_flow_info_cache != NULL) {
1179 skmem_cache_destroy(netns_ns_flow_info_cache);
1180 netns_ns_flow_info_cache = NULL;
1181 }
1182 if (netns_ns_token_cache != NULL) {
1183 skmem_cache_destroy(netns_ns_token_cache);
1184 netns_ns_token_cache = NULL;
1185 }
1186 if (netns_ns_reservation_cache != NULL) {
1187 skmem_cache_destroy(netns_ns_reservation_cache);
1188 netns_ns_reservation_cache = NULL;
1189 }
1190
1191 __netns_inited = 0;
1192 sk_features &= ~SK_FEATURE_NETNS;
1193
1194 SK_D("uninitialized netns");
1195 }
1196 }
1197
1198 void
netns_reap_caches(boolean_t purge)1199 netns_reap_caches(boolean_t purge)
1200 {
1201 /* these aren't created unless netns is enabled */
1202 if (netns_ns_token_cache != NULL) {
1203 skmem_cache_reap_now(netns_ns_token_cache, purge);
1204 }
1205 if (netns_ns_reservation_cache != NULL) {
1206 skmem_cache_reap_now(netns_ns_reservation_cache, purge);
1207 }
1208 if (netns_ns_flow_info_cache != NULL) {
1209 skmem_cache_reap_now(netns_ns_flow_info_cache, purge);
1210 }
1211 }
1212
1213 boolean_t
netns_is_enabled(void)1214 netns_is_enabled(void)
1215 {
1216 return __netns_inited == 1;
1217 }
1218
1219 int
netns_reserve(netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t port,uint32_t flags,struct ns_flow_info * nfi)1220 netns_reserve(netns_token *token, uint32_t *addr, uint8_t addr_len,
1221 uint8_t proto, in_port_t port, uint32_t flags, struct ns_flow_info *nfi)
1222 {
1223 int err = 0;
1224 #if SK_LOG
1225 char tmp_ip_str[MAX_IPv6_STR_LEN];
1226 #endif /* SK_LOG */
1227
1228 if (__netns_inited == 0) {
1229 *token = NULL;
1230 return err;
1231 }
1232
1233 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1234 SK_ERR("netns doesn't support non TCP/UDP protocol");
1235 return ENOTSUP;
1236 }
1237
1238 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1239 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1240 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(port),
1241 flags);
1242
1243 /*
1244 * Check wether the process is allowed to bind to a restricted port
1245 */
1246 if (!current_task_can_use_restricted_in_port(port,
1247 proto, flags)) {
1248 *token = NULL;
1249 return EADDRINUSE;
1250 }
1251
1252 NETNS_LOCK_SPIN();
1253 err = _netns_reserve_kpi_common(NULL, token, addr, addr_len,
1254 proto, &port, flags, nfi);
1255 NETNS_UNLOCK();
1256
1257 return err;
1258 }
1259
1260 /* Import net.inet.{tcp,udp}.randomize_ports sysctls */
1261 extern int udp_use_randomport;
1262 extern int tcp_use_randomport;
1263
1264 int
netns_reserve_ephemeral(netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t * port,uint32_t flags,struct ns_flow_info * nfi)1265 netns_reserve_ephemeral(netns_token *token, uint32_t *addr, uint8_t addr_len,
1266 uint8_t proto, in_port_t *port, uint32_t flags, struct ns_flow_info *nfi)
1267 {
1268 int err = 0;
1269 in_port_t first = (in_port_t)ipport_firstauto;
1270 in_port_t last = (in_port_t)ipport_lastauto;
1271 in_port_t rand_port;
1272 in_port_t last_port;
1273 in_port_t n_last_port;
1274 struct ns *namespace;
1275 boolean_t count_up = true;
1276 boolean_t use_randomport = (proto == IPPROTO_TCP) ?
1277 tcp_use_randomport : udp_use_randomport;
1278 #if SK_LOG
1279 char tmp_ip_str[MAX_IPv6_STR_LEN];
1280 #endif /* SK_LOG */
1281
1282 if (__netns_inited == 0) {
1283 *token = NULL;
1284 return err;
1285 }
1286
1287 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1288 SK_ERR("netns doesn't support non TCP/UDP protocol");
1289 return ENOTSUP;
1290 }
1291
1292 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1293 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1294 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(*port),
1295 flags);
1296
1297 NETNS_LOCK_SPIN();
1298
1299 namespace = _netns_get_ns(addr, addr_len, proto, true);
1300 if (namespace == NULL) {
1301 err = ENOMEM;
1302 NETNS_UNLOCK();
1303 return err;
1304 }
1305
1306 if (proto == IPPROTO_UDP) {
1307 if (UINT16_MAX - namespace->ns_n_reservations <
1308 NETNS_NS_UDP_EPHEMERAL_RESERVE) {
1309 SK_ERR("UDP ephemeral port not available"
1310 "(less than 4096 UDP ports left)");
1311 err = EADDRNOTAVAIL;
1312 NETNS_UNLOCK();
1313 return err;
1314 }
1315 }
1316
1317 if (first == last) {
1318 rand_port = first;
1319 } else {
1320 if (use_randomport) {
1321 NETNS_LOCK_CONVERT();
1322 read_frandom(&rand_port, sizeof(rand_port));
1323
1324 if (first > last) {
1325 rand_port = last + (rand_port %
1326 (first - last));
1327 count_up = false;
1328 } else {
1329 rand_port = first + (rand_port %
1330 (last - first));
1331 }
1332 } else {
1333 if (first > last) {
1334 rand_port =
1335 namespace->ns_last_ephemeral_port_down - 1;
1336 if (rand_port < last || rand_port > first) {
1337 rand_port = last;
1338 }
1339 count_up = false;
1340 } else {
1341 rand_port =
1342 namespace->ns_last_ephemeral_port_up + 1;
1343 if (rand_port < first || rand_port > last) {
1344 rand_port = first;
1345 }
1346 }
1347 }
1348 }
1349 last_port = rand_port;
1350 n_last_port = htons(last_port);
1351
1352 while (true) {
1353 if (n_last_port == 0) {
1354 SK_ERR("ephemeral port search range includes 0");
1355 err = EINVAL;
1356 break;
1357 }
1358
1359 /*
1360 * Skip if this is a restricted port as we do not want to
1361 * restricted ports as ephemeral
1362 */
1363 if (!IS_RESTRICTED_IN_PORT(n_last_port)) {
1364 err = _netns_reserve_kpi_common(namespace, token, addr,
1365 addr_len, proto, &n_last_port, flags, nfi);
1366 if (err == 0 || err != EADDRINUSE) {
1367 break;
1368 }
1369 }
1370 if (count_up) {
1371 last_port++;
1372 if (last_port < first || last_port > last) {
1373 last_port = first;
1374 }
1375 } else {
1376 last_port--;
1377 if (last_port < last || last_port > first) {
1378 last_port = last;
1379 }
1380 }
1381 n_last_port = htons(last_port);
1382
1383 if (last_port == rand_port || first == last) {
1384 SK_ERR("couldn't find free ephemeral port");
1385 err = EADDRNOTAVAIL;
1386 break;
1387 }
1388 }
1389
1390 if (err == 0) {
1391 *port = n_last_port;
1392 if (count_up) {
1393 namespace->ns_last_ephemeral_port_up = last_port;
1394 } else {
1395 namespace->ns_last_ephemeral_port_down = last_port;
1396 }
1397 } else {
1398 netns_ns_cleanup(namespace);
1399 }
1400
1401 NETNS_UNLOCK();
1402
1403 return err;
1404 }
1405
1406 void
netns_release(netns_token * token)1407 netns_release(netns_token *token)
1408 {
1409 struct ns *ns;
1410 struct ns_token *nt;
1411 uint8_t proto, addr_len;
1412 #if SK_LOG
1413 char tmp_ip_str[MAX_IPv6_STR_LEN];
1414 #endif /* SK_LOG */
1415
1416 if (!NETNS_TOKEN_VALID(token)) {
1417 return;
1418 }
1419
1420 if (__netns_inited == 0) {
1421 *token = NULL;
1422 return;
1423 }
1424
1425 NETNS_LOCK_SPIN();
1426
1427 nt = *token;
1428 *token = NULL;
1429
1430 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
1431 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1432 nt->nt_addr_len == sizeof(struct in6_addr));
1433 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1434
1435 addr_len = nt->nt_addr_len;
1436 proto = nt->nt_proto;
1437
1438 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1439 "releasing %s:%s:%d",
1440 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1441 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto),
1442 nt->nt_port);
1443
1444 if (!_netns_is_wildcard_addr(nt->nt_addr, addr_len)) {
1445 /* Remove from global non-wild namespace */
1446
1447 ns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1448 addr_len)];
1449 VERIFY(ns != NULL);
1450
1451 _netns_release_common(ns, nt->nt_port, nt->nt_flags);
1452 }
1453
1454 ns = _netns_get_ns(nt->nt_addr, addr_len, proto, false);
1455 VERIFY(ns != NULL);
1456 _netns_release_common(ns, nt->nt_port, nt->nt_flags);
1457
1458 netns_clear_ifnet(nt);
1459 netns_ns_token_free(nt);
1460
1461 NETNS_UNLOCK();
1462 }
1463
1464 int
netns_change_addr(netns_token * token,uint32_t * addr,uint8_t addr_len)1465 netns_change_addr(netns_token *token, uint32_t *addr, uint8_t addr_len)
1466 {
1467 int err = 0;
1468 struct ns *old_namespace;
1469 struct ns *new_namespace;
1470 struct ns *global_namespace;
1471 struct ns_token *nt;
1472 uint8_t proto;
1473 #if SK_LOG
1474 char tmp_ip_str_1[MAX_IPv6_STR_LEN];
1475 char tmp_ip_str_2[MAX_IPv6_STR_LEN];
1476 #endif /* SK_LOG */
1477
1478 if (__netns_inited == 0) {
1479 return 0;
1480 }
1481
1482 NETNS_LOCK();
1483
1484 VERIFY(NETNS_TOKEN_VALID(token));
1485
1486 nt = *token;
1487
1488 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) == NETNS_BSD);
1489 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1490 nt->nt_addr_len == sizeof(struct in6_addr));
1491 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1492
1493 proto = nt->nt_proto;
1494
1495 #if SK_LOG
1496 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1497 tmp_ip_str_1, sizeof(tmp_ip_str_1));
1498 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str_2,
1499 sizeof(tmp_ip_str_2));
1500 #endif /* SK_LOG */
1501 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1502 "changing address for %s:%d from %s to %s",
1503 PROTO_STR(proto), nt->nt_port, tmp_ip_str_1,
1504 tmp_ip_str_2);
1505
1506 if (nt->nt_addr_len == addr_len &&
1507 memcmp(nt->nt_addr, addr, nt->nt_addr_len) == 0) {
1508 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1509 "address didn't change, exiting early");
1510 goto done;
1511 }
1512
1513 old_namespace = _netns_get_ns(nt->nt_addr, nt->nt_addr_len, proto,
1514 false);
1515 VERIFY(old_namespace != NULL);
1516
1517 new_namespace = _netns_get_ns(addr, addr_len, proto, true);
1518 if (new_namespace == NULL) {
1519 err = ENOMEM;
1520 goto done;
1521 }
1522
1523 /* Acquire reservation in new namespace */
1524 if ((err = _netns_reserve_common(new_namespace, nt->nt_port,
1525 nt->nt_flags))) {
1526 NETNS_LOCK_CONVERT();
1527 netns_ns_cleanup(new_namespace);
1528 SK_ERR("ERROR - reservation collision under new namespace");
1529 goto done;
1530 }
1531
1532 /* Release from old namespace */
1533 _netns_release_common(old_namespace, nt->nt_port, nt->nt_flags);
1534
1535 if (!_netns_is_wildcard_addr(nt->nt_addr, nt->nt_addr_len)) {
1536 /*
1537 * Old address is non-wildcard.
1538 * Remove old reservation from global non-wild namespace
1539 */
1540 global_namespace = netns_global_non_wild[
1541 NETNS_NS_GLOBAL_IDX(proto, nt->nt_addr_len)];
1542 VERIFY(global_namespace != NULL);
1543
1544 _netns_release_common(global_namespace, nt->nt_port,
1545 nt->nt_flags);
1546 }
1547
1548 if (!_netns_is_wildcard_addr(addr, addr_len)) {
1549 /*
1550 * New address is non-wildcard.
1551 * Record new reservation in global non-wild namespace
1552 */
1553 global_namespace = netns_global_non_wild[
1554 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
1555 VERIFY(global_namespace != NULL);
1556
1557 if ((err = _netns_reserve_common(global_namespace,
1558 nt->nt_port, nt->nt_flags)) != 0) {
1559 SK_ERR("ERROR - reservation collision under new global namespace");
1560 /* XXX: Should not fail. Maybe assert instead */
1561 goto done;
1562 }
1563 }
1564
1565 memcpy(nt->nt_addr, addr, addr_len);
1566 nt->nt_addr_len = addr_len;
1567
1568 done:
1569 NETNS_UNLOCK();
1570 return err;
1571 }
1572
1573 static void
_netns_set_ifnet_internal(struct ns_token * nt,struct ifnet * ifp)1574 _netns_set_ifnet_internal(struct ns_token *nt, struct ifnet *ifp)
1575 {
1576 #if SK_LOG
1577 char tmp_ip_str[MAX_IPv6_STR_LEN];
1578 #endif /* SK_LOG */
1579
1580 NETNS_LOCK_ASSERT_HELD();
1581
1582 if (ifp != NULL && ifnet_is_attached(ifp, 1)) {
1583 nt->nt_ifp = ifp;
1584 SLIST_INSERT_HEAD(&ifp->if_netns_tokens, nt, nt_ifp_link);
1585
1586 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1587 "%s:%s:%d // added to ifnet %d",
1588 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1589 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1590 PROTO_STR(nt->nt_proto), nt->nt_port,
1591 ifp->if_index);
1592 } else {
1593 SLIST_INSERT_HEAD(&netns_unbound_tokens, nt, nt_ifp_link);
1594 }
1595 }
1596
1597 void
netns_set_ifnet(netns_token * token,ifnet_t ifp)1598 netns_set_ifnet(netns_token *token, ifnet_t ifp)
1599 {
1600 struct ns_token *nt;
1601 #if SK_LOG
1602 char tmp_ip_str[MAX_IPv6_STR_LEN];
1603 #endif /* SK_LOG */
1604
1605 if (__netns_inited == 0) {
1606 return;
1607 }
1608
1609 NETNS_LOCK();
1610
1611 VERIFY(NETNS_TOKEN_VALID(token));
1612
1613 nt = *token;
1614
1615 if (nt->nt_ifp == ifp) {
1616 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1617 "%s:%s:%d // ifnet already %d, exiting early",
1618 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1619 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1620 PROTO_STR(nt->nt_proto), nt->nt_port,
1621 ifp ? ifp->if_index : -1);
1622 NETNS_UNLOCK();
1623 return;
1624 }
1625
1626 netns_clear_ifnet(nt);
1627
1628 _netns_set_ifnet_internal(nt, ifp);
1629
1630 NETNS_UNLOCK();
1631 }
1632
1633 void
netns_ifnet_detach(ifnet_t ifp)1634 netns_ifnet_detach(ifnet_t ifp)
1635 {
1636 struct ns_token *token, *tmp_token;
1637
1638 if (__netns_inited == 0) {
1639 return;
1640 }
1641
1642 NETNS_LOCK();
1643
1644 SLIST_FOREACH_SAFE(token, &ifp->if_netns_tokens, nt_ifp_link,
1645 tmp_token) {
1646 netns_clear_ifnet(token);
1647 SLIST_INSERT_HEAD(&netns_unbound_tokens, token, nt_ifp_link);
1648 }
1649
1650 NETNS_UNLOCK();
1651 }
1652
1653 static void
_netns_set_state(netns_token * token,uint32_t state)1654 _netns_set_state(netns_token *token, uint32_t state)
1655 {
1656 struct ns_token *nt;
1657 #if SK_LOG
1658 char tmp_ip_str[MAX_IPv6_STR_LEN];
1659 #endif /* SK_LOG */
1660
1661 if (__netns_inited == 0) {
1662 return;
1663 }
1664
1665 NETNS_LOCK();
1666 VERIFY(NETNS_TOKEN_VALID(token));
1667
1668 nt = *token;
1669 nt->nt_state |= state;
1670
1671 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1672 "%s:%s:%d // state 0x%b",
1673 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1674 tmp_ip_str, sizeof(tmp_ip_str)),
1675 PROTO_STR(nt->nt_proto), nt->nt_port, state, NETNS_STATE_BITS);
1676
1677 NETNS_UNLOCK();
1678 }
1679
1680 void
netns_half_close(netns_token * token)1681 netns_half_close(netns_token *token)
1682 {
1683 _netns_set_state(token, NETNS_STATE_HALFCLOSED);
1684 }
1685
1686 void
netns_withdraw(netns_token * token)1687 netns_withdraw(netns_token *token)
1688 {
1689 _netns_set_state(token, NETNS_STATE_WITHDRAWN);
1690 }
1691
1692 int
netns_get_flow_info(netns_token * token,struct ns_flow_info * nfi)1693 netns_get_flow_info(netns_token *token,
1694 struct ns_flow_info *nfi)
1695 {
1696 if (__netns_inited == 0) {
1697 return ENOTSUP;
1698 }
1699
1700 NETNS_LOCK();
1701 if (!NETNS_TOKEN_VALID(token) ||
1702 nfi == NULL) {
1703 NETNS_UNLOCK();
1704 return EINVAL;
1705 }
1706
1707 struct ns_token *nt = *token;
1708 if (nt->nt_flow_info == NULL) {
1709 NETNS_UNLOCK();
1710 return ENOENT;
1711 }
1712
1713 memcpy(nfi, nt->nt_flow_info, sizeof(struct ns_flow_info));
1714 NETNS_UNLOCK();
1715
1716 return 0;
1717 }
1718
1719 void
netns_change_flags(netns_token * token,uint32_t set_flags,uint32_t clear_flags)1720 netns_change_flags(netns_token *token, uint32_t set_flags,
1721 uint32_t clear_flags)
1722 {
1723 struct ns_token *nt;
1724 #if SK_LOG
1725 char tmp_ip_str[MAX_IPv6_STR_LEN];
1726 #endif /* SK_LOG */
1727
1728 if (__netns_inited == 0) {
1729 return;
1730 }
1731
1732 NETNS_LOCK();
1733
1734 VERIFY(NETNS_TOKEN_VALID(token));
1735
1736 nt = *token;
1737
1738 VERIFY(!((set_flags | clear_flags) & NETNS_RESERVATION_FLAGS));
1739 /* TODO: verify set and clear flags don't overlap? */
1740
1741 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1742 "%s:%s:%d // flags 0x%x -> 0x%x",
1743 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1744 tmp_ip_str, sizeof(tmp_ip_str)),
1745 PROTO_STR(nt->nt_proto), nt->nt_port, nt->nt_flags,
1746 nt->nt_flags | set_flags & ~clear_flags);
1747
1748 nt->nt_flags |= set_flags;
1749 nt->nt_flags &= ~clear_flags;
1750
1751 NETNS_UNLOCK();
1752 }
1753
1754 /*
1755 * Port offloading KPI
1756 */
1757 static inline void
netns_local_port_scan_flow_entry(struct flow_entry * fe,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1758 netns_local_port_scan_flow_entry(struct flow_entry *fe, protocol_family_t protocol,
1759 u_int32_t flags, u_int8_t *bitfield)
1760 {
1761 struct ns_token *token;
1762 boolean_t iswildcard = false;
1763
1764 if (fe == NULL) {
1765 return;
1766 }
1767
1768 if (fe->fe_flags & FLOWENTF_EXTRL_PORT) {
1769 return;
1770 }
1771
1772 token = fe->fe_port_reservation;
1773 if (token == NULL) {
1774 return;
1775 }
1776
1777 /*
1778 * We are only interested in active flows over skywalk channels
1779 */
1780 if ((token->nt_flags & NETNS_OWNER_MASK) != NETNS_SKYWALK) {
1781 return;
1782 }
1783
1784 if (token->nt_state & NETNS_STATE_WITHDRAWN) {
1785 return;
1786 }
1787
1788 if (!(flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) &&
1789 (flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) &&
1790 (token->nt_state & NETNS_STATE_HALFCLOSED)) {
1791 return;
1792 }
1793
1794 VERIFY(token->nt_addr_len == sizeof(struct in_addr) ||
1795 token->nt_addr_len == sizeof(struct in6_addr));
1796
1797 if (token->nt_addr_len == sizeof(struct in_addr)) {
1798 if (protocol == PF_INET6) {
1799 return;
1800 }
1801
1802 iswildcard = token->nt_inaddr.s_addr == INADDR_ANY;
1803 } else if (token->nt_addr_len == sizeof(struct in6_addr)) {
1804 if (protocol == PF_INET) {
1805 return;
1806 }
1807
1808 iswildcard = IN6_IS_ADDR_UNSPECIFIED(
1809 &token->nt_in6addr);
1810 }
1811 if (!(flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) && iswildcard) {
1812 return;
1813 }
1814
1815 if ((flags & IFNET_GET_LOCAL_PORTS_TCPONLY) &&
1816 token->nt_proto == IPPROTO_UDP) {
1817 return;
1818 }
1819 if ((flags & IFNET_GET_LOCAL_PORTS_UDPONLY) &&
1820 token->nt_proto == IPPROTO_TCP) {
1821 return;
1822 }
1823
1824 if (!(flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) &&
1825 (token->nt_flags & NETNS_NOWAKEFROMSLEEP)) {
1826 return;
1827 }
1828
1829 if ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) &&
1830 !(token->nt_flags & NETNS_RECVANYIF)) {
1831 return;
1832 }
1833
1834 if ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) &&
1835 !(token->nt_flags & NETNS_EXTBGIDLE)) {
1836 return;
1837 }
1838
1839 if (token->nt_ifp != NULL && token->nt_flow_info != NULL) {
1840 bitstr_set(bitfield, token->nt_port);
1841 (void) if_ports_used_add_flow_entry(fe, token->nt_ifp->if_index,
1842 token->nt_flow_info, token->nt_flags);
1843 } else {
1844 SK_ERR("%s: unknown owner port %u"
1845 " nt_flags 0x%x ifindex %u nt_flow_info %p\n",
1846 __func__, token->nt_port,
1847 token->nt_flags,
1848 token->nt_ifp != NULL ? token->nt_ifp->if_index : 0,
1849 token->nt_flow_info);
1850 }
1851 }
1852
1853 static void
netns_get_if_local_ports(ifnet_t ifp,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1854 netns_get_if_local_ports(ifnet_t ifp, protocol_family_t protocol,
1855 u_int32_t flags, u_int8_t *bitfield)
1856 {
1857 struct nx_flowswitch *fsw = NULL;
1858
1859 if (ifp == NULL || ifp->if_na == NULL) {
1860 return;
1861 }
1862 /* Ensure that the interface is attached and won't detach */
1863 if (!ifnet_is_attached(ifp, 1)) {
1864 return;
1865 }
1866 fsw = fsw_ifp_to_fsw(ifp);
1867 if (fsw == NULL) {
1868 goto done;
1869 }
1870 FSW_RLOCK(fsw);
1871 NETNS_LOCK();
1872 flow_mgr_foreach_flow(fsw->fsw_flow_mgr, ^(struct flow_entry *_fe) {
1873 netns_local_port_scan_flow_entry(_fe, protocol, flags,
1874 bitfield);
1875 });
1876 NETNS_UNLOCK();
1877 FSW_UNLOCK(fsw);
1878 done:
1879 ifnet_decr_iorefcnt(ifp);
1880 }
1881
1882 errno_t
netns_get_local_ports(ifnet_t ifp,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1883 netns_get_local_ports(ifnet_t ifp, protocol_family_t protocol,
1884 u_int32_t flags, u_int8_t *bitfield)
1885 {
1886 if (__netns_inited == 0) {
1887 return 0;
1888 }
1889 if (ifp != NULL) {
1890 netns_get_if_local_ports(ifp, protocol, flags, bitfield);
1891 } else {
1892 errno_t error;
1893 ifnet_t *ifp_list;
1894 uint32_t count, i;
1895
1896 error = ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp_list, &count);
1897 if (error != 0) {
1898 os_log_error(OS_LOG_DEFAULT,
1899 "%s: ifnet_list_get_all() failed %d",
1900 __func__, error);
1901 return error;
1902 }
1903 for (i = 0; i < count; i++) {
1904 if (TAILQ_EMPTY(&ifp_list[i]->if_addrhead)) {
1905 continue;
1906 }
1907 netns_get_if_local_ports(ifp_list[i], protocol, flags,
1908 bitfield);
1909 }
1910 ifnet_list_free(ifp_list);
1911 }
1912
1913 return 0;
1914 }
1915
1916 uint32_t
netns_find_anyres_byaddr(struct ifaddr * ifa,uint8_t proto)1917 netns_find_anyres_byaddr(struct ifaddr *ifa, uint8_t proto)
1918 {
1919 int result = 0;
1920 int ifa_addr_len;
1921 struct ns_token *token;
1922 struct ifnet *ifp = ifa->ifa_ifp;
1923 struct sockaddr *ifa_addr = ifa->ifa_addr;
1924
1925 if (__netns_inited == 0) {
1926 return ENOTSUP;
1927 }
1928
1929 if ((ifa_addr->sa_family != AF_INET) &&
1930 (ifa_addr->sa_family != AF_INET6)) {
1931 return 0;
1932 }
1933
1934 ifa_addr_len = (ifa_addr->sa_family == AF_INET) ?
1935 sizeof(struct in_addr) : sizeof(struct in6_addr);
1936
1937 NETNS_LOCK();
1938
1939 SLIST_FOREACH(token, &ifp->if_netns_tokens, nt_ifp_link) {
1940 if ((token->nt_flags & NETNS_OWNER_MASK) == NETNS_PF) {
1941 continue;
1942 }
1943 if (token->nt_addr_len != ifa_addr_len) {
1944 continue;
1945 }
1946 if (token->nt_proto != proto) {
1947 continue;
1948 }
1949 if (ifa_addr->sa_family == AF_INET) {
1950 if (token->nt_inaddr.s_addr ==
1951 (satosin(ifa->ifa_addr))->sin_addr.s_addr) {
1952 result = 1;
1953 break;
1954 }
1955 } else if (ifa_addr->sa_family == AF_INET6) {
1956 if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa),
1957 &token->nt_in6addr)) {
1958 result = 1;
1959 break;
1960 }
1961 }
1962 }
1963
1964 NETNS_UNLOCK();
1965 return result;
1966 }
1967
1968 static uint32_t
_netns_lookup_ns_n_reservations(uint32_t * addr,uint8_t addr_len,uint8_t proto)1969 _netns_lookup_ns_n_reservations(uint32_t *addr, uint8_t addr_len, uint8_t proto)
1970 {
1971 uint32_t ns_n_reservations = 0;
1972 NETNS_LOCK_SPIN();
1973 struct ns *namespace = _netns_get_ns(addr, addr_len, proto, true);
1974 if (namespace != NULL) {
1975 ns_n_reservations = namespace->ns_n_reservations;
1976 }
1977 NETNS_UNLOCK();
1978 return ns_n_reservations;
1979 }
1980
1981 uint32_t
netns_lookup_reservations_count_in(struct in_addr addr,uint8_t proto)1982 netns_lookup_reservations_count_in(struct in_addr addr, uint8_t proto)
1983 {
1984 return _netns_lookup_ns_n_reservations(&addr.s_addr, sizeof(struct in_addr), proto);
1985 }
1986
1987 uint32_t
netns_lookup_reservations_count_in6(struct in6_addr addr,uint8_t proto)1988 netns_lookup_reservations_count_in6(struct in6_addr addr, uint8_t proto)
1989 {
1990 if (IN6_IS_SCOPE_EMBED(&addr)) {
1991 addr.s6_addr16[1] = 0;
1992 }
1993 return _netns_lookup_ns_n_reservations(&addr.s6_addr32[0], sizeof(struct in6_addr), proto);
1994 }
1995
1996 /*
1997 * Sysctl interface
1998 */
1999
2000 static int netns_ctl_dump_all SYSCTL_HANDLER_ARGS;
2001
2002 SYSCTL_NODE(_kern_skywalk, OID_AUTO, netns, CTLFLAG_RW | CTLFLAG_LOCKED,
2003 0, "Netns interface");
2004
2005 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netns,
2006 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
2007 0, 0, netns_ctl_dump_all, "-",
2008 "Namespace contents (struct netns_ctl_dump_header, "
2009 "skywalk/os_stats_private.h)");
2010
2011 static int
netns_ctl_write_ns(struct sysctl_req * req,struct ns * namespace,boolean_t is_global)2012 netns_ctl_write_ns(struct sysctl_req *req, struct ns *namespace,
2013 boolean_t is_global)
2014 {
2015 struct ns_reservation *res;
2016 struct netns_ctl_dump_header response_header;
2017 struct netns_ctl_dump_record response_record;
2018 int err;
2019
2020 /* Fill out header */
2021 memset(&response_header, 0, sizeof(response_header));
2022 response_header.ncdh_n_records = namespace->ns_n_reservations;
2023 response_header.ncdh_proto = namespace->ns_proto;
2024
2025 if (is_global) {
2026 response_header.ncdh_addr_len = 0;
2027 } else {
2028 response_header.ncdh_addr_len = namespace->ns_addr_len;
2029 }
2030 memcpy(response_header.ncdh_addr, namespace->ns_addr,
2031 namespace->ns_addr_len);
2032
2033 err = SYSCTL_OUT(req, &response_header, sizeof(response_header));
2034 if (err) {
2035 return err;
2036 }
2037
2038 /* Fill out records */
2039 RB_FOREACH(res, ns_reservation_tree, &namespace->ns_reservations) {
2040 memset(&response_record, 0, sizeof(response_record));
2041 response_record.ncdr_port = res->nsr_port;
2042 response_record.ncdr_port_end = 0;
2043 response_record.ncdr_listener_refs =
2044 NETNS_REF_COUNT(res, NETNS_LISTENER);
2045 response_record.ncdr_skywalk_refs =
2046 NETNS_REF_COUNT(res, NETNS_SKYWALK);
2047 response_record.ncdr_bsd_refs =
2048 NETNS_REF_COUNT(res, NETNS_BSD);
2049 response_record.ncdr_pf_refs =
2050 NETNS_REF_COUNT(res, NETNS_PF);
2051 err = SYSCTL_OUT(req, &response_record,
2052 sizeof(response_record));
2053 if (err) {
2054 return err;
2055 }
2056 }
2057
2058 return 0;
2059 }
2060
2061 static int
2062 netns_ctl_dump_all SYSCTL_HANDLER_ARGS
2063 {
2064 #pragma unused(oidp, arg1, arg2)
2065 struct ns *namespace;
2066 int i, err = 0;
2067
2068 if (!kauth_cred_issuser(kauth_cred_get())) {
2069 return EPERM;
2070 }
2071
2072 if (__netns_inited == 0) {
2073 return ENOTSUP;
2074 }
2075
2076 NETNS_LOCK();
2077
2078 for (i = 0; i < NETNS_N_GLOBAL; i++) {
2079 err = netns_ctl_write_ns(req, netns_global_non_wild[i], true);
2080 if (err) {
2081 goto done;
2082 }
2083 }
2084
2085 RB_FOREACH(namespace, netns_namespaces_tree, &netns_namespaces) {
2086 err = netns_ctl_write_ns(req, namespace, false);
2087 if (err) {
2088 goto done;
2089 }
2090 }
2091
2092 /*
2093 * If this is just a request for length, add slop because
2094 * this is dynamically changing data
2095 */
2096 if (req->oldptr == USER_ADDR_NULL) {
2097 req->oldidx += 20 * sizeof(struct netns_ctl_dump_record);
2098 }
2099
2100 done:
2101 NETNS_UNLOCK();
2102 return err;
2103 }
2104 /* CSTYLED */
2105