1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/assert.h>
30 #include <kern/locks.h>
31 #include <kern/zalloc.h>
32 #include <libkern/tree.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/bitstring.h>
36 #include <net/if.h>
37 #include <net/kpi_interface.h>
38 #include <net/restricted_in_port.h>
39
40 #include <netinet/in.h>
41 #include <netinet/in_pcb.h>
42 #include <netinet/tcp_fsm.h>
43 #include <netinet/tcp_var.h>
44
45 #include <netinet6/in6_var.h>
46 #include <string.h>
47
48 #include <skywalk/os_skywalk.h>
49 #include <skywalk/os_skywalk_private.h>
50 #include <skywalk/os_stats_private.h>
51 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
52 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
53
54 #include <net/if_ports_used.h>
55
56 static int __netns_inited = 0;
57
58 /*
59 * Logging
60 */
61
62 #define NS_VERB_PROTO(proto) ((proto == IPPROTO_TCP) ? SK_VERB_NS_TCP : \
63 SK_VERB_NS_UDP)
64 #define NS_VERB_IP(addr_len) ((addr_len == sizeof (struct in_addr)) ? \
65 SK_VERB_NS_IPV4 : SK_VERB_NS_IPV6)
66 #define PROTO_STR(proto) ((proto == IPPROTO_TCP) ? "tcp" : "udp")
67 #define LEN_TO_AF(len) (((len == sizeof (struct in_addr)) ? \
68 AF_INET : AF_INET6))
69 /*
70 * Locking
71 * Netns is currently protected by a global mutex, NETNS_LOCK. This lock is
72 * aquired at the entry of every kernel-facing function, and released at the
73 * end. Data within netns_token structures is also protected under this lock.
74 */
75
76 #define NETNS_LOCK() \
77 lck_mtx_lock(&netns_lock)
78 #define NETNS_LOCK_SPIN() \
79 lck_mtx_lock_spin(&netns_lock)
80 #define NETNS_LOCK_CONVERT() do { \
81 NETNS_LOCK_ASSERT_HELD(); \
82 lck_mtx_convert_spin(&netns_lock); \
83 } while (0)
84 #define NETNS_UNLOCK() \
85 lck_mtx_unlock(&netns_lock)
86 #define NETNS_LOCK_ASSERT_HELD() \
87 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_OWNED)
88 #define NETNS_LOCK_ASSERT_NOTHELD() \
89 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_NOTOWNED)
90
91 static LCK_GRP_DECLARE(netns_lock_group, "netns_lock");
92 static LCK_MTX_DECLARE(netns_lock, &netns_lock_group);
93
94 /*
95 * Internal data structures and parameters
96 */
97
98 /*
99 * Local ports are kept track of by reference counts kept in a tree specific to
100 * an <IP, protocol> tuple (see struct ns).
101 *
102 * Note: port numbers are stored in host byte order.
103 */
104 struct ns_reservation {
105 RB_ENTRY(ns_reservation) nsr_link;
106 in_port_t nsr_port;
107 uint32_t nsr_refs[NETNS_OWNER_MAX + 1];
108 };
109
110 #define NETNS_REF_COUNT(nsr, flags) \
111 (nsr)->nsr_refs[((flags) & NETNS_OWNER_MASK)]
112
113 static inline int nsr_cmp(const struct ns_reservation *,
114 const struct ns_reservation *);
115
116 RB_HEAD(ns_reservation_tree, ns_reservation);
117 RB_PROTOTYPE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
118 RB_GENERATE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
119
120 static inline struct ns_reservation *ns_reservation_tree_find(
121 struct ns_reservation_tree *, const in_port_t);
122
123 /*
124 * A namespace keeps track of the local port numbers in use for a given
125 * <IP, protocol> tuple. There are also global namespaces for each
126 * protocol to accomodate INADDR_ANY behavior and diagnostics.
127 */
128 struct ns {
129 RB_ENTRY(ns) ns_link;
130
131 void *ns_addr_key;
132
133 union {
134 uint32_t ns_addr[4];
135 struct in_addr ns_inaddr;
136 struct in6_addr ns_in6addr;
137 };
138 uint8_t ns_addr_len;
139 uint8_t ns_proto;
140
141 in_port_t ns_last_ephemeral_port_down;
142 in_port_t ns_last_ephemeral_port_up;
143
144 uint8_t ns_is_freeable;
145
146 uint32_t ns_n_reservations;
147 struct ns_reservation_tree ns_reservations;
148 };
149
150 static uint32_t netns_n_namespaces;
151
152 static inline int ns_cmp(const struct ns *, const struct ns *);
153
154 RB_HEAD(netns_namespaces_tree, ns) netns_namespaces =
155 RB_INITIALIZER(netns_namespaces);
156 RB_PROTOTYPE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
157 RB_GENERATE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
158
159 /*
160 * Declare pointers to global namespaces for each protocol.
161 * All non-wildcard reservations will have an entry here.
162 */
163 #define NETNS_N_GLOBAL 4
164 static struct ns *netns_global_non_wild[NETNS_N_GLOBAL];
165 static struct ns *netns_global_wild[NETNS_N_GLOBAL];
166 #define NETNS_NS_TCP 0
167 #define NETNS_NS_UDP 1
168 #define NETNS_NS_V4 0
169 #define NETNS_NS_V6 2
170 #define NETNS_NS_GLOBAL_IDX(proto, addrlen) \
171 ((((proto) == IPPROTO_TCP) ? NETNS_NS_TCP : NETNS_NS_UDP) | \
172 (((addrlen) == sizeof (struct in_addr)) ? NETNS_NS_V4 : NETNS_NS_V6))
173
174 #define NETNS_NS_UDP_EPHEMERAL_RESERVE 4096
175
176 /*
177 * Internal token structure
178 *
179 * Note: port numbers are stored in host byte order.
180 */
181 struct ns_token {
182 /* Reservation state */
183 ifnet_t nt_ifp;
184 SLIST_ENTRY(ns_token) nt_ifp_link;
185 SLIST_ENTRY(ns_token) nt_all_link;
186 uint32_t nt_state; /* NETNS_STATE_* */
187
188 /* Reservation context */
189 union {
190 uint32_t nt_addr[4];
191 struct in_addr nt_inaddr;
192 struct in6_addr nt_in6addr;
193 };
194 uint8_t nt_addr_len;
195 uint8_t nt_proto;
196 in_port_t nt_port;
197 uint32_t nt_flags;
198
199 /* Optional information about the flow */
200 struct ns_flow_info *nt_flow_info;
201 };
202
203 /* Valid values for nt_state */
204 #define NETNS_STATE_HALFCLOSED 0x1 /* half closed */
205 #define NETNS_STATE_WITHDRAWN 0x2 /* withdrawn; not offloadable */
206
207 #define NETNS_STATE_BITS "\020\01HALFCLOSED\02WITHDRAWN"
208
209 /* List of tokens not bound to an ifnet */
210 SLIST_HEAD(, ns_token) netns_unbound_tokens = SLIST_HEAD_INITIALIZER(
211 netns_unbound_tokens);
212
213 /* List of all tokens currently allocated in the system */
214 SLIST_HEAD(, ns_token) netns_all_tokens = SLIST_HEAD_INITIALIZER(
215 netns_all_tokens);
216
217 /*
218 * Memory management
219 */
220 static ZONE_DEFINE(netns_ns_zone, SKMEM_ZONE_PREFIX ".netns.ns",
221 sizeof(struct ns), ZC_ZFREE_CLEARMEM);
222
223 #define NETNS_NS_TOKEN_ZONE_NAME "netns.ns_token"
224 static unsigned int netns_ns_token_size; /* size of zone element */
225 static struct skmem_cache *netns_ns_token_cache; /* for ns_token */
226
227 #define NETNS_NS_FLOW_INFO_ZONE_NAME "netns.ns_flow_info"
228 static unsigned int netns_ns_flow_info_size; /* size of zone element */
229 static struct skmem_cache *netns_ns_flow_info_cache; /* for ns_flow_info */
230
231 #define NETNS_NS_RESERVATION_ZONE_NAME "netns.ns_reservation"
232 static unsigned int netns_ns_reservation_size; /* size of zone element */
233 static struct skmem_cache *netns_ns_reservation_cache; /* for ns_reservation */
234
235 static struct ns_reservation *netns_ns_reservation_alloc(boolean_t, in_port_t);
236 static void netns_ns_reservation_free(struct ns_reservation *);
237 static struct ns *netns_ns_alloc(zalloc_flags_t);
238 static void netns_ns_free(struct ns *);
239 static void netns_ns_cleanup(struct ns *);
240 static struct ns_token *netns_ns_token_alloc(boolean_t, boolean_t);
241 static void netns_ns_token_free(struct ns_token *);
242
243 /*
244 * Utility/internal code
245 */
246 static struct ns *_netns_get_ns(uint32_t *, uint8_t, uint8_t, bool);
247 static inline boolean_t _netns_is_wildcard_addr(const uint32_t *, uint8_t);
248 static int _netns_reserve_common(struct ns *, in_port_t, uint32_t);
249 static void _netns_release_common(struct ns *, in_port_t, uint32_t);
250 static inline void netns_clear_ifnet(struct ns_token *);
251 static int _netns_reserve_kpi_common(struct ns *, netns_token *, uint32_t *,
252 uint8_t, uint8_t, in_port_t *, uint32_t, struct ns_flow_info *);
253 static void _netns_set_ifnet_internal(struct ns_token *, struct ifnet *);
254
255 static struct ns_reservation *
netns_ns_reservation_alloc(boolean_t can_block,in_port_t port)256 netns_ns_reservation_alloc(boolean_t can_block, in_port_t port)
257 {
258 struct ns_reservation *res;
259
260 VERIFY(port != 0);
261
262 res = skmem_cache_alloc(netns_ns_reservation_cache,
263 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
264 if (res == NULL) {
265 return NULL;
266 }
267
268 bzero(res, netns_ns_reservation_size);
269 res->nsr_port = port;
270 return res;
271 }
272
273 static void
netns_ns_reservation_free(struct ns_reservation * res)274 netns_ns_reservation_free(struct ns_reservation *res)
275 {
276 skmem_cache_free(netns_ns_reservation_cache, res);
277 }
278
279 static struct ns *
netns_ns_alloc(zalloc_flags_t how)280 netns_ns_alloc(zalloc_flags_t how)
281 {
282 struct ns *namespace;
283 in_port_t first = (in_port_t)ipport_firstauto;
284 in_port_t last = (in_port_t)ipport_lastauto;
285 in_port_t rand_port;
286
287 namespace = zalloc_flags(netns_ns_zone, how | Z_ZERO);
288 if (namespace == NULL) {
289 return NULL;
290 }
291
292 namespace->ns_is_freeable = 1;
293
294 RB_INIT(&namespace->ns_reservations);
295
296 /*
297 * Randomize the initial ephemeral port starting point, just in case
298 * this namespace is for an ipv6 address which gets brought up and
299 * down often.
300 */
301 if (first == last) {
302 rand_port = first;
303 } else {
304 read_frandom(&rand_port, sizeof(rand_port));
305
306 if (first > last) {
307 rand_port = last + (rand_port % (first - last));
308 } else {
309 rand_port = first + (rand_port % (last - first));
310 }
311 }
312 namespace->ns_last_ephemeral_port_down = rand_port;
313 namespace->ns_last_ephemeral_port_up = rand_port;
314
315 return namespace;
316 }
317
318 static void
netns_ns_free(struct ns * namespace)319 netns_ns_free(struct ns *namespace)
320 {
321 struct ns_reservation *res;
322 struct ns_reservation *tmp_res;
323 #if SK_LOG
324 char tmp_ip_str[MAX_IPv6_STR_LEN];
325 #endif /* SK_LOG */
326
327 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
328 NS_VERB_PROTO(namespace->ns_proto),
329 "freeing %s ns for IP %s",
330 PROTO_STR(namespace->ns_proto),
331 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
332 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)));
333
334 RB_FOREACH_SAFE(res, ns_reservation_tree, &namespace->ns_reservations,
335 tmp_res) {
336 netns_ns_reservation_free(res);
337 namespace->ns_n_reservations--;
338 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
339 res);
340 }
341
342 VERIFY(RB_EMPTY(&namespace->ns_reservations));
343
344 if (netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
345 namespace->ns_addr_len)] == namespace) {
346 netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
347 namespace->ns_addr_len)] = NULL;
348 }
349 if (netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
350 namespace->ns_addr_len)] == namespace) {
351 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
352 namespace->ns_addr_len)] = NULL;
353 }
354
355 zfree(netns_ns_zone, namespace);
356 }
357
358 static void
netns_ns_cleanup(struct ns * namespace)359 netns_ns_cleanup(struct ns *namespace)
360 {
361 if (namespace->ns_is_freeable &&
362 RB_EMPTY(&namespace->ns_reservations)) {
363 RB_REMOVE(netns_namespaces_tree, &netns_namespaces, namespace);
364 netns_n_namespaces--;
365 netns_ns_free(namespace);
366 }
367 }
368
369 static struct ns_token *
netns_ns_token_alloc(boolean_t can_block,boolean_t with_nfi)370 netns_ns_token_alloc(boolean_t can_block, boolean_t with_nfi)
371 {
372 struct ns_token *token;
373
374 NETNS_LOCK_ASSERT_HELD();
375 NETNS_LOCK_CONVERT();
376
377 token = skmem_cache_alloc(netns_ns_token_cache,
378 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
379 if (token == NULL) {
380 return NULL;
381 }
382
383 bzero(token, netns_ns_token_size);
384
385 if (with_nfi) {
386 token->nt_flow_info = skmem_cache_alloc(netns_ns_flow_info_cache,
387 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
388 if (token->nt_flow_info == NULL) {
389 skmem_cache_free(netns_ns_token_cache, token);
390 return NULL;
391 }
392 }
393 SLIST_INSERT_HEAD(&netns_all_tokens, token, nt_all_link);
394
395 return token;
396 }
397
398 static void
netns_ns_token_free(struct ns_token * token)399 netns_ns_token_free(struct ns_token *token)
400 {
401 NETNS_LOCK_ASSERT_HELD();
402 NETNS_LOCK_CONVERT();
403 SLIST_REMOVE(&netns_all_tokens, token, ns_token, nt_all_link);
404
405 if (token->nt_flow_info != NULL) {
406 skmem_cache_free(netns_ns_flow_info_cache, token->nt_flow_info);
407 }
408 skmem_cache_free(netns_ns_token_cache, token);
409 }
410
411 __attribute__((always_inline))
412 static inline int
nsr_cmp(const struct ns_reservation * nsr1,const struct ns_reservation * nsr2)413 nsr_cmp(const struct ns_reservation *nsr1, const struct ns_reservation *nsr2)
414 {
415 #define NSR_COMPARE(r1, r2) ((int)(r1)->nsr_port - (int)(r2)->nsr_port)
416 return NSR_COMPARE(nsr1, nsr2);
417 }
418
419 __attribute__((always_inline))
420 static inline int
ns_cmp(const struct ns * a,const struct ns * b)421 ns_cmp(const struct ns *a, const struct ns *b)
422 {
423 int d;
424
425 if ((d = (a->ns_addr_len - b->ns_addr_len)) != 0) {
426 return d;
427 }
428 if ((d = (a->ns_proto - b->ns_proto)) != 0) {
429 return d;
430 }
431 if ((d = flow_ip_cmp(a->ns_addr_key, b->ns_addr_key,
432 b->ns_addr_len)) != 0) {
433 return d;
434 }
435
436 return 0;
437 }
438
439 /*
440 * Common routine to look up a reservation.
441 *
442 * NOTE: Assumes the caller holds the NETNS global lock
443 */
444 __attribute__((always_inline))
445 static inline struct ns_reservation *
ns_reservation_tree_find(struct ns_reservation_tree * tree,const in_port_t port)446 ns_reservation_tree_find(struct ns_reservation_tree *tree, const in_port_t port)
447 {
448 struct ns_reservation res;
449 res.nsr_port = port;
450 return RB_FIND(ns_reservation_tree, tree, &res);
451 }
452
453 /*
454 * Retrieve the namespace for the supplied <address, protocol> tuple.
455 * If create is set and such a namespace doesn't already exist, one will be
456 * created.
457 */
458 static struct ns *
_netns_get_ns(uint32_t * addr,uint8_t addr_len,uint8_t proto,bool create)459 _netns_get_ns(uint32_t *addr, uint8_t addr_len, uint8_t proto, bool create)
460 {
461 struct ns *namespace = NULL;
462 struct ns find = {
463 .ns_addr_key = addr,
464 .ns_addr_len = addr_len,
465 .ns_proto = proto,
466 };
467 #if SK_LOG
468 char tmp_ip_str[MAX_IPv6_STR_LEN];
469 #endif /* SK_LOG */
470
471 VERIFY(addr_len == sizeof(struct in_addr) ||
472 addr_len == sizeof(struct in6_addr));
473
474 NETNS_LOCK_ASSERT_HELD();
475
476 namespace = RB_FIND(netns_namespaces_tree, &netns_namespaces, &find);
477
478 if (create && namespace == NULL) {
479 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
480 "allocating %s ns for IP %s",
481 PROTO_STR(proto), inet_ntop(LEN_TO_AF(addr_len), addr,
482 tmp_ip_str, sizeof(tmp_ip_str)));
483 NETNS_LOCK_CONVERT();
484 namespace = netns_ns_alloc(Z_WAITOK | Z_NOFAIL);
485 __builtin_assume(namespace != NULL);
486 memcpy(namespace->ns_addr, addr, addr_len);
487 namespace->ns_addr_key = &namespace->ns_addr;
488 namespace->ns_addr_len = addr_len;
489 namespace->ns_proto = proto;
490 RB_INSERT(netns_namespaces_tree, &netns_namespaces, namespace);
491 netns_n_namespaces++;
492
493 if (_netns_is_wildcard_addr(addr, addr_len) &&
494 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
495 addr_len)] == NULL) {
496 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
497 addr_len)] = namespace;
498 }
499 }
500
501 return namespace;
502 }
503
504 /*
505 * Return true if the supplied address is a wildcard (INADDR_ANY)
506 */
507 __attribute__((always_inline))
508 static boolean_t
_netns_is_wildcard_addr(const uint32_t * addr,uint8_t addr_len)509 _netns_is_wildcard_addr(const uint32_t *addr, uint8_t addr_len)
510 {
511 boolean_t wildcard;
512
513 switch (addr_len) {
514 case sizeof(struct in_addr):
515 wildcard = (addr[0] == 0);
516 break;
517
518 case sizeof(struct in6_addr):
519 wildcard = (addr[0] == 0 && addr[1] == 0 &&
520 addr[2] == 0 && addr[3] == 0);
521 break;
522
523 default:
524 wildcard = FALSE;
525 break;
526 }
527
528 return wildcard;
529 }
530
531 __attribute__((always_inline))
532 static boolean_t
_netns_is_port_used(struct ns * gns,struct ns_reservation * curr_res,in_port_t port)533 _netns_is_port_used(struct ns * gns, struct ns_reservation *curr_res, in_port_t port)
534 {
535 struct ns_reservation *res = NULL;
536
537 if (gns == NULL) {
538 return FALSE;
539 }
540
541 res = ns_reservation_tree_find(&gns->ns_reservations, port);
542 if (res != NULL && res != curr_res) {
543 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
544 NETNS_REF_COUNT(res, NETNS_PF) > 0 ||
545 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0 ||
546 NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0) {
547 return TRUE;
548 }
549 }
550
551 return FALSE;
552 }
553
554 /*
555 * Internal shared code to reserve ports within a specific namespace.
556 *
557 * Note: port numbers are in host byte-order here.
558 */
559 static int
_netns_reserve_common(struct ns * namespace,in_port_t port,uint32_t flags)560 _netns_reserve_common(struct ns *namespace, in_port_t port, uint32_t flags)
561 {
562 struct ns_reservation *res = NULL, *exist = NULL;
563 uint8_t proto, addr_len;
564 int err = 0;
565 #if SK_LOG
566 char tmp_ip_str[MAX_IPv6_STR_LEN];
567 #endif /* SK_LOG */
568
569 VERIFY(port != 0);
570 proto = namespace->ns_proto;
571 addr_len = namespace->ns_addr_len;
572 NETNS_LOCK_CONVERT();
573 res = netns_ns_reservation_alloc(TRUE, port);
574 if (res == NULL) {
575 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
576 "ERROR %s:%s:%d // flags 0x%x // OUT OF MEMORY",
577 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
578 namespace->ns_addr, tmp_ip_str,
579 sizeof(tmp_ip_str)), PROTO_STR(proto), port, flags);
580 return ENOMEM;
581 }
582 exist = RB_INSERT(ns_reservation_tree, &namespace->ns_reservations,
583 res);
584 if (__probable(exist == NULL)) {
585 namespace->ns_n_reservations++;
586 } else {
587 netns_ns_reservation_free(res);
588 res = exist;
589 }
590
591 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
592 "pre: %s:%s:%d // flags 0x%x // refs %d sky, %d ls, "
593 "%d bsd %d pf", inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
594 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
595 PROTO_STR(proto), port, flags,
596 NETNS_REF_COUNT(res, NETNS_SKYWALK),
597 NETNS_REF_COUNT(res, NETNS_LISTENER),
598 NETNS_REF_COUNT(res, NETNS_BSD),
599 NETNS_REF_COUNT(res, NETNS_PF));
600
601 /* Make reservation */
602 /*
603 * Bypass collision detection for reservations in the global non-wild
604 * namespace. We use that namespace for reference counts only.
605 */
606 if (namespace !=
607 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)]) {
608 struct ns_reservation *skres;
609 boolean_t is_wild = _netns_is_wildcard_addr(namespace->ns_addr,
610 addr_len);
611 struct ns *gns =
612 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)];
613
614 if (NETNS_IS_SKYWALK(flags)) {
615 if ((!is_wild || exist != NULL) && gns != NULL &&
616 (skres = ns_reservation_tree_find(
617 &gns->ns_reservations, port)) != NULL &&
618 NETNS_REF_COUNT(skres, NETNS_LISTENER) == 0) {
619 /*
620 * The mere existence of any non-skywalk
621 * listener wildcard entry for this
622 * protocol/port number means this must fail.
623 */
624 SK_DF(NS_VERB_IP(addr_len) |
625 NS_VERB_PROTO(proto),
626 "ADDRINUSE: Duplicate wildcard");
627 err = EADDRINUSE;
628 goto done;
629 }
630
631 if (is_wild) {
632 gns = netns_global_non_wild[
633 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
634 VERIFY(gns != NULL);
635
636 if (ns_reservation_tree_find(
637 &gns->ns_reservations, port) != NULL) {
638 /*
639 * If Skywalk is trying to reserve a
640 * wildcard, then the mere existance of
641 * any entry in the non-wild namespace
642 * for this port means this must fail.
643 */
644 SK_DF(NS_VERB_IP(addr_len) |
645 NS_VERB_PROTO(proto), "ADDRINUSE: "
646 "Wildcard with non-wild.");
647 err = EADDRINUSE;
648 goto done;
649 }
650 }
651 } else {
652 /*
653 * Check if Skywalk has reserved a wildcard entry.
654 * Note that the arithmetic OR here is intentional.
655 */
656 if ((!is_wild || exist != NULL) && gns != NULL &&
657 (skres = ns_reservation_tree_find(
658 &gns->ns_reservations, port)) != NULL &&
659 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
660 NETNS_REF_COUNT(skres, NETNS_LISTENER)) != 0) {
661 /*
662 * BSD is trying to reserve a proto/port for
663 * which Skywalk already has a wildcard
664 * reservation.
665 */
666 SK_DF(NS_VERB_IP(addr_len) |
667 NS_VERB_PROTO(proto),
668 "ADDRINUSE: BSD requesting Skywalk port");
669 err = EADDRINUSE;
670 goto done;
671 }
672
673 /*
674 * If BSD is trying to reserve a wildcard,
675 * ensure Skywalk has not already reserved
676 * a non-wildcard.
677 */
678 if (is_wild) {
679 gns = netns_global_non_wild[
680 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
681 VERIFY(gns != NULL);
682
683 /*
684 * Note that the arithmetic OR here is
685 * intentional.
686 */
687 if ((skres = ns_reservation_tree_find(
688 &gns->ns_reservations, port)) != NULL &&
689 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
690 NETNS_REF_COUNT(skres,
691 NETNS_LISTENER)) != 0) {
692 SK_DF(NS_VERB_IP(addr_len) |
693 NS_VERB_PROTO(proto), "ADDRINUSE: "
694 "BSD wildcard with non-wild.");
695 err = EADDRINUSE;
696 goto done;
697 }
698 }
699 }
700
701 switch (flags & NETNS_OWNER_MASK) {
702 case NETNS_SKYWALK:
703 /* check collision w/ BSD */
704 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
705 NETNS_REF_COUNT(res, NETNS_PF) > 0) {
706 SK_DF(NS_VERB_IP(addr_len) |
707 NS_VERB_PROTO(proto),
708 "ERROR - Skywalk got ADDRINUSE (w/ BSD)");
709 err = EADDRINUSE;
710 goto done;
711 }
712
713 /* BEGIN CSTYLED */
714 /*
715 * Scenarios with new Skywalk connected flow:
716 * 1. With existing Skywalk connected flow,
717 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
718 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 1
719 * reject by failing the wild gns lookup below.
720 * 2. With existing Skywalk 3-tuple listener,
721 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 1
722 * bypass the check below.
723 * 3. With existing Skywalk 2-tuple listener,
724 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
725 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 0
726 * pass with successful wild gns lookup.
727 */
728 /* END CSTYLED */
729 if (NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
730 NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0) {
731 /* check if covered by wild Skywalk listener */
732 gns = netns_global_wild[
733 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
734 if (gns != NULL &&
735 (skres = ns_reservation_tree_find(
736 &gns->ns_reservations, port)) != NULL &&
737 NETNS_REF_COUNT(skres, NETNS_LISTENER)
738 != 0) {
739 err = 0;
740 goto done;
741 }
742 if (addr_len == sizeof(struct in_addr)) {
743 /* If address is IPv4, also check for wild IPv6 registration */
744 gns = netns_global_wild[
745 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in6_addr))];
746 if (gns != NULL &&
747 (skres = ns_reservation_tree_find(
748 &gns->ns_reservations, port)) != NULL &&
749 NETNS_REF_COUNT(skres, NETNS_LISTENER)
750 != 0) {
751 err = 0;
752 goto done;
753 }
754 }
755 SK_DF(NS_VERB_IP(addr_len) |
756 NS_VERB_PROTO(proto),
757 "ERROR - Skywalk got ADDRINUSE "
758 "(w/ SK connected flow)");
759 err = EADDRINUSE;
760 }
761 /*
762 * XXX: Duplicate 5-tuple flows under a Skywalk
763 * listener are currently detected by flow manager,
764 * till we implement 5-tuple-aware netns.
765 */
766 break;
767
768 case NETNS_LISTENER:
769 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
770 NETNS_REF_COUNT(res, NETNS_PF) > 0 ||
771 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0 ||
772 _netns_is_port_used(netns_global_wild[
773 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in_addr))], res, port) ||
774 _netns_is_port_used(netns_global_wild[
775 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in6_addr))], res, port) ||
776 _netns_is_port_used(netns_global_non_wild[
777 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in_addr))], res, port) ||
778 _netns_is_port_used(netns_global_non_wild[
779 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in6_addr))], res, port)) {
780 SK_DF(NS_VERB_IP(addr_len) |
781 NS_VERB_PROTO(proto),
782 "ERROR - Listener got ADDRINUSE");
783 err = EADDRINUSE;
784 }
785 break;
786
787 case NETNS_BSD:
788 case NETNS_PF:
789 if (NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0 ||
790 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0) {
791 SK_DF(NS_VERB_IP(addr_len) |
792 NS_VERB_PROTO(proto),
793 "ERROR - %s got ADDRINUSE",
794 ((flags & NETNS_OWNER_MASK) == NETNS_PF) ?
795 "PF" : "BSD");
796 err = EADDRINUSE;
797 }
798 break;
799
800 default:
801 panic("_netns_reserve_common: invalid owner 0x%x",
802 flags & NETNS_OWNER_MASK);
803 /* NOTREACHED */
804 __builtin_unreachable();
805 }
806 }
807
808 done:
809 ASSERT(res != NULL);
810 if (__probable(err == 0)) {
811 NETNS_REF_COUNT(res, flags)++;
812 /* Check for wrap around */
813 VERIFY(NETNS_REF_COUNT(res, flags) != 0);
814 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
815 NS_VERB_PROTO(namespace->ns_proto),
816 "post: %s:%s:%d err %d // flags 0x%x // refs %d sky, "
817 "%d ls, %d bsd %d pf",
818 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
819 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
820 PROTO_STR(namespace->ns_proto), port, err, flags,
821 NETNS_REF_COUNT(res, NETNS_SKYWALK),
822 NETNS_REF_COUNT(res, NETNS_LISTENER),
823 NETNS_REF_COUNT(res, NETNS_BSD),
824 NETNS_REF_COUNT(res, NETNS_PF));
825 } else {
826 if (exist == NULL) {
827 RB_REMOVE(ns_reservation_tree,
828 &namespace->ns_reservations, res);
829 namespace->ns_n_reservations--;
830 netns_ns_reservation_free(res);
831 }
832 }
833 return err;
834 }
835
836 /*
837 * Internal shared code to release ports within a specific namespace.
838 */
839 static void
_netns_release_common(struct ns * namespace,in_port_t port,uint32_t flags)840 _netns_release_common(struct ns *namespace, in_port_t port, uint32_t flags)
841 {
842 struct ns_reservation *res;
843 uint32_t refs;
844 int i;
845 #if SK_LOG
846 char tmp_ip_str[MAX_IPv6_STR_LEN];
847 #endif /* SK_LOG */
848
849 NETNS_LOCK_ASSERT_HELD();
850
851 res = ns_reservation_tree_find(&namespace->ns_reservations, port);
852 if (res == NULL) {
853 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
854 NS_VERB_PROTO(namespace->ns_proto),
855 "ERROR %s:%s:%d // flags 0x%x // not found",
856 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
857 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
858 PROTO_STR(namespace->ns_proto), port, flags);
859 VERIFY(res != NULL);
860 }
861
862 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
863 NS_VERB_PROTO(namespace->ns_proto),
864 "%s:%s:%d // flags 0x%x // refs %d sky, %d ls, %d bsd, %d pf",
865 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
866 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
867 PROTO_STR(namespace->ns_proto), port, flags,
868 NETNS_REF_COUNT(res, NETNS_SKYWALK),
869 NETNS_REF_COUNT(res, NETNS_LISTENER),
870 NETNS_REF_COUNT(res, NETNS_BSD),
871 NETNS_REF_COUNT(res, NETNS_PF));
872
873 /* Release reservation */
874 VERIFY(NETNS_REF_COUNT(res, flags) > 0);
875 NETNS_REF_COUNT(res, flags) -= 1;
876
877 /* Clean up memory, if appropriate */
878 for (i = 0, refs = 0; i <= NETNS_OWNER_MAX && refs == 0; i++) {
879 refs |= res->nsr_refs[i];
880 }
881 if (refs == 0) {
882 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
883 res);
884 namespace->ns_n_reservations--;
885 NETNS_LOCK_CONVERT();
886 netns_ns_reservation_free(res);
887 netns_ns_cleanup(namespace);
888 }
889 }
890
891 __attribute__((always_inline))
892 static inline void
netns_init_global_ns(struct ns ** global_ptr,uint8_t proto,uint8_t addrlen)893 netns_init_global_ns(struct ns **global_ptr, uint8_t proto, uint8_t addrlen)
894 {
895 struct ns *namespace;
896
897 namespace = *global_ptr = netns_ns_alloc(Z_WAITOK);
898 memset(namespace->ns_addr, 0xFF, addrlen);
899 namespace->ns_addr_len = addrlen;
900 namespace->ns_proto = proto;
901 namespace->ns_is_freeable = 0;
902 }
903
904 __attribute__((always_inline))
905 static inline void
netns_clear_ifnet(struct ns_token * nstoken)906 netns_clear_ifnet(struct ns_token *nstoken)
907 {
908 #if SK_LOG
909 char tmp_ip_str[MAX_IPv6_STR_LEN];
910 #endif /* SK_LOG */
911
912 NETNS_LOCK_ASSERT_HELD();
913
914 if (nstoken->nt_ifp != NULL) {
915 SLIST_REMOVE(&nstoken->nt_ifp->if_netns_tokens, nstoken,
916 ns_token, nt_ifp_link);
917
918 SK_DF(NS_VERB_IP(nstoken->nt_addr_len) |
919 NS_VERB_PROTO(nstoken->nt_proto),
920 "%s:%s:%d // removed from ifnet %d",
921 inet_ntop(LEN_TO_AF(nstoken->nt_addr_len),
922 nstoken->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
923 PROTO_STR(nstoken->nt_proto), nstoken->nt_port,
924 nstoken->nt_ifp->if_index);
925
926 NETNS_LOCK_CONVERT();
927 ifnet_decr_iorefcnt(nstoken->nt_ifp);
928 nstoken->nt_ifp = NULL;
929 } else {
930 SLIST_REMOVE(&netns_unbound_tokens, nstoken, ns_token,
931 nt_ifp_link);
932 }
933 }
934
935 /*
936 * Internal shared code to perform a port[-range] reservation, along with all
937 * the boilerplate and sanity checks expected for a call coming in from the
938 * surrounding kernel code.
939 */
940 static int
_netns_reserve_kpi_common(struct ns * ns,netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t * port,uint32_t flags,struct ns_flow_info * nfi)941 _netns_reserve_kpi_common(struct ns *ns, netns_token *token, uint32_t *addr,
942 uint8_t addr_len, uint8_t proto, in_port_t *port, uint32_t flags,
943 struct ns_flow_info *nfi)
944 {
945 boolean_t ns_want_cleanup = (ns == NULL);
946 struct ns_token *nt;
947 int err = 0;
948 in_port_t hport;
949 #if SK_LOG
950 char tmp_ip_str[MAX_IPv6_STR_LEN];
951 #endif /* SK_LOG */
952 struct ifnet *ifp = (nfi != NULL) ? nfi->nfi_ifp : NULL;
953
954 NETNS_LOCK_ASSERT_HELD();
955
956 hport = ntohs(*port);
957
958 VERIFY((flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
959 VERIFY(addr_len == sizeof(struct in_addr) ||
960 addr_len == sizeof(struct in6_addr));
961 VERIFY(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
962 VERIFY(hport != 0);
963
964 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
965 "reserving %s:%s:%d // flags 0x%x // token %svalid",
966 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str,
967 sizeof(tmp_ip_str)), PROTO_STR(proto), hport, flags,
968 NETNS_TOKEN_VALID(token) ? "" : "in");
969
970 /*
971 * See the documentation for NETNS_PRERESERVED in netns.h for an
972 * explanation of this block.
973 */
974 if (NETNS_TOKEN_VALID(token)) {
975 if (flags & NETNS_PRERESERVED) {
976 nt = *token;
977 VERIFY(nt->nt_addr_len == addr_len);
978 VERIFY(memcmp(nt->nt_addr, addr, addr_len) == 0);
979 VERIFY(nt->nt_proto == proto);
980 VERIFY(nt->nt_port == hport);
981 VERIFY((nt->nt_flags &
982 NETNS_RESERVATION_FLAGS | NETNS_PRERESERVED) ==
983 (flags & NETNS_RESERVATION_FLAGS));
984
985 if ((nt->nt_flags & NETNS_CONFIGURATION_FLAGS) ==
986 (flags & NETNS_CONFIGURATION_FLAGS)) {
987 SK_DF(NS_VERB_IP(nt->nt_addr_len) |
988 NS_VERB_PROTO(nt->nt_proto),
989 "%s:%s:%d // flags 0x%x -> 0x%x",
990 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
991 nt->nt_addr, tmp_ip_str,
992 sizeof(tmp_ip_str)),
993 PROTO_STR(nt->nt_proto),
994 nt->nt_port, nt->nt_flags, flags);
995 nt->nt_flags &= ~NETNS_CONFIGURATION_FLAGS;
996 nt->nt_flags |=
997 flags & NETNS_CONFIGURATION_FLAGS;
998 }
999 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1000 "token was prereserved");
1001 goto done;
1002 } else {
1003 panic("Request to overwrite valid netns token");
1004 /* NOTREACHED */
1005 __builtin_unreachable();
1006 }
1007 }
1008
1009 /*
1010 * TODO: Check range against bitmap
1011 */
1012 if (hport == 0) {
1013 /*
1014 * Caller request an arbitrary range of ports
1015 * TODO: Need to figure out how to allocate
1016 * emphemeral ports only.
1017 */
1018 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1019 "ERROR - wildcard port not yet supported");
1020 err = ENOMEM;
1021 goto done;
1022 }
1023
1024 /*
1025 * Fetch namespace for the specified address/protocol, creating
1026 * a new namespace if necessary.
1027 */
1028 if (ns == NULL) {
1029 ASSERT(ns_want_cleanup);
1030 ns = _netns_get_ns(addr, addr_len, proto, true);
1031 }
1032 if (__improbable(ns == NULL)) {
1033 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1034 "ERROR - couldn't create namespace");
1035 err = ENOMEM;
1036 goto done;
1037 }
1038
1039 /*
1040 * Make a reservation in the namespace
1041 * This will return an error if an incompatible reservation
1042 * already exists.
1043 */
1044 err = _netns_reserve_common(ns, hport, flags);
1045 if (__improbable(err != 0)) {
1046 NETNS_LOCK_CONVERT();
1047 if (ns_want_cleanup) {
1048 netns_ns_cleanup(ns);
1049 }
1050 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1051 "ERROR - reservation collision");
1052 goto done;
1053 }
1054
1055 if (!_netns_is_wildcard_addr(ns->ns_addr, addr_len)) {
1056 /* Record the reservation in the non-wild namespace */
1057 struct ns *nwns;
1058
1059 nwns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1060 addr_len)];
1061 err = _netns_reserve_common(nwns, hport, flags);
1062 if (__improbable(err != 0)) {
1063 /* Need to free the specific namespace entry */
1064 NETNS_LOCK_CONVERT();
1065 _netns_release_common(ns, hport, flags);
1066 if (ns_want_cleanup) {
1067 netns_ns_cleanup(ns);
1068 }
1069 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1070 "ERROR - reservation collision");
1071 goto done;
1072 }
1073 }
1074
1075 nt = netns_ns_token_alloc(true, nfi != NULL ? true : false);
1076 if (nt == NULL) {
1077 SK_ERR("netns_ns_token_alloc() failed");
1078 err = ENOMEM;
1079 goto done;
1080 }
1081
1082 ASSERT(nt->nt_ifp == NULL);
1083 _netns_set_ifnet_internal(nt, ifp);
1084
1085 memcpy(nt->nt_addr, addr, addr_len);
1086 nt->nt_addr_len = addr_len;
1087 nt->nt_proto = proto;
1088 nt->nt_port = hport;
1089 nt->nt_flags = flags;
1090
1091 if (nfi != NULL) {
1092 VERIFY(nt->nt_flow_info != NULL);
1093
1094 memcpy(nt->nt_flow_info, nfi, sizeof(struct ns_flow_info));
1095 /*
1096 * The local port is passed as a separate argument
1097 */
1098 if (nfi->nfi_laddr.sa.sa_family == AF_INET) {
1099 nt->nt_flow_info->nfi_laddr.sin.sin_port = *port;
1100 } else if (nfi->nfi_laddr.sa.sa_family == AF_INET6) {
1101 nt->nt_flow_info->nfi_laddr.sin6.sin6_port = *port;
1102 }
1103 }
1104 *token = nt;
1105
1106 done:
1107 return err;
1108 }
1109
1110 /*
1111 * Kernel-facing functions
1112 */
1113
1114 int
netns_init(void)1115 netns_init(void)
1116 {
1117 VERIFY(__netns_inited == 0);
1118
1119 netns_ns_reservation_size = sizeof(struct ns_reservation);
1120 netns_ns_reservation_cache = skmem_cache_create(NETNS_NS_RESERVATION_ZONE_NAME,
1121 netns_ns_reservation_size, sizeof(uint64_t), NULL, NULL, NULL,
1122 NULL, NULL, 0);
1123 if (netns_ns_reservation_cache == NULL) {
1124 panic("%s: skmem_cache create failed (%s)", __func__,
1125 NETNS_NS_RESERVATION_ZONE_NAME);
1126 /* NOTREACHED */
1127 __builtin_unreachable();
1128 }
1129
1130 netns_ns_token_size = sizeof(struct ns_token);
1131 netns_ns_token_cache = skmem_cache_create(NETNS_NS_TOKEN_ZONE_NAME,
1132 netns_ns_token_size, sizeof(uint64_t), NULL, NULL, NULL, NULL,
1133 NULL, 0);
1134 if (netns_ns_token_cache == NULL) {
1135 panic("%s: skmem_cache create failed (%s)", __func__,
1136 NETNS_NS_TOKEN_ZONE_NAME);
1137 /* NOTREACHED */
1138 __builtin_unreachable();
1139 }
1140
1141 netns_ns_flow_info_size = sizeof(struct ns_flow_info);
1142 netns_ns_flow_info_cache = skmem_cache_create(NETNS_NS_FLOW_INFO_ZONE_NAME,
1143 netns_ns_flow_info_size, sizeof(uint64_t), NULL, NULL, NULL,
1144 NULL, NULL, 0);
1145 if (netns_ns_flow_info_cache == NULL) {
1146 panic("%s: skmem_cache create failed (%s)", __func__,
1147 NETNS_NS_FLOW_INFO_ZONE_NAME);
1148 /* NOTREACHED */
1149 __builtin_unreachable();
1150 }
1151
1152 SLIST_INIT(&netns_unbound_tokens);
1153 SLIST_INIT(&netns_all_tokens);
1154
1155 netns_n_namespaces = 0;
1156 RB_INIT(&netns_namespaces);
1157
1158 SK_D("initializing global namespaces");
1159
1160 netns_init_global_ns(
1161 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1162 sizeof(struct in_addr))], IPPROTO_TCP, sizeof(struct in_addr));
1163
1164 netns_init_global_ns(
1165 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1166 sizeof(struct in_addr))], IPPROTO_UDP, sizeof(struct in_addr));
1167
1168 netns_init_global_ns(
1169 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1170 sizeof(struct in6_addr))], IPPROTO_TCP, sizeof(struct in6_addr));
1171
1172 netns_init_global_ns(
1173 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1174 sizeof(struct in6_addr))], IPPROTO_UDP, sizeof(struct in6_addr));
1175
1176 /* Done */
1177
1178 __netns_inited = 1;
1179 sk_features |= SK_FEATURE_NETNS;
1180
1181 SK_D("initialized netns");
1182
1183 return 0;
1184 }
1185
1186 void
netns_uninit(void)1187 netns_uninit(void)
1188 {
1189 if (__netns_inited == 1) {
1190 struct ns *namespace;
1191 struct ns *temp_namespace;
1192 int i;
1193
1194 RB_FOREACH_SAFE(namespace, netns_namespaces_tree,
1195 &netns_namespaces, temp_namespace) {
1196 RB_REMOVE(netns_namespaces_tree, &netns_namespaces,
1197 namespace);
1198 netns_n_namespaces--;
1199 netns_ns_free(namespace);
1200 }
1201
1202 for (i = 0; i < NETNS_N_GLOBAL; i++) {
1203 netns_ns_free(netns_global_non_wild[i]);
1204 }
1205
1206 if (netns_ns_flow_info_cache != NULL) {
1207 skmem_cache_destroy(netns_ns_flow_info_cache);
1208 netns_ns_flow_info_cache = NULL;
1209 }
1210 if (netns_ns_token_cache != NULL) {
1211 skmem_cache_destroy(netns_ns_token_cache);
1212 netns_ns_token_cache = NULL;
1213 }
1214 if (netns_ns_reservation_cache != NULL) {
1215 skmem_cache_destroy(netns_ns_reservation_cache);
1216 netns_ns_reservation_cache = NULL;
1217 }
1218
1219 __netns_inited = 0;
1220 sk_features &= ~SK_FEATURE_NETNS;
1221
1222 SK_D("uninitialized netns");
1223 }
1224 }
1225
1226 void
netns_reap_caches(boolean_t purge)1227 netns_reap_caches(boolean_t purge)
1228 {
1229 /* these aren't created unless netns is enabled */
1230 if (netns_ns_token_cache != NULL) {
1231 skmem_cache_reap_now(netns_ns_token_cache, purge);
1232 }
1233 if (netns_ns_reservation_cache != NULL) {
1234 skmem_cache_reap_now(netns_ns_reservation_cache, purge);
1235 }
1236 if (netns_ns_flow_info_cache != NULL) {
1237 skmem_cache_reap_now(netns_ns_flow_info_cache, purge);
1238 }
1239 }
1240
1241 boolean_t
netns_is_enabled(void)1242 netns_is_enabled(void)
1243 {
1244 return __netns_inited == 1;
1245 }
1246
1247 int
netns_reserve(netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t port,uint32_t flags,struct ns_flow_info * nfi)1248 netns_reserve(netns_token *token, uint32_t *addr, uint8_t addr_len,
1249 uint8_t proto, in_port_t port, uint32_t flags, struct ns_flow_info *nfi)
1250 {
1251 int err = 0;
1252 #if SK_LOG
1253 char tmp_ip_str[MAX_IPv6_STR_LEN];
1254 #endif /* SK_LOG */
1255
1256 if (__netns_inited == 0) {
1257 *token = NULL;
1258 return err;
1259 }
1260
1261 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1262 SK_ERR("netns doesn't support non TCP/UDP protocol");
1263 return ENOTSUP;
1264 }
1265
1266 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1267 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1268 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(port),
1269 flags);
1270
1271 /*
1272 * Check wether the process is allowed to bind to a restricted port
1273 */
1274 if (!current_task_can_use_restricted_in_port(port,
1275 proto, flags)) {
1276 *token = NULL;
1277 return EADDRINUSE;
1278 }
1279
1280 NETNS_LOCK_SPIN();
1281 err = _netns_reserve_kpi_common(NULL, token, addr, addr_len,
1282 proto, &port, flags, nfi);
1283 NETNS_UNLOCK();
1284
1285 return err;
1286 }
1287
1288 /* Import net.inet.{tcp,udp}.randomize_ports sysctls */
1289 extern int udp_use_randomport;
1290 extern int tcp_use_randomport;
1291
1292 int
netns_reserve_ephemeral(netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t * port,uint32_t flags,struct ns_flow_info * nfi)1293 netns_reserve_ephemeral(netns_token *token, uint32_t *addr, uint8_t addr_len,
1294 uint8_t proto, in_port_t *port, uint32_t flags, struct ns_flow_info *nfi)
1295 {
1296 int err = 0;
1297 in_port_t first = (in_port_t)ipport_firstauto;
1298 in_port_t last = (in_port_t)ipport_lastauto;
1299 in_port_t rand_port;
1300 in_port_t last_port;
1301 in_port_t n_last_port;
1302 struct ns *namespace;
1303 boolean_t count_up = true;
1304 boolean_t use_randomport = (proto == IPPROTO_TCP) ?
1305 tcp_use_randomport : udp_use_randomport;
1306 #if SK_LOG
1307 char tmp_ip_str[MAX_IPv6_STR_LEN];
1308 #endif /* SK_LOG */
1309
1310 if (__netns_inited == 0) {
1311 *token = NULL;
1312 return err;
1313 }
1314
1315 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1316 SK_ERR("netns doesn't support non TCP/UDP protocol");
1317 return ENOTSUP;
1318 }
1319
1320 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1321 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1322 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(*port),
1323 flags);
1324
1325 NETNS_LOCK_SPIN();
1326
1327 namespace = _netns_get_ns(addr, addr_len, proto, true);
1328 if (namespace == NULL) {
1329 err = ENOMEM;
1330 NETNS_UNLOCK();
1331 return err;
1332 }
1333
1334 if (proto == IPPROTO_UDP) {
1335 if (UINT16_MAX - namespace->ns_n_reservations <
1336 NETNS_NS_UDP_EPHEMERAL_RESERVE) {
1337 SK_ERR("UDP ephemeral port not available"
1338 "(less than 4096 UDP ports left)");
1339 err = EADDRNOTAVAIL;
1340 NETNS_UNLOCK();
1341 return err;
1342 }
1343 }
1344
1345 if (first == last) {
1346 rand_port = first;
1347 } else {
1348 if (use_randomport) {
1349 NETNS_LOCK_CONVERT();
1350 read_frandom(&rand_port, sizeof(rand_port));
1351
1352 if (first > last) {
1353 rand_port = last + (rand_port %
1354 (first - last));
1355 count_up = false;
1356 } else {
1357 rand_port = first + (rand_port %
1358 (last - first));
1359 }
1360 } else {
1361 if (first > last) {
1362 rand_port =
1363 namespace->ns_last_ephemeral_port_down - 1;
1364 if (rand_port < last || rand_port > first) {
1365 rand_port = last;
1366 }
1367 count_up = false;
1368 } else {
1369 rand_port =
1370 namespace->ns_last_ephemeral_port_up + 1;
1371 if (rand_port < first || rand_port > last) {
1372 rand_port = first;
1373 }
1374 }
1375 }
1376 }
1377 last_port = rand_port;
1378 n_last_port = htons(last_port);
1379
1380 while (true) {
1381 if (n_last_port == 0) {
1382 SK_ERR("ephemeral port search range includes 0");
1383 err = EINVAL;
1384 break;
1385 }
1386
1387 /*
1388 * Skip if this is a restricted port as we do not want to
1389 * restricted ports as ephemeral
1390 */
1391 if (!IS_RESTRICTED_IN_PORT(n_last_port)) {
1392 err = _netns_reserve_kpi_common(namespace, token, addr,
1393 addr_len, proto, &n_last_port, flags, nfi);
1394 if (err == 0 || err != EADDRINUSE) {
1395 break;
1396 }
1397 }
1398 if (count_up) {
1399 last_port++;
1400 if (last_port < first || last_port > last) {
1401 last_port = first;
1402 }
1403 } else {
1404 last_port--;
1405 if (last_port < last || last_port > first) {
1406 last_port = last;
1407 }
1408 }
1409 n_last_port = htons(last_port);
1410
1411 if (last_port == rand_port || first == last) {
1412 SK_ERR("couldn't find free ephemeral port");
1413 err = EADDRNOTAVAIL;
1414 break;
1415 }
1416 }
1417
1418 if (err == 0) {
1419 *port = n_last_port;
1420 if (count_up) {
1421 namespace->ns_last_ephemeral_port_up = last_port;
1422 } else {
1423 namespace->ns_last_ephemeral_port_down = last_port;
1424 }
1425 } else {
1426 netns_ns_cleanup(namespace);
1427 }
1428
1429 NETNS_UNLOCK();
1430
1431 return err;
1432 }
1433
1434 void
netns_release(netns_token * token)1435 netns_release(netns_token *token)
1436 {
1437 struct ns *ns;
1438 struct ns_token *nt;
1439 uint8_t proto, addr_len;
1440 #if SK_LOG
1441 char tmp_ip_str[MAX_IPv6_STR_LEN];
1442 #endif /* SK_LOG */
1443
1444 if (!NETNS_TOKEN_VALID(token)) {
1445 return;
1446 }
1447
1448 if (__netns_inited == 0) {
1449 *token = NULL;
1450 return;
1451 }
1452
1453 NETNS_LOCK_SPIN();
1454
1455 nt = *token;
1456 *token = NULL;
1457
1458 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
1459 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1460 nt->nt_addr_len == sizeof(struct in6_addr));
1461 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1462
1463 addr_len = nt->nt_addr_len;
1464 proto = nt->nt_proto;
1465
1466 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1467 "releasing %s:%s:%d",
1468 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1469 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto),
1470 nt->nt_port);
1471
1472 if (!_netns_is_wildcard_addr(nt->nt_addr, addr_len)) {
1473 /* Remove from global non-wild namespace */
1474
1475 ns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1476 addr_len)];
1477 VERIFY(ns != NULL);
1478
1479 _netns_release_common(ns, nt->nt_port, nt->nt_flags);
1480 }
1481
1482 ns = _netns_get_ns(nt->nt_addr, addr_len, proto, false);
1483 VERIFY(ns != NULL);
1484 _netns_release_common(ns, nt->nt_port, nt->nt_flags);
1485
1486 netns_clear_ifnet(nt);
1487 netns_ns_token_free(nt);
1488
1489 NETNS_UNLOCK();
1490 }
1491
1492 int
netns_change_addr(netns_token * token,uint32_t * addr,uint8_t addr_len)1493 netns_change_addr(netns_token *token, uint32_t *addr, uint8_t addr_len)
1494 {
1495 int err = 0;
1496 struct ns *old_namespace;
1497 struct ns *new_namespace;
1498 struct ns *global_namespace;
1499 struct ns_token *nt;
1500 uint8_t proto;
1501 #if SK_LOG
1502 char tmp_ip_str_1[MAX_IPv6_STR_LEN];
1503 char tmp_ip_str_2[MAX_IPv6_STR_LEN];
1504 #endif /* SK_LOG */
1505
1506 if (__netns_inited == 0) {
1507 return 0;
1508 }
1509
1510 NETNS_LOCK();
1511
1512 VERIFY(NETNS_TOKEN_VALID(token));
1513
1514 nt = *token;
1515
1516 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) == NETNS_BSD);
1517 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1518 nt->nt_addr_len == sizeof(struct in6_addr));
1519 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1520
1521 proto = nt->nt_proto;
1522
1523 #if SK_LOG
1524 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1525 tmp_ip_str_1, sizeof(tmp_ip_str_1));
1526 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str_2,
1527 sizeof(tmp_ip_str_2));
1528 #endif /* SK_LOG */
1529 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1530 "changing address for %s:%d from %s to %s",
1531 PROTO_STR(proto), nt->nt_port, tmp_ip_str_1,
1532 tmp_ip_str_2);
1533
1534 if (nt->nt_addr_len == addr_len &&
1535 memcmp(nt->nt_addr, addr, nt->nt_addr_len) == 0) {
1536 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1537 "address didn't change, exiting early");
1538 goto done;
1539 }
1540
1541 old_namespace = _netns_get_ns(nt->nt_addr, nt->nt_addr_len, proto,
1542 false);
1543 VERIFY(old_namespace != NULL);
1544
1545 new_namespace = _netns_get_ns(addr, addr_len, proto, true);
1546 if (new_namespace == NULL) {
1547 err = ENOMEM;
1548 goto done;
1549 }
1550
1551 /* Acquire reservation in new namespace */
1552 if ((err = _netns_reserve_common(new_namespace, nt->nt_port,
1553 nt->nt_flags))) {
1554 NETNS_LOCK_CONVERT();
1555 netns_ns_cleanup(new_namespace);
1556 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1557 "ERROR - reservation collision under new namespace");
1558 goto done;
1559 }
1560
1561 /* Release from old namespace */
1562 _netns_release_common(old_namespace, nt->nt_port, nt->nt_flags);
1563
1564 if (!_netns_is_wildcard_addr(nt->nt_addr, nt->nt_addr_len)) {
1565 /*
1566 * Old address is non-wildcard.
1567 * Remove old reservation from global non-wild namespace
1568 */
1569 global_namespace = netns_global_non_wild[
1570 NETNS_NS_GLOBAL_IDX(proto, nt->nt_addr_len)];
1571 VERIFY(global_namespace != NULL);
1572
1573 _netns_release_common(global_namespace, nt->nt_port,
1574 nt->nt_flags);
1575 }
1576
1577 if (!_netns_is_wildcard_addr(addr, addr_len)) {
1578 /*
1579 * New address is non-wildcard.
1580 * Record new reservation in global non-wild namespace
1581 */
1582 global_namespace = netns_global_non_wild[
1583 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
1584 VERIFY(global_namespace != NULL);
1585
1586 if ((err = _netns_reserve_common(global_namespace,
1587 nt->nt_port, nt->nt_flags)) != 0) {
1588 SK_DF(NS_VERB_IP(addr_len) |
1589 NS_VERB_PROTO(proto),
1590 "ERROR - reservation collision under new "
1591 "global namespace");
1592 /* XXX: Should not fail. Maybe assert instead */
1593 goto done;
1594 }
1595 }
1596
1597 memcpy(nt->nt_addr, addr, addr_len);
1598 nt->nt_addr_len = addr_len;
1599
1600 done:
1601 NETNS_UNLOCK();
1602 return err;
1603 }
1604
1605 static void
_netns_set_ifnet_internal(struct ns_token * nt,struct ifnet * ifp)1606 _netns_set_ifnet_internal(struct ns_token *nt, struct ifnet *ifp)
1607 {
1608 #if SK_LOG
1609 char tmp_ip_str[MAX_IPv6_STR_LEN];
1610 #endif /* SK_LOG */
1611
1612 NETNS_LOCK_ASSERT_HELD();
1613
1614 if (ifp != NULL && ifnet_is_attached(ifp, 1)) {
1615 nt->nt_ifp = ifp;
1616 SLIST_INSERT_HEAD(&ifp->if_netns_tokens, nt, nt_ifp_link);
1617
1618 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1619 "%s:%s:%d // added to ifnet %d",
1620 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1621 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1622 PROTO_STR(nt->nt_proto), nt->nt_port,
1623 ifp->if_index);
1624 } else {
1625 SLIST_INSERT_HEAD(&netns_unbound_tokens, nt, nt_ifp_link);
1626 }
1627 }
1628
1629 void
netns_set_ifnet(netns_token * token,ifnet_t ifp)1630 netns_set_ifnet(netns_token *token, ifnet_t ifp)
1631 {
1632 struct ns_token *nt;
1633 #if SK_LOG
1634 char tmp_ip_str[MAX_IPv6_STR_LEN];
1635 #endif /* SK_LOG */
1636
1637 if (__netns_inited == 0) {
1638 return;
1639 }
1640
1641 NETNS_LOCK();
1642
1643 VERIFY(NETNS_TOKEN_VALID(token));
1644
1645 nt = *token;
1646
1647 if (nt->nt_ifp == ifp) {
1648 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1649 "%s:%s:%d // ifnet already %d, exiting early",
1650 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1651 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1652 PROTO_STR(nt->nt_proto), nt->nt_port,
1653 ifp ? ifp->if_index : -1);
1654 NETNS_UNLOCK();
1655 return;
1656 }
1657
1658 netns_clear_ifnet(nt);
1659
1660 _netns_set_ifnet_internal(nt, ifp);
1661
1662 NETNS_UNLOCK();
1663 }
1664
1665 void
netns_ifnet_detach(ifnet_t ifp)1666 netns_ifnet_detach(ifnet_t ifp)
1667 {
1668 struct ns_token *token, *tmp_token;
1669
1670 if (__netns_inited == 0) {
1671 return;
1672 }
1673
1674 NETNS_LOCK();
1675
1676 SLIST_FOREACH_SAFE(token, &ifp->if_netns_tokens, nt_ifp_link,
1677 tmp_token) {
1678 netns_clear_ifnet(token);
1679 SLIST_INSERT_HEAD(&netns_unbound_tokens, token, nt_ifp_link);
1680 }
1681
1682 NETNS_UNLOCK();
1683 }
1684
1685 static void
_netns_set_state(netns_token * token,uint32_t state)1686 _netns_set_state(netns_token *token, uint32_t state)
1687 {
1688 struct ns_token *nt;
1689 #if SK_LOG
1690 char tmp_ip_str[MAX_IPv6_STR_LEN];
1691 #endif /* SK_LOG */
1692
1693 if (__netns_inited == 0) {
1694 return;
1695 }
1696
1697 NETNS_LOCK();
1698 VERIFY(NETNS_TOKEN_VALID(token));
1699
1700 nt = *token;
1701 nt->nt_state |= state;
1702
1703 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1704 "%s:%s:%d // state 0x%b",
1705 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1706 tmp_ip_str, sizeof(tmp_ip_str)),
1707 PROTO_STR(nt->nt_proto), nt->nt_port, state, NETNS_STATE_BITS);
1708
1709 NETNS_UNLOCK();
1710 }
1711
1712 void
netns_half_close(netns_token * token)1713 netns_half_close(netns_token *token)
1714 {
1715 _netns_set_state(token, NETNS_STATE_HALFCLOSED);
1716 }
1717
1718 void
netns_withdraw(netns_token * token)1719 netns_withdraw(netns_token *token)
1720 {
1721 _netns_set_state(token, NETNS_STATE_WITHDRAWN);
1722 }
1723
1724 int
netns_get_flow_info(netns_token * token,struct ns_flow_info * nfi)1725 netns_get_flow_info(netns_token *token,
1726 struct ns_flow_info *nfi)
1727 {
1728 if (__netns_inited == 0) {
1729 return ENOTSUP;
1730 }
1731
1732 NETNS_LOCK();
1733 if (!NETNS_TOKEN_VALID(token) ||
1734 nfi == NULL) {
1735 NETNS_UNLOCK();
1736 return EINVAL;
1737 }
1738
1739 struct ns_token *nt = *token;
1740 if (nt->nt_flow_info == NULL) {
1741 NETNS_UNLOCK();
1742 return ENOENT;
1743 }
1744
1745 memcpy(nfi, nt->nt_flow_info, sizeof(struct ns_flow_info));
1746 NETNS_UNLOCK();
1747
1748 return 0;
1749 }
1750
1751 void
netns_change_flags(netns_token * token,uint32_t set_flags,uint32_t clear_flags)1752 netns_change_flags(netns_token *token, uint32_t set_flags,
1753 uint32_t clear_flags)
1754 {
1755 struct ns_token *nt;
1756 #if SK_LOG
1757 char tmp_ip_str[MAX_IPv6_STR_LEN];
1758 #endif /* SK_LOG */
1759
1760 if (__netns_inited == 0) {
1761 return;
1762 }
1763
1764 NETNS_LOCK();
1765
1766 VERIFY(NETNS_TOKEN_VALID(token));
1767
1768 nt = *token;
1769
1770 VERIFY(!((set_flags | clear_flags) & NETNS_RESERVATION_FLAGS));
1771 /* TODO: verify set and clear flags don't overlap? */
1772
1773 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1774 "%s:%s:%d // flags 0x%x -> 0x%x",
1775 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1776 tmp_ip_str, sizeof(tmp_ip_str)),
1777 PROTO_STR(nt->nt_proto), nt->nt_port, nt->nt_flags,
1778 nt->nt_flags | set_flags & ~clear_flags);
1779
1780 nt->nt_flags |= set_flags;
1781 nt->nt_flags &= ~clear_flags;
1782
1783 NETNS_UNLOCK();
1784 }
1785
1786 /*
1787 * Port offloading KPI
1788 */
1789 static inline void
netns_local_port_scan_flow_entry(struct flow_entry * fe,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1790 netns_local_port_scan_flow_entry(struct flow_entry *fe, protocol_family_t protocol,
1791 u_int32_t flags, u_int8_t *bitfield)
1792 {
1793 struct ns_token *token = fe->fe_port_reservation;
1794 boolean_t iswildcard = false;
1795
1796 if (fe == NULL || token == NULL) {
1797 return;
1798 }
1799
1800 /*
1801 * We are only interested in active flows over skywalk channels
1802 */
1803 if ((token->nt_flags & NETNS_OWNER_MASK) != NETNS_SKYWALK) {
1804 return;
1805 }
1806
1807 if (token->nt_state & NETNS_STATE_WITHDRAWN) {
1808 return;
1809 }
1810
1811 if (!(flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) &&
1812 (flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) &&
1813 (token->nt_state & NETNS_STATE_HALFCLOSED)) {
1814 return;
1815 }
1816
1817 VERIFY(token->nt_addr_len == sizeof(struct in_addr) ||
1818 token->nt_addr_len == sizeof(struct in6_addr));
1819
1820 if (token->nt_addr_len == sizeof(struct in_addr)) {
1821 if (protocol == PF_INET6) {
1822 return;
1823 }
1824
1825 iswildcard = token->nt_inaddr.s_addr == INADDR_ANY;
1826 } else if (token->nt_addr_len == sizeof(struct in6_addr)) {
1827 if (protocol == PF_INET) {
1828 return;
1829 }
1830
1831 iswildcard = IN6_IS_ADDR_UNSPECIFIED(
1832 &token->nt_in6addr);
1833 }
1834 if (!(flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) && iswildcard) {
1835 return;
1836 }
1837
1838 if ((flags & IFNET_GET_LOCAL_PORTS_TCPONLY) &&
1839 token->nt_proto == IPPROTO_UDP) {
1840 return;
1841 }
1842 if ((flags & IFNET_GET_LOCAL_PORTS_UDPONLY) &&
1843 token->nt_proto == IPPROTO_TCP) {
1844 return;
1845 }
1846
1847 if (!(flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) &&
1848 (token->nt_flags & NETNS_NOWAKEFROMSLEEP)) {
1849 return;
1850 }
1851
1852 if ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) &&
1853 !(token->nt_flags & NETNS_RECVANYIF)) {
1854 return;
1855 }
1856
1857 if ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) &&
1858 !(token->nt_flags & NETNS_EXTBGIDLE)) {
1859 return;
1860 }
1861
1862 if (token->nt_ifp != NULL && token->nt_flow_info != NULL) {
1863 bitstr_set(bitfield, token->nt_port);
1864 (void) if_ports_used_add_flow_entry(fe, token->nt_ifp->if_index,
1865 token->nt_flow_info, token->nt_flags);
1866 } else {
1867 SK_ERR("%s: unknown owner port %u"
1868 " nt_flags 0x%x ifindex %u nt_flow_info %p\n",
1869 __func__, token->nt_port,
1870 token->nt_flags,
1871 token->nt_ifp != NULL ? token->nt_ifp->if_index : 0,
1872 token->nt_flow_info);
1873 }
1874 }
1875
1876 static void
netns_get_if_local_ports(ifnet_t ifp,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1877 netns_get_if_local_ports(ifnet_t ifp, protocol_family_t protocol,
1878 u_int32_t flags, u_int8_t *bitfield)
1879 {
1880 struct nx_flowswitch *fsw = NULL;
1881
1882 if (ifp == NULL || ifp->if_na == NULL) {
1883 return;
1884 }
1885 /* Ensure that the interface is attached and won't detach */
1886 if (!ifnet_is_attached(ifp, 1)) {
1887 return;
1888 }
1889 fsw = fsw_ifp_to_fsw(ifp);
1890 if (fsw == NULL) {
1891 goto done;
1892 }
1893 FSW_RLOCK(fsw);
1894 flow_mgr_foreach_flow(fsw->fsw_flow_mgr, ^(struct flow_entry *_fe) {
1895 netns_local_port_scan_flow_entry(_fe, protocol, flags,
1896 bitfield);
1897 });
1898 FSW_UNLOCK(fsw);
1899 done:
1900 ifnet_decr_iorefcnt(ifp);
1901 }
1902
1903 errno_t
netns_get_local_ports(ifnet_t ifp,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1904 netns_get_local_ports(ifnet_t ifp, protocol_family_t protocol,
1905 u_int32_t flags, u_int8_t *bitfield)
1906 {
1907 if (__netns_inited == 0) {
1908 return 0;
1909 }
1910 if (ifp != NULL) {
1911 netns_get_if_local_ports(ifp, protocol, flags, bitfield);
1912 } else {
1913 errno_t error;
1914 ifnet_t *ifp_list;
1915 uint32_t count, i;
1916
1917 error = ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp_list, &count);
1918 if (error != 0) {
1919 os_log_error(OS_LOG_DEFAULT,
1920 "%s: ifnet_list_get_all() failed %d",
1921 __func__, error);
1922 return error;
1923 }
1924 for (i = 0; i < count; i++) {
1925 if (TAILQ_EMPTY(&ifp_list[i]->if_addrhead)) {
1926 continue;
1927 }
1928 netns_get_if_local_ports(ifp_list[i], protocol, flags,
1929 bitfield);
1930 }
1931 ifnet_list_free(ifp_list);
1932 }
1933
1934 return 0;
1935 }
1936
1937 uint32_t
netns_find_anyres_byaddr(struct ifaddr * ifa,uint8_t proto)1938 netns_find_anyres_byaddr(struct ifaddr *ifa, uint8_t proto)
1939 {
1940 int result = 0;
1941 int ifa_addr_len;
1942 struct ns_token *token;
1943 struct ifnet *ifp = ifa->ifa_ifp;
1944 struct sockaddr *ifa_addr = ifa->ifa_addr;
1945
1946 if (__netns_inited == 0) {
1947 return ENOTSUP;
1948 }
1949
1950 if ((ifa_addr->sa_family != AF_INET) &&
1951 (ifa_addr->sa_family != AF_INET6)) {
1952 return 0;
1953 }
1954
1955 ifa_addr_len = (ifa_addr->sa_family == AF_INET) ?
1956 sizeof(struct in_addr) : sizeof(struct in6_addr);
1957
1958 NETNS_LOCK();
1959
1960 SLIST_FOREACH(token, &ifp->if_netns_tokens, nt_ifp_link) {
1961 if ((token->nt_flags & NETNS_OWNER_MASK) == NETNS_PF) {
1962 continue;
1963 }
1964 if (token->nt_addr_len != ifa_addr_len) {
1965 continue;
1966 }
1967 if (token->nt_proto != proto) {
1968 continue;
1969 }
1970 if (ifa_addr->sa_family == AF_INET) {
1971 if (token->nt_inaddr.s_addr ==
1972 (satosin(ifa->ifa_addr))->sin_addr.s_addr) {
1973 result = 1;
1974 break;
1975 }
1976 } else if (ifa_addr->sa_family == AF_INET6) {
1977 if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa),
1978 &token->nt_in6addr)) {
1979 result = 1;
1980 break;
1981 }
1982 }
1983 }
1984
1985 NETNS_UNLOCK();
1986 return result;
1987 }
1988
1989 static uint32_t
_netns_lookup_ns_n_reservations(uint32_t * addr,uint8_t addr_len,uint8_t proto)1990 _netns_lookup_ns_n_reservations(uint32_t *addr, uint8_t addr_len, uint8_t proto)
1991 {
1992 uint32_t ns_n_reservations = 0;
1993 NETNS_LOCK_SPIN();
1994 struct ns *namespace = _netns_get_ns(addr, addr_len, proto, true);
1995 if (namespace != NULL) {
1996 ns_n_reservations = namespace->ns_n_reservations;
1997 }
1998 NETNS_UNLOCK();
1999 return ns_n_reservations;
2000 }
2001
2002 uint32_t
netns_lookup_reservations_count_in(struct in_addr addr,uint8_t proto)2003 netns_lookup_reservations_count_in(struct in_addr addr, uint8_t proto)
2004 {
2005 return _netns_lookup_ns_n_reservations(&addr.s_addr, sizeof(struct in_addr), proto);
2006 }
2007
2008 uint32_t
netns_lookup_reservations_count_in6(struct in6_addr addr,uint8_t proto)2009 netns_lookup_reservations_count_in6(struct in6_addr addr, uint8_t proto)
2010 {
2011 if (IN6_IS_SCOPE_EMBED(&addr)) {
2012 addr.s6_addr16[1] = 0;
2013 }
2014 return _netns_lookup_ns_n_reservations(&addr.s6_addr32[0], sizeof(struct in6_addr), proto);
2015 }
2016
2017 /*
2018 * Sysctl interface
2019 */
2020
2021 static int netns_ctl_dump_all SYSCTL_HANDLER_ARGS;
2022
2023 SYSCTL_NODE(_kern_skywalk, OID_AUTO, netns, CTLFLAG_RW | CTLFLAG_LOCKED,
2024 0, "Netns interface");
2025
2026 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netns,
2027 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
2028 0, 0, netns_ctl_dump_all, "-",
2029 "Namespace contents (struct netns_ctl_dump_header, "
2030 "skywalk/os_stats_private.h)");
2031
2032 static int
netns_ctl_write_ns(struct sysctl_req * req,struct ns * namespace,boolean_t is_global)2033 netns_ctl_write_ns(struct sysctl_req *req, struct ns *namespace,
2034 boolean_t is_global)
2035 {
2036 struct ns_reservation *res;
2037 struct netns_ctl_dump_header response_header;
2038 struct netns_ctl_dump_record response_record;
2039 int err;
2040
2041 /* Fill out header */
2042 memset(&response_header, 0, sizeof(response_header));
2043 response_header.ncdh_n_records = namespace->ns_n_reservations;
2044 response_header.ncdh_proto = namespace->ns_proto;
2045
2046 if (is_global) {
2047 response_header.ncdh_addr_len = 0;
2048 } else {
2049 response_header.ncdh_addr_len = namespace->ns_addr_len;
2050 }
2051 memcpy(response_header.ncdh_addr, namespace->ns_addr,
2052 namespace->ns_addr_len);
2053
2054 err = SYSCTL_OUT(req, &response_header, sizeof(response_header));
2055 if (err) {
2056 return err;
2057 }
2058
2059 /* Fill out records */
2060 RB_FOREACH(res, ns_reservation_tree, &namespace->ns_reservations) {
2061 memset(&response_record, 0, sizeof(response_record));
2062 response_record.ncdr_port = res->nsr_port;
2063 response_record.ncdr_port_end = 0;
2064 response_record.ncdr_listener_refs =
2065 NETNS_REF_COUNT(res, NETNS_LISTENER);
2066 response_record.ncdr_skywalk_refs =
2067 NETNS_REF_COUNT(res, NETNS_SKYWALK);
2068 response_record.ncdr_bsd_refs =
2069 NETNS_REF_COUNT(res, NETNS_BSD);
2070 response_record.ncdr_pf_refs =
2071 NETNS_REF_COUNT(res, NETNS_PF);
2072 err = SYSCTL_OUT(req, &response_record,
2073 sizeof(response_record));
2074 if (err) {
2075 return err;
2076 }
2077 }
2078
2079 return 0;
2080 }
2081
2082 static int
2083 netns_ctl_dump_all SYSCTL_HANDLER_ARGS
2084 {
2085 #pragma unused(oidp, arg1, arg2)
2086 struct ns *namespace;
2087 int i, err = 0;
2088
2089 if (!kauth_cred_issuser(kauth_cred_get())) {
2090 return EPERM;
2091 }
2092
2093 if (__netns_inited == 0) {
2094 return ENOTSUP;
2095 }
2096
2097 NETNS_LOCK();
2098
2099 for (i = 0; i < NETNS_N_GLOBAL; i++) {
2100 err = netns_ctl_write_ns(req, netns_global_non_wild[i], true);
2101 if (err) {
2102 goto done;
2103 }
2104 }
2105
2106 RB_FOREACH(namespace, netns_namespaces_tree, &netns_namespaces) {
2107 err = netns_ctl_write_ns(req, namespace, false);
2108 if (err) {
2109 goto done;
2110 }
2111 }
2112
2113 /*
2114 * If this is just a request for length, add slop because
2115 * this is dynamically changing data
2116 */
2117 if (req->oldptr == USER_ADDR_NULL) {
2118 req->oldidx += 20 * sizeof(struct netns_ctl_dump_record);
2119 }
2120
2121 done:
2122 NETNS_UNLOCK();
2123 return err;
2124 }
2125 /* CSTYLED */
2126