1 /*
2 * Copyright (c) 2016-2021 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <kern/assert.h>
30 #include <kern/locks.h>
31 #include <kern/zalloc.h>
32 #include <libkern/tree.h>
33 #include <sys/kernel.h>
34 #include <sys/sysctl.h>
35 #include <sys/bitstring.h>
36 #include <net/if.h>
37 #include <net/kpi_interface.h>
38 #include <net/restricted_in_port.h>
39
40 #include <netinet/in.h>
41 #include <netinet/in_pcb.h>
42 #include <netinet/tcp_fsm.h>
43 #include <netinet/tcp_var.h>
44
45 #include <netinet6/in6_var.h>
46 #include <string.h>
47
48 #include <skywalk/os_skywalk.h>
49 #include <skywalk/os_skywalk_private.h>
50 #include <skywalk/os_stats_private.h>
51 #include <skywalk/nexus/flowswitch/flow/flow_var.h>
52 #include <skywalk/nexus/flowswitch/nx_flowswitch.h>
53
54 #include <net/if_ports_used.h>
55
56 static int __netns_inited = 0;
57
58 /*
59 * Logging
60 */
61
62 #define NS_VERB_PROTO(proto) ((proto == IPPROTO_TCP) ? SK_VERB_NS_TCP : \
63 SK_VERB_NS_UDP)
64 #define NS_VERB_IP(addr_len) ((addr_len == sizeof (struct in_addr)) ? \
65 SK_VERB_NS_IPV4 : SK_VERB_NS_IPV6)
66 #define PROTO_STR(proto) ((proto == IPPROTO_TCP) ? "tcp" : "udp")
67 #define LEN_TO_AF(len) (((len == sizeof (struct in_addr)) ? \
68 AF_INET : AF_INET6))
69 /*
70 * Locking
71 * Netns is currently protected by a global mutex, NETNS_LOCK. This lock is
72 * aquired at the entry of every kernel-facing function, and released at the
73 * end. Data within netns_token structures is also protected under this lock.
74 */
75
76 #define NETNS_LOCK() \
77 lck_mtx_lock(&netns_lock)
78 #define NETNS_LOCK_SPIN() \
79 lck_mtx_lock_spin(&netns_lock)
80 #define NETNS_LOCK_CONVERT() do { \
81 NETNS_LOCK_ASSERT_HELD(); \
82 lck_mtx_convert_spin(&netns_lock); \
83 } while (0)
84 #define NETNS_UNLOCK() \
85 lck_mtx_unlock(&netns_lock)
86 #define NETNS_LOCK_ASSERT_HELD() \
87 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_OWNED)
88 #define NETNS_LOCK_ASSERT_NOTHELD() \
89 LCK_MTX_ASSERT(&netns_lock, LCK_MTX_ASSERT_NOTOWNED)
90
91 static LCK_GRP_DECLARE(netns_lock_group, "netns_lock");
92 static LCK_MTX_DECLARE(netns_lock, &netns_lock_group);
93
94 /*
95 * Internal data structures and parameters
96 */
97
98 /*
99 * Local ports are kept track of by reference counts kept in a tree specific to
100 * an <IP, protocol> tuple (see struct ns).
101 *
102 * Note: port numbers are stored in host byte order.
103 */
104 struct ns_reservation {
105 RB_ENTRY(ns_reservation) nsr_link;
106 in_port_t nsr_port;
107 uint32_t nsr_refs[NETNS_OWNER_MAX + 1];
108 };
109
110 #define NETNS_REF_COUNT(nsr, flags) \
111 (nsr)->nsr_refs[((flags) & NETNS_OWNER_MASK)]
112
113 static inline int nsr_cmp(const struct ns_reservation *,
114 const struct ns_reservation *);
115
116 RB_HEAD(ns_reservation_tree, ns_reservation);
117 RB_PROTOTYPE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
118 RB_GENERATE(ns_reservation_tree, ns_reservation, nsr_link, nsr_cmp);
119
120 static inline struct ns_reservation *ns_reservation_tree_find(
121 struct ns_reservation_tree *, const in_port_t);
122
123 /*
124 * A namespace keeps track of the local port numbers in use for a given
125 * <IP, protocol> tuple. There are also global namespaces for each
126 * protocol to accomodate INADDR_ANY behavior and diagnostics.
127 */
128 struct ns {
129 RB_ENTRY(ns) ns_link;
130
131 void *ns_addr_key;
132
133 union {
134 uint32_t ns_addr[4];
135 struct in_addr ns_inaddr;
136 struct in6_addr ns_in6addr;
137 };
138 uint8_t ns_addr_len;
139 uint8_t ns_proto;
140
141 in_port_t ns_last_ephemeral_port_down;
142 in_port_t ns_last_ephemeral_port_up;
143
144 uint8_t ns_is_freeable;
145
146 uint32_t ns_n_reservations;
147 struct ns_reservation_tree ns_reservations;
148 };
149
150 static uint32_t netns_n_namespaces;
151
152 static inline int ns_cmp(const struct ns *, const struct ns *);
153
154 RB_HEAD(netns_namespaces_tree, ns) netns_namespaces =
155 RB_INITIALIZER(netns_namespaces);
156 RB_PROTOTYPE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
157 RB_GENERATE_PREV(netns_namespaces_tree, ns, ns_link, ns_cmp);
158
159 /*
160 * Declare pointers to global namespaces for each protocol.
161 * All non-wildcard reservations will have an entry here.
162 */
163 #define NETNS_N_GLOBAL 4
164 static struct ns *netns_global_non_wild[NETNS_N_GLOBAL];
165 static struct ns *netns_global_wild[NETNS_N_GLOBAL];
166 #define NETNS_NS_TCP 0
167 #define NETNS_NS_UDP 1
168 #define NETNS_NS_V4 0
169 #define NETNS_NS_V6 2
170 #define NETNS_NS_GLOBAL_IDX(proto, addrlen) \
171 ((((proto) == IPPROTO_TCP) ? NETNS_NS_TCP : NETNS_NS_UDP) | \
172 (((addrlen) == sizeof (struct in_addr)) ? NETNS_NS_V4 : NETNS_NS_V6))
173
174 #define NETNS_NS_UDP_EPHEMERAL_RESERVE 4096
175
176 /*
177 * Internal token structure
178 *
179 * Note: port numbers are stored in host byte order.
180 */
181 struct ns_token {
182 /* Reservation state */
183 ifnet_t nt_ifp;
184 SLIST_ENTRY(ns_token) nt_ifp_link;
185 SLIST_ENTRY(ns_token) nt_all_link;
186 uint32_t nt_state; /* NETNS_STATE_* */
187
188 /* Reservation context */
189 union {
190 uint32_t nt_addr[4];
191 struct in_addr nt_inaddr;
192 struct in6_addr nt_in6addr;
193 };
194 uint8_t nt_addr_len;
195 uint8_t nt_proto;
196 in_port_t nt_port;
197 uint32_t nt_flags;
198
199 /* Optional information about the flow */
200 struct ns_flow_info *nt_flow_info;
201 };
202
203 /* Valid values for nt_state */
204 #define NETNS_STATE_HALFCLOSED 0x1 /* half closed */
205 #define NETNS_STATE_WITHDRAWN 0x2 /* withdrawn; not offloadable */
206
207 #define NETNS_STATE_BITS "\020\01HALFCLOSED\02WITHDRAWN"
208
209 /* List of tokens not bound to an ifnet */
210 SLIST_HEAD(, ns_token) netns_unbound_tokens = SLIST_HEAD_INITIALIZER(
211 netns_unbound_tokens);
212
213 /* List of all tokens currently allocated in the system */
214 SLIST_HEAD(, ns_token) netns_all_tokens = SLIST_HEAD_INITIALIZER(
215 netns_all_tokens);
216
217 /*
218 * Memory management
219 */
220 static ZONE_DECLARE(netns_ns_zone, SKMEM_ZONE_PREFIX ".netns.ns",
221 sizeof(struct ns), ZC_ZFREE_CLEARMEM);
222
223 #define NETNS_NS_TOKEN_ZONE_NAME "netns.ns_token"
224 static unsigned int netns_ns_token_size; /* size of zone element */
225 static struct skmem_cache *netns_ns_token_cache; /* for ns_token */
226
227 #define NETNS_NS_FLOW_INFO_ZONE_NAME "netns.ns_flow_info"
228 static unsigned int netns_ns_flow_info_size; /* size of zone element */
229 static struct skmem_cache *netns_ns_flow_info_cache; /* for ns_flow_info */
230
231 #define NETNS_NS_RESERVATION_ZONE_NAME "netns.ns_reservation"
232 static unsigned int netns_ns_reservation_size; /* size of zone element */
233 static struct skmem_cache *netns_ns_reservation_cache; /* for ns_reservation */
234
235 static struct ns_reservation *netns_ns_reservation_alloc(boolean_t, in_port_t);
236 static void netns_ns_reservation_free(struct ns_reservation *);
237 static struct ns *netns_ns_alloc(zalloc_flags_t);
238 static void netns_ns_free(struct ns *);
239 static void netns_ns_cleanup(struct ns *);
240 static struct ns_token *netns_ns_token_alloc(boolean_t, boolean_t);
241 static void netns_ns_token_free(struct ns_token *);
242
243 /*
244 * Utility/internal code
245 */
246 static struct ns *_netns_get_ns(uint32_t *, uint8_t, uint8_t, bool);
247 static inline boolean_t _netns_is_wildcard_addr(const uint32_t *, uint8_t);
248 static int _netns_reserve_common(struct ns *, in_port_t, uint32_t);
249 static void _netns_release_common(struct ns *, in_port_t, uint32_t);
250 static inline void netns_clear_ifnet(struct ns_token *);
251 static int _netns_reserve_kpi_common(struct ns *, netns_token *, uint32_t *,
252 uint8_t, uint8_t, in_port_t *, uint32_t, struct ns_flow_info *);
253 static void _netns_set_ifnet_internal(struct ns_token *, struct ifnet *);
254
255 static struct ns_reservation *
netns_ns_reservation_alloc(boolean_t can_block,in_port_t port)256 netns_ns_reservation_alloc(boolean_t can_block, in_port_t port)
257 {
258 struct ns_reservation *res;
259
260 VERIFY(port != 0);
261
262 res = skmem_cache_alloc(netns_ns_reservation_cache,
263 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
264 if (res == NULL) {
265 return NULL;
266 }
267
268 bzero(res, netns_ns_reservation_size);
269 res->nsr_port = port;
270 return res;
271 }
272
273 static void
netns_ns_reservation_free(struct ns_reservation * res)274 netns_ns_reservation_free(struct ns_reservation *res)
275 {
276 skmem_cache_free(netns_ns_reservation_cache, res);
277 }
278
279 static struct ns *
netns_ns_alloc(zalloc_flags_t how)280 netns_ns_alloc(zalloc_flags_t how)
281 {
282 struct ns *namespace;
283 in_port_t first = (in_port_t)ipport_firstauto;
284 in_port_t last = (in_port_t)ipport_lastauto;
285 in_port_t rand_port;
286
287 namespace = zalloc_flags(netns_ns_zone, how | Z_ZERO);
288 if (namespace == NULL) {
289 return NULL;
290 }
291
292 namespace->ns_is_freeable = 1;
293
294 RB_INIT(&namespace->ns_reservations);
295
296 /*
297 * Randomize the initial ephemeral port starting point, just in case
298 * this namespace is for an ipv6 address which gets brought up and
299 * down often.
300 */
301 if (first == last) {
302 rand_port = first;
303 } else {
304 read_frandom(&rand_port, sizeof(rand_port));
305
306 if (first > last) {
307 rand_port = last + (rand_port % (first - last));
308 } else {
309 rand_port = first + (rand_port % (last - first));
310 }
311 }
312 namespace->ns_last_ephemeral_port_down = rand_port;
313 namespace->ns_last_ephemeral_port_up = rand_port;
314
315 return namespace;
316 }
317
318 static void
netns_ns_free(struct ns * namespace)319 netns_ns_free(struct ns *namespace)
320 {
321 struct ns_reservation *res;
322 struct ns_reservation *tmp_res;
323 #if SK_LOG
324 char tmp_ip_str[MAX_IPv6_STR_LEN];
325 #endif /* SK_LOG */
326
327 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
328 NS_VERB_PROTO(namespace->ns_proto),
329 "freeing %s ns for IP %s",
330 PROTO_STR(namespace->ns_proto),
331 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
332 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)));
333
334 RB_FOREACH_SAFE(res, ns_reservation_tree, &namespace->ns_reservations,
335 tmp_res) {
336 netns_ns_reservation_free(res);
337 namespace->ns_n_reservations--;
338 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
339 res);
340 }
341
342 VERIFY(RB_EMPTY(&namespace->ns_reservations));
343
344 if (netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
345 namespace->ns_addr_len)] == namespace) {
346 netns_global_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
347 namespace->ns_addr_len)] = NULL;
348 }
349 if (netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
350 namespace->ns_addr_len)] == namespace) {
351 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(namespace->ns_proto,
352 namespace->ns_addr_len)] = NULL;
353 }
354
355 zfree(netns_ns_zone, namespace);
356 }
357
358 static void
netns_ns_cleanup(struct ns * namespace)359 netns_ns_cleanup(struct ns *namespace)
360 {
361 if (namespace->ns_is_freeable &&
362 RB_EMPTY(&namespace->ns_reservations)) {
363 RB_REMOVE(netns_namespaces_tree, &netns_namespaces, namespace);
364 netns_n_namespaces--;
365 netns_ns_free(namespace);
366 }
367 }
368
369 static struct ns_token *
netns_ns_token_alloc(boolean_t can_block,boolean_t with_nfi)370 netns_ns_token_alloc(boolean_t can_block, boolean_t with_nfi)
371 {
372 struct ns_token *token;
373
374 NETNS_LOCK_ASSERT_HELD();
375 NETNS_LOCK_CONVERT();
376
377 token = skmem_cache_alloc(netns_ns_token_cache,
378 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
379 if (token == NULL) {
380 return NULL;
381 }
382
383 bzero(token, netns_ns_token_size);
384
385 if (with_nfi) {
386 token->nt_flow_info = skmem_cache_alloc(netns_ns_flow_info_cache,
387 can_block ? SKMEM_SLEEP : SKMEM_NOSLEEP);
388 if (token->nt_flow_info == NULL) {
389 skmem_cache_free(netns_ns_token_cache, token);
390 return NULL;
391 }
392 }
393 SLIST_INSERT_HEAD(&netns_all_tokens, token, nt_all_link);
394
395 return token;
396 }
397
398 static void
netns_ns_token_free(struct ns_token * token)399 netns_ns_token_free(struct ns_token *token)
400 {
401 NETNS_LOCK_ASSERT_HELD();
402 NETNS_LOCK_CONVERT();
403 SLIST_REMOVE(&netns_all_tokens, token, ns_token, nt_all_link);
404
405 if (token->nt_flow_info != NULL) {
406 skmem_cache_free(netns_ns_flow_info_cache, token->nt_flow_info);
407 }
408 skmem_cache_free(netns_ns_token_cache, token);
409 }
410
411 __attribute__((always_inline))
412 static inline int
nsr_cmp(const struct ns_reservation * nsr1,const struct ns_reservation * nsr2)413 nsr_cmp(const struct ns_reservation *nsr1, const struct ns_reservation *nsr2)
414 {
415 #define NSR_COMPARE(r1, r2) ((int)(r1)->nsr_port - (int)(r2)->nsr_port)
416 return NSR_COMPARE(nsr1, nsr2);
417 }
418
419 __attribute__((always_inline))
420 static inline int
ns_cmp(const struct ns * a,const struct ns * b)421 ns_cmp(const struct ns *a, const struct ns *b)
422 {
423 int d;
424
425 if ((d = (a->ns_addr_len - b->ns_addr_len)) != 0) {
426 return d;
427 }
428 if ((d = (a->ns_proto - b->ns_proto)) != 0) {
429 return d;
430 }
431 if ((d = flow_ip_cmp(a->ns_addr_key, b->ns_addr_key,
432 b->ns_addr_len)) != 0) {
433 return d;
434 }
435
436 return 0;
437 }
438
439 /*
440 * Common routine to look up a reservation.
441 *
442 * NOTE: Assumes the caller holds the NETNS global lock
443 */
444 __attribute__((always_inline))
445 static inline struct ns_reservation *
ns_reservation_tree_find(struct ns_reservation_tree * tree,const in_port_t port)446 ns_reservation_tree_find(struct ns_reservation_tree *tree, const in_port_t port)
447 {
448 struct ns_reservation res;
449 res.nsr_port = port;
450 return RB_FIND(ns_reservation_tree, tree, &res);
451 }
452
453 /*
454 * Retrieve the namespace for the supplied <address, protocol> tuple.
455 * If create is set and such a namespace doesn't already exist, one will be
456 * created.
457 */
458 static struct ns *
_netns_get_ns(uint32_t * addr,uint8_t addr_len,uint8_t proto,bool create)459 _netns_get_ns(uint32_t *addr, uint8_t addr_len, uint8_t proto, bool create)
460 {
461 struct ns *namespace = NULL;
462 struct ns find = {
463 .ns_addr_key = addr,
464 .ns_addr_len = addr_len,
465 .ns_proto = proto,
466 };
467 #if SK_LOG
468 char tmp_ip_str[MAX_IPv6_STR_LEN];
469 #endif /* SK_LOG */
470
471 VERIFY(addr_len == sizeof(struct in_addr) ||
472 addr_len == sizeof(struct in6_addr));
473
474 NETNS_LOCK_ASSERT_HELD();
475
476 namespace = RB_FIND(netns_namespaces_tree, &netns_namespaces, &find);
477
478 if (create && namespace == NULL) {
479 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
480 "allocating %s ns for IP %s",
481 PROTO_STR(proto), inet_ntop(LEN_TO_AF(addr_len), addr,
482 tmp_ip_str, sizeof(tmp_ip_str)));
483 NETNS_LOCK_CONVERT();
484 namespace = netns_ns_alloc(Z_WAITOK);
485 memcpy(namespace->ns_addr, addr, addr_len);
486 namespace->ns_addr_key = &namespace->ns_addr;
487 namespace->ns_addr_len = addr_len;
488 namespace->ns_proto = proto;
489 RB_INSERT(netns_namespaces_tree, &netns_namespaces, namespace);
490 netns_n_namespaces++;
491
492 if (_netns_is_wildcard_addr(addr, addr_len) &&
493 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
494 addr_len)] == NULL) {
495 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto,
496 addr_len)] = namespace;
497 }
498 }
499
500 return namespace;
501 }
502
503 /*
504 * Return true if the supplied address is a wildcard (INADDR_ANY)
505 */
506 __attribute__((always_inline))
507 static boolean_t
_netns_is_wildcard_addr(const uint32_t * addr,uint8_t addr_len)508 _netns_is_wildcard_addr(const uint32_t *addr, uint8_t addr_len)
509 {
510 boolean_t wildcard;
511
512 switch (addr_len) {
513 case sizeof(struct in_addr):
514 wildcard = (addr[0] == 0);
515 break;
516
517 case sizeof(struct in6_addr):
518 wildcard = (addr[0] == 0 && addr[1] == 0 &&
519 addr[2] == 0 && addr[3] == 0);
520 break;
521
522 default:
523 wildcard = FALSE;
524 break;
525 }
526
527 return wildcard;
528 }
529
530 __attribute__((always_inline))
531 static boolean_t
_netns_is_port_used(struct ns * gns,struct ns_reservation * curr_res,in_port_t port)532 _netns_is_port_used(struct ns * gns, struct ns_reservation *curr_res, in_port_t port)
533 {
534 struct ns_reservation *res = NULL;
535
536 if (gns == NULL) {
537 return FALSE;
538 }
539
540 res = ns_reservation_tree_find(&gns->ns_reservations, port);
541 if (res != NULL && res != curr_res) {
542 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
543 NETNS_REF_COUNT(res, NETNS_PF) > 0 ||
544 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0 ||
545 NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0) {
546 return TRUE;
547 }
548 }
549
550 return FALSE;
551 }
552
553 /*
554 * Internal shared code to reserve ports within a specific namespace.
555 *
556 * Note: port numbers are in host byte-order here.
557 */
558 static int
_netns_reserve_common(struct ns * namespace,in_port_t port,uint32_t flags)559 _netns_reserve_common(struct ns *namespace, in_port_t port, uint32_t flags)
560 {
561 struct ns_reservation *res = NULL, *exist = NULL;
562 uint8_t proto, addr_len;
563 int err = 0;
564 #if SK_LOG
565 char tmp_ip_str[MAX_IPv6_STR_LEN];
566 #endif /* SK_LOG */
567
568 VERIFY(port != 0);
569 proto = namespace->ns_proto;
570 addr_len = namespace->ns_addr_len;
571 NETNS_LOCK_CONVERT();
572 res = netns_ns_reservation_alloc(TRUE, port);
573 if (res == NULL) {
574 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
575 "ERROR %s:%s:%d // flags 0x%x // OUT OF MEMORY",
576 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
577 namespace->ns_addr, tmp_ip_str,
578 sizeof(tmp_ip_str)), PROTO_STR(proto), port, flags);
579 return ENOMEM;
580 }
581 exist = RB_INSERT(ns_reservation_tree, &namespace->ns_reservations,
582 res);
583 if (__probable(exist == NULL)) {
584 namespace->ns_n_reservations++;
585 } else {
586 netns_ns_reservation_free(res);
587 res = exist;
588 }
589
590 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
591 "pre: %s:%s:%d // flags 0x%x // refs %d sky, %d ls, "
592 "%d bsd %d pf", inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
593 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
594 PROTO_STR(proto), port, flags,
595 NETNS_REF_COUNT(res, NETNS_SKYWALK),
596 NETNS_REF_COUNT(res, NETNS_LISTENER),
597 NETNS_REF_COUNT(res, NETNS_BSD),
598 NETNS_REF_COUNT(res, NETNS_PF));
599
600 /* Make reservation */
601 /*
602 * Bypass collision detection for reservations in the global non-wild
603 * namespace. We use that namespace for reference counts only.
604 */
605 if (namespace !=
606 netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)]) {
607 struct ns_reservation *skres;
608 boolean_t is_wild = _netns_is_wildcard_addr(namespace->ns_addr,
609 addr_len);
610 struct ns *gns =
611 netns_global_wild[NETNS_NS_GLOBAL_IDX(proto, addr_len)];
612
613 if (NETNS_IS_SKYWALK(flags)) {
614 if ((!is_wild || exist != NULL) && gns != NULL &&
615 (skres = ns_reservation_tree_find(
616 &gns->ns_reservations, port)) != NULL &&
617 NETNS_REF_COUNT(skres, NETNS_LISTENER) == 0) {
618 /*
619 * The mere existence of any non-skywalk
620 * listener wildcard entry for this
621 * protocol/port number means this must fail.
622 */
623 SK_DF(NS_VERB_IP(addr_len) |
624 NS_VERB_PROTO(proto),
625 "ADDRINUSE: Duplicate wildcard");
626 err = EADDRINUSE;
627 goto done;
628 }
629
630 if (is_wild) {
631 gns = netns_global_non_wild[
632 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
633 VERIFY(gns != NULL);
634
635 if (ns_reservation_tree_find(
636 &gns->ns_reservations, port) != NULL) {
637 /*
638 * If Skywalk is trying to reserve a
639 * wildcard, then the mere existance of
640 * any entry in the non-wild namespace
641 * for this port means this must fail.
642 */
643 SK_DF(NS_VERB_IP(addr_len) |
644 NS_VERB_PROTO(proto), "ADDRINUSE: "
645 "Wildcard with non-wild.");
646 err = EADDRINUSE;
647 goto done;
648 }
649 }
650 } else {
651 /*
652 * Check if Skywalk has reserved a wildcard entry.
653 * Note that the arithmetic OR here is intentional.
654 */
655 if ((!is_wild || exist != NULL) && gns != NULL &&
656 (skres = ns_reservation_tree_find(
657 &gns->ns_reservations, port)) != NULL &&
658 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
659 NETNS_REF_COUNT(skres, NETNS_LISTENER)) != 0) {
660 /*
661 * BSD is trying to reserve a proto/port for
662 * which Skywalk already has a wildcard
663 * reservation.
664 */
665 SK_DF(NS_VERB_IP(addr_len) |
666 NS_VERB_PROTO(proto),
667 "ADDRINUSE: BSD requesting Skywalk port");
668 err = EADDRINUSE;
669 goto done;
670 }
671
672 /*
673 * If BSD is trying to reserve a wildcard,
674 * ensure Skywalk has not already reserved
675 * a non-wildcard.
676 */
677 if (is_wild) {
678 gns = netns_global_non_wild[
679 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
680 VERIFY(gns != NULL);
681
682 /*
683 * Note that the arithmetic OR here is
684 * intentional.
685 */
686 if ((skres = ns_reservation_tree_find(
687 &gns->ns_reservations, port)) != NULL &&
688 (NETNS_REF_COUNT(skres, NETNS_SKYWALK) |
689 NETNS_REF_COUNT(skres,
690 NETNS_LISTENER)) != 0) {
691 SK_DF(NS_VERB_IP(addr_len) |
692 NS_VERB_PROTO(proto), "ADDRINUSE: "
693 "BSD wildcard with non-wild.");
694 err = EADDRINUSE;
695 goto done;
696 }
697 }
698 }
699
700 switch (flags & NETNS_OWNER_MASK) {
701 case NETNS_SKYWALK:
702 /* check collision w/ BSD */
703 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
704 NETNS_REF_COUNT(res, NETNS_PF) > 0) {
705 SK_DF(NS_VERB_IP(addr_len) |
706 NS_VERB_PROTO(proto),
707 "ERROR - Skywalk got ADDRINUSE (w/ BSD)");
708 err = EADDRINUSE;
709 goto done;
710 }
711
712 /* BEGIN CSTYLED */
713 /*
714 * Scenarios with new Skywalk connected flow:
715 * 1. With existing Skywalk connected flow,
716 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
717 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 1
718 * reject by failing the wild gns lookup below.
719 * 2. With existing Skywalk 3-tuple listener,
720 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 1
721 * bypass the check below.
722 * 3. With existing Skywalk 2-tuple listener,
723 * NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
724 * NETNS_REF_COUNT(res, NETNS_SKYWALK) == 0
725 * pass with successful wild gns lookup.
726 */
727 /* END CSTYLED */
728 if (NETNS_REF_COUNT(res, NETNS_LISTENER) == 0 &&
729 NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0) {
730 /* check if covered by wild Skywalk listener */
731 gns = netns_global_wild[
732 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
733 if (gns != NULL &&
734 (skres = ns_reservation_tree_find(
735 &gns->ns_reservations, port)) != NULL &&
736 NETNS_REF_COUNT(skres, NETNS_LISTENER)
737 != 0) {
738 err = 0;
739 goto done;
740 }
741 if (addr_len == sizeof(struct in_addr)) {
742 /* If address is IPv4, also check for wild IPv6 registration */
743 gns = netns_global_wild[
744 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in6_addr))];
745 if (gns != NULL &&
746 (skres = ns_reservation_tree_find(
747 &gns->ns_reservations, port)) != NULL &&
748 NETNS_REF_COUNT(skres, NETNS_LISTENER)
749 != 0) {
750 err = 0;
751 goto done;
752 }
753 }
754 SK_DF(NS_VERB_IP(addr_len) |
755 NS_VERB_PROTO(proto),
756 "ERROR - Skywalk got ADDRINUSE "
757 "(w/ SK connected flow)");
758 err = EADDRINUSE;
759 }
760 /*
761 * XXX: Duplicate 5-tuple flows under a Skywalk
762 * listener are currently detected by flow manager,
763 * till we implement 5-tuple-aware netns.
764 */
765 break;
766
767 case NETNS_LISTENER:
768 if (NETNS_REF_COUNT(res, NETNS_BSD) > 0 ||
769 NETNS_REF_COUNT(res, NETNS_PF) > 0 ||
770 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0 ||
771 _netns_is_port_used(netns_global_wild[
772 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in_addr))], res, port) ||
773 _netns_is_port_used(netns_global_wild[
774 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in6_addr))], res, port) ||
775 _netns_is_port_used(netns_global_non_wild[
776 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in_addr))], res, port) ||
777 _netns_is_port_used(netns_global_non_wild[
778 NETNS_NS_GLOBAL_IDX(proto, sizeof(struct in6_addr))], res, port)) {
779 SK_DF(NS_VERB_IP(addr_len) |
780 NS_VERB_PROTO(proto),
781 "ERROR - Listener got ADDRINUSE");
782 err = EADDRINUSE;
783 }
784 break;
785
786 case NETNS_BSD:
787 case NETNS_PF:
788 if (NETNS_REF_COUNT(res, NETNS_SKYWALK) > 0 ||
789 NETNS_REF_COUNT(res, NETNS_LISTENER) > 0) {
790 SK_DF(NS_VERB_IP(addr_len) |
791 NS_VERB_PROTO(proto),
792 "ERROR - %s got ADDRINUSE",
793 ((flags & NETNS_OWNER_MASK) == NETNS_PF) ?
794 "PF" : "BSD");
795 err = EADDRINUSE;
796 }
797 break;
798
799 default:
800 panic("_netns_reserve_common: invalid owner 0x%x",
801 flags & NETNS_OWNER_MASK);
802 /* NOTREACHED */
803 __builtin_unreachable();
804 }
805 }
806
807 done:
808 ASSERT(res != NULL);
809 if (__probable(err == 0)) {
810 NETNS_REF_COUNT(res, flags)++;
811 /* Check for wrap around */
812 VERIFY(NETNS_REF_COUNT(res, flags) != 0);
813 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
814 NS_VERB_PROTO(namespace->ns_proto),
815 "post: %s:%s:%d err %d // flags 0x%x // refs %d sky, "
816 "%d ls, %d bsd %d pf",
817 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
818 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
819 PROTO_STR(namespace->ns_proto), port, err, flags,
820 NETNS_REF_COUNT(res, NETNS_SKYWALK),
821 NETNS_REF_COUNT(res, NETNS_LISTENER),
822 NETNS_REF_COUNT(res, NETNS_BSD),
823 NETNS_REF_COUNT(res, NETNS_PF));
824 } else {
825 if (exist == NULL) {
826 RB_REMOVE(ns_reservation_tree,
827 &namespace->ns_reservations, res);
828 namespace->ns_n_reservations--;
829 netns_ns_reservation_free(res);
830 }
831 }
832 return err;
833 }
834
835 /*
836 * Internal shared code to release ports within a specific namespace.
837 */
838 static void
_netns_release_common(struct ns * namespace,in_port_t port,uint32_t flags)839 _netns_release_common(struct ns *namespace, in_port_t port, uint32_t flags)
840 {
841 struct ns_reservation *res;
842 uint16_t refs;
843 int i;
844 #if SK_LOG
845 char tmp_ip_str[MAX_IPv6_STR_LEN];
846 #endif /* SK_LOG */
847
848 NETNS_LOCK_ASSERT_HELD();
849
850 res = ns_reservation_tree_find(&namespace->ns_reservations, port);
851 if (res == NULL) {
852 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
853 NS_VERB_PROTO(namespace->ns_proto),
854 "ERROR %s:%s:%d // flags 0x%x // not found",
855 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
856 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
857 PROTO_STR(namespace->ns_proto), port, flags);
858 VERIFY(res != NULL);
859 }
860
861 SK_DF(NS_VERB_IP(namespace->ns_addr_len) |
862 NS_VERB_PROTO(namespace->ns_proto),
863 "%s:%s:%d // flags 0x%x // refs %d sky, %d ls, %d bsd, %d pf",
864 inet_ntop(LEN_TO_AF(namespace->ns_addr_len),
865 namespace->ns_addr, tmp_ip_str, sizeof(tmp_ip_str)),
866 PROTO_STR(namespace->ns_proto), port, flags,
867 NETNS_REF_COUNT(res, NETNS_SKYWALK),
868 NETNS_REF_COUNT(res, NETNS_LISTENER),
869 NETNS_REF_COUNT(res, NETNS_BSD),
870 NETNS_REF_COUNT(res, NETNS_PF));
871
872 /* Release reservation */
873 VERIFY(NETNS_REF_COUNT(res, flags) > 0);
874 NETNS_REF_COUNT(res, flags) -= 1;
875
876 /* Clean up memory, if appropriate */
877 for (i = 0, refs = 0; i <= NETNS_OWNER_MAX && refs == 0; i++) {
878 refs |= res->nsr_refs[i];
879 }
880 if (refs == 0) {
881 RB_REMOVE(ns_reservation_tree, &namespace->ns_reservations,
882 res);
883 namespace->ns_n_reservations--;
884 NETNS_LOCK_CONVERT();
885 netns_ns_reservation_free(res);
886 netns_ns_cleanup(namespace);
887 }
888 }
889
890 __attribute__((always_inline))
891 static inline void
netns_init_global_ns(struct ns ** global_ptr,uint8_t proto,uint8_t addrlen)892 netns_init_global_ns(struct ns **global_ptr, uint8_t proto, uint8_t addrlen)
893 {
894 struct ns *namespace;
895
896 namespace = *global_ptr = netns_ns_alloc(Z_WAITOK);
897 memset(namespace->ns_addr, 0xFF, addrlen);
898 namespace->ns_addr_len = addrlen;
899 namespace->ns_proto = proto;
900 namespace->ns_is_freeable = 0;
901 }
902
903 __attribute__((always_inline))
904 static inline void
netns_clear_ifnet(struct ns_token * nstoken)905 netns_clear_ifnet(struct ns_token *nstoken)
906 {
907 #if SK_LOG
908 char tmp_ip_str[MAX_IPv6_STR_LEN];
909 #endif /* SK_LOG */
910
911 NETNS_LOCK_ASSERT_HELD();
912
913 if (nstoken->nt_ifp != NULL) {
914 SLIST_REMOVE(&nstoken->nt_ifp->if_netns_tokens, nstoken,
915 ns_token, nt_ifp_link);
916
917 SK_DF(NS_VERB_IP(nstoken->nt_addr_len) |
918 NS_VERB_PROTO(nstoken->nt_proto),
919 "%s:%s:%d // removed from ifnet %d",
920 inet_ntop(LEN_TO_AF(nstoken->nt_addr_len),
921 nstoken->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
922 PROTO_STR(nstoken->nt_proto), nstoken->nt_port,
923 nstoken->nt_ifp->if_index);
924
925 NETNS_LOCK_CONVERT();
926 ifnet_decr_iorefcnt(nstoken->nt_ifp);
927 nstoken->nt_ifp = NULL;
928 } else {
929 SLIST_REMOVE(&netns_unbound_tokens, nstoken, ns_token,
930 nt_ifp_link);
931 }
932 }
933
934 /*
935 * Internal shared code to perform a port[-range] reservation, along with all
936 * the boilerplate and sanity checks expected for a call coming in from the
937 * surrounding kernel code.
938 */
939 static int
_netns_reserve_kpi_common(struct ns * ns,netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t * port,uint32_t flags,struct ns_flow_info * nfi)940 _netns_reserve_kpi_common(struct ns *ns, netns_token *token, uint32_t *addr,
941 uint8_t addr_len, uint8_t proto, in_port_t *port, uint32_t flags,
942 struct ns_flow_info *nfi)
943 {
944 boolean_t ns_want_cleanup = (ns == NULL);
945 struct ns_token *nt;
946 int err = 0;
947 in_port_t hport;
948 #if SK_LOG
949 char tmp_ip_str[MAX_IPv6_STR_LEN];
950 #endif /* SK_LOG */
951 struct ifnet *ifp = (nfi != NULL) ? nfi->nfi_ifp : NULL;
952
953 NETNS_LOCK_ASSERT_HELD();
954
955 hport = ntohs(*port);
956
957 VERIFY((flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
958 VERIFY(addr_len == sizeof(struct in_addr) ||
959 addr_len == sizeof(struct in6_addr));
960 VERIFY(proto == IPPROTO_TCP || proto == IPPROTO_UDP);
961 VERIFY(hport != 0);
962
963 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
964 "reserving %s:%s:%d // flags 0x%x // token %svalid",
965 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str,
966 sizeof(tmp_ip_str)), PROTO_STR(proto), hport, flags,
967 NETNS_TOKEN_VALID(token) ? "" : "in");
968
969 /*
970 * See the documentation for NETNS_PRERESERVED in netns.h for an
971 * explanation of this block.
972 */
973 if (NETNS_TOKEN_VALID(token)) {
974 if (flags & NETNS_PRERESERVED) {
975 nt = *token;
976 VERIFY(nt->nt_addr_len == addr_len);
977 VERIFY(memcmp(nt->nt_addr, addr, addr_len) == 0);
978 VERIFY(nt->nt_proto == proto);
979 VERIFY(nt->nt_port == hport);
980 VERIFY((nt->nt_flags &
981 NETNS_RESERVATION_FLAGS | NETNS_PRERESERVED) ==
982 (flags & NETNS_RESERVATION_FLAGS));
983
984 if ((nt->nt_flags & NETNS_CONFIGURATION_FLAGS) ==
985 (flags & NETNS_CONFIGURATION_FLAGS)) {
986 SK_DF(NS_VERB_IP(nt->nt_addr_len) |
987 NS_VERB_PROTO(nt->nt_proto),
988 "%s:%s:%d // flags 0x%x -> 0x%x",
989 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
990 nt->nt_addr, tmp_ip_str,
991 sizeof(tmp_ip_str)),
992 PROTO_STR(nt->nt_proto),
993 nt->nt_port, nt->nt_flags, flags);
994 nt->nt_flags &= ~NETNS_CONFIGURATION_FLAGS;
995 nt->nt_flags |=
996 flags & NETNS_CONFIGURATION_FLAGS;
997 }
998 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
999 "token was prereserved");
1000 goto done;
1001 } else {
1002 panic("Request to overwrite valid netns token");
1003 /* NOTREACHED */
1004 __builtin_unreachable();
1005 }
1006 }
1007
1008 /*
1009 * TODO: Check range against bitmap
1010 */
1011 if (hport == 0) {
1012 /*
1013 * Caller request an arbitrary range of ports
1014 * TODO: Need to figure out how to allocate
1015 * emphemeral ports only.
1016 */
1017 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1018 "ERROR - wildcard port not yet supported");
1019 err = ENOMEM;
1020 goto done;
1021 }
1022
1023 /*
1024 * Fetch namespace for the specified address/protocol, creating
1025 * a new namespace if necessary.
1026 */
1027 if (ns == NULL) {
1028 ASSERT(ns_want_cleanup);
1029 ns = _netns_get_ns(addr, addr_len, proto, true);
1030 }
1031 if (__improbable(ns == NULL)) {
1032 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1033 "ERROR - couldn't create namespace");
1034 err = ENOMEM;
1035 goto done;
1036 }
1037
1038 /*
1039 * Make a reservation in the namespace
1040 * This will return an error if an incompatible reservation
1041 * already exists.
1042 */
1043 err = _netns_reserve_common(ns, hport, flags);
1044 if (__improbable(err != 0)) {
1045 NETNS_LOCK_CONVERT();
1046 if (ns_want_cleanup) {
1047 netns_ns_cleanup(ns);
1048 }
1049 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1050 "ERROR - reservation collision");
1051 goto done;
1052 }
1053
1054 if (!_netns_is_wildcard_addr(ns->ns_addr, addr_len)) {
1055 /* Record the reservation in the non-wild namespace */
1056 struct ns *nwns;
1057
1058 nwns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1059 addr_len)];
1060 err = _netns_reserve_common(nwns, hport, flags);
1061 if (__improbable(err != 0)) {
1062 /* Need to free the specific namespace entry */
1063 NETNS_LOCK_CONVERT();
1064 _netns_release_common(ns, hport, flags);
1065 if (ns_want_cleanup) {
1066 netns_ns_cleanup(ns);
1067 }
1068 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1069 "ERROR - reservation collision");
1070 goto done;
1071 }
1072 }
1073
1074 nt = netns_ns_token_alloc(true, nfi != NULL ? true : false);
1075 if (nt == NULL) {
1076 SK_ERR("netns_ns_token_alloc() failed");
1077 err = ENOMEM;
1078 goto done;
1079 }
1080
1081 ASSERT(nt->nt_ifp == NULL);
1082 _netns_set_ifnet_internal(nt, ifp);
1083
1084 memcpy(nt->nt_addr, addr, addr_len);
1085 nt->nt_addr_len = addr_len;
1086 nt->nt_proto = proto;
1087 nt->nt_port = hport;
1088 nt->nt_flags = flags;
1089
1090 if (nfi != NULL) {
1091 VERIFY(nt->nt_flow_info != NULL);
1092
1093 memcpy(nt->nt_flow_info, nfi, sizeof(struct ns_flow_info));
1094 /*
1095 * The local port is passed as a separate argument
1096 */
1097 if (nfi->nfi_laddr.sa.sa_family == AF_INET) {
1098 nt->nt_flow_info->nfi_laddr.sin.sin_port = *port;
1099 } else if (nfi->nfi_laddr.sa.sa_family == AF_INET6) {
1100 nt->nt_flow_info->nfi_laddr.sin6.sin6_port = *port;
1101 }
1102 }
1103 *token = nt;
1104
1105 done:
1106 return err;
1107 }
1108
1109 /*
1110 * Kernel-facing functions
1111 */
1112
1113 int
netns_init(void)1114 netns_init(void)
1115 {
1116 VERIFY(__netns_inited == 0);
1117
1118 netns_ns_reservation_size = sizeof(struct ns_reservation);
1119 netns_ns_reservation_cache = skmem_cache_create(NETNS_NS_RESERVATION_ZONE_NAME,
1120 netns_ns_reservation_size, sizeof(uint64_t), NULL, NULL, NULL,
1121 NULL, NULL, 0);
1122 if (netns_ns_reservation_cache == NULL) {
1123 panic("%s: skmem_cache create failed (%s)", __func__,
1124 NETNS_NS_RESERVATION_ZONE_NAME);
1125 /* NOTREACHED */
1126 __builtin_unreachable();
1127 }
1128
1129 netns_ns_token_size = sizeof(struct ns_token);
1130 netns_ns_token_cache = skmem_cache_create(NETNS_NS_TOKEN_ZONE_NAME,
1131 netns_ns_token_size, sizeof(uint64_t), NULL, NULL, NULL, NULL,
1132 NULL, 0);
1133 if (netns_ns_token_cache == NULL) {
1134 panic("%s: skmem_cache create failed (%s)", __func__,
1135 NETNS_NS_TOKEN_ZONE_NAME);
1136 /* NOTREACHED */
1137 __builtin_unreachable();
1138 }
1139
1140 netns_ns_flow_info_size = sizeof(struct ns_flow_info);
1141 netns_ns_flow_info_cache = skmem_cache_create(NETNS_NS_FLOW_INFO_ZONE_NAME,
1142 netns_ns_flow_info_size, sizeof(uint64_t), NULL, NULL, NULL,
1143 NULL, NULL, 0);
1144 if (netns_ns_flow_info_cache == NULL) {
1145 panic("%s: skmem_cache create failed (%s)", __func__,
1146 NETNS_NS_FLOW_INFO_ZONE_NAME);
1147 /* NOTREACHED */
1148 __builtin_unreachable();
1149 }
1150
1151 SLIST_INIT(&netns_unbound_tokens);
1152 SLIST_INIT(&netns_all_tokens);
1153
1154 netns_n_namespaces = 0;
1155 RB_INIT(&netns_namespaces);
1156
1157 SK_D("initializing global namespaces");
1158
1159 netns_init_global_ns(
1160 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1161 sizeof(struct in_addr))], IPPROTO_TCP, sizeof(struct in_addr));
1162
1163 netns_init_global_ns(
1164 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1165 sizeof(struct in_addr))], IPPROTO_UDP, sizeof(struct in_addr));
1166
1167 netns_init_global_ns(
1168 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_TCP,
1169 sizeof(struct in6_addr))], IPPROTO_TCP, sizeof(struct in6_addr));
1170
1171 netns_init_global_ns(
1172 &netns_global_non_wild[NETNS_NS_GLOBAL_IDX(IPPROTO_UDP,
1173 sizeof(struct in6_addr))], IPPROTO_UDP, sizeof(struct in6_addr));
1174
1175 /* Done */
1176
1177 __netns_inited = 1;
1178 sk_features |= SK_FEATURE_NETNS;
1179
1180 SK_D("initialized netns");
1181
1182 return 0;
1183 }
1184
1185 void
netns_uninit(void)1186 netns_uninit(void)
1187 {
1188 if (__netns_inited == 1) {
1189 struct ns *namespace;
1190 struct ns *temp_namespace;
1191 int i;
1192
1193 RB_FOREACH_SAFE(namespace, netns_namespaces_tree,
1194 &netns_namespaces, temp_namespace) {
1195 RB_REMOVE(netns_namespaces_tree, &netns_namespaces,
1196 namespace);
1197 netns_n_namespaces--;
1198 netns_ns_free(namespace);
1199 }
1200
1201 for (i = 0; i < NETNS_N_GLOBAL; i++) {
1202 netns_ns_free(netns_global_non_wild[i]);
1203 }
1204
1205 if (netns_ns_flow_info_cache != NULL) {
1206 skmem_cache_destroy(netns_ns_flow_info_cache);
1207 netns_ns_flow_info_cache = NULL;
1208 }
1209 if (netns_ns_token_cache != NULL) {
1210 skmem_cache_destroy(netns_ns_token_cache);
1211 netns_ns_token_cache = NULL;
1212 }
1213 if (netns_ns_reservation_cache != NULL) {
1214 skmem_cache_destroy(netns_ns_reservation_cache);
1215 netns_ns_reservation_cache = NULL;
1216 }
1217
1218 __netns_inited = 0;
1219 sk_features &= ~SK_FEATURE_NETNS;
1220
1221 SK_D("uninitialized netns");
1222 }
1223 }
1224
1225 void
netns_reap_caches(boolean_t purge)1226 netns_reap_caches(boolean_t purge)
1227 {
1228 /* these aren't created unless netns is enabled */
1229 if (netns_ns_token_cache != NULL) {
1230 skmem_cache_reap_now(netns_ns_token_cache, purge);
1231 }
1232 if (netns_ns_reservation_cache != NULL) {
1233 skmem_cache_reap_now(netns_ns_reservation_cache, purge);
1234 }
1235 if (netns_ns_flow_info_cache != NULL) {
1236 skmem_cache_reap_now(netns_ns_flow_info_cache, purge);
1237 }
1238 }
1239
1240 boolean_t
netns_is_enabled(void)1241 netns_is_enabled(void)
1242 {
1243 return __netns_inited == 1;
1244 }
1245
1246 int
netns_reserve(netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t port,uint32_t flags,struct ns_flow_info * nfi)1247 netns_reserve(netns_token *token, uint32_t *addr, uint8_t addr_len,
1248 uint8_t proto, in_port_t port, uint32_t flags, struct ns_flow_info *nfi)
1249 {
1250 int err = 0;
1251 #if SK_LOG
1252 char tmp_ip_str[MAX_IPv6_STR_LEN];
1253 #endif /* SK_LOG */
1254
1255 if (__netns_inited == 0) {
1256 *token = NULL;
1257 return err;
1258 }
1259
1260 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1261 SK_ERR("netns doesn't support non TCP/UDP protocol");
1262 return ENOTSUP;
1263 }
1264
1265 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1266 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1267 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(port),
1268 flags);
1269
1270 /*
1271 * Check wether the process is allowed to bind to a restricted port
1272 */
1273 if (!current_task_can_use_restricted_in_port(port,
1274 proto, flags)) {
1275 *token = NULL;
1276 return EADDRINUSE;
1277 }
1278
1279 NETNS_LOCK_SPIN();
1280 err = _netns_reserve_kpi_common(NULL, token, addr, addr_len,
1281 proto, &port, flags, nfi);
1282 NETNS_UNLOCK();
1283
1284 return err;
1285 }
1286
1287 /* Import net.inet.{tcp,udp}.randomize_ports sysctls */
1288 extern int udp_use_randomport;
1289 extern int tcp_use_randomport;
1290
1291 int
netns_reserve_ephemeral(netns_token * token,uint32_t * addr,uint8_t addr_len,uint8_t proto,in_port_t * port,uint32_t flags,struct ns_flow_info * nfi)1292 netns_reserve_ephemeral(netns_token *token, uint32_t *addr, uint8_t addr_len,
1293 uint8_t proto, in_port_t *port, uint32_t flags, struct ns_flow_info *nfi)
1294 {
1295 int err = 0;
1296 in_port_t first = (in_port_t)ipport_firstauto;
1297 in_port_t last = (in_port_t)ipport_lastauto;
1298 in_port_t rand_port;
1299 in_port_t last_port;
1300 in_port_t n_last_port;
1301 struct ns *namespace;
1302 boolean_t count_up = true;
1303 boolean_t use_randomport = (proto == IPPROTO_TCP) ?
1304 tcp_use_randomport : udp_use_randomport;
1305 #if SK_LOG
1306 char tmp_ip_str[MAX_IPv6_STR_LEN];
1307 #endif /* SK_LOG */
1308
1309 if (__netns_inited == 0) {
1310 *token = NULL;
1311 return err;
1312 }
1313
1314 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) {
1315 SK_ERR("netns doesn't support non TCP/UDP protocol");
1316 return ENOTSUP;
1317 }
1318
1319 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1320 "%s:%s:%d // flags 0x%x", inet_ntop(LEN_TO_AF(addr_len), addr,
1321 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto), ntohs(*port),
1322 flags);
1323
1324 NETNS_LOCK_SPIN();
1325
1326 namespace = _netns_get_ns(addr, addr_len, proto, true);
1327 if (namespace == NULL) {
1328 err = ENOMEM;
1329 NETNS_UNLOCK();
1330 return err;
1331 }
1332
1333 if (proto == IPPROTO_UDP) {
1334 if (UINT16_MAX - namespace->ns_n_reservations <
1335 NETNS_NS_UDP_EPHEMERAL_RESERVE) {
1336 SK_ERR("UDP ephemeral port not available"
1337 "(less than 4096 UDP ports left)");
1338 err = EADDRNOTAVAIL;
1339 NETNS_UNLOCK();
1340 return err;
1341 }
1342 }
1343
1344 if (first == last) {
1345 rand_port = first;
1346 } else {
1347 if (use_randomport) {
1348 NETNS_LOCK_CONVERT();
1349 read_frandom(&rand_port, sizeof(rand_port));
1350
1351 if (first > last) {
1352 rand_port = last + (rand_port %
1353 (first - last));
1354 count_up = false;
1355 } else {
1356 rand_port = first + (rand_port %
1357 (last - first));
1358 }
1359 } else {
1360 if (first > last) {
1361 rand_port =
1362 namespace->ns_last_ephemeral_port_down - 1;
1363 if (rand_port < last || rand_port > first) {
1364 rand_port = last;
1365 }
1366 count_up = false;
1367 } else {
1368 rand_port =
1369 namespace->ns_last_ephemeral_port_up + 1;
1370 if (rand_port < first || rand_port > last) {
1371 rand_port = first;
1372 }
1373 }
1374 }
1375 }
1376 last_port = rand_port;
1377 n_last_port = htons(last_port);
1378
1379 while (true) {
1380 if (n_last_port == 0) {
1381 SK_ERR("ephemeral port search range includes 0");
1382 err = EINVAL;
1383 break;
1384 }
1385
1386 /*
1387 * Skip if this is a restricted port as we do not want to
1388 * restricted ports as ephemeral
1389 */
1390 if (!IS_RESTRICTED_IN_PORT(n_last_port)) {
1391 err = _netns_reserve_kpi_common(namespace, token, addr,
1392 addr_len, proto, &n_last_port, flags, nfi);
1393 if (err == 0 || err != EADDRINUSE) {
1394 break;
1395 }
1396 }
1397 if (count_up) {
1398 last_port++;
1399 if (last_port < first || last_port > last) {
1400 last_port = first;
1401 }
1402 } else {
1403 last_port--;
1404 if (last_port < last || last_port > first) {
1405 last_port = last;
1406 }
1407 }
1408 n_last_port = htons(last_port);
1409
1410 if (last_port == rand_port || first == last) {
1411 SK_ERR("couldn't find free ephemeral port");
1412 err = EADDRNOTAVAIL;
1413 break;
1414 }
1415 }
1416
1417 if (err == 0) {
1418 *port = n_last_port;
1419 if (count_up) {
1420 namespace->ns_last_ephemeral_port_up = last_port;
1421 } else {
1422 namespace->ns_last_ephemeral_port_down = last_port;
1423 }
1424 } else {
1425 netns_ns_cleanup(namespace);
1426 }
1427
1428 NETNS_UNLOCK();
1429
1430 return err;
1431 }
1432
1433 void
netns_release(netns_token * token)1434 netns_release(netns_token *token)
1435 {
1436 struct ns *ns;
1437 struct ns_token *nt;
1438 uint8_t proto, addr_len;
1439 #if SK_LOG
1440 char tmp_ip_str[MAX_IPv6_STR_LEN];
1441 #endif /* SK_LOG */
1442
1443 if (!NETNS_TOKEN_VALID(token)) {
1444 return;
1445 }
1446
1447 if (__netns_inited == 0) {
1448 *token = NULL;
1449 return;
1450 }
1451
1452 NETNS_LOCK_SPIN();
1453
1454 nt = *token;
1455 *token = NULL;
1456
1457 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) <= NETNS_OWNER_MAX);
1458 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1459 nt->nt_addr_len == sizeof(struct in6_addr));
1460 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1461
1462 addr_len = nt->nt_addr_len;
1463 proto = nt->nt_proto;
1464
1465 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1466 "releasing %s:%s:%d",
1467 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1468 tmp_ip_str, sizeof(tmp_ip_str)), PROTO_STR(proto),
1469 nt->nt_port);
1470
1471 if (!_netns_is_wildcard_addr(nt->nt_addr, addr_len)) {
1472 /* Remove from global non-wild namespace */
1473
1474 ns = netns_global_non_wild[NETNS_NS_GLOBAL_IDX(proto,
1475 addr_len)];
1476 VERIFY(ns != NULL);
1477
1478 _netns_release_common(ns, nt->nt_port, nt->nt_flags);
1479 }
1480
1481 ns = _netns_get_ns(nt->nt_addr, addr_len, proto, false);
1482 VERIFY(ns != NULL);
1483 _netns_release_common(ns, nt->nt_port, nt->nt_flags);
1484
1485 netns_clear_ifnet(nt);
1486 netns_ns_token_free(nt);
1487
1488 NETNS_UNLOCK();
1489 }
1490
1491 int
netns_change_addr(netns_token * token,uint32_t * addr,uint8_t addr_len)1492 netns_change_addr(netns_token *token, uint32_t *addr, uint8_t addr_len)
1493 {
1494 int err = 0;
1495 struct ns *old_namespace;
1496 struct ns *new_namespace;
1497 struct ns *global_namespace;
1498 struct ns_token *nt;
1499 uint8_t proto;
1500 #if SK_LOG
1501 char tmp_ip_str_1[MAX_IPv6_STR_LEN];
1502 char tmp_ip_str_2[MAX_IPv6_STR_LEN];
1503 #endif /* SK_LOG */
1504
1505 if (__netns_inited == 0) {
1506 return 0;
1507 }
1508
1509 NETNS_LOCK();
1510
1511 VERIFY(NETNS_TOKEN_VALID(token));
1512
1513 nt = *token;
1514
1515 VERIFY((nt->nt_flags & NETNS_OWNER_MASK) == NETNS_BSD);
1516 VERIFY(nt->nt_addr_len == sizeof(struct in_addr) ||
1517 nt->nt_addr_len == sizeof(struct in6_addr));
1518 VERIFY(nt->nt_proto == IPPROTO_TCP || nt->nt_proto == IPPROTO_UDP);
1519
1520 proto = nt->nt_proto;
1521
1522 #if SK_LOG
1523 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1524 tmp_ip_str_1, sizeof(tmp_ip_str_1));
1525 inet_ntop(LEN_TO_AF(addr_len), addr, tmp_ip_str_2,
1526 sizeof(tmp_ip_str_2));
1527 #endif /* SK_LOG */
1528 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1529 "changing address for %s:%d from %s to %s",
1530 PROTO_STR(proto), nt->nt_port, tmp_ip_str_1,
1531 tmp_ip_str_2);
1532
1533 if (nt->nt_addr_len == addr_len &&
1534 memcmp(nt->nt_addr, addr, nt->nt_addr_len) == 0) {
1535 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1536 "address didn't change, exiting early");
1537 goto done;
1538 }
1539
1540 old_namespace = _netns_get_ns(nt->nt_addr, nt->nt_addr_len, proto,
1541 false);
1542 VERIFY(old_namespace != NULL);
1543
1544 new_namespace = _netns_get_ns(addr, addr_len, proto, true);
1545 if (new_namespace == NULL) {
1546 err = ENOMEM;
1547 goto done;
1548 }
1549
1550 /* Acquire reservation in new namespace */
1551 if ((err = _netns_reserve_common(new_namespace, nt->nt_port,
1552 nt->nt_flags))) {
1553 NETNS_LOCK_CONVERT();
1554 netns_ns_cleanup(new_namespace);
1555 SK_DF(NS_VERB_IP(addr_len) | NS_VERB_PROTO(proto),
1556 "ERROR - reservation collision under new namespace");
1557 goto done;
1558 }
1559
1560 /* Release from old namespace */
1561 _netns_release_common(old_namespace, nt->nt_port, nt->nt_flags);
1562
1563 if (!_netns_is_wildcard_addr(nt->nt_addr, nt->nt_addr_len)) {
1564 /*
1565 * Old address is non-wildcard.
1566 * Remove old reservation from global non-wild namespace
1567 */
1568 global_namespace = netns_global_non_wild[
1569 NETNS_NS_GLOBAL_IDX(proto, nt->nt_addr_len)];
1570 VERIFY(global_namespace != NULL);
1571
1572 _netns_release_common(global_namespace, nt->nt_port,
1573 nt->nt_flags);
1574 }
1575
1576 if (!_netns_is_wildcard_addr(addr, addr_len)) {
1577 /*
1578 * New address is non-wildcard.
1579 * Record new reservation in global non-wild namespace
1580 */
1581 global_namespace = netns_global_non_wild[
1582 NETNS_NS_GLOBAL_IDX(proto, addr_len)];
1583 VERIFY(global_namespace != NULL);
1584
1585 if ((err = _netns_reserve_common(global_namespace,
1586 nt->nt_port, nt->nt_flags)) != 0) {
1587 SK_DF(NS_VERB_IP(addr_len) |
1588 NS_VERB_PROTO(proto),
1589 "ERROR - reservation collision under new "
1590 "global namespace");
1591 /* XXX: Should not fail. Maybe assert instead */
1592 goto done;
1593 }
1594 }
1595
1596 memcpy(nt->nt_addr, addr, addr_len);
1597 nt->nt_addr_len = addr_len;
1598
1599 done:
1600 NETNS_UNLOCK();
1601 return err;
1602 }
1603
1604 static void
_netns_set_ifnet_internal(struct ns_token * nt,struct ifnet * ifp)1605 _netns_set_ifnet_internal(struct ns_token *nt, struct ifnet *ifp)
1606 {
1607 #if SK_LOG
1608 char tmp_ip_str[MAX_IPv6_STR_LEN];
1609 #endif /* SK_LOG */
1610
1611 NETNS_LOCK_ASSERT_HELD();
1612
1613 if (ifp != NULL && ifnet_is_attached(ifp, 1)) {
1614 nt->nt_ifp = ifp;
1615 SLIST_INSERT_HEAD(&ifp->if_netns_tokens, nt, nt_ifp_link);
1616
1617 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1618 "%s:%s:%d // added to ifnet %d",
1619 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1620 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1621 PROTO_STR(nt->nt_proto), nt->nt_port,
1622 ifp->if_index);
1623 } else {
1624 SLIST_INSERT_HEAD(&netns_unbound_tokens, nt, nt_ifp_link);
1625 }
1626 }
1627
1628 void
netns_set_ifnet(netns_token * token,ifnet_t ifp)1629 netns_set_ifnet(netns_token *token, ifnet_t ifp)
1630 {
1631 struct ns_token *nt;
1632 #if SK_LOG
1633 char tmp_ip_str[MAX_IPv6_STR_LEN];
1634 #endif /* SK_LOG */
1635
1636 if (__netns_inited == 0) {
1637 return;
1638 }
1639
1640 NETNS_LOCK();
1641
1642 VERIFY(NETNS_TOKEN_VALID(token));
1643
1644 nt = *token;
1645
1646 if (nt->nt_ifp == ifp) {
1647 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1648 "%s:%s:%d // ifnet already %d, exiting early",
1649 inet_ntop(LEN_TO_AF(nt->nt_addr_len),
1650 nt->nt_addr, tmp_ip_str, sizeof(tmp_ip_str)),
1651 PROTO_STR(nt->nt_proto), nt->nt_port,
1652 ifp ? ifp->if_index : -1);
1653 NETNS_UNLOCK();
1654 return;
1655 }
1656
1657 netns_clear_ifnet(nt);
1658
1659 _netns_set_ifnet_internal(nt, ifp);
1660
1661 NETNS_UNLOCK();
1662 }
1663
1664 void
netns_ifnet_detach(ifnet_t ifp)1665 netns_ifnet_detach(ifnet_t ifp)
1666 {
1667 struct ns_token *token, *tmp_token;
1668
1669 if (__netns_inited == 0) {
1670 return;
1671 }
1672
1673 NETNS_LOCK();
1674
1675 SLIST_FOREACH_SAFE(token, &ifp->if_netns_tokens, nt_ifp_link,
1676 tmp_token) {
1677 netns_clear_ifnet(token);
1678 SLIST_INSERT_HEAD(&netns_unbound_tokens, token, nt_ifp_link);
1679 }
1680
1681 NETNS_UNLOCK();
1682 }
1683
1684 static void
_netns_set_state(netns_token * token,uint32_t state)1685 _netns_set_state(netns_token *token, uint32_t state)
1686 {
1687 struct ns_token *nt;
1688 #if SK_LOG
1689 char tmp_ip_str[MAX_IPv6_STR_LEN];
1690 #endif /* SK_LOG */
1691
1692 if (__netns_inited == 0) {
1693 return;
1694 }
1695
1696 NETNS_LOCK();
1697 VERIFY(NETNS_TOKEN_VALID(token));
1698
1699 nt = *token;
1700 nt->nt_state |= state;
1701
1702 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1703 "%s:%s:%d // state 0x%b",
1704 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1705 tmp_ip_str, sizeof(tmp_ip_str)),
1706 PROTO_STR(nt->nt_proto), nt->nt_port, state, NETNS_STATE_BITS);
1707
1708 NETNS_UNLOCK();
1709 }
1710
1711 void
netns_half_close(netns_token * token)1712 netns_half_close(netns_token *token)
1713 {
1714 _netns_set_state(token, NETNS_STATE_HALFCLOSED);
1715 }
1716
1717 void
netns_withdraw(netns_token * token)1718 netns_withdraw(netns_token *token)
1719 {
1720 _netns_set_state(token, NETNS_STATE_WITHDRAWN);
1721 }
1722
1723 int
netns_get_flow_info(netns_token * token,struct ns_flow_info * nfi)1724 netns_get_flow_info(netns_token *token,
1725 struct ns_flow_info *nfi)
1726 {
1727 if (__netns_inited == 0) {
1728 return ENOTSUP;
1729 }
1730
1731 NETNS_LOCK();
1732 if (!NETNS_TOKEN_VALID(token) ||
1733 nfi == NULL) {
1734 NETNS_UNLOCK();
1735 return EINVAL;
1736 }
1737
1738 struct ns_token *nt = *token;
1739 if (nt->nt_flow_info == NULL) {
1740 NETNS_UNLOCK();
1741 return ENOENT;
1742 }
1743
1744 memcpy(nfi, nt->nt_flow_info, sizeof(struct ns_flow_info));
1745 NETNS_UNLOCK();
1746
1747 return 0;
1748 }
1749
1750 void
netns_change_flags(netns_token * token,uint32_t set_flags,uint32_t clear_flags)1751 netns_change_flags(netns_token *token, uint32_t set_flags,
1752 uint32_t clear_flags)
1753 {
1754 struct ns_token *nt;
1755 #if SK_LOG
1756 char tmp_ip_str[MAX_IPv6_STR_LEN];
1757 #endif /* SK_LOG */
1758
1759 if (__netns_inited == 0) {
1760 return;
1761 }
1762
1763 NETNS_LOCK();
1764
1765 VERIFY(NETNS_TOKEN_VALID(token));
1766
1767 nt = *token;
1768
1769 VERIFY(!((set_flags | clear_flags) & NETNS_RESERVATION_FLAGS));
1770 /* TODO: verify set and clear flags don't overlap? */
1771
1772 SK_DF(NS_VERB_IP(nt->nt_addr_len) | NS_VERB_PROTO(nt->nt_proto),
1773 "%s:%s:%d // flags 0x%x -> 0x%x",
1774 inet_ntop(LEN_TO_AF(nt->nt_addr_len), nt->nt_addr,
1775 tmp_ip_str, sizeof(tmp_ip_str)),
1776 PROTO_STR(nt->nt_proto), nt->nt_port, nt->nt_flags,
1777 nt->nt_flags | set_flags & ~clear_flags);
1778
1779 nt->nt_flags |= set_flags;
1780 nt->nt_flags &= ~clear_flags;
1781
1782 NETNS_UNLOCK();
1783 }
1784
1785 /*
1786 * Port offloading KPI
1787 */
1788 static inline void
netns_local_port_scan_flow_entry(struct flow_entry * fe,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1789 netns_local_port_scan_flow_entry(struct flow_entry *fe, protocol_family_t protocol,
1790 u_int32_t flags, u_int8_t *bitfield)
1791 {
1792 struct ns_token *token = fe->fe_port_reservation;
1793 boolean_t iswildcard = false;
1794
1795 if (fe == NULL || token == NULL) {
1796 return;
1797 }
1798
1799 /*
1800 * We are only interested in active flows over skywalk channels
1801 */
1802 if ((token->nt_flags & NETNS_OWNER_MASK) != NETNS_SKYWALK) {
1803 return;
1804 }
1805
1806 if (token->nt_state & NETNS_STATE_WITHDRAWN) {
1807 return;
1808 }
1809
1810 if (!(flags & IFNET_GET_LOCAL_PORTS_ANYTCPSTATEOK) &&
1811 (flags & IFNET_GET_LOCAL_PORTS_ACTIVEONLY) &&
1812 (token->nt_state & NETNS_STATE_HALFCLOSED)) {
1813 return;
1814 }
1815
1816 VERIFY(token->nt_addr_len == sizeof(struct in_addr) ||
1817 token->nt_addr_len == sizeof(struct in6_addr));
1818
1819 if (token->nt_addr_len == sizeof(struct in_addr)) {
1820 if (protocol == PF_INET6) {
1821 return;
1822 }
1823
1824 iswildcard = token->nt_inaddr.s_addr == INADDR_ANY;
1825 } else if (token->nt_addr_len == sizeof(struct in6_addr)) {
1826 if (protocol == PF_INET) {
1827 return;
1828 }
1829
1830 iswildcard = IN6_IS_ADDR_UNSPECIFIED(
1831 &token->nt_in6addr);
1832 }
1833 if (!(flags & IFNET_GET_LOCAL_PORTS_WILDCARDOK) && iswildcard) {
1834 return;
1835 }
1836
1837 if ((flags & IFNET_GET_LOCAL_PORTS_TCPONLY) &&
1838 token->nt_proto == IPPROTO_UDP) {
1839 return;
1840 }
1841 if ((flags & IFNET_GET_LOCAL_PORTS_UDPONLY) &&
1842 token->nt_proto == IPPROTO_TCP) {
1843 return;
1844 }
1845
1846 if (!(flags & IFNET_GET_LOCAL_PORTS_NOWAKEUPOK) &&
1847 (token->nt_flags & NETNS_NOWAKEFROMSLEEP)) {
1848 return;
1849 }
1850
1851 if ((flags & IFNET_GET_LOCAL_PORTS_RECVANYIFONLY) &&
1852 !(token->nt_flags & NETNS_RECVANYIF)) {
1853 return;
1854 }
1855
1856 if ((flags & IFNET_GET_LOCAL_PORTS_EXTBGIDLEONLY) &&
1857 !(token->nt_flags & NETNS_EXTBGIDLE)) {
1858 return;
1859 }
1860
1861 if (token->nt_ifp != NULL && token->nt_flow_info != NULL) {
1862 bitstr_set(bitfield, token->nt_port);
1863 (void) if_ports_used_add_flow_entry(fe, token->nt_ifp->if_index,
1864 token->nt_flow_info, token->nt_flags);
1865 } else {
1866 SK_ERR("%s: unknown owner port %u"
1867 " nt_flags 0x%x ifindex %u nt_flow_info %p\n",
1868 __func__, token->nt_port,
1869 token->nt_flags,
1870 token->nt_ifp != NULL ? token->nt_ifp->if_index : 0,
1871 token->nt_flow_info);
1872 }
1873 }
1874
1875 static void
netns_get_if_local_ports(ifnet_t ifp,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1876 netns_get_if_local_ports(ifnet_t ifp, protocol_family_t protocol,
1877 u_int32_t flags, u_int8_t *bitfield)
1878 {
1879 struct nx_flowswitch *fsw = NULL;
1880
1881 if (ifp == NULL || ifp->if_na == NULL) {
1882 return;
1883 }
1884 /* Ensure that the interface is attached and won't detach */
1885 if (!ifnet_is_attached(ifp, 1)) {
1886 return;
1887 }
1888 fsw = fsw_ifp_to_fsw(ifp);
1889 if (fsw == NULL) {
1890 goto done;
1891 }
1892 FSW_RLOCK(fsw);
1893 flow_mgr_foreach_flow(fsw->fsw_flow_mgr, ^(struct flow_entry *_fe) {
1894 netns_local_port_scan_flow_entry(_fe, protocol, flags,
1895 bitfield);
1896 });
1897 FSW_UNLOCK(fsw);
1898 done:
1899 ifnet_decr_iorefcnt(ifp);
1900 }
1901
1902 errno_t
netns_get_local_ports(ifnet_t ifp,protocol_family_t protocol,u_int32_t flags,u_int8_t * bitfield)1903 netns_get_local_ports(ifnet_t ifp, protocol_family_t protocol,
1904 u_int32_t flags, u_int8_t *bitfield)
1905 {
1906 if (__netns_inited == 0) {
1907 return 0;
1908 }
1909 if (ifp != NULL) {
1910 netns_get_if_local_ports(ifp, protocol, flags, bitfield);
1911 } else {
1912 errno_t error;
1913 ifnet_t *ifp_list;
1914 uint32_t count, i;
1915
1916 error = ifnet_list_get_all(IFNET_FAMILY_ANY, &ifp_list, &count);
1917 if (error != 0) {
1918 os_log_error(OS_LOG_DEFAULT,
1919 "%s: ifnet_list_get_all() failed %d",
1920 __func__, error);
1921 return error;
1922 }
1923 for (i = 0; i < count; i++) {
1924 if (TAILQ_EMPTY(&ifp_list[i]->if_addrhead)) {
1925 continue;
1926 }
1927 netns_get_if_local_ports(ifp_list[i], protocol, flags,
1928 bitfield);
1929 }
1930 ifnet_list_free(ifp_list);
1931 }
1932
1933 return 0;
1934 }
1935
1936 uint32_t
netns_find_anyres_byaddr(struct ifaddr * ifa,uint8_t proto)1937 netns_find_anyres_byaddr(struct ifaddr *ifa, uint8_t proto)
1938 {
1939 int result = 0;
1940 int ifa_addr_len;
1941 struct ns_token *token;
1942 struct ifnet *ifp = ifa->ifa_ifp;
1943 struct sockaddr *ifa_addr = ifa->ifa_addr;
1944
1945 if (__netns_inited == 0) {
1946 return ENOTSUP;
1947 }
1948
1949 if ((ifa_addr->sa_family != AF_INET) &&
1950 (ifa_addr->sa_family != AF_INET6)) {
1951 return 0;
1952 }
1953
1954 ifa_addr_len = (ifa_addr->sa_family == AF_INET) ?
1955 sizeof(struct in_addr) : sizeof(struct in6_addr);
1956
1957 NETNS_LOCK();
1958
1959 SLIST_FOREACH(token, &ifp->if_netns_tokens, nt_ifp_link) {
1960 if ((token->nt_flags & NETNS_OWNER_MASK) == NETNS_PF) {
1961 continue;
1962 }
1963 if (token->nt_addr_len != ifa_addr_len) {
1964 continue;
1965 }
1966 if (token->nt_proto != proto) {
1967 continue;
1968 }
1969 if (ifa_addr->sa_family == AF_INET) {
1970 if (token->nt_inaddr.s_addr ==
1971 (satosin(ifa->ifa_addr))->sin_addr.s_addr) {
1972 result = 1;
1973 break;
1974 }
1975 } else if (ifa_addr->sa_family == AF_INET6) {
1976 if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa),
1977 &token->nt_in6addr)) {
1978 result = 1;
1979 break;
1980 }
1981 }
1982 }
1983
1984 NETNS_UNLOCK();
1985 return result;
1986 }
1987
1988 static uint32_t
_netns_lookup_ns_n_reservations(uint32_t * addr,uint8_t addr_len,uint8_t proto)1989 _netns_lookup_ns_n_reservations(uint32_t *addr, uint8_t addr_len, uint8_t proto)
1990 {
1991 uint32_t ns_n_reservations = 0;
1992 NETNS_LOCK_SPIN();
1993 struct ns *namespace = _netns_get_ns(addr, addr_len, proto, true);
1994 if (namespace != NULL) {
1995 ns_n_reservations = namespace->ns_n_reservations;
1996 }
1997 NETNS_UNLOCK();
1998 return ns_n_reservations;
1999 }
2000
2001 uint32_t
netns_lookup_reservations_count_in(struct in_addr addr,uint8_t proto)2002 netns_lookup_reservations_count_in(struct in_addr addr, uint8_t proto)
2003 {
2004 return _netns_lookup_ns_n_reservations(&addr.s_addr, sizeof(struct in_addr), proto);
2005 }
2006
2007 uint32_t
netns_lookup_reservations_count_in6(struct in6_addr addr,uint8_t proto)2008 netns_lookup_reservations_count_in6(struct in6_addr addr, uint8_t proto)
2009 {
2010 if (IN6_IS_SCOPE_EMBED(&addr)) {
2011 addr.s6_addr16[1] = 0;
2012 }
2013 return _netns_lookup_ns_n_reservations(&addr.s6_addr32[0], sizeof(struct in6_addr), proto);
2014 }
2015
2016 /*
2017 * Sysctl interface
2018 */
2019
2020 static int netns_ctl_dump_all SYSCTL_HANDLER_ARGS;
2021
2022 SYSCTL_NODE(_kern_skywalk, OID_AUTO, netns, CTLFLAG_RW | CTLFLAG_LOCKED,
2023 0, "Netns interface");
2024
2025 SYSCTL_PROC(_kern_skywalk_stats, OID_AUTO, netns,
2026 CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED,
2027 0, 0, netns_ctl_dump_all, "-",
2028 "Namespace contents (struct netns_ctl_dump_header, "
2029 "skywalk/os_stats_private.h)");
2030
2031 static int
netns_ctl_write_ns(struct sysctl_req * req,struct ns * namespace,boolean_t is_global)2032 netns_ctl_write_ns(struct sysctl_req *req, struct ns *namespace,
2033 boolean_t is_global)
2034 {
2035 struct ns_reservation *res;
2036 struct netns_ctl_dump_header response_header;
2037 struct netns_ctl_dump_record response_record;
2038 int err;
2039
2040 /* Fill out header */
2041 memset(&response_header, 0, sizeof(response_header));
2042 response_header.ncdh_n_records = namespace->ns_n_reservations;
2043 response_header.ncdh_proto = namespace->ns_proto;
2044
2045 if (is_global) {
2046 response_header.ncdh_addr_len = 0;
2047 } else {
2048 response_header.ncdh_addr_len = namespace->ns_addr_len;
2049 }
2050 memcpy(response_header.ncdh_addr, namespace->ns_addr,
2051 namespace->ns_addr_len);
2052
2053 err = SYSCTL_OUT(req, &response_header, sizeof(response_header));
2054 if (err) {
2055 return err;
2056 }
2057
2058 /* Fill out records */
2059 RB_FOREACH(res, ns_reservation_tree, &namespace->ns_reservations) {
2060 memset(&response_record, 0, sizeof(response_record));
2061 response_record.ncdr_port = res->nsr_port;
2062 response_record.ncdr_port_end = 0;
2063 response_record.ncdr_listener_refs =
2064 NETNS_REF_COUNT(res, NETNS_LISTENER);
2065 response_record.ncdr_skywalk_refs =
2066 NETNS_REF_COUNT(res, NETNS_SKYWALK);
2067 response_record.ncdr_bsd_refs =
2068 NETNS_REF_COUNT(res, NETNS_BSD);
2069 response_record.ncdr_pf_refs =
2070 NETNS_REF_COUNT(res, NETNS_PF);
2071 err = SYSCTL_OUT(req, &response_record,
2072 sizeof(response_record));
2073 if (err) {
2074 return err;
2075 }
2076 }
2077
2078 return 0;
2079 }
2080
2081 static int
2082 netns_ctl_dump_all SYSCTL_HANDLER_ARGS
2083 {
2084 #pragma unused(oidp, arg1, arg2)
2085 struct ns *namespace;
2086 int i, err = 0;
2087
2088 if (!kauth_cred_issuser(kauth_cred_get())) {
2089 return EPERM;
2090 }
2091
2092 if (__netns_inited == 0) {
2093 return ENOTSUP;
2094 }
2095
2096 NETNS_LOCK();
2097
2098 for (i = 0; i < NETNS_N_GLOBAL; i++) {
2099 err = netns_ctl_write_ns(req, netns_global_non_wild[i], true);
2100 if (err) {
2101 goto done;
2102 }
2103 }
2104
2105 RB_FOREACH(namespace, netns_namespaces_tree, &netns_namespaces) {
2106 err = netns_ctl_write_ns(req, namespace, false);
2107 if (err) {
2108 goto done;
2109 }
2110 }
2111
2112 /*
2113 * If this is just a request for length, add slop because
2114 * this is dynamically changing data
2115 */
2116 if (req->oldptr == USER_ADDR_NULL) {
2117 req->oldidx += 20 * sizeof(struct netns_ctl_dump_record);
2118 }
2119
2120 done:
2121 NETNS_UNLOCK();
2122 return err;
2123 }
2124 /* CSTYLED */
2125