1 /*
2 * Copyright (c) 2004-2023 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $ */
30 /*
31 * Copyright 2001 Wasabi Systems, Inc.
32 * All rights reserved.
33 *
34 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed for the NetBSD Project by
47 * Wasabi Systems, Inc.
48 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49 * or promote products derived from this software without specific prior
50 * written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
56 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62 * POSSIBILITY OF SUCH DAMAGE.
63 */
64
65 /*
66 * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67 * All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88 * POSSIBILITY OF SUCH DAMAGE.
89 *
90 * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91 */
92
93 /*
94 * Network interface bridge support.
95 *
96 * TODO:
97 *
98 * - Currently only supports Ethernet-like interfaces (Ethernet,
99 * 802.11, VLANs on Ethernet, etc.) Figure out a nice way
100 * to bridge other types of interfaces (FDDI-FDDI, and maybe
101 * consider heterogenous bridges).
102 *
103 * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104 */
105
106 #include <sys/cdefs.h>
107
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123
124 #include <sys/kauth.h>
125
126 #include <kern/thread_call.h>
127
128 #include <libkern/libkern.h>
129
130 #include <kern/zalloc.h>
131
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176
177 #include <os/log.h>
178
179 /*
180 * if_bridge_debug, BR_DBGF_*
181 * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182 * to enable additional logs for the corresponding bridge function
183 * - "sysctl net.link.bridge.debug" controls the value of
184 * 'if_bridge_debug'
185 */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE 0x0001
188 #define BR_DBGF_INPUT 0x0002
189 #define BR_DBGF_OUTPUT 0x0004
190 #define BR_DBGF_RT_TABLE 0x0008
191 #define BR_DBGF_DELAYED_CALL 0x0010
192 #define BR_DBGF_IOCTL 0x0020
193 #define BR_DBGF_MBUF 0x0040
194 #define BR_DBGF_MCAST 0x0080
195 #define BR_DBGF_HOSTFILTER 0x0100
196 #define BR_DBGF_CHECKSUM 0x0200
197 #define BR_DBGF_MAC_NAT 0x0400
198
199 /*
200 * if_bridge_log_level
201 * - 'if_bridge_log_level' ensures that by default important logs are
202 * logged regardless of if_bridge_debug by comparing the log level
203 * in BRIDGE_LOG to if_bridge_log_level
204 * - use "sysctl net.link.bridge.log_level" controls the value of
205 * 'if_bridge_log_level'
206 * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207 * logs must use LOG_NOTICE to ensure they appear by default
208 */
209 static int if_bridge_log_level = LOG_NOTICE;
210
211 #define BRIDGE_DBGF_ENABLED(__flag) ((if_bridge_debug & __flag) != 0)
212
213 /*
214 * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215 * - macros to generate the specified log conditionally based on
216 * the specified log level and debug flags
217 * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218 */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...) \
220 do { \
221 if (__level <= if_bridge_log_level || \
222 BRIDGE_DBGF_ENABLED(__dbgf)) { \
223 os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 __func__, ## __VA_ARGS__); \
225 } \
226 } while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...) \
228 do { \
229 if (__level <= if_bridge_log_level || \
230 BRIDGE_DBGF_ENABLED(__dbgf)) { \
231 os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 } \
233 } while (0)
234
235 #define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc) \
238 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \
240 LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241
242 #define BRIDGE_LOCK_DEBUG 1
243 #if BRIDGE_LOCK_DEBUG
244
245 #define BR_LCKDBG_MAX 4
246
247 #define BRIDGE_LOCK(_sc) bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc) bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc)
253
254 #else /* !BRIDGE_LOCK_DEBUG */
255
256 #define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err) do { \
259 BRIDGE_LOCK_ASSERT_HELD(_sc); \
260 if ((_sc)->sc_iflist_xcnt > 0) \
261 (_err) = EBUSY; \
262 else { \
263 (_sc)->sc_iflist_ref++; \
264 (_err) = 0; \
265 } \
266 _BRIDGE_UNLOCK(_sc); \
267 } while (0)
268 #define BRIDGE_UNREF(_sc) do { \
269 _BRIDGE_LOCK(_sc); \
270 (_sc)->sc_iflist_ref--; \
271 if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \
272 _BRIDGE_UNLOCK(_sc); \
273 wakeup(&(_sc)->sc_cv); \
274 } else \
275 _BRIDGE_UNLOCK(_sc); \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc) do { \
278 BRIDGE_LOCK_ASSERT_HELD(_sc); \
279 (_sc)->sc_iflist_xcnt++; \
280 while ((_sc)->sc_iflist_ref > 0) \
281 msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \
282 "BRIDGE_XLOCK", NULL); \
283 } while (0)
284 #define BRIDGE_XDROP(_sc) do { \
285 BRIDGE_LOCK_ASSERT_HELD(_sc); \
286 (_sc)->sc_iflist_xcnt--; \
287 } while (0)
288
289 #endif /* BRIDGE_LOCK_DEBUG */
290
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m) \
293 if (sc->sc_bpf_input != NULL) \
294 bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298
299 /*
300 * Initial size of the route hash table. Must be a power of two.
301 */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE 16
304 #endif
305
306 /*
307 * Maximum size of the routing hash table
308 */
309 #define BRIDGE_RTHASH_SIZE_MAX 2048
310
311 #define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1)
312
313 /*
314 * Maximum number of addresses to cache.
315 */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX 100
318 #endif
319
320
321 /*
322 * Timeout (in seconds) for entries learned dynamically.
323 */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT (20 * 60) /* same as ARP */
326 #endif
327
328 /*
329 * Number of seconds between walks of the route list.
330 */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD (5 * 60)
333 #endif
334
335 /*
336 * Number of MAC NAT entries
337 * - sized based on 16 clients (including MAC NAT interface)
338 * each with 4 addresses
339 */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX 64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343
344 /*
345 * List of capabilities to possibly mask on the member interface.
346 */
347 #define BRIDGE_IFCAPS_MASK (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349 * List of capabilities to disable on the member interface.
350 */
351 #define BRIDGE_IFCAPS_STRIP IFCAP_LRO
352
353 /*
354 * Bridge interface list entry.
355 */
356 struct bridge_iflist {
357 TAILQ_ENTRY(bridge_iflist) bif_next;
358 struct ifnet *bif_ifp; /* member if */
359 struct bstp_port bif_stp; /* STP state */
360 uint32_t bif_ifflags; /* member if flags */
361 int bif_savedcaps; /* saved capabilities */
362 uint32_t bif_addrmax; /* max # of addresses */
363 uint32_t bif_addrcnt; /* cur. # of addresses */
364 uint32_t bif_addrexceeded; /* # of address violations */
365
366 interface_filter_t bif_iff_ref;
367 struct bridge_softc *bif_sc;
368 uint32_t bif_flags;
369
370 /* host filter */
371 struct in_addr bif_hf_ipsrc;
372 uint8_t bif_hf_hwsrc[ETHER_ADDR_LEN];
373
374 struct ifbrmstats bif_stats;
375 };
376
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 return (bif->bif_ifflags & flags) == flags;
381 }
382
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388
389 /* fake errors to make the code clearer */
390 #define _EBADIP EJUSTRETURN
391 #define _EBADIPCHECKSUM EJUSTRETURN
392 #define _EBADIPV6 EJUSTRETURN
393 #define _EBADUDP EJUSTRETURN
394 #define _EBADTCP EJUSTRETURN
395 #define _EBADUDPCHECKSUM EJUSTRETURN
396 #define _EBADTCPCHECKSUM EJUSTRETURN
397
398 #define BIFF_PROMISC 0x01 /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */
400 #define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */
402 #define BIFF_HOST_FILTER 0x10 /* host filter enabled */
403 #define BIFF_HF_HWSRC 0x20 /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC 0x40 /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST 0x80 /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST 0x100 /* added to the member list */
407 #define BIFF_WIFI_INFRA 0x200 /* interface is Wi-Fi infra */
408 #define BIFF_ALL_MULTI 0x400 /* allmulti set */
409 #if SKYWALK
410 #define BIFF_FLOWSWITCH_ATTACHED 0x1000 /* we attached the flowswitch */
411 #define BIFF_NETAGENT_REMOVED 0x2000 /* we removed the netagent */
412 #endif /* SKYWALK */
413
414 /*
415 * mac_nat_entry
416 * - translates between an IP address and MAC address on a specific
417 * bridge interface member
418 */
419 struct mac_nat_entry {
420 LIST_ENTRY(mac_nat_entry) mne_list; /* list linkage */
421 struct bridge_iflist *mne_bif; /* originating interface */
422 unsigned long mne_expire; /* expiration time */
423 union {
424 struct in_addr mneu_ip; /* originating IPv4 address */
425 struct in6_addr mneu_ip6; /* originating IPv6 address */
426 } mne_u;
427 uint8_t mne_mac[ETHER_ADDR_LEN];
428 uint8_t mne_flags;
429 uint8_t mne_reserved;
430 };
431 #define mne_ip mne_u.mneu_ip
432 #define mne_ip6 mne_u.mneu_ip6
433
434 #define MNE_FLAGS_IPV6 0x01 /* IPv6 address */
435
436 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
437
438 /*
439 * mac_nat_record
440 * - used by bridge_mac_nat_output() to convey the translation that needs
441 * to take place in bridge_mac_nat_translate
442 * - holds enough information so that the translation can be done later without
443 * holding the bridge lock
444 */
445 struct mac_nat_record {
446 uint16_t mnr_ether_type;
447 union {
448 uint16_t mnru_arp_offset;
449 struct {
450 uint16_t mnruip_dhcp_flags;
451 uint16_t mnruip_udp_csum;
452 uint8_t mnruip_header_len;
453 } mnru_ip;
454 struct {
455 uint16_t mnruip6_icmp6_len;
456 uint16_t mnruip6_lladdr_offset;
457 uint8_t mnruip6_icmp6_type;
458 uint8_t mnruip6_header_len;
459 } mnru_ip6;
460 } mnr_u;
461 };
462
463 #define mnr_arp_offset mnr_u.mnru_arp_offset
464
465 #define mnr_ip_header_len mnr_u.mnru_ip.mnruip_header_len
466 #define mnr_ip_dhcp_flags mnr_u.mnru_ip.mnruip_dhcp_flags
467 #define mnr_ip_udp_csum mnr_u.mnru_ip.mnruip_udp_csum
468
469 #define mnr_ip6_icmp6_len mnr_u.mnru_ip6.mnruip6_icmp6_len
470 #define mnr_ip6_icmp6_type mnr_u.mnru_ip6.mnruip6_icmp6_type
471 #define mnr_ip6_header_len mnr_u.mnru_ip6.mnruip6_header_len
472 #define mnr_ip6_lladdr_offset mnr_u.mnru_ip6.mnruip6_lladdr_offset
473
474 /*
475 * Bridge route node.
476 */
477 struct bridge_rtnode {
478 LIST_ENTRY(bridge_rtnode) brt_hash; /* hash table linkage */
479 LIST_ENTRY(bridge_rtnode) brt_list; /* list linkage */
480 struct bridge_iflist *brt_dst; /* destination if */
481 unsigned long brt_expire; /* expiration time */
482 uint8_t brt_flags; /* address flags */
483 uint8_t brt_addr[ETHER_ADDR_LEN];
484 uint16_t brt_vlan; /* vlan id */
485
486 };
487 #define brt_ifp brt_dst->bif_ifp
488
489 /*
490 * Bridge delayed function call context
491 */
492 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
493
494 struct bridge_delayed_call {
495 struct bridge_softc *bdc_sc;
496 bridge_delayed_func_t bdc_func; /* Function to call */
497 struct timespec bdc_ts; /* Time to call */
498 u_int32_t bdc_flags;
499 thread_call_t bdc_thread_call;
500 };
501
502 #define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */
503 #define BDCF_CANCELLING 0x02 /* May be waiting for call completion */
504
505 /*
506 * Software state for each bridge.
507 */
508 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
509
510 struct bridge_softc {
511 struct ifnet *sc_ifp; /* make this an interface */
512 u_int32_t sc_flags;
513 LIST_ENTRY(bridge_softc) sc_list;
514 decl_lck_mtx_data(, sc_mtx);
515 struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */
516 struct _bridge_rtnode_list sc_rtlist; /* list version of above */
517 uint32_t sc_rthash_key; /* key for hash */
518 uint32_t sc_rthash_size; /* size of the hash table */
519 struct bridge_delayed_call sc_aging_timer;
520 struct bridge_delayed_call sc_resize_call;
521 TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */
522 struct bstp_state sc_stp; /* STP state */
523 bpf_packet_func sc_bpf_input;
524 bpf_packet_func sc_bpf_output;
525 void *sc_cv;
526 uint32_t sc_brtmax; /* max # of addresses */
527 uint32_t sc_brtcnt; /* cur. # of addresses */
528 uint32_t sc_brttimeout; /* rt timeout in seconds */
529 uint32_t sc_iflist_ref; /* refcount for sc_iflist */
530 uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */
531 TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */
532 uint32_t sc_brtexceeded; /* # of cache drops */
533 uint32_t sc_filter_flags; /* ipf and flags */
534 struct ifnet *sc_ifaddr; /* member mac copied from */
535 u_char sc_defaddr[6]; /* Default MAC address */
536 char sc_if_xname[IFNAMSIZ];
537
538 struct bridge_iflist *sc_mac_nat_bif; /* single MAC NAT interface */
539 struct mac_nat_entry_list sc_mne_list; /* MAC NAT IPv4 */
540 struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
541 uint32_t sc_mne_max; /* max # of entries */
542 uint32_t sc_mne_count; /* cur. # of entries */
543 uint32_t sc_mne_allocation_failures;
544 #if BRIDGE_LOCK_DEBUG
545 /*
546 * Locking and unlocking calling history
547 */
548 void *lock_lr[BR_LCKDBG_MAX];
549 int next_lock_lr;
550 void *unlock_lr[BR_LCKDBG_MAX];
551 int next_unlock_lr;
552 #endif /* BRIDGE_LOCK_DEBUG */
553 };
554
555 #define SCF_DETACHING 0x01
556 #define SCF_RESIZING 0x02
557 #define SCF_MEDIA_ACTIVE 0x04
558
559 typedef enum {
560 CHECKSUM_OPERATION_NONE = 0,
561 CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
562 CHECKSUM_OPERATION_FINALIZE = 2,
563 CHECKSUM_OPERATION_COMPUTE = 3,
564 } ChecksumOperation;
565
566 union iphdr {
567 struct ip *ip;
568 struct ip6_hdr *ip6;
569 void * ptr;
570 };
571
572 typedef struct {
573 u_int ip_hlen; /* IP header length */
574 u_int ip_pay_len; /* length of payload (exclusive of ip_hlen) */
575 u_int ip_opt_len; /* IPv6 options headers length */
576 uint8_t ip_proto; /* IPPROTO_TCP, IPPROTO_UDP, etc. */
577 bool ip_is_ipv4;
578 bool ip_is_fragmented;
579 union iphdr ip_hdr; /* pointer to IP header */
580 void * ip_proto_hdr; /* ptr to protocol header (TCP) */
581 } ip_packet_info, *ip_packet_info_t;
582
583 struct bridge_hostfilter_stats bridge_hostfilter_stats;
584
585 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
586 #if BRIDGE_LOCK_DEBUG
587 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
588 #else
589 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
590 #endif
591 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
592
593 static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
594
595 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
596 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
597
598 static int bridge_clone_create(struct if_clone *, uint32_t, void *);
599 static int bridge_clone_destroy(struct ifnet *);
600
601 static errno_t bridge_ioctl(struct ifnet *, u_long, void *);
602 #if HAS_IF_CAP
603 static void bridge_mutecaps(struct bridge_softc *);
604 static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
605 int);
606 #endif
607 static errno_t bridge_set_tso(struct bridge_softc *);
608 static void bridge_proto_attach_changed(struct ifnet *);
609 static int bridge_init(struct ifnet *);
610 #if HAS_BRIDGE_DUMMYNET
611 static void bridge_dummynet(struct mbuf *, struct ifnet *);
612 #endif
613 static void bridge_ifstop(struct ifnet *, int);
614 static int bridge_output(struct ifnet *, struct mbuf *);
615 static void bridge_finalize_cksum(struct ifnet *, struct mbuf *);
616 static void bridge_start(struct ifnet *);
617 static errno_t bridge_input(struct ifnet *, mbuf_t *);
618 static errno_t bridge_iff_input(void *, ifnet_t, protocol_family_t,
619 mbuf_t *, char **);
620 static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t,
621 mbuf_t *);
622 static errno_t bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
623 mbuf_t *m);
624
625 static int bridge_enqueue(ifnet_t, struct ifnet *,
626 struct ifnet *, struct mbuf *, ChecksumOperation);
627 static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
628
629 static void bridge_forward(struct bridge_softc *, struct bridge_iflist *,
630 struct mbuf *);
631
632 static void bridge_aging_timer(struct bridge_softc *sc);
633
634 static void bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
635 struct mbuf *, int);
636 static void bridge_span(struct bridge_softc *, struct mbuf *);
637
638 static int bridge_rtupdate(struct bridge_softc *, const uint8_t *,
639 uint16_t, struct bridge_iflist *, int, uint8_t);
640 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
641 uint16_t);
642 static void bridge_rttrim(struct bridge_softc *);
643 static void bridge_rtage(struct bridge_softc *);
644 static void bridge_rtflush(struct bridge_softc *, int);
645 static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
646 uint16_t);
647
648 static int bridge_rtable_init(struct bridge_softc *);
649 static void bridge_rtable_fini(struct bridge_softc *);
650
651 static void bridge_rthash_resize(struct bridge_softc *);
652
653 static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
654 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
655 const uint8_t *, uint16_t);
656 static int bridge_rtnode_hash(struct bridge_softc *,
657 struct bridge_rtnode *);
658 static int bridge_rtnode_insert(struct bridge_softc *,
659 struct bridge_rtnode *);
660 static void bridge_rtnode_destroy(struct bridge_softc *,
661 struct bridge_rtnode *);
662 #if BRIDGESTP
663 static void bridge_rtable_expire(struct ifnet *, int);
664 static void bridge_state_change(struct ifnet *, int);
665 #endif /* BRIDGESTP */
666
667 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
668 const char *name);
669 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
670 struct ifnet *ifp);
671 static void bridge_delete_member(struct bridge_softc *,
672 struct bridge_iflist *);
673 static void bridge_delete_span(struct bridge_softc *,
674 struct bridge_iflist *);
675
676 static int bridge_ioctl_add(struct bridge_softc *, void *);
677 static int bridge_ioctl_del(struct bridge_softc *, void *);
678 static int bridge_ioctl_gifflags(struct bridge_softc *, void *);
679 static int bridge_ioctl_sifflags(struct bridge_softc *, void *);
680 static int bridge_ioctl_scache(struct bridge_softc *, void *);
681 static int bridge_ioctl_gcache(struct bridge_softc *, void *);
682 static int bridge_ioctl_gifs32(struct bridge_softc *, void *);
683 static int bridge_ioctl_gifs64(struct bridge_softc *, void *);
684 static int bridge_ioctl_rts32(struct bridge_softc *, void *);
685 static int bridge_ioctl_rts64(struct bridge_softc *, void *);
686 static int bridge_ioctl_saddr32(struct bridge_softc *, void *);
687 static int bridge_ioctl_saddr64(struct bridge_softc *, void *);
688 static int bridge_ioctl_sto(struct bridge_softc *, void *);
689 static int bridge_ioctl_gto(struct bridge_softc *, void *);
690 static int bridge_ioctl_daddr32(struct bridge_softc *, void *);
691 static int bridge_ioctl_daddr64(struct bridge_softc *, void *);
692 static int bridge_ioctl_flush(struct bridge_softc *, void *);
693 static int bridge_ioctl_gpri(struct bridge_softc *, void *);
694 static int bridge_ioctl_spri(struct bridge_softc *, void *);
695 static int bridge_ioctl_ght(struct bridge_softc *, void *);
696 static int bridge_ioctl_sht(struct bridge_softc *, void *);
697 static int bridge_ioctl_gfd(struct bridge_softc *, void *);
698 static int bridge_ioctl_sfd(struct bridge_softc *, void *);
699 static int bridge_ioctl_gma(struct bridge_softc *, void *);
700 static int bridge_ioctl_sma(struct bridge_softc *, void *);
701 static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
702 static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
703 static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
704 static int bridge_ioctl_addspan(struct bridge_softc *, void *);
705 static int bridge_ioctl_delspan(struct bridge_softc *, void *);
706 static int bridge_ioctl_gbparam32(struct bridge_softc *, void *);
707 static int bridge_ioctl_gbparam64(struct bridge_softc *, void *);
708 static int bridge_ioctl_grte(struct bridge_softc *, void *);
709 static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
710 static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
711 static int bridge_ioctl_sproto(struct bridge_softc *, void *);
712 static int bridge_ioctl_stxhc(struct bridge_softc *, void *);
713 static int bridge_ioctl_purge(struct bridge_softc *sc, void *);
714 static int bridge_ioctl_gfilt(struct bridge_softc *, void *);
715 static int bridge_ioctl_sfilt(struct bridge_softc *, void *);
716 static int bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
717 static int bridge_ioctl_shostfilter(struct bridge_softc *, void *);
718 static int bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
719 static int bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
720 static int bridge_ioctl_gifstats32(struct bridge_softc *, void *);
721 static int bridge_ioctl_gifstats64(struct bridge_softc *, void *);
722
723 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
724 static int bridge_ip_checkbasic(struct mbuf **);
725 static int bridge_ip6_checkbasic(struct mbuf **);
726
727 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
728 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
729 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
730
731 static void bridge_detach(ifnet_t);
732 static void bridge_link_event(struct ifnet *, u_int32_t);
733 static void bridge_iflinkevent(struct ifnet *);
734 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
735 static int interface_media_active(struct ifnet *);
736 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
737 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
739 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
740
741 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
742 struct bridge_iflist *);
743 static void bridge_mac_nat_disable(struct bridge_softc *sc);
744 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
745 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
746 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
747 struct bridge_iflist *);
748 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
749 boolean_t *);
750 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
751 struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
752 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
753 const caddr_t);
754 static bool is_broadcast_ip_packet(mbuf_t *);
755 static bool in_addr_is_ours(const struct in_addr);
756 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
757
758 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
759
760 static int
761 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
762 boolean_t is_tx);
763
764 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
765 #define VLANTAGOF(_m) 0
766
767 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
768 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
769
770 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
771 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
772
773 #if BRIDGESTP
774 static struct bstp_cb_ops bridge_ops = {
775 .bcb_state = bridge_state_change,
776 .bcb_rtage = bridge_rtable_expire
777 };
778 #endif /* BRIDGESTP */
779
780 SYSCTL_DECL(_net_link);
781 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
782 "Bridge");
783
784 static int bridge_inherit_mac = 0; /* share MAC with first bridge member */
785 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
786 CTLFLAG_RW | CTLFLAG_LOCKED,
787 &bridge_inherit_mac, 0,
788 "Inherit MAC address from the first bridge member");
789
790 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
791 CTLFLAG_RW | CTLFLAG_LOCKED,
792 &bridge_rtable_prune_period, 0,
793 "Interval between pruning of routing table");
794
795 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
796 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
797 CTLFLAG_RW | CTLFLAG_LOCKED,
798 &bridge_rtable_hash_size_max, 0,
799 "Maximum size of the routing hash table");
800
801 #if BRIDGE_DELAYED_CALLBACK_DEBUG
802 static int bridge_delayed_callback_delay = 0;
803 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
804 CTLFLAG_RW | CTLFLAG_LOCKED,
805 &bridge_delayed_callback_delay, 0,
806 "Delay before calling delayed function");
807 #endif
808
809 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
810 hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
811 &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
812
813 #if BRIDGESTP
814 static int log_stp = 0; /* log STP state changes */
815 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
816 &log_stp, 0, "Log STP state changes");
817 #endif /* BRIDGESTP */
818
819 struct bridge_control {
820 int (*bc_func)(struct bridge_softc *, void *);
821 unsigned int bc_argsize;
822 unsigned int bc_flags;
823 };
824
825 #define VMNET_TAG "com.apple.vmnet"
826 #define VMNET_LOCAL_TAG VMNET_TAG ".local"
827 #define VMNET_BROADCAST_TAG VMNET_TAG ".broadcast"
828 #define VMNET_MULTICAST_TAG VMNET_TAG ".multicast"
829
830 static u_int16_t vmnet_tag;
831 static u_int16_t vmnet_local_tag;
832 static u_int16_t vmnet_broadcast_tag;
833 static u_int16_t vmnet_multicast_tag;
834
835 static u_int16_t
allocate_pf_tag(char * name)836 allocate_pf_tag(char * name)
837 {
838 u_int16_t tag;
839
840 tag = pf_tagname2tag_ext(name);
841 BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
842 return tag;
843 }
844
845 static void
allocate_vmnet_pf_tags(void)846 allocate_vmnet_pf_tags(void)
847 {
848 /* allocate tags to use with PF */
849 if (vmnet_tag == 0) {
850 vmnet_tag = allocate_pf_tag(VMNET_TAG);
851 }
852 if (vmnet_local_tag == 0) {
853 vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
854 }
855 if (vmnet_broadcast_tag == 0) {
856 vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
857 }
858 if (vmnet_multicast_tag == 0) {
859 vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
860 }
861 }
862
863 #define BC_F_COPYIN 0x01 /* copy arguments in */
864 #define BC_F_COPYOUT 0x02 /* copy arguments out */
865 #define BC_F_SUSER 0x04 /* do super-user check */
866
867 static const struct bridge_control bridge_control_table32[] = {
868 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
869 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
870 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
871 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
872
873 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
874 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
875 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
876 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
877
878 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
879 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
881 .bc_flags = BC_F_COPYOUT },
882
883 { .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
884 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
885 { .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
886 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
887
888 { .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
889 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890
891 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
892 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
894 .bc_flags = BC_F_COPYOUT },
895
896 { .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
897 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898
899 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
900 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901
902 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
903 .bc_flags = BC_F_COPYOUT },
904 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
905 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
906
907 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
908 .bc_flags = BC_F_COPYOUT },
909 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
910 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911
912 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
913 .bc_flags = BC_F_COPYOUT },
914 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
915 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916
917 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
918 .bc_flags = BC_F_COPYOUT },
919 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
920 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921
922 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
923 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924
925 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
926 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927
928 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
929 .bc_flags = BC_F_COPYOUT },
930 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
931 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932
933 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
934 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935
936 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
937 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
939 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940
941 { .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
942 .bc_flags = BC_F_COPYOUT },
943
944 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
945 .bc_flags = BC_F_COPYOUT },
946
947 { .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32), /* 30 */
948 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
949
950 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
951 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
952
953 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
954 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955
956 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
957 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958
959 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
960 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
961 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
962 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963
964 { .bc_func = bridge_ioctl_gmnelist32,
965 .bc_argsize = sizeof(struct ifbrmnelist32),
966 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
967 { .bc_func = bridge_ioctl_gifstats32,
968 .bc_argsize = sizeof(struct ifbrmreq32),
969 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 };
971
972 static const struct bridge_control bridge_control_table64[] = {
973 { .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq), /* 0 */
974 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 { .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
976 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977
978 { .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
979 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
980 { .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
981 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982
983 { .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
984 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 { .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
986 .bc_flags = BC_F_COPYOUT },
987
988 { .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
989 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
990 { .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
991 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
992
993 { .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
994 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995
996 { .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
997 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 { .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam), /* 10 */
999 .bc_flags = BC_F_COPYOUT },
1000
1001 { .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1002 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003
1004 { .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1005 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006
1007 { .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1008 .bc_flags = BC_F_COPYOUT },
1009 { .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1010 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1011
1012 { .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1013 .bc_flags = BC_F_COPYOUT },
1014 { .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1015 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016
1017 { .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1018 .bc_flags = BC_F_COPYOUT },
1019 { .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1020 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021
1022 { .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1023 .bc_flags = BC_F_COPYOUT },
1024 { .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam), /* 20 */
1025 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026
1027 { .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1028 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029
1030 { .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1031 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032
1033 { .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1034 .bc_flags = BC_F_COPYOUT },
1035 { .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1036 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037
1038 { .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1039 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040
1041 { .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1042 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 { .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1044 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045
1046 { .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1047 .bc_flags = BC_F_COPYOUT },
1048
1049 { .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1050 .bc_flags = BC_F_COPYOUT },
1051
1052 { .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64), /* 30 */
1053 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1054
1055 { .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1056 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1057
1058 { .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1059 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060
1061 { .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1062 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063
1064 { .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1065 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1066 { .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1067 .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068
1069 { .bc_func = bridge_ioctl_gmnelist64,
1070 .bc_argsize = sizeof(struct ifbrmnelist64),
1071 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1072 { .bc_func = bridge_ioctl_gifstats64,
1073 .bc_argsize = sizeof(struct ifbrmreq64),
1074 .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 };
1076
1077 static const unsigned int bridge_control_table_size =
1078 sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1079
1080 static LIST_HEAD(, bridge_softc) bridge_list =
1081 LIST_HEAD_INITIALIZER(bridge_list);
1082
1083 #define BRIDGENAME "bridge"
1084 #define BRIDGES_MAX IF_MAXUNIT
1085 #define BRIDGE_ZONE_MAX_ELEM MIN(IFNETS_MAX, BRIDGES_MAX)
1086
1087 static struct if_clone bridge_cloner =
1088 IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1089 0, BRIDGES_MAX);
1090
1091 static int if_bridge_txstart = 0;
1092 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1093 &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1094
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1096 &if_bridge_debug, 0, "Bridge debug flags");
1097
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1099 CTLFLAG_RW | CTLFLAG_LOCKED,
1100 &if_bridge_log_level, 0, "Bridge log level");
1101
1102 static int if_bridge_segmentation = 1;
1103 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1104 CTLFLAG_RW | CTLFLAG_LOCKED,
1105 &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1106
1107 static int if_bridge_vmnet_pf_tagging = 1;
1108 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1109 CTLFLAG_RW | CTLFLAG_LOCKED,
1110 &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1111
1112 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX 256
1113 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT 110
1114 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX 256
1115 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT 0
1116
1117 static u_int if_bridge_tso_reduce_mss_forwarding
1118 = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1119 static u_int if_bridge_tso_reduce_mss_tx
1120 = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1121
1122 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1123 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1124 {
1125 int changed;
1126 int error;
1127 u_int new_value;
1128
1129 error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1130 &changed);
1131 if (error == 0 && changed != 0) {
1132 if (new_value > val_max) {
1133 return EINVAL;
1134 }
1135 *val = new_value;
1136 }
1137 return error;
1138 }
1139
1140 static int
1141 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1142 {
1143 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1144 BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1145 }
1146
1147 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1148 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1149 0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1150 "Bridge tso reduce mss when forwarding");
1151
1152 static int
1153 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1154 {
1155 return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1156 BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1157 }
1158
1159 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1160 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1161 0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1162 "Bridge tso reduce mss on transmit");
1163
1164
1165 #if DEBUG || DEVELOPMENT
1166 #define BRIDGE_FORCE_ONE 0x00000001
1167 #define BRIDGE_FORCE_TWO 0x00000002
1168 static u_int32_t if_bridge_force_errors = 0;
1169 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1170 CTLFLAG_RW | CTLFLAG_LOCKED,
1171 &if_bridge_force_errors, 0, "Bridge interface force errors");
1172 static inline bool
bridge_error_is_forced(u_int32_t flags)1173 bridge_error_is_forced(u_int32_t flags)
1174 {
1175 return (if_bridge_force_errors & flags) != 0;
1176 }
1177
1178 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags) \
1179 do { \
1180 __is_forced = bridge_error_is_forced(__flags); \
1181 if (__is_forced) { \
1182 BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1183 } \
1184 } while (0)
1185 #endif /* DEBUG || DEVELOPMENT */
1186
1187
1188 static void brlog_ether_header(struct ether_header *);
1189 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1190 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1191 static void brlog_mbuf(mbuf_t, const char *, const char *);
1192 static void brlog_link(struct bridge_softc * sc);
1193
1194 #if BRIDGE_LOCK_DEBUG
1195 static void bridge_lock(struct bridge_softc *);
1196 static void bridge_unlock(struct bridge_softc *);
1197 static int bridge_lock2ref(struct bridge_softc *);
1198 static void bridge_unref(struct bridge_softc *);
1199 static void bridge_xlock(struct bridge_softc *);
1200 static void bridge_xdrop(struct bridge_softc *);
1201
1202 static void
bridge_lock(struct bridge_softc * sc)1203 bridge_lock(struct bridge_softc *sc)
1204 {
1205 void *lr_saved = __builtin_return_address(0);
1206
1207 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1208
1209 _BRIDGE_LOCK(sc);
1210
1211 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1212 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1213 }
1214
1215 static void
bridge_unlock(struct bridge_softc * sc)1216 bridge_unlock(struct bridge_softc *sc)
1217 {
1218 void *lr_saved = __builtin_return_address(0);
1219
1220 BRIDGE_LOCK_ASSERT_HELD(sc);
1221
1222 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1223 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1224
1225 _BRIDGE_UNLOCK(sc);
1226 }
1227
1228 static int
bridge_lock2ref(struct bridge_softc * sc)1229 bridge_lock2ref(struct bridge_softc *sc)
1230 {
1231 int error = 0;
1232 void *lr_saved = __builtin_return_address(0);
1233
1234 BRIDGE_LOCK_ASSERT_HELD(sc);
1235
1236 if (sc->sc_iflist_xcnt > 0) {
1237 error = EBUSY;
1238 } else {
1239 sc->sc_iflist_ref++;
1240 }
1241
1242 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1243 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1244
1245 _BRIDGE_UNLOCK(sc);
1246
1247 return error;
1248 }
1249
1250 static void
bridge_unref(struct bridge_softc * sc)1251 bridge_unref(struct bridge_softc *sc)
1252 {
1253 void *lr_saved = __builtin_return_address(0);
1254
1255 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1256
1257 _BRIDGE_LOCK(sc);
1258 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1259 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1260
1261 sc->sc_iflist_ref--;
1262
1263 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1264 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1265 if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1266 _BRIDGE_UNLOCK(sc);
1267 wakeup(&sc->sc_cv);
1268 } else {
1269 _BRIDGE_UNLOCK(sc);
1270 }
1271 }
1272
1273 static void
bridge_xlock(struct bridge_softc * sc)1274 bridge_xlock(struct bridge_softc *sc)
1275 {
1276 void *lr_saved = __builtin_return_address(0);
1277
1278 BRIDGE_LOCK_ASSERT_HELD(sc);
1279
1280 sc->sc_iflist_xcnt++;
1281 while (sc->sc_iflist_ref > 0) {
1282 sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1283 sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1284
1285 msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1286
1287 sc->lock_lr[sc->next_lock_lr] = lr_saved;
1288 sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1289 }
1290 }
1291
1292 static void
bridge_xdrop(struct bridge_softc * sc)1293 bridge_xdrop(struct bridge_softc *sc)
1294 {
1295 BRIDGE_LOCK_ASSERT_HELD(sc);
1296
1297 sc->sc_iflist_xcnt--;
1298 }
1299
1300 #endif /* BRIDGE_LOCK_DEBUG */
1301
1302 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1303 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1304 {
1305 if (m) {
1306 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1307 "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1308 prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1309 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1310 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1311 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1312 suffix ? suffix : "");
1313 } else {
1314 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1315 }
1316 }
1317
1318 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1319 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1320 {
1321 if (m) {
1322 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1323 "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1324 "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1325 prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1326 mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1327 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1328 (unsigned int)mbuf_maxlen(m),
1329 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1330 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1331 !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1332 if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1333 brlog_mbuf_pkthdr(m, "", suffix);
1334 }
1335 } else {
1336 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1337 }
1338 }
1339
1340 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1341 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1342 {
1343 mbuf_t n;
1344 size_t i, j;
1345 size_t pktlen, mlen, maxlen;
1346 unsigned char *ptr;
1347
1348 pktlen = mbuf_pkthdr_len(m);
1349
1350 if (offset > pktlen) {
1351 return;
1352 }
1353
1354 maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1355 n = m;
1356 mlen = mbuf_len(n);
1357 ptr = mbuf_data(n);
1358 for (i = 0, j = 0; i < maxlen; i++, j++) {
1359 if (j >= mlen) {
1360 n = mbuf_next(n);
1361 if (n == 0) {
1362 break;
1363 }
1364 ptr = mbuf_data(n);
1365 mlen = mbuf_len(n);
1366 j = 0;
1367 }
1368 if (i >= offset) {
1369 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1370 "%02x%s", ptr[j], i % 2 ? " " : "");
1371 }
1372 }
1373 }
1374
1375 static void
brlog_ether_header(struct ether_header * eh)1376 brlog_ether_header(struct ether_header *eh)
1377 {
1378 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1379 "%02x:%02x:%02x:%02x:%02x:%02x > "
1380 "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1381 eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1382 eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1383 eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1384 eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1385 ntohs(eh->ether_type));
1386 }
1387
1388 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1389 ether_ntop(char *buf, size_t len, const u_char *ap)
1390 {
1391 snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1392 ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1393
1394 return buf;
1395 }
1396
1397 static void
brlog_link(struct bridge_softc * sc)1398 brlog_link(struct bridge_softc * sc)
1399 {
1400 int i;
1401 uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1402 IFNAMSIZ + ETHER_ADDR_LEN];
1403 struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1404 const u_char * lladdr;
1405 char lladdr_str[48];
1406
1407 memset(sdl, 0, sizeof(sdl_buffer));
1408 sdl->sdl_family = AF_LINK;
1409 sdl->sdl_nlen = strlen(sc->sc_if_xname);
1410 sdl->sdl_alen = ETHER_ADDR_LEN;
1411 sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1412 memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1413 memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1414 lladdr_str[0] = '\0';
1415 for (i = 0, lladdr = CONST_LLADDR(sdl);
1416 i < sdl->sdl_alen;
1417 i++, lladdr++) {
1418 char byte_str[4];
1419
1420 snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1421 *lladdr);
1422 strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1423 }
1424 BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1425 "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1426 " slen %d addr %s", sc->sc_if_xname,
1427 sdl->sdl_len, sdl->sdl_index,
1428 sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1429 sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1430 }
1431
1432
1433 /*
1434 * bridgeattach:
1435 *
1436 * Pseudo-device attach routine.
1437 */
1438 __private_extern__ int
bridgeattach(int n)1439 bridgeattach(int n)
1440 {
1441 #pragma unused(n)
1442 int error;
1443
1444 LIST_INIT(&bridge_list);
1445
1446 #if BRIDGESTP
1447 bstp_sys_init();
1448 #endif /* BRIDGESTP */
1449
1450 error = if_clone_attach(&bridge_cloner);
1451 if (error != 0) {
1452 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1453 }
1454 return error;
1455 }
1456
1457
1458 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1459 bridge_ifnet_set_attrs(struct ifnet * ifp)
1460 {
1461 errno_t error;
1462
1463 error = ifnet_set_mtu(ifp, ETHERMTU);
1464 if (error != 0) {
1465 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1466 goto done;
1467 }
1468 error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1469 if (error != 0) {
1470 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1471 goto done;
1472 }
1473 error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1474 if (error != 0) {
1475 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1476 goto done;
1477 }
1478 error = ifnet_set_flags(ifp,
1479 IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1480 0xffff);
1481
1482 if (error != 0) {
1483 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1484 goto done;
1485 }
1486 done:
1487 return error;
1488 }
1489
1490 /*
1491 * bridge_clone_create:
1492 *
1493 * Create a new bridge instance.
1494 */
1495 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1496 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1497 {
1498 #pragma unused(params)
1499 struct ifnet *ifp = NULL;
1500 struct bridge_softc *sc = NULL;
1501 struct bridge_softc *sc2 = NULL;
1502 struct ifnet_init_eparams init_params;
1503 errno_t error = 0;
1504 uint8_t eth_hostid[ETHER_ADDR_LEN];
1505 int fb, retry, has_hostid;
1506
1507 sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1508 lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1509 sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1510 sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1511 sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1512 sc->sc_filter_flags = 0;
1513
1514 TAILQ_INIT(&sc->sc_iflist);
1515
1516 /* use the interface name as the unique id for ifp recycle */
1517 snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1518 ifc->ifc_name, unit);
1519 bzero(&init_params, sizeof(init_params));
1520 init_params.ver = IFNET_INIT_CURRENT_VERSION;
1521 init_params.len = sizeof(init_params);
1522 /* Initialize our routing table. */
1523 error = bridge_rtable_init(sc);
1524 if (error != 0) {
1525 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1526 goto done;
1527 }
1528 TAILQ_INIT(&sc->sc_spanlist);
1529 if (if_bridge_txstart) {
1530 init_params.start = bridge_start;
1531 } else {
1532 init_params.flags = IFNET_INIT_LEGACY;
1533 init_params.output = bridge_output;
1534 }
1535 init_params.set_bpf_tap = bridge_set_bpf_tap;
1536 init_params.uniqueid = sc->sc_if_xname;
1537 init_params.uniqueid_len = strlen(sc->sc_if_xname);
1538 init_params.sndq_maxlen = IFQ_MAXLEN;
1539 init_params.name = ifc->ifc_name;
1540 init_params.unit = unit;
1541 init_params.family = IFNET_FAMILY_ETHERNET;
1542 init_params.type = IFT_BRIDGE;
1543 init_params.demux = ether_demux;
1544 init_params.add_proto = ether_add_proto;
1545 init_params.del_proto = ether_del_proto;
1546 init_params.check_multi = ether_check_multi;
1547 init_params.framer_extended = ether_frameout_extended;
1548 init_params.softc = sc;
1549 init_params.ioctl = bridge_ioctl;
1550 init_params.detach = bridge_detach;
1551 init_params.broadcast_addr = etherbroadcastaddr;
1552 init_params.broadcast_len = ETHER_ADDR_LEN;
1553
1554 error = ifnet_allocate_extended(&init_params, &ifp);
1555 if (error != 0) {
1556 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1557 goto done;
1558 }
1559 LIST_INIT(&sc->sc_mne_list);
1560 LIST_INIT(&sc->sc_mne_list_v6);
1561 sc->sc_ifp = ifp;
1562 error = bridge_ifnet_set_attrs(ifp);
1563 if (error != 0) {
1564 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1565 error);
1566 goto done;
1567 }
1568 /*
1569 * Generate an ethernet address with a locally administered address.
1570 *
1571 * Since we are using random ethernet addresses for the bridge, it is
1572 * possible that we might have address collisions, so make sure that
1573 * this hardware address isn't already in use on another bridge.
1574 * The first try uses the "hostid" and falls back to read_frandom();
1575 * for "hostid", we use the MAC address of the first-encountered
1576 * Ethernet-type interface that is currently configured.
1577 */
1578 fb = 0;
1579 has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0);
1580 for (retry = 1; retry != 0;) {
1581 if (fb || has_hostid == 0) {
1582 read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1583 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1584 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1585 } else {
1586 bcopy(ð_hostid[0], &sc->sc_defaddr,
1587 ETHER_ADDR_LEN);
1588 sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1589 sc->sc_defaddr[0] |= 2; /* set the LAA bit */
1590 sc->sc_defaddr[3] = /* stir it up a bit */
1591 ((sc->sc_defaddr[3] & 0x0f) << 4) |
1592 ((sc->sc_defaddr[3] & 0xf0) >> 4);
1593 /*
1594 * Mix in the LSB as it's actually pretty significant,
1595 * see rdar://14076061
1596 */
1597 sc->sc_defaddr[4] =
1598 (((sc->sc_defaddr[4] & 0x0f) << 4) |
1599 ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1600 sc->sc_defaddr[5];
1601 sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1602 }
1603
1604 fb = 1;
1605 retry = 0;
1606 lck_mtx_lock(&bridge_list_mtx);
1607 LIST_FOREACH(sc2, &bridge_list, sc_list) {
1608 if (_ether_cmp(sc->sc_defaddr,
1609 IF_LLADDR(sc2->sc_ifp)) == 0) {
1610 retry = 1;
1611 }
1612 }
1613 lck_mtx_unlock(&bridge_list_mtx);
1614 }
1615
1616 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1617
1618 if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1619 brlog_link(sc);
1620 }
1621 error = ifnet_attach(ifp, NULL);
1622 if (error != 0) {
1623 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1624 goto done;
1625 }
1626
1627 error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1628 IFT_ETHER);
1629 if (error != 0) {
1630 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1631 error);
1632 goto done;
1633 }
1634
1635 ifnet_set_offload(ifp,
1636 IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1637 IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1638 error = bridge_set_tso(sc);
1639 if (error != 0) {
1640 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1641 goto done;
1642 }
1643 #if BRIDGESTP
1644 bstp_attach(&sc->sc_stp, &bridge_ops);
1645 #endif /* BRIDGESTP */
1646
1647 lck_mtx_lock(&bridge_list_mtx);
1648 LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1649 lck_mtx_unlock(&bridge_list_mtx);
1650
1651 /* attach as ethernet */
1652 error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1653 NULL, NULL);
1654
1655 done:
1656 if (error != 0) {
1657 BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1658 /* TBD: Clean up: sc, sc_rthash etc */
1659 }
1660
1661 return error;
1662 }
1663
1664 /*
1665 * bridge_clone_destroy:
1666 *
1667 * Destroy a bridge instance.
1668 */
1669 static int
bridge_clone_destroy(struct ifnet * ifp)1670 bridge_clone_destroy(struct ifnet *ifp)
1671 {
1672 struct bridge_softc *sc = ifp->if_softc;
1673 struct bridge_iflist *bif;
1674 errno_t error;
1675
1676 BRIDGE_LOCK(sc);
1677 if ((sc->sc_flags & SCF_DETACHING)) {
1678 BRIDGE_UNLOCK(sc);
1679 return 0;
1680 }
1681 sc->sc_flags |= SCF_DETACHING;
1682
1683 bridge_ifstop(ifp, 1);
1684
1685 bridge_cancel_delayed_call(&sc->sc_resize_call);
1686
1687 bridge_cleanup_delayed_call(&sc->sc_resize_call);
1688 bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1689
1690 error = ifnet_set_flags(ifp, 0, IFF_UP);
1691 if (error != 0) {
1692 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1693 }
1694
1695 while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1696 bridge_delete_member(sc, bif);
1697 }
1698
1699 while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1700 bridge_delete_span(sc, bif);
1701 }
1702 BRIDGE_UNLOCK(sc);
1703
1704 error = ifnet_detach(ifp);
1705 if (error != 0) {
1706 panic("%s (%d): ifnet_detach(%p) failed %d",
1707 __func__, __LINE__, ifp, error);
1708 }
1709 return 0;
1710 }
1711
1712 #define DRVSPEC do { \
1713 if (ifd->ifd_cmd >= bridge_control_table_size) { \
1714 error = EINVAL; \
1715 break; \
1716 } \
1717 bc = &bridge_control_table[ifd->ifd_cmd]; \
1718 \
1719 if (cmd == SIOCGDRVSPEC && \
1720 (bc->bc_flags & BC_F_COPYOUT) == 0) { \
1721 error = EINVAL; \
1722 break; \
1723 } else if (cmd == SIOCSDRVSPEC && \
1724 (bc->bc_flags & BC_F_COPYOUT) != 0) { \
1725 error = EINVAL; \
1726 break; \
1727 } \
1728 \
1729 if (bc->bc_flags & BC_F_SUSER) { \
1730 error = kauth_authorize_generic(kauth_cred_get(), \
1731 KAUTH_GENERIC_ISSUSER); \
1732 if (error) \
1733 break; \
1734 } \
1735 \
1736 if (ifd->ifd_len != bc->bc_argsize || \
1737 ifd->ifd_len > sizeof (args)) { \
1738 error = EINVAL; \
1739 break; \
1740 } \
1741 \
1742 bzero(&args, sizeof (args)); \
1743 if (bc->bc_flags & BC_F_COPYIN) { \
1744 error = copyin(ifd->ifd_data, &args, ifd->ifd_len); \
1745 if (error) \
1746 break; \
1747 } \
1748 \
1749 BRIDGE_LOCK(sc); \
1750 error = (*bc->bc_func)(sc, &args); \
1751 BRIDGE_UNLOCK(sc); \
1752 if (error) \
1753 break; \
1754 \
1755 if (bc->bc_flags & BC_F_COPYOUT) \
1756 error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \
1757 } while (0)
1758
1759 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1760 interface_needs_input_broadcast(struct ifnet * ifp)
1761 {
1762 /*
1763 * Selectively enable input broadcast only when necessary.
1764 * The bridge interface itself attaches a fake protocol
1765 * so checking for at least two protocols means that the
1766 * interface is being used for something besides bridging
1767 * and needs to see broadcast packets from other members.
1768 */
1769 return if_get_protolist(ifp, NULL, 0) >= 2;
1770 }
1771
1772 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1773 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1774 {
1775 boolean_t old_input_broadcast;
1776
1777 old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1778 if (input_broadcast) {
1779 bif->bif_flags |= BIFF_INPUT_BROADCAST;
1780 } else {
1781 bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1782 }
1783 return old_input_broadcast != input_broadcast;
1784 }
1785
1786 /*
1787 * bridge_ioctl:
1788 *
1789 * Handle a control request from the operator.
1790 */
1791 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1792 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1793 {
1794 struct bridge_softc *sc = ifp->if_softc;
1795 struct ifreq *ifr = (struct ifreq *)data;
1796 struct bridge_iflist *bif;
1797 int error = 0;
1798
1799 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1800
1801 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1802 "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1803 ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1804 (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1805 (char)IOCGROUP(cmd), cmd & 0xff);
1806
1807 switch (cmd) {
1808 case SIOCSIFADDR:
1809 case SIOCAIFADDR:
1810 ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1811 break;
1812
1813 case SIOCGIFMEDIA32:
1814 case SIOCGIFMEDIA64: {
1815 struct ifmediareq *ifmr = (struct ifmediareq *)data;
1816 user_addr_t user_addr;
1817
1818 user_addr = (cmd == SIOCGIFMEDIA64) ?
1819 ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1820 CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1821
1822 ifmr->ifm_status = IFM_AVALID;
1823 ifmr->ifm_mask = 0;
1824 ifmr->ifm_count = 1;
1825
1826 BRIDGE_LOCK(sc);
1827 if (!(sc->sc_flags & SCF_DETACHING) &&
1828 (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1829 ifmr->ifm_status |= IFM_ACTIVE;
1830 ifmr->ifm_active = ifmr->ifm_current =
1831 IFM_ETHER | IFM_AUTO;
1832 } else {
1833 ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1834 }
1835 BRIDGE_UNLOCK(sc);
1836
1837 if (user_addr != USER_ADDR_NULL) {
1838 error = copyout(&ifmr->ifm_current, user_addr,
1839 sizeof(int));
1840 }
1841 break;
1842 }
1843
1844 case SIOCADDMULTI:
1845 case SIOCDELMULTI:
1846 break;
1847
1848 case SIOCSDRVSPEC32:
1849 case SIOCGDRVSPEC32: {
1850 union {
1851 struct ifbreq ifbreq;
1852 struct ifbifconf32 ifbifconf;
1853 struct ifbareq32 ifbareq;
1854 struct ifbaconf32 ifbaconf;
1855 struct ifbrparam ifbrparam;
1856 struct ifbropreq32 ifbropreq;
1857 } args;
1858 struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1859 const struct bridge_control *bridge_control_table =
1860 bridge_control_table32, *bc;
1861
1862 DRVSPEC;
1863
1864 break;
1865 }
1866 case SIOCSDRVSPEC64:
1867 case SIOCGDRVSPEC64: {
1868 union {
1869 struct ifbreq ifbreq;
1870 struct ifbifconf64 ifbifconf;
1871 struct ifbareq64 ifbareq;
1872 struct ifbaconf64 ifbaconf;
1873 struct ifbrparam ifbrparam;
1874 struct ifbropreq64 ifbropreq;
1875 } args;
1876 struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1877 const struct bridge_control *bridge_control_table =
1878 bridge_control_table64, *bc;
1879
1880 DRVSPEC;
1881
1882 break;
1883 }
1884
1885 case SIOCSIFFLAGS:
1886 if (!(ifp->if_flags & IFF_UP) &&
1887 (ifp->if_flags & IFF_RUNNING)) {
1888 /*
1889 * If interface is marked down and it is running,
1890 * then stop and disable it.
1891 */
1892 BRIDGE_LOCK(sc);
1893 bridge_ifstop(ifp, 1);
1894 BRIDGE_UNLOCK(sc);
1895 } else if ((ifp->if_flags & IFF_UP) &&
1896 !(ifp->if_flags & IFF_RUNNING)) {
1897 /*
1898 * If interface is marked up and it is stopped, then
1899 * start it.
1900 */
1901 BRIDGE_LOCK(sc);
1902 error = bridge_init(ifp);
1903 BRIDGE_UNLOCK(sc);
1904 }
1905 break;
1906
1907 case SIOCSIFLLADDR:
1908 error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1909 ifr->ifr_addr.sa_len);
1910 if (error != 0) {
1911 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1912 "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1913 error);
1914 }
1915 break;
1916
1917 case SIOCSIFMTU:
1918 if (ifr->ifr_mtu < 576) {
1919 error = EINVAL;
1920 break;
1921 }
1922 BRIDGE_LOCK(sc);
1923 if (TAILQ_EMPTY(&sc->sc_iflist)) {
1924 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1925 BRIDGE_UNLOCK(sc);
1926 break;
1927 }
1928 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1929 if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1930 BRIDGE_LOG(LOG_NOTICE, 0,
1931 "%s invalid MTU: %u(%s) != %d",
1932 sc->sc_ifp->if_xname,
1933 bif->bif_ifp->if_mtu,
1934 bif->bif_ifp->if_xname, ifr->ifr_mtu);
1935 error = EINVAL;
1936 break;
1937 }
1938 }
1939 if (!error) {
1940 sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1941 }
1942 BRIDGE_UNLOCK(sc);
1943 break;
1944
1945 default:
1946 error = ether_ioctl(ifp, cmd, data);
1947 if (error != 0 && error != EOPNOTSUPP) {
1948 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1949 "ifp %s cmd 0x%08lx "
1950 "(%c%c [%lu] %c %lu) failed error: %d",
1951 ifp->if_xname, cmd,
1952 (cmd & IOC_IN) ? 'I' : ' ',
1953 (cmd & IOC_OUT) ? 'O' : ' ',
1954 IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1955 cmd & 0xff, error);
1956 }
1957 break;
1958 }
1959 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1960
1961 return error;
1962 }
1963
1964 #if HAS_IF_CAP
1965 /*
1966 * bridge_mutecaps:
1967 *
1968 * Clear or restore unwanted capabilities on the member interface
1969 */
1970 static void
bridge_mutecaps(struct bridge_softc * sc)1971 bridge_mutecaps(struct bridge_softc *sc)
1972 {
1973 struct bridge_iflist *bif;
1974 int enabled, mask;
1975
1976 /* Initial bitmask of capabilities to test */
1977 mask = BRIDGE_IFCAPS_MASK;
1978
1979 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1980 /* Every member must support it or its disabled */
1981 mask &= bif->bif_savedcaps;
1982 }
1983
1984 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1985 enabled = bif->bif_ifp->if_capenable;
1986 enabled &= ~BRIDGE_IFCAPS_STRIP;
1987 /* strip off mask bits and enable them again if allowed */
1988 enabled &= ~BRIDGE_IFCAPS_MASK;
1989 enabled |= mask;
1990
1991 bridge_set_ifcap(sc, bif, enabled);
1992 }
1993 }
1994
1995 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1996 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1997 {
1998 struct ifnet *ifp = bif->bif_ifp;
1999 struct ifreq ifr;
2000 int error;
2001
2002 bzero(&ifr, sizeof(ifr));
2003 ifr.ifr_reqcap = set;
2004
2005 if (ifp->if_capenable != set) {
2006 IFF_LOCKGIANT(ifp);
2007 error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2008 IFF_UNLOCKGIANT(ifp);
2009 if (error) {
2010 BRIDGE_LOG(LOG_NOTICE, 0,
2011 "%s error setting interface capabilities on %s",
2012 sc->sc_ifp->if_xname, ifp->if_xname);
2013 }
2014 }
2015 }
2016 #endif /* HAS_IF_CAP */
2017
2018 static errno_t
bridge_set_tso(struct bridge_softc * sc)2019 bridge_set_tso(struct bridge_softc *sc)
2020 {
2021 struct bridge_iflist *bif;
2022 u_int32_t tso_v4_mtu;
2023 u_int32_t tso_v6_mtu;
2024 ifnet_offload_t offload;
2025 errno_t error = 0;
2026
2027 /* By default, support TSO */
2028 offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2029 tso_v4_mtu = IP_MAXPACKET;
2030 tso_v6_mtu = IP_MAXPACKET;
2031
2032 /* Use the lowest common denominator of the members */
2033 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2034 ifnet_t ifp = bif->bif_ifp;
2035
2036 if (ifp == NULL) {
2037 continue;
2038 }
2039
2040 if (offload & IFNET_TSO_IPV4) {
2041 if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2042 if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2043 tso_v4_mtu = ifp->if_tso_v4_mtu;
2044 }
2045 } else {
2046 offload &= ~IFNET_TSO_IPV4;
2047 tso_v4_mtu = 0;
2048 }
2049 }
2050 if (offload & IFNET_TSO_IPV6) {
2051 if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2052 if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2053 tso_v6_mtu = ifp->if_tso_v6_mtu;
2054 }
2055 } else {
2056 offload &= ~IFNET_TSO_IPV6;
2057 tso_v6_mtu = 0;
2058 }
2059 }
2060 }
2061
2062 if (offload != sc->sc_ifp->if_hwassist) {
2063 error = ifnet_set_offload(sc->sc_ifp, offload);
2064 if (error != 0) {
2065 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2066 "ifnet_set_offload(%s, 0x%x) failed %d",
2067 sc->sc_ifp->if_xname, offload, error);
2068 goto done;
2069 }
2070 /*
2071 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2072 * as large as the interface MTU
2073 */
2074 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2075 if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2076 tso_v4_mtu = sc->sc_ifp->if_mtu;
2077 }
2078 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2079 tso_v4_mtu);
2080 if (error != 0) {
2081 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2082 "ifnet_set_tso_mtu(%s, "
2083 "AF_INET, %u) failed %d",
2084 sc->sc_ifp->if_xname,
2085 tso_v4_mtu, error);
2086 goto done;
2087 }
2088 }
2089 if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2090 if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2091 tso_v6_mtu = sc->sc_ifp->if_mtu;
2092 }
2093 error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2094 tso_v6_mtu);
2095 if (error != 0) {
2096 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2097 "ifnet_set_tso_mtu(%s, "
2098 "AF_INET6, %u) failed %d",
2099 sc->sc_ifp->if_xname,
2100 tso_v6_mtu, error);
2101 goto done;
2102 }
2103 }
2104 }
2105 done:
2106 return error;
2107 }
2108
2109 /*
2110 * bridge_lookup_member:
2111 *
2112 * Lookup a bridge member interface.
2113 */
2114 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2115 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2116 {
2117 struct bridge_iflist *bif;
2118 struct ifnet *ifp;
2119
2120 BRIDGE_LOCK_ASSERT_HELD(sc);
2121
2122 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2123 ifp = bif->bif_ifp;
2124 if (strcmp(ifp->if_xname, name) == 0) {
2125 return bif;
2126 }
2127 }
2128
2129 return NULL;
2130 }
2131
2132 /*
2133 * bridge_lookup_member_if:
2134 *
2135 * Lookup a bridge member interface by ifnet*.
2136 */
2137 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2138 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2139 {
2140 struct bridge_iflist *bif;
2141
2142 BRIDGE_LOCK_ASSERT_HELD(sc);
2143
2144 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2145 if (bif->bif_ifp == member_ifp) {
2146 return bif;
2147 }
2148 }
2149
2150 return NULL;
2151 }
2152
2153 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2154 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2155 mbuf_t *data, char **frame_ptr)
2156 {
2157 #pragma unused(protocol)
2158 errno_t error = 0;
2159 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2160 struct bridge_softc *sc = bif->bif_sc;
2161 int included = 0;
2162 size_t frmlen = 0;
2163 mbuf_t m = *data;
2164
2165 if ((m->m_flags & M_PROTO1)) {
2166 goto out;
2167 }
2168
2169 if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2170 *frame_ptr <= (char *)mbuf_data(m)) {
2171 included = 1;
2172 frmlen = (char *)mbuf_data(m) - *frame_ptr;
2173 }
2174 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2175 "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2176 "frmlen %lu", sc->sc_ifp->if_xname,
2177 ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2178 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2179 (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2180 included ? "inside" : "outside", frmlen);
2181 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2182 brlog_mbuf(m, "bridge_iff_input[", "");
2183 brlog_ether_header((struct ether_header *)
2184 (void *)*frame_ptr);
2185 brlog_mbuf_data(m, 0, 20);
2186 }
2187 if (included == 0) {
2188 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2189 goto out;
2190 }
2191
2192 /* Move data pointer to start of frame to the link layer header */
2193 (void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2194 mbuf_len(m) + frmlen);
2195 (void) mbuf_pkthdr_adjustlen(m, frmlen);
2196
2197 /* make sure we can access the ethernet header */
2198 if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2199 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2200 "short frame %lu < %lu",
2201 mbuf_pkthdr_len(m), sizeof(struct ether_header));
2202 goto out;
2203 }
2204 if (mbuf_len(m) < sizeof(struct ether_header)) {
2205 error = mbuf_pullup(data, sizeof(struct ether_header));
2206 if (error != 0) {
2207 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2208 "mbuf_pullup(%lu) failed %d",
2209 sizeof(struct ether_header),
2210 error);
2211 error = EJUSTRETURN;
2212 goto out;
2213 }
2214 if (m != *data) {
2215 m = *data;
2216 *frame_ptr = mbuf_data(m);
2217 }
2218 }
2219
2220 error = bridge_input(ifp, data);
2221
2222 /* Adjust packet back to original */
2223 if (error == 0) {
2224 /* bridge_input might have modified *data */
2225 if (*data != m) {
2226 m = *data;
2227 *frame_ptr = mbuf_data(m);
2228 }
2229 (void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2230 mbuf_len(m) - frmlen);
2231 (void) mbuf_pkthdr_adjustlen(m, -frmlen);
2232 }
2233
2234 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2235 BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2236 brlog_mbuf(m, "bridge_iff_input]", "");
2237 }
2238
2239 out:
2240 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2241
2242 return error;
2243 }
2244
2245 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2246 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2247 mbuf_t *data)
2248 {
2249 #pragma unused(protocol)
2250 errno_t error = 0;
2251 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2252 struct bridge_softc *sc = bif->bif_sc;
2253 mbuf_t m = *data;
2254
2255 if ((m->m_flags & M_PROTO1)) {
2256 goto out;
2257 }
2258 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2259 "%s from %s m 0x%llx data 0x%llx",
2260 sc->sc_ifp->if_xname, ifp->if_xname,
2261 (uint64_t)VM_KERNEL_ADDRPERM(m),
2262 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2263
2264 error = bridge_member_output(sc, ifp, data);
2265 if (error != 0 && error != EJUSTRETURN) {
2266 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2267 "bridge_member_output failed error %d",
2268 error);
2269 }
2270 out:
2271 BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2272
2273 return error;
2274 }
2275
2276 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2277 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2278 const struct kev_msg *event_msg)
2279 {
2280 #pragma unused(protocol)
2281 struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2282 struct bridge_softc *sc = bif->bif_sc;
2283
2284 if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2285 event_msg->kev_class == KEV_NETWORK_CLASS &&
2286 event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2287 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2288 "%s event_code %u - %s",
2289 ifp->if_xname, event_msg->event_code,
2290 dlil_kev_dl_code_str(event_msg->event_code));
2291
2292 switch (event_msg->event_code) {
2293 case KEV_DL_LINK_OFF:
2294 case KEV_DL_LINK_ON: {
2295 bridge_iflinkevent(ifp);
2296 #if BRIDGESTP
2297 bstp_linkstate(ifp, event_msg->event_code);
2298 #endif /* BRIDGESTP */
2299 break;
2300 }
2301 case KEV_DL_SIFFLAGS: {
2302 if ((ifp->if_flags & IFF_UP) == 0) {
2303 break;
2304 }
2305 if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2306 errno_t error;
2307
2308 error = ifnet_set_promiscuous(ifp, 1);
2309 if (error != 0) {
2310 BRIDGE_LOG(LOG_NOTICE, 0,
2311 "ifnet_set_promiscuous (%s)"
2312 " failed %d", ifp->if_xname,
2313 error);
2314 } else {
2315 bif->bif_flags |= BIFF_PROMISC;
2316 }
2317 }
2318 if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2319 (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2320 errno_t error;
2321
2322 error = if_allmulti(ifp, 1);
2323 if (error != 0) {
2324 BRIDGE_LOG(LOG_NOTICE, 0,
2325 "if_allmulti (%s)"
2326 " failed %d", ifp->if_xname,
2327 error);
2328 } else {
2329 bif->bif_flags |= BIFF_ALL_MULTI;
2330 #ifdef XNU_PLATFORM_AppleTVOS
2331 ip6_forwarding = 1;
2332 #endif /* XNU_PLATFORM_AppleTVOS */
2333 }
2334 }
2335 break;
2336 }
2337 case KEV_DL_IFCAP_CHANGED: {
2338 BRIDGE_LOCK(sc);
2339 bridge_set_tso(sc);
2340 BRIDGE_UNLOCK(sc);
2341 break;
2342 }
2343 case KEV_DL_PROTO_DETACHED:
2344 case KEV_DL_PROTO_ATTACHED: {
2345 bridge_proto_attach_changed(ifp);
2346 break;
2347 }
2348 default:
2349 break;
2350 }
2351 }
2352 }
2353
2354 /*
2355 * bridge_iff_detached:
2356 *
2357 * Called when our interface filter has been detached from a
2358 * member interface.
2359 */
2360 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2361 bridge_iff_detached(void *cookie, ifnet_t ifp)
2362 {
2363 #pragma unused(cookie)
2364 struct bridge_iflist *bif;
2365 struct bridge_softc *sc = ifp->if_bridge;
2366
2367 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2368
2369 /* Check if the interface is a bridge member */
2370 if (sc != NULL) {
2371 BRIDGE_LOCK(sc);
2372 bif = bridge_lookup_member_if(sc, ifp);
2373 if (bif != NULL) {
2374 bridge_delete_member(sc, bif);
2375 }
2376 BRIDGE_UNLOCK(sc);
2377 return;
2378 }
2379 /* Check if the interface is a span port */
2380 lck_mtx_lock(&bridge_list_mtx);
2381 LIST_FOREACH(sc, &bridge_list, sc_list) {
2382 BRIDGE_LOCK(sc);
2383 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2384 if (ifp == bif->bif_ifp) {
2385 bridge_delete_span(sc, bif);
2386 break;
2387 }
2388 BRIDGE_UNLOCK(sc);
2389 }
2390 lck_mtx_unlock(&bridge_list_mtx);
2391 }
2392
2393 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2394 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2395 char *header)
2396 {
2397 #pragma unused(protocol, packet, header)
2398 BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2399 ifp->if_xname);
2400 return 0;
2401 }
2402
2403 static int
bridge_attach_protocol(struct ifnet * ifp)2404 bridge_attach_protocol(struct ifnet *ifp)
2405 {
2406 int error;
2407 struct ifnet_attach_proto_param reg;
2408
2409 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2410 bzero(®, sizeof(reg));
2411 reg.input = bridge_proto_input;
2412
2413 error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®);
2414 if (error) {
2415 BRIDGE_LOG(LOG_NOTICE, 0,
2416 "ifnet_attach_protocol(%s) failed, %d",
2417 ifp->if_xname, error);
2418 }
2419
2420 return error;
2421 }
2422
2423 static int
bridge_detach_protocol(struct ifnet * ifp)2424 bridge_detach_protocol(struct ifnet *ifp)
2425 {
2426 int error;
2427
2428 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2429 error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2430 if (error) {
2431 BRIDGE_LOG(LOG_NOTICE, 0,
2432 "ifnet_detach_protocol(%s) failed, %d",
2433 ifp->if_xname, error);
2434 }
2435
2436 return error;
2437 }
2438
2439 /*
2440 * bridge_delete_member:
2441 *
2442 * Delete the specified member interface.
2443 */
2444 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2445 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2446 {
2447 #if SKYWALK
2448 boolean_t add_netagent = FALSE;
2449 #endif /* SKYWALK */
2450 uint32_t bif_flags;
2451 struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2452 int lladdr_changed = 0, error;
2453 uint8_t eaddr[ETHER_ADDR_LEN];
2454 u_int32_t event_code = 0;
2455
2456 BRIDGE_LOCK_ASSERT_HELD(sc);
2457 VERIFY(ifs != NULL);
2458
2459 /*
2460 * Remove the member from the list first so it cannot be found anymore
2461 * when we release the bridge lock below
2462 */
2463 if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2464 BRIDGE_XLOCK(sc);
2465 TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2466 BRIDGE_XDROP(sc);
2467 }
2468 if (sc->sc_mac_nat_bif != NULL) {
2469 if (bif == sc->sc_mac_nat_bif) {
2470 bridge_mac_nat_disable(sc);
2471 } else {
2472 bridge_mac_nat_flush_entries(sc, bif);
2473 }
2474 }
2475 #if BRIDGESTP
2476 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2477 bstp_disable(&bif->bif_stp);
2478 }
2479 #endif /* BRIDGESTP */
2480
2481 /*
2482 * If removing the interface that gave the bridge its mac address, set
2483 * the mac address of the bridge to the address of the next member, or
2484 * to its default address if no members are left.
2485 */
2486 if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2487 ifnet_release(sc->sc_ifaddr);
2488 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2489 bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2490 sc->sc_ifaddr = NULL;
2491 } else {
2492 struct ifnet *fif =
2493 TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2494 bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2495 sc->sc_ifaddr = fif;
2496 ifnet_reference(fif); /* for sc_ifaddr */
2497 }
2498 lladdr_changed = 1;
2499 }
2500
2501 #if HAS_IF_CAP
2502 bridge_mutecaps(sc); /* recalculate now this interface is removed */
2503 #endif /* HAS_IF_CAP */
2504
2505 error = bridge_set_tso(sc);
2506 if (error != 0) {
2507 BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2508 }
2509
2510 bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2511
2512 KASSERT(bif->bif_addrcnt == 0,
2513 ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2514
2515 /*
2516 * Update link status of the bridge based on its remaining members
2517 */
2518 event_code = bridge_updatelinkstatus(sc);
2519 bif_flags = bif->bif_flags;
2520 BRIDGE_UNLOCK(sc);
2521
2522 /* only perform these steps if the interface is still attached */
2523 if (ifnet_is_attached(ifs, 1)) {
2524 #if SKYWALK
2525 add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2526
2527 if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2528 ifnet_detach_flowswitch_nexus(ifs);
2529 }
2530 #endif /* SKYWALK */
2531 /* disable promiscuous mode */
2532 if ((bif_flags & BIFF_PROMISC) != 0) {
2533 (void) ifnet_set_promiscuous(ifs, 0);
2534 }
2535 /* disable all multi */
2536 if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2537 (void)if_allmulti(ifs, 0);
2538 }
2539 #if HAS_IF_CAP
2540 /* re-enable any interface capabilities */
2541 bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2542 #endif
2543 /* detach bridge "protocol" */
2544 if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2545 (void)bridge_detach_protocol(ifs);
2546 }
2547 /* detach interface filter */
2548 if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2549 iflt_detach(bif->bif_iff_ref);
2550 }
2551 ifnet_decr_iorefcnt(ifs);
2552 }
2553
2554 if (lladdr_changed &&
2555 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2556 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2557 }
2558
2559 if (event_code != 0) {
2560 bridge_link_event(bifp, event_code);
2561 }
2562
2563 #if BRIDGESTP
2564 bstp_destroy(&bif->bif_stp); /* prepare to free */
2565 #endif /* BRIDGESTP */
2566
2567 kfree_type(struct bridge_iflist, bif);
2568 ifs->if_bridge = NULL;
2569 #if SKYWALK
2570 if (add_netagent && ifnet_is_attached(ifs, 1)) {
2571 (void)ifnet_add_netagent(ifs);
2572 ifnet_decr_iorefcnt(ifs);
2573 }
2574 #endif /* SKYWALK */
2575
2576 ifnet_release(ifs);
2577
2578 BRIDGE_LOCK(sc);
2579 }
2580
2581 /*
2582 * bridge_delete_span:
2583 *
2584 * Delete the specified span interface.
2585 */
2586 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2587 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2588 {
2589 BRIDGE_LOCK_ASSERT_HELD(sc);
2590
2591 KASSERT(bif->bif_ifp->if_bridge == NULL,
2592 ("%s: not a span interface", __func__));
2593
2594 ifnet_release(bif->bif_ifp);
2595
2596 TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2597 kfree_type(struct bridge_iflist, bif);
2598 }
2599
2600 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2601 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2602 {
2603 struct ifbreq *req = arg;
2604 struct bridge_iflist *bif = NULL;
2605 struct ifnet *ifs, *bifp = sc->sc_ifp;
2606 int error = 0, lladdr_changed = 0;
2607 uint8_t eaddr[ETHER_ADDR_LEN];
2608 struct iff_filter iff;
2609 u_int32_t event_code = 0;
2610 boolean_t input_broadcast;
2611 boolean_t wifi_infra = FALSE;
2612 int media_active;
2613
2614 ifs = ifunit(req->ifbr_ifsname);
2615 if (ifs == NULL) {
2616 return ENOENT;
2617 }
2618 if (ifs->if_ioctl == NULL) { /* must be supported */
2619 return EINVAL;
2620 }
2621
2622 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2623 return EINVAL;
2624 }
2625
2626 /* If it's in the span list, it can't be a member. */
2627 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2628 if (ifs == bif->bif_ifp) {
2629 return EBUSY;
2630 }
2631 }
2632
2633 if (ifs->if_bridge == sc) {
2634 return EEXIST;
2635 }
2636
2637 if (ifs->if_bridge != NULL) {
2638 return EBUSY;
2639 }
2640
2641 switch (ifs->if_type) {
2642 case IFT_ETHER:
2643 if (strcmp(ifs->if_name, "en") == 0 &&
2644 ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2645 (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2646 /* XXX is there a better way to identify Wi-Fi STA? */
2647 wifi_infra = TRUE;
2648 }
2649 break;
2650 case IFT_L2VLAN:
2651 case IFT_IEEE8023ADLAG:
2652 break;
2653 case IFT_GIF:
2654 /* currently not supported */
2655 /* FALLTHRU */
2656 default:
2657 return EINVAL;
2658 }
2659
2660 /* fail to add the interface if the MTU doesn't match */
2661 if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2662 BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2663 sc->sc_ifp->if_xname,
2664 ifs->if_xname);
2665 return EINVAL;
2666 }
2667
2668 /* there's already an interface that's doing MAC NAT */
2669 if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2670 return EBUSY;
2671 }
2672
2673 /* prevent the interface from detaching while we add the member */
2674 if (!ifnet_is_attached(ifs, 1)) {
2675 return ENXIO;
2676 }
2677
2678 /* allocate a new member */
2679 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2680 bif->bif_ifp = ifs;
2681 ifnet_reference(ifs);
2682 bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2683 #if HAS_IF_CAP
2684 bif->bif_savedcaps = ifs->if_capenable;
2685 #endif /* HAS_IF_CAP */
2686 bif->bif_sc = sc;
2687 if (wifi_infra) {
2688 (void)bridge_mac_nat_enable(sc, bif);
2689 }
2690
2691 if (IFNET_IS_VMNET(ifs)) {
2692 allocate_vmnet_pf_tags();
2693 }
2694 /* Allow the first Ethernet member to define the MTU */
2695 if (TAILQ_EMPTY(&sc->sc_iflist)) {
2696 sc->sc_ifp->if_mtu = ifs->if_mtu;
2697 }
2698
2699 /*
2700 * Assign the interface's MAC address to the bridge if it's the first
2701 * member and the MAC address of the bridge has not been changed from
2702 * the default (randomly) generated one.
2703 */
2704 if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2705 _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2706 bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2707 sc->sc_ifaddr = ifs;
2708 ifnet_reference(ifs); /* for sc_ifaddr */
2709 lladdr_changed = 1;
2710 }
2711
2712 ifs->if_bridge = sc;
2713 #if BRIDGESTP
2714 bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2715 #endif /* BRIDGESTP */
2716
2717 #if HAS_IF_CAP
2718 /* Set interface capabilities to the intersection set of all members */
2719 bridge_mutecaps(sc);
2720 #endif /* HAS_IF_CAP */
2721
2722
2723 /*
2724 * Respect lock ordering with DLIL lock for the following operations
2725 */
2726 BRIDGE_UNLOCK(sc);
2727
2728 /* enable promiscuous mode */
2729 error = ifnet_set_promiscuous(ifs, 1);
2730 switch (error) {
2731 case 0:
2732 bif->bif_flags |= BIFF_PROMISC;
2733 break;
2734 case ENETDOWN:
2735 case EPWROFF:
2736 BRIDGE_LOG(LOG_NOTICE, 0,
2737 "ifnet_set_promiscuous(%s) failed %d, ignoring",
2738 ifs->if_xname, error);
2739 /* Ignore error when device is not up */
2740 error = 0;
2741 break;
2742 default:
2743 BRIDGE_LOG(LOG_NOTICE, 0,
2744 "ifnet_set_promiscuous(%s) failed %d",
2745 ifs->if_xname, error);
2746 BRIDGE_LOCK(sc);
2747 goto out;
2748 }
2749 if (wifi_infra) {
2750 int this_error;
2751
2752 /* Wi-Fi doesn't really support promiscuous, set allmulti */
2753 bif->bif_flags |= BIFF_WIFI_INFRA;
2754 this_error = if_allmulti(ifs, 1);
2755 if (this_error == 0) {
2756 bif->bif_flags |= BIFF_ALL_MULTI;
2757 #ifdef XNU_PLATFORM_AppleTVOS
2758 ip6_forwarding = 1;
2759 #endif /* XNU_PLATFORM_AppleTVOS */
2760 } else {
2761 BRIDGE_LOG(LOG_NOTICE, 0,
2762 "if_allmulti(%s) failed %d, ignoring",
2763 ifs->if_xname, this_error);
2764 }
2765 }
2766 #if SKYWALK
2767 /* ensure that the flowswitch is present for native interface */
2768 if (SKYWALK_NATIVE(ifs)) {
2769 if (ifnet_attach_flowswitch_nexus(ifs)) {
2770 bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2771 }
2772 }
2773 /* remove the netagent on the flowswitch (rdar://75050182) */
2774 if (if_is_fsw_netagent_enabled()) {
2775 (void)ifnet_remove_netagent(ifs);
2776 bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2777 }
2778 #endif /* SKYWALK */
2779
2780 /*
2781 * install an interface filter
2782 */
2783 memset(&iff, 0, sizeof(struct iff_filter));
2784 iff.iff_cookie = bif;
2785 iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2786 iff.iff_input = bridge_iff_input;
2787 iff.iff_output = bridge_iff_output;
2788 iff.iff_event = bridge_iff_event;
2789 iff.iff_detached = bridge_iff_detached;
2790 error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2791 DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2792 if (error != 0) {
2793 BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2794 BRIDGE_LOCK(sc);
2795 goto out;
2796 }
2797 bif->bif_flags |= BIFF_FILTER_ATTACHED;
2798
2799 /*
2800 * install a dummy "bridge" protocol
2801 */
2802 if ((error = bridge_attach_protocol(ifs)) != 0) {
2803 if (error != 0) {
2804 BRIDGE_LOG(LOG_NOTICE, 0,
2805 "bridge_attach_protocol failed %d", error);
2806 BRIDGE_LOCK(sc);
2807 goto out;
2808 }
2809 }
2810 bif->bif_flags |= BIFF_PROTO_ATTACHED;
2811
2812 if (lladdr_changed &&
2813 (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2814 BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2815 }
2816
2817 media_active = interface_media_active(ifs);
2818
2819 /*
2820 * No failures past this point. Add the member to the list.
2821 */
2822 BRIDGE_LOCK(sc);
2823 bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2824 BRIDGE_XLOCK(sc);
2825 TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2826 BRIDGE_XDROP(sc);
2827
2828 /* cache the member link status */
2829 if (media_active != 0) {
2830 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2831 } else {
2832 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2833 }
2834
2835 /* the new member may change the link status of the bridge interface */
2836 event_code = bridge_updatelinkstatus(sc);
2837
2838 /* check whether we need input broadcast or not */
2839 input_broadcast = interface_needs_input_broadcast(ifs);
2840 bif_set_input_broadcast(bif, input_broadcast);
2841 BRIDGE_UNLOCK(sc);
2842
2843 if (event_code != 0) {
2844 bridge_link_event(bifp, event_code);
2845 }
2846 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2847 "%s input broadcast %s", ifs->if_xname,
2848 input_broadcast ? "ENABLED" : "DISABLED");
2849
2850 BRIDGE_LOCK(sc);
2851 bridge_set_tso(sc);
2852
2853 out:
2854 /* allow the interface to detach */
2855 ifnet_decr_iorefcnt(ifs);
2856
2857 if (error != 0) {
2858 if (bif != NULL) {
2859 bridge_delete_member(sc, bif);
2860 }
2861 } else if (IFNET_IS_VMNET(ifs)) {
2862 INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2863 }
2864
2865 return error;
2866 }
2867
2868 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2869 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2870 {
2871 struct ifbreq *req = arg;
2872 struct bridge_iflist *bif;
2873
2874 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2875 if (bif == NULL) {
2876 return ENOENT;
2877 }
2878
2879 bridge_delete_member(sc, bif);
2880
2881 return 0;
2882 }
2883
2884 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2885 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2886 {
2887 #pragma unused(sc, arg)
2888 return 0;
2889 }
2890
2891 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2892 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2893 {
2894 struct ifbreq *req = arg;
2895 struct bridge_iflist *bif;
2896
2897 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2898 if (bif == NULL) {
2899 return ENOENT;
2900 }
2901
2902 struct bstp_port *bp;
2903
2904 bp = &bif->bif_stp;
2905 req->ifbr_state = bp->bp_state;
2906 req->ifbr_priority = bp->bp_priority;
2907 req->ifbr_path_cost = bp->bp_path_cost;
2908 req->ifbr_proto = bp->bp_protover;
2909 req->ifbr_role = bp->bp_role;
2910 req->ifbr_stpflags = bp->bp_flags;
2911 req->ifbr_ifsflags = bif->bif_ifflags;
2912
2913 /* Copy STP state options as flags */
2914 if (bp->bp_operedge) {
2915 req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2916 }
2917 if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2918 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2919 }
2920 if (bp->bp_ptp_link) {
2921 req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2922 }
2923 if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2924 req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2925 }
2926 if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2927 req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2928 }
2929 if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2930 req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2931 }
2932
2933 req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2934 req->ifbr_addrcnt = bif->bif_addrcnt;
2935 req->ifbr_addrmax = bif->bif_addrmax;
2936 req->ifbr_addrexceeded = bif->bif_addrexceeded;
2937
2938 return 0;
2939 }
2940
2941 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2942 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2943 {
2944 struct ifbreq *req = arg;
2945 struct bridge_iflist *bif;
2946 #if BRIDGESTP
2947 struct bstp_port *bp;
2948 int error;
2949 #endif /* BRIDGESTP */
2950
2951 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2952 if (bif == NULL) {
2953 return ENOENT;
2954 }
2955
2956 if (req->ifbr_ifsflags & IFBIF_SPAN) {
2957 /* SPAN is readonly */
2958 return EINVAL;
2959 }
2960 #define _EXCLUSIVE_FLAGS (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2961 if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2962 /* can't specify both MAC-NAT and checksum offload */
2963 return EINVAL;
2964 }
2965 if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2966 errno_t error;
2967
2968 error = bridge_mac_nat_enable(sc, bif);
2969 if (error != 0) {
2970 return error;
2971 }
2972 } else if (sc->sc_mac_nat_bif == bif) {
2973 bridge_mac_nat_disable(sc);
2974 }
2975
2976
2977 #if BRIDGESTP
2978 if (req->ifbr_ifsflags & IFBIF_STP) {
2979 if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2980 error = bstp_enable(&bif->bif_stp);
2981 if (error) {
2982 return error;
2983 }
2984 }
2985 } else {
2986 if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2987 bstp_disable(&bif->bif_stp);
2988 }
2989 }
2990
2991 /* Pass on STP flags */
2992 bp = &bif->bif_stp;
2993 bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2994 bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2995 bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2996 bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2997 #else /* !BRIDGESTP */
2998 if (req->ifbr_ifsflags & IFBIF_STP) {
2999 return EOPNOTSUPP;
3000 }
3001 #endif /* !BRIDGESTP */
3002
3003 /* Save the bits relating to the bridge */
3004 bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
3005
3006
3007 return 0;
3008 }
3009
3010 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)3011 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
3012 {
3013 struct ifbrparam *param = arg;
3014
3015 sc->sc_brtmax = param->ifbrp_csize;
3016 bridge_rttrim(sc);
3017 return 0;
3018 }
3019
3020 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)3021 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
3022 {
3023 struct ifbrparam *param = arg;
3024
3025 param->ifbrp_csize = sc->sc_brtmax;
3026
3027 return 0;
3028 }
3029
3030 #define BRIDGE_IOCTL_GIFS do { \
3031 struct bridge_iflist *bif; \
3032 struct ifbreq breq; \
3033 char *buf, *outbuf; \
3034 unsigned int count, buflen, len; \
3035 \
3036 count = 0; \
3037 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) \
3038 count++; \
3039 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) \
3040 count++; \
3041 \
3042 buflen = sizeof (breq) * count; \
3043 if (bifc->ifbic_len == 0) { \
3044 bifc->ifbic_len = buflen; \
3045 return (0); \
3046 } \
3047 BRIDGE_UNLOCK(sc); \
3048 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3049 BRIDGE_LOCK(sc); \
3050 \
3051 count = 0; \
3052 buf = outbuf; \
3053 len = min(bifc->ifbic_len, buflen); \
3054 bzero(&breq, sizeof (breq)); \
3055 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3056 if (len < sizeof (breq)) \
3057 break; \
3058 \
3059 snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3060 "%s", bif->bif_ifp->if_xname); \
3061 /* Fill in the ifbreq structure */ \
3062 error = bridge_ioctl_gifflags(sc, &breq); \
3063 if (error) \
3064 break; \
3065 memcpy(buf, &breq, sizeof (breq)); \
3066 count++; \
3067 buf += sizeof (breq); \
3068 len -= sizeof (breq); \
3069 } \
3070 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) { \
3071 if (len < sizeof (breq)) \
3072 break; \
3073 \
3074 snprintf(breq.ifbr_ifsname, \
3075 sizeof (breq.ifbr_ifsname), \
3076 "%s", bif->bif_ifp->if_xname); \
3077 breq.ifbr_ifsflags = bif->bif_ifflags; \
3078 breq.ifbr_portno \
3079 = bif->bif_ifp->if_index & 0xfff; \
3080 memcpy(buf, &breq, sizeof (breq)); \
3081 count++; \
3082 buf += sizeof (breq); \
3083 len -= sizeof (breq); \
3084 } \
3085 \
3086 BRIDGE_UNLOCK(sc); \
3087 bifc->ifbic_len = sizeof (breq) * count; \
3088 error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len); \
3089 BRIDGE_LOCK(sc); \
3090 kfree_data(outbuf, buflen); \
3091 } while (0)
3092
3093 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3094 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3095 {
3096 struct ifbifconf64 *bifc = arg;
3097 int error = 0;
3098
3099 BRIDGE_IOCTL_GIFS;
3100
3101 return error;
3102 }
3103
3104 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3105 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3106 {
3107 struct ifbifconf32 *bifc = arg;
3108 int error = 0;
3109
3110 BRIDGE_IOCTL_GIFS;
3111
3112 return error;
3113 }
3114
3115 #define BRIDGE_IOCTL_RTS do { \
3116 struct bridge_rtnode *brt; \
3117 char *buf; \
3118 char *outbuf = NULL; \
3119 unsigned int count, buflen, len; \
3120 unsigned long now; \
3121 \
3122 if (bac->ifbac_len == 0) \
3123 return (0); \
3124 \
3125 bzero(&bareq, sizeof (bareq)); \
3126 count = 0; \
3127 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) \
3128 count++; \
3129 buflen = sizeof (bareq) * count; \
3130 \
3131 BRIDGE_UNLOCK(sc); \
3132 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3133 BRIDGE_LOCK(sc); \
3134 \
3135 count = 0; \
3136 buf = outbuf; \
3137 len = min(bac->ifbac_len, buflen); \
3138 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \
3139 if (len < sizeof (bareq)) \
3140 goto out; \
3141 snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \
3142 "%s", brt->brt_ifp->if_xname); \
3143 memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3144 bareq.ifba_vlan = brt->brt_vlan; \
3145 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \
3146 now = (unsigned long) net_uptime(); \
3147 if (now < brt->brt_expire) \
3148 bareq.ifba_expire = \
3149 brt->brt_expire - now; \
3150 } else \
3151 bareq.ifba_expire = 0; \
3152 bareq.ifba_flags = brt->brt_flags; \
3153 \
3154 memcpy(buf, &bareq, sizeof (bareq)); \
3155 count++; \
3156 buf += sizeof (bareq); \
3157 len -= sizeof (bareq); \
3158 } \
3159 out: \
3160 bac->ifbac_len = sizeof (bareq) * count; \
3161 if (outbuf != NULL) { \
3162 BRIDGE_UNLOCK(sc); \
3163 error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \
3164 kfree_data(outbuf, buflen); \
3165 BRIDGE_LOCK(sc); \
3166 } \
3167 return (error); \
3168 } while (0)
3169
3170 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3171 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3172 {
3173 struct ifbaconf64 *bac = arg;
3174 struct ifbareq64 bareq;
3175 int error = 0;
3176
3177 BRIDGE_IOCTL_RTS;
3178 return error;
3179 }
3180
3181 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3182 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3183 {
3184 struct ifbaconf32 *bac = arg;
3185 struct ifbareq32 bareq;
3186 int error = 0;
3187
3188 BRIDGE_IOCTL_RTS;
3189 return error;
3190 }
3191
3192 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3193 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3194 {
3195 struct ifbareq32 *req = arg;
3196 struct bridge_iflist *bif;
3197 int error;
3198
3199 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3200 if (bif == NULL) {
3201 return ENOENT;
3202 }
3203
3204 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3205 req->ifba_flags);
3206
3207 return error;
3208 }
3209
3210 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3211 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3212 {
3213 struct ifbareq64 *req = arg;
3214 struct bridge_iflist *bif;
3215 int error;
3216
3217 bif = bridge_lookup_member(sc, req->ifba_ifsname);
3218 if (bif == NULL) {
3219 return ENOENT;
3220 }
3221
3222 error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3223 req->ifba_flags);
3224
3225 return error;
3226 }
3227
3228 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3229 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3230 {
3231 struct ifbrparam *param = arg;
3232
3233 sc->sc_brttimeout = param->ifbrp_ctime;
3234 return 0;
3235 }
3236
3237 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3238 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3239 {
3240 struct ifbrparam *param = arg;
3241
3242 param->ifbrp_ctime = sc->sc_brttimeout;
3243 return 0;
3244 }
3245
3246 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3247 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3248 {
3249 struct ifbareq32 *req = arg;
3250
3251 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3252 }
3253
3254 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3255 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3256 {
3257 struct ifbareq64 *req = arg;
3258
3259 return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3260 }
3261
3262 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3263 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3264 {
3265 struct ifbreq *req = arg;
3266
3267 bridge_rtflush(sc, req->ifbr_ifsflags);
3268 return 0;
3269 }
3270
3271 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3272 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3273 {
3274 struct ifbrparam *param = arg;
3275 struct bstp_state *bs = &sc->sc_stp;
3276
3277 param->ifbrp_prio = bs->bs_bridge_priority;
3278 return 0;
3279 }
3280
3281 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3282 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3283 {
3284 #if BRIDGESTP
3285 struct ifbrparam *param = arg;
3286
3287 return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3288 #else /* !BRIDGESTP */
3289 #pragma unused(sc, arg)
3290 return EOPNOTSUPP;
3291 #endif /* !BRIDGESTP */
3292 }
3293
3294 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3295 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3296 {
3297 struct ifbrparam *param = arg;
3298 struct bstp_state *bs = &sc->sc_stp;
3299
3300 param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3301 return 0;
3302 }
3303
3304 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3305 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3306 {
3307 #if BRIDGESTP
3308 struct ifbrparam *param = arg;
3309
3310 return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3311 #else /* !BRIDGESTP */
3312 #pragma unused(sc, arg)
3313 return EOPNOTSUPP;
3314 #endif /* !BRIDGESTP */
3315 }
3316
3317 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3318 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3319 {
3320 struct ifbrparam *param;
3321 struct bstp_state *bs;
3322
3323 param = arg;
3324 bs = &sc->sc_stp;
3325 param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3326 return 0;
3327 }
3328
3329 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3330 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3331 {
3332 #if BRIDGESTP
3333 struct ifbrparam *param = arg;
3334
3335 return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3336 #else /* !BRIDGESTP */
3337 #pragma unused(sc, arg)
3338 return EOPNOTSUPP;
3339 #endif /* !BRIDGESTP */
3340 }
3341
3342 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3343 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3344 {
3345 struct ifbrparam *param;
3346 struct bstp_state *bs;
3347
3348 param = arg;
3349 bs = &sc->sc_stp;
3350 param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3351 return 0;
3352 }
3353
3354 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3355 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3356 {
3357 #if BRIDGESTP
3358 struct ifbrparam *param = arg;
3359
3360 return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3361 #else /* !BRIDGESTP */
3362 #pragma unused(sc, arg)
3363 return EOPNOTSUPP;
3364 #endif /* !BRIDGESTP */
3365 }
3366
3367 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3368 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3369 {
3370 #if BRIDGESTP
3371 struct ifbreq *req = arg;
3372 struct bridge_iflist *bif;
3373
3374 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3375 if (bif == NULL) {
3376 return ENOENT;
3377 }
3378
3379 return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3380 #else /* !BRIDGESTP */
3381 #pragma unused(sc, arg)
3382 return EOPNOTSUPP;
3383 #endif /* !BRIDGESTP */
3384 }
3385
3386 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3387 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3388 {
3389 #if BRIDGESTP
3390 struct ifbreq *req = arg;
3391 struct bridge_iflist *bif;
3392
3393 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3394 if (bif == NULL) {
3395 return ENOENT;
3396 }
3397
3398 return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3399 #else /* !BRIDGESTP */
3400 #pragma unused(sc, arg)
3401 return EOPNOTSUPP;
3402 #endif /* !BRIDGESTP */
3403 }
3404
3405 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3406 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3407 {
3408 struct ifbrparam *param = arg;
3409
3410 param->ifbrp_filter = sc->sc_filter_flags;
3411
3412 return 0;
3413 }
3414
3415 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3416 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3417 {
3418 struct ifbrparam *param = arg;
3419
3420 if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3421 return EINVAL;
3422 }
3423
3424 if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3425 return EINVAL;
3426 }
3427
3428 sc->sc_filter_flags = param->ifbrp_filter;
3429
3430 return 0;
3431 }
3432
3433 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3434 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3435 {
3436 struct ifbreq *req = arg;
3437 struct bridge_iflist *bif;
3438
3439 bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3440 if (bif == NULL) {
3441 return ENOENT;
3442 }
3443
3444 bif->bif_addrmax = req->ifbr_addrmax;
3445 return 0;
3446 }
3447
3448 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3449 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3450 {
3451 struct ifbreq *req = arg;
3452 struct bridge_iflist *bif = NULL;
3453 struct ifnet *ifs;
3454
3455 ifs = ifunit(req->ifbr_ifsname);
3456 if (ifs == NULL) {
3457 return ENOENT;
3458 }
3459
3460 if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3461 return EINVAL;
3462 }
3463
3464 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3465 if (ifs == bif->bif_ifp) {
3466 return EBUSY;
3467 }
3468
3469 if (ifs->if_bridge != NULL) {
3470 return EBUSY;
3471 }
3472
3473 switch (ifs->if_type) {
3474 case IFT_ETHER:
3475 case IFT_L2VLAN:
3476 case IFT_IEEE8023ADLAG:
3477 break;
3478 case IFT_GIF:
3479 /* currently not supported */
3480 /* FALLTHRU */
3481 default:
3482 return EINVAL;
3483 }
3484
3485 bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3486
3487 bif->bif_ifp = ifs;
3488 bif->bif_ifflags = IFBIF_SPAN;
3489
3490 ifnet_reference(bif->bif_ifp);
3491
3492 TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3493
3494 return 0;
3495 }
3496
3497 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3498 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3499 {
3500 struct ifbreq *req = arg;
3501 struct bridge_iflist *bif;
3502 struct ifnet *ifs;
3503
3504 ifs = ifunit(req->ifbr_ifsname);
3505 if (ifs == NULL) {
3506 return ENOENT;
3507 }
3508
3509 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3510 if (ifs == bif->bif_ifp) {
3511 break;
3512 }
3513
3514 if (bif == NULL) {
3515 return ENOENT;
3516 }
3517
3518 bridge_delete_span(sc, bif);
3519
3520 return 0;
3521 }
3522
3523 #define BRIDGE_IOCTL_GBPARAM do { \
3524 struct bstp_state *bs = &sc->sc_stp; \
3525 struct bstp_port *root_port; \
3526 \
3527 req->ifbop_maxage = bs->bs_bridge_max_age >> 8; \
3528 req->ifbop_hellotime = bs->bs_bridge_htime >> 8; \
3529 req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8; \
3530 \
3531 root_port = bs->bs_root_port; \
3532 if (root_port == NULL) \
3533 req->ifbop_root_port = 0; \
3534 else \
3535 req->ifbop_root_port = root_port->bp_ifp->if_index; \
3536 \
3537 req->ifbop_holdcount = bs->bs_txholdcount; \
3538 req->ifbop_priority = bs->bs_bridge_priority; \
3539 req->ifbop_protocol = bs->bs_protover; \
3540 req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost; \
3541 req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id; \
3542 req->ifbop_designated_root = bs->bs_root_pv.pv_root_id; \
3543 req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id; \
3544 req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec; \
3545 req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec; \
3546 } while (0)
3547
3548 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3549 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3550 {
3551 struct ifbropreq32 *req = arg;
3552
3553 BRIDGE_IOCTL_GBPARAM;
3554 return 0;
3555 }
3556
3557 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3558 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3559 {
3560 struct ifbropreq64 *req = arg;
3561
3562 BRIDGE_IOCTL_GBPARAM;
3563 return 0;
3564 }
3565
3566 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3567 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3568 {
3569 struct ifbrparam *param = arg;
3570
3571 param->ifbrp_cexceeded = sc->sc_brtexceeded;
3572 return 0;
3573 }
3574
3575 #define BRIDGE_IOCTL_GIFSSTP do { \
3576 struct bridge_iflist *bif; \
3577 struct bstp_port *bp; \
3578 struct ifbpstpreq bpreq; \
3579 char *buf, *outbuf; \
3580 unsigned int count, buflen, len; \
3581 \
3582 count = 0; \
3583 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3584 if ((bif->bif_ifflags & IFBIF_STP) != 0) \
3585 count++; \
3586 } \
3587 \
3588 buflen = sizeof (bpreq) * count; \
3589 if (bifstp->ifbpstp_len == 0) { \
3590 bifstp->ifbpstp_len = buflen; \
3591 return (0); \
3592 } \
3593 \
3594 BRIDGE_UNLOCK(sc); \
3595 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO); \
3596 BRIDGE_LOCK(sc); \
3597 \
3598 count = 0; \
3599 buf = outbuf; \
3600 len = min(bifstp->ifbpstp_len, buflen); \
3601 bzero(&bpreq, sizeof (bpreq)); \
3602 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \
3603 if (len < sizeof (bpreq)) \
3604 break; \
3605 \
3606 if ((bif->bif_ifflags & IFBIF_STP) == 0) \
3607 continue; \
3608 \
3609 bp = &bif->bif_stp; \
3610 bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff; \
3611 bpreq.ifbp_fwd_trans = bp->bp_forward_transitions; \
3612 bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost; \
3613 bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id; \
3614 bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3615 bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id; \
3616 \
3617 memcpy(buf, &bpreq, sizeof (bpreq)); \
3618 count++; \
3619 buf += sizeof (bpreq); \
3620 len -= sizeof (bpreq); \
3621 } \
3622 \
3623 BRIDGE_UNLOCK(sc); \
3624 bifstp->ifbpstp_len = sizeof (bpreq) * count; \
3625 error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3626 BRIDGE_LOCK(sc); \
3627 kfree_data(outbuf, buflen); \
3628 return (error); \
3629 } while (0)
3630
3631 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3632 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3633 {
3634 struct ifbpstpconf32 *bifstp = arg;
3635 int error = 0;
3636
3637 BRIDGE_IOCTL_GIFSSTP;
3638 return error;
3639 }
3640
3641 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3642 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3643 {
3644 struct ifbpstpconf64 *bifstp = arg;
3645 int error = 0;
3646
3647 BRIDGE_IOCTL_GIFSSTP;
3648 return error;
3649 }
3650
3651 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3652 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3653 {
3654 #if BRIDGESTP
3655 struct ifbrparam *param = arg;
3656
3657 return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3658 #else /* !BRIDGESTP */
3659 #pragma unused(sc, arg)
3660 return EOPNOTSUPP;
3661 #endif /* !BRIDGESTP */
3662 }
3663
3664 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3665 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3666 {
3667 #if BRIDGESTP
3668 struct ifbrparam *param = arg;
3669
3670 return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3671 #else /* !BRIDGESTP */
3672 #pragma unused(sc, arg)
3673 return EOPNOTSUPP;
3674 #endif /* !BRIDGESTP */
3675 }
3676
3677
3678 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3679 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3680 {
3681 struct ifbrhostfilter *req = arg;
3682 struct bridge_iflist *bif;
3683
3684 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3685 if (bif == NULL) {
3686 return ENOENT;
3687 }
3688
3689 bzero(req, sizeof(struct ifbrhostfilter));
3690 if (bif->bif_flags & BIFF_HOST_FILTER) {
3691 req->ifbrhf_flags |= IFBRHF_ENABLED;
3692 bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3693 ETHER_ADDR_LEN);
3694 req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3695 }
3696 return 0;
3697 }
3698
3699 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3700 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3701 {
3702 struct ifbrhostfilter *req = arg;
3703 struct bridge_iflist *bif;
3704
3705 bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3706 if (bif == NULL) {
3707 return ENOENT;
3708 }
3709
3710 if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3711 bif->bif_flags |= BIFF_HOST_FILTER;
3712
3713 if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3714 bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3715 ETHER_ADDR_LEN);
3716 if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3717 ETHER_ADDR_LEN) != 0) {
3718 bif->bif_flags |= BIFF_HF_HWSRC;
3719 } else {
3720 bif->bif_flags &= ~BIFF_HF_HWSRC;
3721 }
3722 }
3723 if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3724 bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3725 if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3726 bif->bif_flags |= BIFF_HF_IPSRC;
3727 } else {
3728 bif->bif_flags &= ~BIFF_HF_IPSRC;
3729 }
3730 }
3731 } else {
3732 bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3733 BIFF_HF_IPSRC);
3734 bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3735 bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3736 }
3737
3738 return 0;
3739 }
3740
3741 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3742 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3743 unsigned int * count_p, char *buf, unsigned int *len_p)
3744 {
3745 unsigned int count = *count_p;
3746 struct ifbrmne ifbmne;
3747 unsigned int len = *len_p;
3748 struct mac_nat_entry *mne;
3749 unsigned long now;
3750
3751 bzero(&ifbmne, sizeof(ifbmne));
3752 LIST_FOREACH(mne, list, mne_list) {
3753 if (len < sizeof(ifbmne)) {
3754 break;
3755 }
3756 snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3757 "%s", mne->mne_bif->bif_ifp->if_xname);
3758 memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3759 sizeof(ifbmne.ifbmne_mac));
3760 now = (unsigned long) net_uptime();
3761 if (now < mne->mne_expire) {
3762 ifbmne.ifbmne_expire = mne->mne_expire - now;
3763 } else {
3764 ifbmne.ifbmne_expire = 0;
3765 }
3766 if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3767 ifbmne.ifbmne_af = AF_INET6;
3768 ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3769 } else {
3770 ifbmne.ifbmne_af = AF_INET;
3771 ifbmne.ifbmne_ip_addr = mne->mne_ip;
3772 }
3773 memcpy(buf, &ifbmne, sizeof(ifbmne));
3774 count++;
3775 buf += sizeof(ifbmne);
3776 len -= sizeof(ifbmne);
3777 }
3778 *count_p = count;
3779 *len_p = len;
3780 return buf;
3781 }
3782
3783 /*
3784 * bridge_ioctl_gmnelist()
3785 * Perform the get mac_nat_entry list ioctl.
3786 *
3787 * Note:
3788 * The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3789 * field size/layout except for the last field ifbml_buf, the user-supplied
3790 * buffer pointer. That is passed in separately via the 'user_addr'
3791 * parameter from the respective 32-bit or 64-bit ioctl routine.
3792 */
3793 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3794 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3795 user_addr_t user_addr)
3796 {
3797 unsigned int count;
3798 char *buf;
3799 int error = 0;
3800 char *outbuf = NULL;
3801 struct mac_nat_entry *mne;
3802 unsigned int buflen;
3803 unsigned int len;
3804
3805 mnl->ifbml_elsize = sizeof(struct ifbrmne);
3806 count = 0;
3807 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3808 count++;
3809 }
3810 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3811 count++;
3812 }
3813 buflen = sizeof(struct ifbrmne) * count;
3814 if (buflen == 0 || mnl->ifbml_len == 0) {
3815 mnl->ifbml_len = buflen;
3816 return error;
3817 }
3818 BRIDGE_UNLOCK(sc);
3819 outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3820 BRIDGE_LOCK(sc);
3821 count = 0;
3822 buf = outbuf;
3823 len = min(mnl->ifbml_len, buflen);
3824 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3825 buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3826 mnl->ifbml_len = count * sizeof(struct ifbrmne);
3827 BRIDGE_UNLOCK(sc);
3828 error = copyout(outbuf, user_addr, mnl->ifbml_len);
3829 kfree_data(outbuf, buflen);
3830 BRIDGE_LOCK(sc);
3831 return error;
3832 }
3833
3834 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3835 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3836 {
3837 struct ifbrmnelist64 *mnl = arg;
3838
3839 return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3840 }
3841
3842 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3843 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3844 {
3845 struct ifbrmnelist32 *mnl = arg;
3846
3847 return bridge_ioctl_gmnelist(sc, arg,
3848 CAST_USER_ADDR_T(mnl->ifbml_buf));
3849 }
3850
3851 /*
3852 * bridge_ioctl_gifstats()
3853 * Return per-member stats.
3854 *
3855 * Note:
3856 * The ifbrmreq32 and ifbrmreq64 structures have the same
3857 * field size/layout except for the last field brmr_buf, the user-supplied
3858 * buffer pointer. That is passed in separately via the 'user_addr'
3859 * parameter from the respective 32-bit or 64-bit ioctl routine.
3860 */
3861 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3862 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3863 user_addr_t user_addr)
3864 {
3865 struct bridge_iflist *bif;
3866 int error = 0;
3867 unsigned int buflen;
3868
3869 bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3870 if (bif == NULL) {
3871 error = ENOENT;
3872 goto done;
3873 }
3874
3875 buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3876 if (buflen == 0 || mreq->brmr_len == 0) {
3877 mreq->brmr_len = buflen;
3878 goto done;
3879 }
3880 if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3881 error = ENOBUFS;
3882 goto done;
3883 }
3884 mreq->brmr_len = buflen;
3885 error = copyout(&bif->bif_stats, user_addr, buflen);
3886 done:
3887 return error;
3888 }
3889
3890 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3891 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3892 {
3893 struct ifbrmreq32 *mreq = arg;
3894
3895 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3896 }
3897
3898 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3899 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3900 {
3901 struct ifbrmreq64 *mreq = arg;
3902
3903 return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3904 }
3905
3906 /*
3907 * bridge_proto_attach_changed
3908 *
3909 * Called when protocol attachment on the interface changes.
3910 */
3911 static void
bridge_proto_attach_changed(struct ifnet * ifp)3912 bridge_proto_attach_changed(struct ifnet *ifp)
3913 {
3914 boolean_t changed = FALSE;
3915 struct bridge_iflist *bif;
3916 boolean_t input_broadcast;
3917 struct bridge_softc *sc = ifp->if_bridge;
3918
3919 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3920 if (sc == NULL) {
3921 return;
3922 }
3923 input_broadcast = interface_needs_input_broadcast(ifp);
3924 BRIDGE_LOCK(sc);
3925 bif = bridge_lookup_member_if(sc, ifp);
3926 if (bif != NULL) {
3927 changed = bif_set_input_broadcast(bif, input_broadcast);
3928 }
3929 BRIDGE_UNLOCK(sc);
3930 if (changed) {
3931 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3932 "%s input broadcast %s", ifp->if_xname,
3933 input_broadcast ? "ENABLED" : "DISABLED");
3934 }
3935 return;
3936 }
3937
3938 /*
3939 * interface_media_active:
3940 *
3941 * Tells if an interface media is active.
3942 */
3943 static int
interface_media_active(struct ifnet * ifp)3944 interface_media_active(struct ifnet *ifp)
3945 {
3946 struct ifmediareq ifmr;
3947 int status = 0;
3948
3949 bzero(&ifmr, sizeof(ifmr));
3950 if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3951 if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3952 status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3953 }
3954 }
3955
3956 return status;
3957 }
3958
3959 /*
3960 * bridge_updatelinkstatus:
3961 *
3962 * Update the media active status of the bridge based on the
3963 * media active status of its member.
3964 * If changed, return the corresponding onf/off link event.
3965 */
3966 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3967 bridge_updatelinkstatus(struct bridge_softc *sc)
3968 {
3969 struct bridge_iflist *bif;
3970 int active_member = 0;
3971 u_int32_t event_code = 0;
3972
3973 BRIDGE_LOCK_ASSERT_HELD(sc);
3974
3975 /*
3976 * Find out if we have an active interface
3977 */
3978 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3979 if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3980 active_member = 1;
3981 break;
3982 }
3983 }
3984
3985 if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3986 sc->sc_flags |= SCF_MEDIA_ACTIVE;
3987 event_code = KEV_DL_LINK_ON;
3988 } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3989 sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3990 event_code = KEV_DL_LINK_OFF;
3991 }
3992
3993 return event_code;
3994 }
3995
3996 /*
3997 * bridge_iflinkevent:
3998 */
3999 static void
bridge_iflinkevent(struct ifnet * ifp)4000 bridge_iflinkevent(struct ifnet *ifp)
4001 {
4002 struct bridge_softc *sc = ifp->if_bridge;
4003 struct bridge_iflist *bif;
4004 u_int32_t event_code = 0;
4005 int media_active;
4006
4007 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4008
4009 /* Check if the interface is a bridge member */
4010 if (sc == NULL) {
4011 return;
4012 }
4013
4014 media_active = interface_media_active(ifp);
4015 BRIDGE_LOCK(sc);
4016 bif = bridge_lookup_member_if(sc, ifp);
4017 if (bif != NULL) {
4018 if (media_active) {
4019 bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4020 } else {
4021 bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4022 }
4023 if (sc->sc_mac_nat_bif != NULL) {
4024 bridge_mac_nat_flush_entries(sc, bif);
4025 }
4026
4027 event_code = bridge_updatelinkstatus(sc);
4028 }
4029 BRIDGE_UNLOCK(sc);
4030
4031 if (event_code != 0) {
4032 bridge_link_event(sc->sc_ifp, event_code);
4033 }
4034 }
4035
4036 /*
4037 * bridge_delayed_callback:
4038 *
4039 * Makes a delayed call
4040 */
4041 static void
bridge_delayed_callback(void * param,__unused void * param2)4042 bridge_delayed_callback(void *param, __unused void *param2)
4043 {
4044 struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4045 struct bridge_softc *sc = call->bdc_sc;
4046
4047 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4048 if (bridge_delayed_callback_delay > 0) {
4049 struct timespec ts;
4050
4051 ts.tv_sec = bridge_delayed_callback_delay;
4052 ts.tv_nsec = 0;
4053
4054 BRIDGE_LOG(LOG_NOTICE, 0,
4055 "sleeping for %d seconds",
4056 bridge_delayed_callback_delay);
4057
4058 msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4059 __func__, &ts);
4060
4061 BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4062 }
4063 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4064
4065 BRIDGE_LOCK(sc);
4066
4067 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4068 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4069 "%s call 0x%llx flags 0x%x",
4070 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4071 call->bdc_flags);
4072 }
4073 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4074
4075 if (call->bdc_flags & BDCF_CANCELLING) {
4076 wakeup(call);
4077 } else {
4078 if ((sc->sc_flags & SCF_DETACHING) == 0) {
4079 (*call->bdc_func)(sc);
4080 }
4081 }
4082 call->bdc_flags &= ~BDCF_OUTSTANDING;
4083 BRIDGE_UNLOCK(sc);
4084 }
4085
4086 /*
4087 * bridge_schedule_delayed_call:
4088 *
4089 * Schedule a function to be called on a separate thread
4090 * The actual call may be scheduled to run at a given time or ASAP.
4091 */
4092 static void
4093 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4094 {
4095 uint64_t deadline = 0;
4096 struct bridge_softc *sc = call->bdc_sc;
4097
4098 BRIDGE_LOCK_ASSERT_HELD(sc);
4099
4100 if ((sc->sc_flags & SCF_DETACHING) ||
4101 (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4102 return;
4103 }
4104
4105 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4106 nanoseconds_to_absolutetime(
4107 (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4108 call->bdc_ts.tv_nsec, &deadline);
4109 clock_absolutetime_interval_to_deadline(deadline, &deadline);
4110 }
4111
4112 call->bdc_flags = BDCF_OUTSTANDING;
4113
4114 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4115 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4116 "%s call 0x%llx flags 0x%x",
4117 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4118 call->bdc_flags);
4119 }
4120 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4121
4122 if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4123 thread_call_func_delayed(
4124 (thread_call_func_t)bridge_delayed_callback,
4125 call, deadline);
4126 } else {
4127 if (call->bdc_thread_call == NULL) {
4128 call->bdc_thread_call = thread_call_allocate(
4129 (thread_call_func_t)bridge_delayed_callback,
4130 call);
4131 }
4132 thread_call_enter(call->bdc_thread_call);
4133 }
4134 }
4135
4136 /*
4137 * bridge_cancel_delayed_call:
4138 *
4139 * Cancel a queued or running delayed call.
4140 * If call is running, does not return until the call is done to
4141 * prevent race condition with the brigde interface getting destroyed
4142 */
4143 static void
4144 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4145 {
4146 boolean_t result;
4147 struct bridge_softc *sc = call->bdc_sc;
4148
4149 /*
4150 * The call was never scheduled
4151 */
4152 if (sc == NULL) {
4153 return;
4154 }
4155
4156 BRIDGE_LOCK_ASSERT_HELD(sc);
4157
4158 call->bdc_flags |= BDCF_CANCELLING;
4159
4160 while (call->bdc_flags & BDCF_OUTSTANDING) {
4161 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4162 "%s call 0x%llx flags 0x%x",
4163 sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4164 call->bdc_flags);
4165 result = thread_call_func_cancel(
4166 (thread_call_func_t)bridge_delayed_callback, call, FALSE);
4167
4168 if (result) {
4169 /*
4170 * We managed to dequeue the delayed call
4171 */
4172 call->bdc_flags &= ~BDCF_OUTSTANDING;
4173 } else {
4174 /*
4175 * Wait for delayed call do be done running
4176 */
4177 msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4178 }
4179 }
4180 call->bdc_flags &= ~BDCF_CANCELLING;
4181 }
4182
4183 /*
4184 * bridge_cleanup_delayed_call:
4185 *
4186 * Dispose resource allocated for a delayed call
4187 * Assume the delayed call is not queued or running .
4188 */
4189 static void
4190 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4191 {
4192 boolean_t result;
4193 struct bridge_softc *sc = call->bdc_sc;
4194
4195 /*
4196 * The call was never scheduled
4197 */
4198 if (sc == NULL) {
4199 return;
4200 }
4201
4202 BRIDGE_LOCK_ASSERT_HELD(sc);
4203
4204 VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4205 VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4206
4207 if (call->bdc_thread_call != NULL) {
4208 result = thread_call_free(call->bdc_thread_call);
4209 if (result == FALSE) {
4210 panic("%s thread_call_free() failed for call %p",
4211 __func__, call);
4212 }
4213 call->bdc_thread_call = NULL;
4214 }
4215 }
4216
4217 /*
4218 * bridge_init:
4219 *
4220 * Initialize a bridge interface.
4221 */
4222 static int
4223 bridge_init(struct ifnet *ifp)
4224 {
4225 struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4226 errno_t error;
4227
4228 BRIDGE_LOCK_ASSERT_HELD(sc);
4229
4230 if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4231 return 0;
4232 }
4233
4234 error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4235
4236 /*
4237 * Calling bridge_aging_timer() is OK as there are no entries to
4238 * age so we're just going to arm the timer
4239 */
4240 bridge_aging_timer(sc);
4241 #if BRIDGESTP
4242 if (error == 0) {
4243 bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4244 }
4245 #endif /* BRIDGESTP */
4246 return error;
4247 }
4248
4249 /*
4250 * bridge_ifstop:
4251 *
4252 * Stop the bridge interface.
4253 */
4254 static void
4255 bridge_ifstop(struct ifnet *ifp, int disable)
4256 {
4257 #pragma unused(disable)
4258 struct bridge_softc *sc = ifp->if_softc;
4259
4260 BRIDGE_LOCK_ASSERT_HELD(sc);
4261
4262 if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4263 return;
4264 }
4265
4266 bridge_cancel_delayed_call(&sc->sc_aging_timer);
4267
4268 #if BRIDGESTP
4269 bstp_stop(&sc->sc_stp);
4270 #endif /* BRIDGESTP */
4271
4272 bridge_rtflush(sc, IFBF_FLUSHDYN);
4273 (void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4274 }
4275
4276 /*
4277 * bridge_compute_cksum:
4278 *
4279 * If the packet has checksum flags, compare the hardware checksum
4280 * capabilities of the source and destination interfaces. If they
4281 * are the same, there's nothing to do. If they are different,
4282 * finalize the checksum so that it can be sent on the destination
4283 * interface.
4284 */
4285 static void
4286 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4287 {
4288 uint32_t csum_flags;
4289 uint16_t dst_hw_csum;
4290 uint32_t did_sw = 0;
4291 struct ether_header *eh;
4292 uint16_t src_hw_csum;
4293
4294 if (src_if == dst_if) {
4295 return;
4296 }
4297 csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4298 if (csum_flags == 0) {
4299 /* no checksum offload */
4300 return;
4301 }
4302
4303 /*
4304 * if destination/source differ in checksum offload
4305 * capabilities, finalize/compute the checksum
4306 */
4307 dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4308 src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4309 if (dst_hw_csum == src_hw_csum) {
4310 return;
4311 }
4312 eh = mtod(m, struct ether_header *);
4313 switch (ntohs(eh->ether_type)) {
4314 case ETHERTYPE_IP:
4315 did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4316 break;
4317 case ETHERTYPE_IPV6:
4318 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4319 break;
4320 }
4321 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4322 "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4323 src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4324 m->m_pkthdr.csum_flags);
4325 }
4326
4327 static errno_t
4328 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4329 {
4330 struct flowadv adv = { .code = FADV_SUCCESS };
4331 errno_t error;
4332
4333 error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4334 if (error == 0) {
4335 if (adv.code == FADV_FLOW_CONTROLLED) {
4336 error = EQFULL;
4337 } else if (adv.code == FADV_SUSPENDED) {
4338 error = EQSUSPENDED;
4339 }
4340 }
4341 return error;
4342 }
4343
4344 static int
4345 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4346 bool *is_fragmented)
4347 {
4348 int newoff;
4349
4350 *is_fragmented = false;
4351 while (1) {
4352 newoff = ip6_nexthdr(m, off, proto, nxtp);
4353 if (newoff < 0) {
4354 return off;
4355 } else if (newoff < off) {
4356 return -1; /* invalid */
4357 } else if (newoff == off) {
4358 return newoff;
4359 }
4360 off = newoff;
4361 proto = *nxtp;
4362 if (proto == IPPROTO_FRAGMENT) {
4363 *is_fragmented = true;
4364 }
4365 }
4366 }
4367
4368 static int
4369 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4370 ip_packet_info_t info_p, struct bripstats * stats_p)
4371 {
4372 int error = 0;
4373 u_int hlen;
4374 u_int ip_hlen;
4375 u_int ip_pay_len;
4376 struct mbuf * m0 = *mp;
4377 int off;
4378 int opt_len = 0;
4379 int proto = 0;
4380
4381 bzero(info_p, sizeof(*info_p));
4382 if (is_ipv4) {
4383 struct ip * ip;
4384 u_int ip_total_len;
4385
4386 /* IPv4 */
4387 hlen = mac_hlen + sizeof(struct ip);
4388 if (m0->m_pkthdr.len < hlen) {
4389 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4390 "Short IP packet %d < %d",
4391 m0->m_pkthdr.len, hlen);
4392 error = _EBADIP;
4393 stats_p->bips_bad_ip++;
4394 goto done;
4395 }
4396 if (m0->m_len < hlen) {
4397 *mp = m0 = m_pullup(m0, hlen);
4398 if (m0 == NULL) {
4399 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4400 "m_pullup failed hlen %d",
4401 hlen);
4402 error = ENOBUFS;
4403 stats_p->bips_bad_ip++;
4404 goto done;
4405 }
4406 }
4407 ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4408 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4409 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4410 "bad IP version");
4411 error = _EBADIP;
4412 stats_p->bips_bad_ip++;
4413 goto done;
4414 }
4415 ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4416 if (ip_hlen < sizeof(struct ip)) {
4417 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4418 "bad IP header length %d < %d",
4419 ip_hlen,
4420 (int)sizeof(struct ip));
4421 error = _EBADIP;
4422 stats_p->bips_bad_ip++;
4423 goto done;
4424 }
4425 hlen = mac_hlen + ip_hlen;
4426 if (m0->m_len < hlen) {
4427 *mp = m0 = m_pullup(m0, hlen);
4428 if (m0 == NULL) {
4429 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4430 "m_pullup failed hlen %d",
4431 hlen);
4432 error = ENOBUFS;
4433 stats_p->bips_bad_ip++;
4434 goto done;
4435 }
4436 }
4437
4438 ip_total_len = ntohs(ip->ip_len);
4439 if (ip_total_len < ip_hlen) {
4440 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4441 "IP total len %d < header len %d",
4442 ip_total_len, ip_hlen);
4443 error = _EBADIP;
4444 stats_p->bips_bad_ip++;
4445 goto done;
4446 }
4447 if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4448 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4449 "invalid IP payload length %d > %d",
4450 ip_total_len,
4451 (m0->m_pkthdr.len - mac_hlen));
4452 error = _EBADIP;
4453 stats_p->bips_bad_ip++;
4454 goto done;
4455 }
4456 ip_pay_len = ip_total_len - ip_hlen;
4457 info_p->ip_proto = ip->ip_p;
4458 info_p->ip_hdr.ip = ip;
4459 #define FRAG_BITS (IP_OFFMASK | IP_MF)
4460 if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4461 info_p->ip_is_fragmented = true;
4462 }
4463 stats_p->bips_ip++;
4464 } else {
4465 struct ip6_hdr *ip6;
4466
4467 /* IPv6 */
4468 hlen = mac_hlen + sizeof(struct ip6_hdr);
4469 if (m0->m_pkthdr.len < hlen) {
4470 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4471 "short IPv6 packet %d < %d",
4472 m0->m_pkthdr.len, hlen);
4473 error = _EBADIPV6;
4474 stats_p->bips_bad_ip6++;
4475 goto done;
4476 }
4477 if (m0->m_len < hlen) {
4478 *mp = m0 = m_pullup(m0, hlen);
4479 if (m0 == NULL) {
4480 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4481 "m_pullup failed hlen %d",
4482 hlen);
4483 error = ENOBUFS;
4484 stats_p->bips_bad_ip6++;
4485 goto done;
4486 }
4487 }
4488 ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4489 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4490 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4491 "bad IPv6 version");
4492 error = _EBADIPV6;
4493 stats_p->bips_bad_ip6++;
4494 goto done;
4495 }
4496 off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4497 &info_p->ip_is_fragmented);
4498 if (off < 0 || m0->m_pkthdr.len < off) {
4499 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4500 "ip6_lasthdr() returned %d",
4501 off);
4502 error = _EBADIPV6;
4503 stats_p->bips_bad_ip6++;
4504 goto done;
4505 }
4506 ip_hlen = sizeof(*ip6);
4507 opt_len = off - mac_hlen - ip_hlen;
4508 if (opt_len < 0) {
4509 error = _EBADIPV6;
4510 stats_p->bips_bad_ip6++;
4511 goto done;
4512 }
4513 info_p->ip_proto = proto;
4514 info_p->ip_hdr.ip6 = ip6;
4515 ip_pay_len = ntohs(ip6->ip6_plen);
4516 if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4517 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4518 "invalid IPv6 payload length %d > %d",
4519 ip_pay_len,
4520 (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4521 error = _EBADIPV6;
4522 stats_p->bips_bad_ip6++;
4523 goto done;
4524 }
4525 stats_p->bips_ip6++;
4526 }
4527 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4528 "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4529 is_ipv4 ? '4' : '6',
4530 proto, ip_hlen, ip_pay_len, opt_len,
4531 m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4532 info_p->ip_hlen = ip_hlen;
4533 info_p->ip_pay_len = ip_pay_len;
4534 info_p->ip_opt_len = opt_len;
4535 info_p->ip_is_ipv4 = is_ipv4;
4536 done:
4537 return error;
4538 }
4539
4540 static int
4541 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4542 ip_packet_info_t info_p, struct bripstats * stats_p)
4543 {
4544 int error;
4545 u_int hlen;
4546
4547 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4548 if (error != 0) {
4549 goto done;
4550 }
4551 if (info_p->ip_proto != IPPROTO_TCP) {
4552 /* not a TCP frame, not an error, just a bad guess */
4553 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4554 "non-TCP (%d) IPv%c frame %d bytes",
4555 info_p->ip_proto, is_ipv4 ? '4' : '6',
4556 (*mp)->m_pkthdr.len);
4557 goto done;
4558 }
4559 if (info_p->ip_is_fragmented) {
4560 /* both TSO and IP fragmentation don't make sense */
4561 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4562 "fragmented TSO packet?");
4563 stats_p->bips_bad_tcp++;
4564 error = _EBADTCP;
4565 goto done;
4566 }
4567 hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4568 info_p->ip_opt_len;
4569 if ((*mp)->m_len < hlen) {
4570 *mp = m_pullup(*mp, hlen);
4571 if (*mp == NULL) {
4572 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4573 "m_pullup %d failed",
4574 hlen);
4575 stats_p->bips_bad_tcp++;
4576 error = _EBADTCP;
4577 goto done;
4578 }
4579 }
4580 info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4581 info_p->ip_hlen + info_p->ip_opt_len;
4582 done:
4583 return error;
4584 }
4585
4586 static inline void
4587 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4588 {
4589 if (proto == IPPROTO_TCP) {
4590 stats_p->brcs_tcp_checksum++;
4591 } else {
4592 stats_p->brcs_udp_checksum++;
4593 }
4594 return;
4595 }
4596
4597 static bool
4598 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4599 {
4600 uint16_t ether_type;
4601 bool is_ip = TRUE;
4602
4603 ether_type = ntohs(eh->ether_type);
4604 switch (ether_type) {
4605 case ETHERTYPE_IP:
4606 *is_ipv4 = TRUE;
4607 break;
4608 case ETHERTYPE_IPV6:
4609 *is_ipv4 = FALSE;
4610 break;
4611 default:
4612 is_ip = FALSE;
4613 break;
4614 }
4615 return is_ip;
4616 }
4617
4618 static errno_t
4619 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4620 {
4621 struct brcsumstats *csum_stats_p;
4622 struct ether_header *eh;
4623 errno_t error = 0;
4624 ip_packet_info info;
4625 bool is_ipv4;
4626 struct mbuf * m;
4627 u_int mac_hlen = sizeof(struct ether_header);
4628 uint16_t sum;
4629 bool valid;
4630
4631 eh = mtod(*mp, struct ether_header *);
4632 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4633 goto done;
4634 }
4635 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4636 &stats_p->brms_out_ip);
4637 m = *mp;
4638 if (error != 0) {
4639 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4640 "bridge_get_ip_proto failed %d",
4641 error);
4642 goto done;
4643 }
4644 if (is_ipv4) {
4645 if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4646 /* hardware offloaded IP header checksum */
4647 valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4648 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4649 "IP checksum HW %svalid",
4650 valid ? "" : "in");
4651 if (!valid) {
4652 stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4653 error = _EBADIPCHECKSUM;
4654 goto done;
4655 }
4656 stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4657 } else {
4658 /* verify */
4659 sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4660 valid = (sum == 0);
4661 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4662 "IP checksum SW %svalid",
4663 valid ? "" : "in");
4664 if (!valid) {
4665 stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4666 error = _EBADIPCHECKSUM;
4667 goto done;
4668 }
4669 stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4670 }
4671 }
4672 if (info.ip_is_fragmented) {
4673 /* can't verify checksum on fragmented packets */
4674 goto done;
4675 }
4676 switch (info.ip_proto) {
4677 case IPPROTO_TCP:
4678 stats_p->brms_out_ip.bips_tcp++;
4679 break;
4680 case IPPROTO_UDP:
4681 stats_p->brms_out_ip.bips_udp++;
4682 break;
4683 default:
4684 goto done;
4685 }
4686 /* check for hardware offloaded UDP/TCP checksum */
4687 #define HW_CSUM (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4688 if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4689 /* checksum verified by hardware */
4690 valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4691 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4692 "IPv%c %s checksum HW 0x%x %svalid",
4693 is_ipv4 ? '4' : '6',
4694 (info.ip_proto == IPPROTO_TCP)
4695 ? "TCP" : "UDP",
4696 m->m_pkthdr.csum_data,
4697 valid ? "" : "in" );
4698 if (!valid) {
4699 /* bad checksum */
4700 csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4701 error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4702 : _EBADTCPCHECKSUM;
4703 } else {
4704 /* good checksum */
4705 csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4706 }
4707 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4708 goto done;
4709 }
4710 m->m_data += mac_hlen;
4711 m->m_len -= mac_hlen;
4712 m->m_pkthdr.len -= mac_hlen;
4713 if (is_ipv4) {
4714 sum = inet_cksum(m, info.ip_proto,
4715 info.ip_hlen,
4716 info.ip_pay_len);
4717 } else {
4718 sum = inet6_cksum(m, info.ip_proto,
4719 info.ip_hlen + info.ip_opt_len,
4720 info.ip_pay_len - info.ip_opt_len);
4721 }
4722 valid = (sum == 0);
4723 if (valid) {
4724 csum_stats_p = &stats_p->brms_out_cksum_good;
4725 } else {
4726 csum_stats_p = &stats_p->brms_out_cksum_bad;
4727 error = (info.ip_proto == IPPROTO_TCP)
4728 ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4729 }
4730 proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4731 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4732 "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4733 is_ipv4 ? '4' : '6',
4734 (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4735 valid ? "" : "in",
4736 sum, info.ip_hlen, info.ip_pay_len);
4737 m->m_data -= mac_hlen;
4738 m->m_len += mac_hlen;
4739 m->m_pkthdr.len += mac_hlen;
4740 done:
4741 return error;
4742 }
4743
4744 static errno_t
4745 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4746 struct ifbrmstats * stats_p)
4747 {
4748 uint16_t * csum_p;
4749 errno_t error = 0;
4750 u_int hlen;
4751 struct mbuf * m0 = *mp;
4752 u_int mac_hlen = sizeof(struct ether_header);
4753 u_int pkt_hdr_len;
4754 struct tcphdr * tcp;
4755 u_int tcp_hlen;
4756 struct udphdr * udp;
4757
4758 if (info_p->ip_is_ipv4) {
4759 /* compute IP header checksum */
4760 info_p->ip_hdr.ip->ip_sum = 0;
4761 info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4762 info_p->ip_hlen);
4763 stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4764 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4765 "IPv4 checksum 0x%x",
4766 ntohs(info_p->ip_hdr.ip->ip_sum));
4767 }
4768 if (info_p->ip_is_fragmented) {
4769 /* can't compute checksum on fragmented packets */
4770 goto done;
4771 }
4772 pkt_hdr_len = m0->m_pkthdr.len;
4773 switch (info_p->ip_proto) {
4774 case IPPROTO_TCP:
4775 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4776 + sizeof(struct tcphdr);
4777 if (m0->m_len < hlen) {
4778 *mp = m0 = m_pullup(m0, hlen);
4779 if (m0 == NULL) {
4780 stats_p->brms_in_ip.bips_bad_tcp++;
4781 error = _EBADTCP;
4782 goto done;
4783 }
4784 }
4785 tcp = (struct tcphdr *)(void *)
4786 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4787 + info_p->ip_opt_len);
4788 tcp_hlen = tcp->th_off << 2;
4789 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4790 if (hlen > pkt_hdr_len) {
4791 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4792 "bad tcp header length %u",
4793 tcp_hlen);
4794 stats_p->brms_in_ip.bips_bad_tcp++;
4795 error = _EBADTCP;
4796 goto done;
4797 }
4798 csum_p = &tcp->th_sum;
4799 stats_p->brms_in_ip.bips_tcp++;
4800 break;
4801 case IPPROTO_UDP:
4802 hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4803 if (m0->m_len < hlen) {
4804 *mp = m0 = m_pullup(m0, hlen);
4805 if (m0 == NULL) {
4806 stats_p->brms_in_ip.bips_bad_udp++;
4807 error = ENOBUFS;
4808 goto done;
4809 }
4810 }
4811 udp = (struct udphdr *)(void *)
4812 ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4813 + info_p->ip_opt_len);
4814 csum_p = &udp->uh_sum;
4815 stats_p->brms_in_ip.bips_udp++;
4816 break;
4817 default:
4818 /* not TCP or UDP */
4819 goto done;
4820 }
4821 *csum_p = 0;
4822 m0->m_data += mac_hlen;
4823 m0->m_len -= mac_hlen;
4824 m0->m_pkthdr.len -= mac_hlen;
4825 if (info_p->ip_is_ipv4) {
4826 *csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4827 info_p->ip_pay_len);
4828 } else {
4829 *csum_p = inet6_cksum(m0, info_p->ip_proto,
4830 info_p->ip_hlen + info_p->ip_opt_len,
4831 info_p->ip_pay_len - info_p->ip_opt_len);
4832 }
4833 if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4834 /* RFC 1122 4.1.3.4 */
4835 *csum_p = 0xffff;
4836 }
4837 m0->m_data -= mac_hlen;
4838 m0->m_len += mac_hlen;
4839 m0->m_pkthdr.len += mac_hlen;
4840 proto_csum_stats_increment(info_p->ip_proto,
4841 &stats_p->brms_in_computed_cksum);
4842
4843 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4844 "IPv%c %s set checksum 0x%x",
4845 info_p->ip_is_ipv4 ? '4' : '6',
4846 (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4847 ntohs(*csum_p));
4848 done:
4849 return error;
4850 }
4851
4852 static errno_t
4853 bridge_send(struct ifnet *src_ifp,
4854 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4855 {
4856 switch (cksum_op) {
4857 case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4858 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4859 break;
4860 case CHECKSUM_OPERATION_FINALIZE:
4861 /* the checksum might not be correct, finalize now */
4862 bridge_finalize_cksum(dst_ifp, m);
4863 break;
4864 case CHECKSUM_OPERATION_COMPUTE:
4865 bridge_compute_cksum(src_ifp, dst_ifp, m);
4866 break;
4867 default:
4868 break;
4869 }
4870 #if HAS_IF_CAP
4871 /*
4872 * If underlying interface can not do VLAN tag insertion itself
4873 * then attach a packet tag that holds it.
4874 */
4875 if ((m->m_flags & M_VLANTAG) &&
4876 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4877 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4878 if (m == NULL) {
4879 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4880 "%s: unable to prepend VLAN header",
4881 dst_ifp->if_xname);
4882 (void) ifnet_stat_increment_out(dst_ifp,
4883 0, 0, 1);
4884 return 0;
4885 }
4886 m->m_flags &= ~M_VLANTAG;
4887 }
4888 #endif /* HAS_IF_CAP */
4889 return bridge_transmit(dst_ifp, m);
4890 }
4891
4892 static errno_t
4893 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4894 {
4895 errno_t error;
4896 u_int mac_hlen;
4897
4898 mac_hlen = sizeof(struct ether_header);
4899
4900 #if HAS_IF_CAP
4901 /*
4902 * If underlying interface can not do VLAN tag insertion itself
4903 * then attach a packet tag that holds it.
4904 */
4905 if ((m->m_flags & M_VLANTAG) &&
4906 (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4907 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4908 if (m == NULL) {
4909 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4910 "%s: unable to prepend VLAN header",
4911 dst_ifp->if_xname);
4912 (void) ifnet_stat_increment_out(dst_ifp,
4913 0, 0, 1);
4914 error = ENOBUFS;
4915 goto done;
4916 }
4917 m->m_flags &= ~M_VLANTAG;
4918 mac_hlen += ETHER_VLAN_ENCAP_LEN;
4919 }
4920 #endif /* HAS_IF_CAP */
4921 error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4922 return error;
4923 }
4924
4925 /*
4926 * tso_hwassist:
4927 * - determine whether the destination interface supports TSO offload
4928 * - if the packet is already marked for offload and the hardware supports
4929 * it, just allow the packet to continue on
4930 * - if not, parse the packet headers to verify that this is a large TCP
4931 * packet requiring segmentation; if the hardware doesn't support it
4932 * set need_sw_tso; otherwise, mark the packet for TSO offload
4933 */
4934 static int
4935 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4936 bool * need_sw_tso, bool * is_large_tcp)
4937 {
4938 int error = 0;
4939 u_int32_t if_csum;
4940 u_int32_t if_tso;
4941 u_int32_t mbuf_tso;
4942 bool supports_cksum = false;
4943
4944 *need_sw_tso = false;
4945 *is_large_tcp = false;
4946 if (is_ipv4) {
4947 /*
4948 * Enable both TCP and IP offload if the hardware supports it.
4949 * If the hardware doesn't support TCP offload, supports_cksum
4950 * will be false so we won't set either offload.
4951 */
4952 if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4953 supports_cksum = (if_csum & CSUM_TCP) != 0;
4954 if_tso = IFNET_TSO_IPV4;
4955 mbuf_tso = CSUM_TSO_IPV4;
4956 } else {
4957 supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4958 if_csum = CSUM_TCPIPV6;
4959 if_tso = IFNET_TSO_IPV6;
4960 mbuf_tso = CSUM_TSO_IPV6;
4961 }
4962 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4963 "%s: does%s support checksum 0x%x if_csum 0x%x",
4964 ifp->if_xname, supports_cksum ? "" : " not",
4965 ifp->if_hwassist, if_csum);
4966 if ((ifp->if_hwassist & if_tso) != 0 &&
4967 ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4968 /* hardware TSO, mbuf already marked */
4969 } else {
4970 /* verify that this is a large TCP frame */
4971 uint32_t csum_flags;
4972 ip_packet_info info;
4973 int mss;
4974 struct bripstats stats;
4975 struct tcphdr * tcp;
4976
4977 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4978 &info, &stats);
4979 if (error != 0) {
4980 /* bad packet */
4981 goto done;
4982 }
4983 if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4984 ifp->if_mtu) {
4985 /* not actually a large packet */
4986 goto done;
4987 }
4988 if (info.ip_proto_hdr == NULL) {
4989 /* not a TCP packet */
4990 goto done;
4991 }
4992 if ((ifp->if_hwassist & if_tso) == 0) {
4993 /* hardware does not support TSO, enable sw tso */
4994 *need_sw_tso = if_bridge_segmentation != 0;
4995 goto done;
4996 }
4997 /* use hardware TSO */
4998 (*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4999 tcp = (struct tcphdr *)info.ip_proto_hdr;
5000 mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
5001 - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
5002 assert(mss > 0);
5003 csum_flags = mbuf_tso;
5004 if (supports_cksum) {
5005 csum_flags |= if_csum;
5006 }
5007 (*mp)->m_pkthdr.tso_segsz = mss;
5008 (*mp)->m_pkthdr.csum_flags |= csum_flags;
5009 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5010 *is_large_tcp = true;
5011 }
5012 done:
5013 return error;
5014 }
5015
5016 /*
5017 * bridge_enqueue:
5018 *
5019 * Enqueue a packet on a bridge member interface.
5020 *
5021 */
5022 static errno_t
5023 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
5024 struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
5025 {
5026 errno_t error = 0;
5027 int len;
5028
5029 VERIFY(dst_ifp != NULL);
5030
5031 /*
5032 * We may be sending a fragment so traverse the mbuf
5033 *
5034 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
5035 */
5036 for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
5037 bool need_sw_tso = false;
5038 bool is_ipv4 = false;
5039 bool is_large_pkt;
5040 errno_t _error = 0;
5041
5042 len = m->m_pkthdr.len;
5043 m->m_flags |= M_PROTO1; /* set to avoid loops */
5044 next_m = m->m_nextpkt;
5045 m->m_nextpkt = NULL;
5046 /*
5047 * Need to segment the packet if it is a large frame
5048 * and the destination interface does not support TSO.
5049 *
5050 * Note that with trailers, it's possible for a packet to
5051 * be large but not actually require segmentation.
5052 */
5053 is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5054 if (is_large_pkt) {
5055 struct ether_header *eh;
5056 bool is_large_tcp = false;
5057
5058 eh = mtod(m, struct ether_header *);
5059 if (ether_header_type_is_ip(eh, &is_ipv4)) {
5060 _error = tso_hwassist(&m, is_ipv4,
5061 dst_ifp, sizeof(struct ether_header),
5062 &need_sw_tso, &is_large_tcp);
5063 if (is_large_tcp) {
5064 cksum_op = CHECKSUM_OPERATION_NONE;
5065 }
5066 } else {
5067 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5068 "large non IP packet");
5069 }
5070 }
5071 if (_error != 0) {
5072 if (m != NULL) {
5073 m_freem(m);
5074 }
5075 } else if (need_sw_tso) {
5076 _error = bridge_send_tso(dst_ifp, m, is_ipv4);
5077 } else {
5078 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5079 "%s bridge_send(%s) len %d op %d",
5080 bridge_ifp->if_xname,
5081 dst_ifp->if_xname,
5082 len, cksum_op);
5083 _error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5084 }
5085
5086 /* Preserve first error value */
5087 if (error == 0 && _error != 0) {
5088 error = _error;
5089 }
5090 if (_error == 0) {
5091 (void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5092 } else {
5093 (void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5094 }
5095 }
5096
5097 return error;
5098 }
5099
5100 #if HAS_BRIDGE_DUMMYNET
5101 /*
5102 * bridge_dummynet:
5103 *
5104 * Receive a queued packet from dummynet and pass it on to the output
5105 * interface.
5106 *
5107 * The mbuf has the Ethernet header already attached.
5108 */
5109 static void
5110 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5111 {
5112 struct bridge_softc *sc;
5113
5114 sc = ifp->if_bridge;
5115
5116 /*
5117 * The packet didn't originate from a member interface. This should only
5118 * ever happen if a member interface is removed while packets are
5119 * queued for it.
5120 */
5121 if (sc == NULL) {
5122 m_freem(m);
5123 return;
5124 }
5125
5126 if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5127 if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5128 return;
5129 }
5130 if (m == NULL) {
5131 return;
5132 }
5133 }
5134 (void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5135 }
5136
5137 #endif /* HAS_BRIDGE_DUMMYNET */
5138
5139 /*
5140 * bridge_member_output:
5141 *
5142 * Send output from a bridge member interface. This
5143 * performs the bridging function for locally originated
5144 * packets.
5145 *
5146 * The mbuf has the Ethernet header already attached.
5147 */
5148 static errno_t
5149 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5150 {
5151 ifnet_t bridge_ifp;
5152 struct ether_header *eh;
5153 struct ifnet *dst_if;
5154 uint16_t vlan;
5155 struct bridge_iflist *mac_nat_bif;
5156 ifnet_t mac_nat_ifp;
5157 mbuf_t m = *data;
5158
5159 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5160 "ifp %s", ifp->if_xname);
5161 if (m->m_len < ETHER_HDR_LEN) {
5162 m = m_pullup(m, ETHER_HDR_LEN);
5163 if (m == NULL) {
5164 *data = NULL;
5165 return EJUSTRETURN;
5166 }
5167 }
5168
5169 eh = mtod(m, struct ether_header *);
5170 vlan = VLANTAGOF(m);
5171
5172 BRIDGE_LOCK(sc);
5173 mac_nat_bif = sc->sc_mac_nat_bif;
5174 mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5175 if (mac_nat_ifp == ifp) {
5176 /* record the IP address used by the MAC NAT interface */
5177 (void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5178 m = *data;
5179 if (m == NULL) {
5180 /* packet was deallocated */
5181 BRIDGE_UNLOCK(sc);
5182 return EJUSTRETURN;
5183 }
5184 }
5185 bridge_ifp = sc->sc_ifp;
5186
5187 /*
5188 * APPLE MODIFICATION
5189 * If the packet is an 802.1X ethertype, then only send on the
5190 * original output interface.
5191 */
5192 if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5193 dst_if = ifp;
5194 goto sendunicast;
5195 }
5196
5197 /*
5198 * If bridge is down, but the original output interface is up,
5199 * go ahead and send out that interface. Otherwise, the packet
5200 * is dropped below.
5201 */
5202 if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5203 dst_if = ifp;
5204 goto sendunicast;
5205 }
5206
5207 /*
5208 * If the packet is a multicast, or we don't know a better way to
5209 * get there, send to all interfaces.
5210 */
5211 if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5212 dst_if = NULL;
5213 } else {
5214 dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5215 }
5216 if (dst_if == NULL) {
5217 struct bridge_iflist *bif;
5218 struct mbuf *mc;
5219 errno_t error;
5220
5221
5222 bridge_span(sc, m);
5223
5224 BRIDGE_LOCK2REF(sc, error);
5225 if (error != 0) {
5226 m_freem(m);
5227 return EJUSTRETURN;
5228 }
5229
5230 /*
5231 * Duplicate and send the packet across all member interfaces
5232 * except the originating interface.
5233 */
5234 TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5235 dst_if = bif->bif_ifp;
5236 if (dst_if == ifp) {
5237 /* skip the originating interface */
5238 continue;
5239 }
5240 /* skip interface with inactive link status */
5241 if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5242 continue;
5243 }
5244 #if 0
5245 if (dst_if->if_type == IFT_GIF) {
5246 continue;
5247 }
5248 #endif
5249 /* skip interface that isn't running */
5250 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5251 continue;
5252 }
5253 /*
5254 * If the interface is participating in spanning
5255 * tree, make sure the port is in a state that
5256 * allows forwarding.
5257 */
5258 if ((bif->bif_ifflags & IFBIF_STP) &&
5259 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5260 continue;
5261 }
5262 /*
5263 * If the destination is the MAC NAT interface,
5264 * skip sending the packet. The packet can't be sent
5265 * if the source MAC is incorrect.
5266 */
5267 if (dst_if == mac_nat_ifp) {
5268 continue;
5269 }
5270
5271 /* make a deep copy to send on this member interface */
5272 mc = m_dup(m, M_DONTWAIT);
5273 if (mc == NULL) {
5274 (void)ifnet_stat_increment_out(bridge_ifp,
5275 0, 0, 1);
5276 continue;
5277 }
5278 (void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5279 mc, CHECKSUM_OPERATION_COMPUTE);
5280 }
5281 BRIDGE_UNREF(sc);
5282
5283 if ((ifp->if_flags & IFF_RUNNING) == 0) {
5284 m_freem(m);
5285 return EJUSTRETURN;
5286 }
5287 /* allow packet to continue on the originating interface */
5288 return 0;
5289 }
5290
5291 sendunicast:
5292 /*
5293 * XXX Spanning tree consideration here?
5294 */
5295
5296 bridge_span(sc, m);
5297 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5298 m_freem(m);
5299 BRIDGE_UNLOCK(sc);
5300 return EJUSTRETURN;
5301 }
5302
5303 BRIDGE_UNLOCK(sc);
5304 if (dst_if == ifp) {
5305 /* allow packet to continue on the originating interface */
5306 return 0;
5307 }
5308 if (dst_if != mac_nat_ifp) {
5309 (void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5310 CHECKSUM_OPERATION_COMPUTE);
5311 } else {
5312 /*
5313 * This is not the original output interface
5314 * and the destination is the MAC NAT interface.
5315 * Drop the packet because the packet can't be sent
5316 * if the source MAC is incorrect.
5317 */
5318 m_freem(m);
5319 }
5320 return EJUSTRETURN;
5321 }
5322
5323 /*
5324 * Output callback.
5325 *
5326 * This routine is called externally from above only when if_bridge_txstart
5327 * is disabled; otherwise it is called internally by bridge_start().
5328 */
5329 static int
5330 bridge_output(struct ifnet *ifp, struct mbuf *m)
5331 {
5332 struct bridge_softc *sc = ifnet_softc(ifp);
5333 struct ether_header *eh;
5334 struct ifnet *dst_if = NULL;
5335 int error = 0;
5336
5337 eh = mtod(m, struct ether_header *);
5338
5339 BRIDGE_LOCK(sc);
5340
5341 if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5342 dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5343 }
5344
5345 (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5346
5347 #if NBPFILTER > 0
5348 if (sc->sc_bpf_output) {
5349 bridge_bpf_output(ifp, m);
5350 }
5351 #endif
5352
5353 if (dst_if == NULL) {
5354 /* callee will unlock */
5355 bridge_broadcast(sc, NULL, m, 0);
5356 } else {
5357 ifnet_t bridge_ifp;
5358
5359 bridge_ifp = sc->sc_ifp;
5360 BRIDGE_UNLOCK(sc);
5361
5362 error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5363 CHECKSUM_OPERATION_FINALIZE);
5364 }
5365
5366 return error;
5367 }
5368
5369 static void
5370 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5371 {
5372 struct ether_header *eh;
5373 bool is_ipv4;
5374 uint32_t sw_csum, hwcap;
5375 uint32_t did_sw;
5376 uint32_t csum_flags;
5377
5378 eh = mtod(m, struct ether_header *);
5379 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5380 return;
5381 }
5382
5383 /* do in software what the hardware cannot */
5384 hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5385 csum_flags = m->m_pkthdr.csum_flags;
5386 sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5387 sw_csum &= IF_HWASSIST_CSUM_MASK;
5388
5389 if (is_ipv4) {
5390 if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5391 (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5392 if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5393 uint16_t start =
5394 sizeof(*eh) + sizeof(struct ip);
5395 uint16_t ulpoff =
5396 m->m_pkthdr.csum_data & 0xffff;
5397 m->m_pkthdr.csum_flags |=
5398 (CSUM_DATA_VALID | CSUM_PARTIAL);
5399 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5400 m->m_pkthdr.csum_tx_start = start;
5401 } else {
5402 sw_csum |= (CSUM_DELAY_DATA &
5403 m->m_pkthdr.csum_flags);
5404 }
5405 }
5406 did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5407 } else {
5408 if ((hwcap & CSUM_PARTIAL) &&
5409 !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5410 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5411 if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5412 uint16_t start =
5413 sizeof(*eh) + sizeof(struct ip6_hdr);
5414 uint16_t ulpoff =
5415 m->m_pkthdr.csum_data & 0xffff;
5416 m->m_pkthdr.csum_flags |=
5417 (CSUM_DATA_VALID | CSUM_PARTIAL);
5418 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5419 m->m_pkthdr.csum_tx_start = start;
5420 } else {
5421 sw_csum |= (CSUM_DELAY_IPV6_DATA &
5422 m->m_pkthdr.csum_flags);
5423 }
5424 }
5425 did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5426 }
5427 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5428 "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5429 ifp->if_xname, csum_flags, hwcap, sw_csum,
5430 did_sw, m->m_pkthdr.csum_flags);
5431 }
5432
5433 /*
5434 * bridge_start:
5435 *
5436 * Start output on a bridge.
5437 *
5438 * This routine is invoked by the start worker thread; because we never call
5439 * it directly, there is no need do deploy any serialization mechanism other
5440 * than what's already used by the worker thread, i.e. this is already single
5441 * threaded.
5442 *
5443 * This routine is called only when if_bridge_txstart is enabled.
5444 */
5445 static void
5446 bridge_start(struct ifnet *ifp)
5447 {
5448 struct mbuf *m;
5449
5450 for (;;) {
5451 if (ifnet_dequeue(ifp, &m) != 0) {
5452 break;
5453 }
5454
5455 (void) bridge_output(ifp, m);
5456 }
5457 }
5458
5459 /*
5460 * bridge_forward:
5461 *
5462 * The forwarding function of the bridge.
5463 *
5464 * NOTE: Releases the lock on return.
5465 */
5466 static void
5467 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5468 struct mbuf *m)
5469 {
5470 struct bridge_iflist *dbif;
5471 ifnet_t bridge_ifp;
5472 struct ifnet *src_if, *dst_if;
5473 struct ether_header *eh;
5474 uint16_t vlan;
5475 uint8_t *dst;
5476 int error;
5477 struct mac_nat_record mnr;
5478 bool translate_mac = FALSE;
5479 uint32_t sc_filter_flags = 0;
5480
5481 BRIDGE_LOCK_ASSERT_HELD(sc);
5482
5483 bridge_ifp = sc->sc_ifp;
5484 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5485 "%s m 0x%llx", bridge_ifp->if_xname,
5486 (uint64_t)VM_KERNEL_ADDRPERM(m));
5487
5488 src_if = m->m_pkthdr.rcvif;
5489 if (src_if != sbif->bif_ifp) {
5490 const char * src_if_name;
5491
5492 src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5493 BRIDGE_LOG(LOG_NOTICE, 0,
5494 "src_if %s != bif_ifp %s",
5495 src_if_name, sbif->bif_ifp->if_xname);
5496 goto drop;
5497 }
5498
5499 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5500 vlan = VLANTAGOF(m);
5501
5502
5503 if ((sbif->bif_ifflags & IFBIF_STP) &&
5504 sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5505 goto drop;
5506 }
5507
5508 eh = mtod(m, struct ether_header *);
5509 dst = eh->ether_dhost;
5510
5511 /* If the interface is learning, record the address. */
5512 if (sbif->bif_ifflags & IFBIF_LEARNING) {
5513 error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5514 sbif, 0, IFBAF_DYNAMIC);
5515 /*
5516 * If the interface has addresses limits then deny any source
5517 * that is not in the cache.
5518 */
5519 if (error && sbif->bif_addrmax) {
5520 goto drop;
5521 }
5522 }
5523
5524 if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5525 sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5526 goto drop;
5527 }
5528
5529 /*
5530 * At this point, the port either doesn't participate
5531 * in spanning tree or it is in the forwarding state.
5532 */
5533
5534 /*
5535 * If the packet is unicast, destined for someone on
5536 * "this" side of the bridge, drop it.
5537 */
5538 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5539 /* unicast */
5540 dst_if = bridge_rtlookup(sc, dst, vlan);
5541 if (src_if == dst_if) {
5542 goto drop;
5543 }
5544 } else {
5545 /* broadcast/multicast */
5546
5547 /*
5548 * Check if its a reserved multicast address, any address
5549 * listed in 802.1D section 7.12.6 may not be forwarded by the
5550 * bridge.
5551 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5552 */
5553 if (dst[0] == 0x01 && dst[1] == 0x80 &&
5554 dst[2] == 0xc2 && dst[3] == 0x00 &&
5555 dst[4] == 0x00 && dst[5] <= 0x0f) {
5556 goto drop;
5557 }
5558
5559
5560 /* ...forward it to all interfaces. */
5561 atomic_add_64(&bridge_ifp->if_imcasts, 1);
5562 dst_if = NULL;
5563 }
5564
5565 /*
5566 * If we have a destination interface which is a member of our bridge,
5567 * OR this is a unicast packet, push it through the bpf(4) machinery.
5568 * For broadcast or multicast packets, don't bother because it will
5569 * be reinjected into ether_input. We do this before we pass the packets
5570 * through the pfil(9) framework, as it is possible that pfil(9) will
5571 * drop the packet, or possibly modify it, making it difficult to debug
5572 * firewall issues on the bridge.
5573 */
5574 #if NBPFILTER > 0
5575 if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5576 dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5577 m->m_pkthdr.rcvif = bridge_ifp;
5578 BRIDGE_BPF_MTAP_INPUT(sc, m);
5579 }
5580 #endif /* NBPFILTER */
5581
5582 if (dst_if == NULL) {
5583 /* bridge_broadcast will unlock */
5584 bridge_broadcast(sc, sbif, m, 1);
5585 return;
5586 }
5587
5588 /*
5589 * Unicast.
5590 */
5591 /*
5592 * At this point, we're dealing with a unicast frame
5593 * going to a different interface.
5594 */
5595 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5596 goto drop;
5597 }
5598
5599 dbif = bridge_lookup_member_if(sc, dst_if);
5600 if (dbif == NULL) {
5601 /* Not a member of the bridge (anymore?) */
5602 goto drop;
5603 }
5604
5605 /* Private segments can not talk to each other */
5606 if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5607 goto drop;
5608 }
5609
5610 if ((dbif->bif_ifflags & IFBIF_STP) &&
5611 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5612 goto drop;
5613 }
5614
5615 #if HAS_DHCPRA_MASK
5616 /* APPLE MODIFICATION <rdar:6985737> */
5617 if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5618 m = ip_xdhcpra_output(dst_if, m);
5619 if (!m) {
5620 ++bridge_ifp.if_xdhcpra;
5621 BRIDGE_UNLOCK(sc);
5622 return;
5623 }
5624 }
5625 #endif /* HAS_DHCPRA_MASK */
5626
5627 if (dbif == sc->sc_mac_nat_bif) {
5628 /* determine how to translate the packet */
5629 translate_mac
5630 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5631 if (m == NULL) {
5632 /* packet was deallocated */
5633 BRIDGE_UNLOCK(sc);
5634 return;
5635 }
5636 } else if (bif_has_checksum_offload(dbif) &&
5637 !bif_has_checksum_offload(sbif)) {
5638 /*
5639 * If the destination interface has checksum offload enabled,
5640 * verify the checksum now, unless the source interface also has
5641 * checksum offload enabled. The checksum in that case has
5642 * already just been computed and verifying it is unnecessary.
5643 */
5644 error = bridge_verify_checksum(&m, &dbif->bif_stats);
5645 if (error != 0) {
5646 BRIDGE_UNLOCK(sc);
5647 if (m != NULL) {
5648 m_freem(m);
5649 }
5650 return;
5651 }
5652 }
5653
5654 sc_filter_flags = sc->sc_filter_flags;
5655
5656 BRIDGE_UNLOCK(sc);
5657 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5658 if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5659 return;
5660 }
5661 if (m == NULL) {
5662 return;
5663 }
5664 }
5665
5666 /* if we need to, translate the MAC address */
5667 if (translate_mac) {
5668 bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5669 }
5670 /*
5671 * We're forwarding an inbound packet in which the checksum must
5672 * already have been computed and if required, verified.
5673 */
5674 if (m != NULL) {
5675 (void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5676 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5677 }
5678 return;
5679
5680 drop:
5681 BRIDGE_UNLOCK(sc);
5682 m_freem(m);
5683 }
5684
5685 static void
5686 inject_input_packet(ifnet_t ifp, mbuf_t m)
5687 {
5688 mbuf_pkthdr_setrcvif(m, ifp);
5689 mbuf_pkthdr_setheader(m, mbuf_data(m));
5690 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5691 mbuf_len(m) - ETHER_HDR_LEN);
5692 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5693 m->m_flags |= M_PROTO1; /* set to avoid loops */
5694 dlil_input_packet_list(ifp, m);
5695 return;
5696 }
5697
5698 static bool
5699 in_addr_is_ours(struct in_addr ip)
5700 {
5701 struct in_ifaddr *ia;
5702 bool ours = false;
5703
5704 lck_rw_lock_shared(&in_ifaddr_rwlock);
5705 TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5706 if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5707 ours = true;
5708 break;
5709 }
5710 }
5711 lck_rw_done(&in_ifaddr_rwlock);
5712 return ours;
5713 }
5714
5715 static bool
5716 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5717 {
5718 struct in6_ifaddr *ia6;
5719 bool ours = false;
5720
5721 if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5722 struct in6_addr dst_ip;
5723
5724 /* need to embed scope ID for comparison */
5725 bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5726 dst_ip.s6_addr16[1] = htons(ifscope);
5727 ip6_p = &dst_ip;
5728 }
5729 lck_rw_lock_shared(&in6_ifaddr_rwlock);
5730 TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5731 if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5732 ia6->ia_addr.sin6_scope_id, ifscope)) {
5733 ours = true;
5734 break;
5735 }
5736 }
5737 lck_rw_done(&in6_ifaddr_rwlock);
5738 return ours;
5739 }
5740
5741 static void
5742 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5743 bpf_packet_func bpf_input_func)
5744 {
5745 size_t byte_count;
5746 struct ether_header *eh;
5747 errno_t error;
5748 bool is_ipv4;
5749 int len;
5750 u_int mac_hlen;
5751 int pkt_count;
5752
5753 /* segment large packets before sending them up */
5754 if (if_bridge_segmentation == 0) {
5755 goto done;
5756 }
5757 len = m->m_pkthdr.len;
5758 if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5759 goto done;
5760 }
5761 eh = mtod(m, struct ether_header *);
5762 if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5763 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5764 "large non IPv4/IPv6 packet");
5765 goto done;
5766 }
5767
5768 /*
5769 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5770 *
5771 * If gso_tcp() returns success (0), the packet(s) are
5772 * ready to be passed up. If the destination is a local IP address,
5773 * the packet will be passed up as a large, single packet.
5774 *
5775 * If gso_tcp() returns an error, the packet has already
5776 * been freed.
5777 */
5778 mac_hlen = sizeof(*eh);
5779 error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5780 if (error != 0) {
5781 return;
5782 }
5783
5784 done:
5785 pkt_count = 0;
5786 byte_count = 0;
5787 for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5788 /* Mark the packet as arriving on the bridge interface */
5789 mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5790 mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5791 if (bpf_input_func != NULL) {
5792 (*bpf_input_func)(bridge_ifp, scan);
5793 }
5794 mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5795 mbuf_len(scan) - ETHER_HDR_LEN);
5796 mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5797 byte_count += mbuf_pkthdr_len(scan);
5798 pkt_count++;
5799 }
5800 (void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5801 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5802 "%s %d packet(s) %ld bytes",
5803 bridge_ifp->if_xname, pkt_count, byte_count);
5804 dlil_input_packet_list(bridge_ifp, m);
5805 return;
5806 }
5807
5808 static bool
5809 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5810 {
5811 bool ours;
5812
5813 if (info_p->ip_is_ipv4) {
5814 struct in_addr dst_ip;
5815
5816 bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5817 ours = in_addr_is_ours(dst_ip);
5818 } else {
5819 ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5820 }
5821 return ours;
5822 }
5823
5824 static inline errno_t
5825 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5826 const u_char * ether_dhost, mbuf_t *mp,
5827 bool is_broadcast, bool is_ip, bool is_ipv4,
5828 ip_packet_info * info_p, struct bripstats * stats_p,
5829 bool *info_initialized)
5830 {
5831 errno_t error = 0;
5832 bool is_local = false;
5833 struct pf_mtag *pf_mtag;
5834 u_int16_t tag = vmnet_tag;
5835
5836 *info_initialized = false;
5837 if (is_broadcast) {
5838 if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5839 tag = vmnet_broadcast_tag;
5840 } else {
5841 tag = vmnet_multicast_tag;
5842 }
5843 } else if (is_ip) {
5844 unsigned int mac_hlen = sizeof(struct ether_header);
5845
5846 bzero(stats_p, sizeof(*stats_p));
5847 *info_initialized = true;
5848 error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5849 stats_p);
5850 if (error != 0) {
5851 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5852 "%s(%s) bridge_get_ip_proto failed %d",
5853 bridge_ifp->if_xname,
5854 ifp->if_xname, error);
5855 if (*mp == NULL) {
5856 return EJUSTRETURN;
5857 }
5858 } else {
5859 is_local = is_our_ip(info_p, bridge_ifp->if_index);
5860 if (is_local) {
5861 tag = vmnet_local_tag;
5862 }
5863 }
5864 }
5865 pf_mtag = pf_get_mtag(*mp);
5866 if (pf_mtag != NULL) {
5867 pf_mtag->pftag_tag = tag;
5868 }
5869 #if DEBUG || DEVELOPMENT
5870 {
5871 bool forced;
5872
5873 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5874 if (forced) {
5875 m_freem(*mp);
5876 *mp = NULL;
5877 error = EJUSTRETURN;
5878 goto done;
5879 }
5880 BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5881 if (forced) {
5882 error = _EBADIP;
5883 goto done;
5884 }
5885 }
5886 done:
5887 #endif /* DEBUG || DEVELOPMENT */
5888 return error;
5889 }
5890
5891 static void
5892 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5893 {
5894 dst_p->bips_ip += src_p->bips_ip;
5895 dst_p->bips_ip6 += src_p->bips_ip6;
5896 dst_p->bips_udp += src_p->bips_udp;
5897 dst_p->bips_tcp += src_p->bips_tcp;
5898
5899 dst_p->bips_bad_ip += src_p->bips_bad_ip;
5900 dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5901 dst_p->bips_bad_udp += src_p->bips_bad_udp;
5902 dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5903 }
5904
5905 static void
5906 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5907 {
5908 struct bridge_iflist *bif;
5909 struct bridge_softc *sc = ifp->if_bridge;
5910
5911 BRIDGE_LOCK(sc);
5912 bif = bridge_lookup_member_if(sc, ifp);
5913 if (bif == NULL) {
5914 goto done;
5915 }
5916 if (!bif_has_checksum_offload(bif)) {
5917 goto done;
5918 }
5919 bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5920
5921 done:
5922 BRIDGE_UNLOCK(sc);
5923 return;
5924 }
5925
5926 /*
5927 * bridge_input:
5928 *
5929 * Filter input from a member interface. Queue the packet for
5930 * bridging if it is not for us.
5931 */
5932 errno_t
5933 bridge_input(struct ifnet *ifp, mbuf_t *data)
5934 {
5935 struct bridge_softc *sc = ifp->if_bridge;
5936 struct bridge_iflist *bif, *bif2;
5937 struct ether_header eh_in;
5938 bool is_ip = false;
5939 bool is_ipv4 = false;
5940 ifnet_t bridge_ifp;
5941 struct mbuf *mc, *mc2;
5942 unsigned int mac_hlen = sizeof(struct ether_header);
5943 uint16_t vlan;
5944 errno_t error;
5945 ip_packet_info info;
5946 struct bripstats stats;
5947 bool info_initialized = false;
5948 errno_t ip_packet_error = 0;
5949 bool is_broadcast;
5950 bool is_ip_broadcast = false;
5951 bool is_ifp_mac = false;
5952 mbuf_t m = *data;
5953 uint32_t sc_filter_flags = 0;
5954
5955 bridge_ifp = sc->sc_ifp;
5956 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5957 "%s from %s m 0x%llx data 0x%llx",
5958 bridge_ifp->if_xname, ifp->if_xname,
5959 (uint64_t)VM_KERNEL_ADDRPERM(m),
5960 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5961 if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5962 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5963 "%s not running passing along",
5964 bridge_ifp->if_xname);
5965 return 0;
5966 }
5967
5968 vlan = VLANTAGOF(m);
5969
5970 #ifdef IFF_MONITOR
5971 /*
5972 * Implement support for bridge monitoring. If this flag has been
5973 * set on this interface, discard the packet once we push it through
5974 * the bpf(4) machinery, but before we do, increment the byte and
5975 * packet counters associated with this interface.
5976 */
5977 if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5978 m->m_pkthdr.rcvif = bridge_ifp;
5979 BRIDGE_BPF_MTAP_INPUT(sc, m);
5980 (void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5981 *data = NULL;
5982 m_freem(m);
5983 return EJUSTRETURN;
5984 }
5985 #endif /* IFF_MONITOR */
5986
5987 is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5988
5989 /*
5990 * Need to clear the promiscuous flag otherwise it will be
5991 * dropped by DLIL after processing filters
5992 */
5993 if ((mbuf_flags(m) & MBUF_PROMISC)) {
5994 mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5995 }
5996
5997 /* copy the ethernet header */
5998 eh_in = *(mtod(m, struct ether_header *));
5999
6000 is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
6001
6002 if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
6003 /* tag packets coming from VMNET interfaces */
6004 ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
6005 eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
6006 &info, &stats, &info_initialized);
6007 m = *data;
6008 if (m == NULL) {
6009 bridge_bripstats_apply(ifp, &stats);
6010 return EJUSTRETURN;
6011 }
6012 }
6013
6014 sc_filter_flags = sc->sc_filter_flags;
6015 if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6016 error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
6017 m = *data;
6018 if (error != 0 || m == NULL) {
6019 return EJUSTRETURN;
6020 }
6021 }
6022
6023 BRIDGE_LOCK(sc);
6024 bif = bridge_lookup_member_if(sc, ifp);
6025 if (bif == NULL) {
6026 BRIDGE_UNLOCK(sc);
6027 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6028 "%s bridge_lookup_member_if failed",
6029 bridge_ifp->if_xname);
6030 return 0;
6031 }
6032 if (is_ip && bif_has_checksum_offload(bif)) {
6033 if (info_initialized) {
6034 bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
6035 } else {
6036 error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
6037 &info, &bif->bif_stats.brms_in_ip);
6038 if (error != 0) {
6039 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6040 "%s(%s) bridge_get_ip_proto failed %d",
6041 bridge_ifp->if_xname,
6042 bif->bif_ifp->if_xname, error);
6043 ip_packet_error = error;
6044 }
6045 }
6046 if (ip_packet_error == 0) {
6047 /* need to compute IP/UDP/TCP/checksums */
6048 error = bridge_offload_checksum(data, &info,
6049 &bif->bif_stats);
6050 if (error != 0) {
6051 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6052 "%s(%s) bridge_offload_checksum failed %d",
6053 bridge_ifp->if_xname,
6054 bif->bif_ifp->if_xname, error);
6055 ip_packet_error = error;
6056 }
6057 }
6058 if (ip_packet_error != 0) {
6059 BRIDGE_UNLOCK(sc);
6060 if (*data != NULL) {
6061 m_freem(*data);
6062 *data = NULL;
6063 }
6064 return EJUSTRETURN;
6065 }
6066 m = *data;
6067 }
6068
6069 if (bif->bif_flags & BIFF_HOST_FILTER) {
6070 error = bridge_host_filter(bif, data);
6071 if (error != 0) {
6072 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6073 "%s bridge_host_filter failed",
6074 bif->bif_ifp->if_xname);
6075 BRIDGE_UNLOCK(sc);
6076 return EJUSTRETURN;
6077 }
6078 m = *data;
6079 }
6080
6081 if (!is_broadcast &&
6082 _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6083 /* the packet is unicast to the interface's MAC address */
6084 if (is_ip && sc->sc_mac_nat_bif == bif) {
6085 /* doing MAC-NAT, check if destination is IP broadcast */
6086 is_ip_broadcast = is_broadcast_ip_packet(data);
6087 if (*data == NULL) {
6088 BRIDGE_UNLOCK(sc);
6089 return EJUSTRETURN;
6090 }
6091 m = *data;
6092 }
6093 if (!is_ip_broadcast) {
6094 is_ifp_mac = TRUE;
6095 }
6096 }
6097
6098 bridge_span(sc, m);
6099
6100 if (is_broadcast || is_ip_broadcast) {
6101 if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6102 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6103 " multicast: "
6104 "%02x:%02x:%02x:%02x:%02x:%02x",
6105 eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6106 eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6107 eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6108 }
6109 /* Tap off 802.1D packets; they do not get forwarded. */
6110 if (is_broadcast &&
6111 _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6112 #if BRIDGESTP
6113 m = bstp_input(&bif->bif_stp, ifp, m);
6114 #else /* !BRIDGESTP */
6115 m_freem(m);
6116 m = NULL;
6117 #endif /* !BRIDGESTP */
6118 if (m == NULL) {
6119 BRIDGE_UNLOCK(sc);
6120 return EJUSTRETURN;
6121 }
6122 }
6123
6124 if ((bif->bif_ifflags & IFBIF_STP) &&
6125 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6126 BRIDGE_UNLOCK(sc);
6127 return 0;
6128 }
6129
6130 /*
6131 * Make a deep copy of the packet and enqueue the copy
6132 * for bridge processing.
6133 */
6134 mc = m_dup(m, M_DONTWAIT);
6135 if (mc == NULL) {
6136 BRIDGE_UNLOCK(sc);
6137 return 0;
6138 }
6139
6140 /*
6141 * Perform the bridge forwarding function with the copy.
6142 *
6143 * Note that bridge_forward calls BRIDGE_UNLOCK
6144 */
6145 if (is_ip_broadcast) {
6146 struct ether_header *eh;
6147
6148 /* make the copy look like it is actually broadcast */
6149 mc->m_flags |= M_BCAST;
6150 eh = mtod(mc, struct ether_header *);
6151 bcopy(etherbroadcastaddr, eh->ether_dhost,
6152 ETHER_ADDR_LEN);
6153 }
6154 bridge_forward(sc, bif, mc);
6155
6156 /*
6157 * Reinject the mbuf as arriving on the bridge so we have a
6158 * chance at claiming multicast packets. We can not loop back
6159 * here from ether_input as a bridge is never a member of a
6160 * bridge.
6161 */
6162 VERIFY(bridge_ifp->if_bridge == NULL);
6163 mc2 = m_dup(m, M_DONTWAIT);
6164 if (mc2 != NULL) {
6165 /* Keep the layer3 header aligned */
6166 int i = min(mc2->m_pkthdr.len, max_protohdr);
6167 mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6168 }
6169 if (mc2 != NULL) {
6170 /* mark packet as arriving on the bridge */
6171 mc2->m_pkthdr.rcvif = bridge_ifp;
6172 mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6173 BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6174 (void) mbuf_setdata(mc2,
6175 (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6176 mbuf_len(mc2) - ETHER_HDR_LEN);
6177 (void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6178 (void) ifnet_stat_increment_in(bridge_ifp, 1,
6179 mbuf_pkthdr_len(mc2), 0);
6180 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6181 "%s mcast for us", bridge_ifp->if_xname);
6182 dlil_input_packet_list(bridge_ifp, mc2);
6183 }
6184
6185 /* Return the original packet for local processing. */
6186 return 0;
6187 }
6188
6189 if ((bif->bif_ifflags & IFBIF_STP) &&
6190 bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6191 BRIDGE_UNLOCK(sc);
6192 return 0;
6193 }
6194
6195 #ifdef DEV_CARP
6196 #define CARP_CHECK_WE_ARE_DST(iface) \
6197 ((iface)->if_carp &&\
6198 carp_forus((iface)->if_carp, eh_in.ether_dhost))
6199 #define CARP_CHECK_WE_ARE_SRC(iface) \
6200 ((iface)->if_carp &&\
6201 carp_forus((iface)->if_carp, eh_in.ether_shost))
6202 #else
6203 #define CARP_CHECK_WE_ARE_DST(iface) 0
6204 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6205 #endif
6206
6207 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6208
6209 #define PFIL_PHYS(sc, ifp, m)
6210
6211 #define GRAB_OUR_PACKETS(iface) \
6212 if ((iface)->if_type == IFT_GIF) \
6213 continue; \
6214 /* It is destined for us. */ \
6215 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 || \
6216 CARP_CHECK_WE_ARE_DST((iface))) { \
6217 if ((iface)->if_type == IFT_BRIDGE) { \
6218 BRIDGE_BPF_MTAP_INPUT(sc, m); \
6219 /* Filter on the physical interface. */ \
6220 PFIL_PHYS(sc, iface, m); \
6221 } else { \
6222 bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0); \
6223 } \
6224 if (bif->bif_ifflags & IFBIF_LEARNING) { \
6225 error = bridge_rtupdate(sc, eh_in.ether_shost, \
6226 vlan, bif, 0, IFBAF_DYNAMIC); \
6227 if (error && bif->bif_addrmax) { \
6228 BRIDGE_UNLOCK(sc); \
6229 m_freem(m); \
6230 return (EJUSTRETURN); \
6231 } \
6232 } \
6233 BRIDGE_UNLOCK(sc); \
6234 inject_input_packet(iface, m); \
6235 return (EJUSTRETURN); \
6236 } \
6237 \
6238 /* We just received a packet that we sent out. */ \
6239 if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 || \
6240 CARP_CHECK_WE_ARE_SRC((iface))) { \
6241 BRIDGE_UNLOCK(sc); \
6242 m_freem(m); \
6243 return (EJUSTRETURN); \
6244 }
6245
6246 /*
6247 * Unicast.
6248 */
6249
6250 /* handle MAC-NAT if enabled */
6251 if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6252 ifnet_t dst_if;
6253 boolean_t is_input = FALSE;
6254
6255 dst_if = bridge_mac_nat_input(sc, data, &is_input);
6256 m = *data;
6257 if (dst_if == ifp) {
6258 /* our input packet */
6259 } else if (dst_if != NULL || m == NULL) {
6260 BRIDGE_UNLOCK(sc);
6261 if (dst_if != NULL) {
6262 ASSERT(m != NULL);
6263 if (is_input) {
6264 inject_input_packet(dst_if, m);
6265 } else {
6266 (void)bridge_enqueue(bridge_ifp, NULL,
6267 dst_if, m,
6268 CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6269 }
6270 }
6271 return EJUSTRETURN;
6272 }
6273 }
6274
6275 /*
6276 * If the packet is for the bridge, pass it up for local processing.
6277 */
6278 if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6279 CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6280 bpf_packet_func bpf_input_func = sc->sc_bpf_input;
6281
6282 /*
6283 * If the interface is learning, and the source
6284 * address is valid and not multicast, record
6285 * the address.
6286 */
6287 if (bif->bif_ifflags & IFBIF_LEARNING) {
6288 (void) bridge_rtupdate(sc, eh_in.ether_shost,
6289 vlan, bif, 0, IFBAF_DYNAMIC);
6290 }
6291 BRIDGE_UNLOCK(sc);
6292
6293 bridge_interface_input(bridge_ifp, m, bpf_input_func);
6294 return EJUSTRETURN;
6295 }
6296
6297 /*
6298 * if the destination of the packet is for the MAC address of
6299 * the member interface itself, then we don't need to forward
6300 * it -- just pass it back. Note that it'll likely just be
6301 * dropped by the stack, but if something else is bound to
6302 * the interface directly (for example, the wireless stats
6303 * protocol -- although that actually uses BPF right now),
6304 * then it will consume the packet
6305 *
6306 * ALSO, note that we do this check AFTER checking for the
6307 * bridge's own MAC address, because the bridge may be
6308 * using the SAME MAC address as one of its interfaces
6309 */
6310 if (is_ifp_mac) {
6311
6312 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6313 BRIDGE_LOG(LOG_NOTICE, 0,
6314 "not forwarding packet bound for member interface");
6315 #endif
6316
6317 BRIDGE_UNLOCK(sc);
6318 return 0;
6319 }
6320
6321 /* Now check the remaining bridge members. */
6322 TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6323 if (bif2->bif_ifp != ifp) {
6324 GRAB_OUR_PACKETS(bif2->bif_ifp);
6325 }
6326 }
6327
6328 #undef CARP_CHECK_WE_ARE_DST
6329 #undef CARP_CHECK_WE_ARE_SRC
6330 #undef GRAB_OUR_PACKETS
6331
6332 /*
6333 * Perform the bridge forwarding function.
6334 *
6335 * Note that bridge_forward calls BRIDGE_UNLOCK
6336 */
6337 bridge_forward(sc, bif, m);
6338
6339 return EJUSTRETURN;
6340 }
6341
6342 /*
6343 * bridge_broadcast:
6344 *
6345 * Send a frame to all interfaces that are members of
6346 * the bridge, except for the one on which the packet
6347 * arrived.
6348 *
6349 * NOTE: Releases the lock on return.
6350 */
6351 static void
6352 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6353 struct mbuf *m, int runfilt)
6354 {
6355 ifnet_t bridge_ifp;
6356 struct bridge_iflist *dbif;
6357 struct ifnet * src_if;
6358 struct mbuf *mc;
6359 struct mbuf *mc_in;
6360 struct ifnet *dst_if;
6361 int error = 0, used = 0;
6362 boolean_t bridge_if_out;
6363 ChecksumOperation cksum_op;
6364 struct mac_nat_record mnr;
6365 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6366 boolean_t translate_mac = FALSE;
6367 uint32_t sc_filter_flags = 0;
6368
6369 bridge_ifp = sc->sc_ifp;
6370 if (sbif != NULL) {
6371 bridge_if_out = FALSE;
6372 src_if = sbif->bif_ifp;
6373 cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6374 if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6375 /* get the translation record while holding the lock */
6376 translate_mac
6377 = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6378 if (m == NULL) {
6379 /* packet was deallocated */
6380 BRIDGE_UNLOCK(sc);
6381 return;
6382 }
6383 }
6384 } else {
6385 /*
6386 * sbif is NULL when the bridge interface calls
6387 * bridge_broadcast().
6388 */
6389 bridge_if_out = TRUE;
6390 cksum_op = CHECKSUM_OPERATION_FINALIZE;
6391 sbif = NULL;
6392 src_if = NULL;
6393 }
6394
6395 BRIDGE_LOCK2REF(sc, error);
6396 if (error) {
6397 m_freem(m);
6398 return;
6399 }
6400
6401 TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6402 dst_if = dbif->bif_ifp;
6403 if (dst_if == src_if) {
6404 /* skip the interface that the packet came in on */
6405 continue;
6406 }
6407
6408 /* Private segments can not talk to each other */
6409 if (sbif != NULL &&
6410 (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6411 continue;
6412 }
6413
6414 if ((dbif->bif_ifflags & IFBIF_STP) &&
6415 dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6416 continue;
6417 }
6418
6419 if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6420 (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6421 continue;
6422 }
6423
6424 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6425 continue;
6426 }
6427
6428 if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6429 continue;
6430 }
6431
6432 if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6433 mc = m;
6434 used = 1;
6435 } else {
6436 mc = m_dup(m, M_DONTWAIT);
6437 if (mc == NULL) {
6438 (void) ifnet_stat_increment_out(bridge_ifp,
6439 0, 0, 1);
6440 continue;
6441 }
6442 }
6443
6444 /*
6445 * If broadcast input is enabled, do so only if this
6446 * is an input packet.
6447 */
6448 if (!bridge_if_out &&
6449 (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6450 mc_in = m_dup(mc, M_DONTWAIT);
6451 /* this could fail, but we continue anyways */
6452 } else {
6453 mc_in = NULL;
6454 }
6455
6456 /* out */
6457 if (translate_mac && mac_nat_bif == dbif) {
6458 /* translate the packet without holding the lock */
6459 bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6460 }
6461
6462 sc_filter_flags = sc->sc_filter_flags;
6463 if (runfilt &&
6464 PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6465 if (used == 0) {
6466 /* Keep the layer3 header aligned */
6467 int i = min(mc->m_pkthdr.len, max_protohdr);
6468 mc = m_copyup(mc, i, ETHER_ALIGN);
6469 if (mc == NULL) {
6470 (void) ifnet_stat_increment_out(
6471 sc->sc_ifp, 0, 0, 1);
6472 if (mc_in != NULL) {
6473 m_freem(mc_in);
6474 mc_in = NULL;
6475 }
6476 continue;
6477 }
6478 }
6479 if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6480 if (mc_in != NULL) {
6481 m_freem(mc_in);
6482 mc_in = NULL;
6483 }
6484 continue;
6485 }
6486 if (mc == NULL) {
6487 if (mc_in != NULL) {
6488 m_freem(mc_in);
6489 mc_in = NULL;
6490 }
6491 continue;
6492 }
6493 }
6494
6495 if (mc != NULL) {
6496 /* verify checksum if necessary */
6497 if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6498 !bif_has_checksum_offload(sbif)) {
6499 error = bridge_verify_checksum(&mc,
6500 &dbif->bif_stats);
6501 if (error != 0) {
6502 if (mc != NULL) {
6503 m_freem(mc);
6504 }
6505 mc = NULL;
6506 }
6507 }
6508 if (mc != NULL) {
6509 (void) bridge_enqueue(bridge_ifp,
6510 NULL, dst_if, mc, cksum_op);
6511 }
6512 }
6513
6514 /* in */
6515 if (mc_in == NULL) {
6516 continue;
6517 }
6518 bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6519 mbuf_pkthdr_setrcvif(mc_in, dst_if);
6520 mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6521 mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6522 mbuf_len(mc_in) - ETHER_HDR_LEN);
6523 mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6524 mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6525 dlil_input_packet_list(dst_if, mc_in);
6526 }
6527 if (used == 0) {
6528 m_freem(m);
6529 }
6530
6531
6532 BRIDGE_UNREF(sc);
6533 }
6534
6535 /*
6536 * bridge_span:
6537 *
6538 * Duplicate a packet out one or more interfaces that are in span mode,
6539 * the original mbuf is unmodified.
6540 */
6541 static void
6542 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6543 {
6544 struct bridge_iflist *bif;
6545 struct ifnet *dst_if;
6546 struct mbuf *mc;
6547
6548 if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6549 return;
6550 }
6551
6552 TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6553 dst_if = bif->bif_ifp;
6554
6555 if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6556 continue;
6557 }
6558
6559 mc = m_copypacket(m, M_DONTWAIT);
6560 if (mc == NULL) {
6561 (void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6562 continue;
6563 }
6564
6565 (void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6566 CHECKSUM_OPERATION_NONE);
6567 }
6568 }
6569
6570
6571 /*
6572 * bridge_rtupdate:
6573 *
6574 * Add a bridge routing entry.
6575 */
6576 static int
6577 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6578 struct bridge_iflist *bif, int setflags, uint8_t flags)
6579 {
6580 struct bridge_rtnode *brt;
6581 int error;
6582
6583 BRIDGE_LOCK_ASSERT_HELD(sc);
6584
6585 /* Check the source address is valid and not multicast. */
6586 if (ETHER_IS_MULTICAST(dst) ||
6587 (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6588 dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6589 return EINVAL;
6590 }
6591
6592
6593 /* 802.1p frames map to vlan 1 */
6594 if (vlan == 0) {
6595 vlan = 1;
6596 }
6597
6598 /*
6599 * A route for this destination might already exist. If so,
6600 * update it, otherwise create a new one.
6601 */
6602 if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6603 if (sc->sc_brtcnt >= sc->sc_brtmax) {
6604 sc->sc_brtexceeded++;
6605 return ENOSPC;
6606 }
6607 /* Check per interface address limits (if enabled) */
6608 if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6609 bif->bif_addrexceeded++;
6610 return ENOSPC;
6611 }
6612
6613 /*
6614 * Allocate a new bridge forwarding node, and
6615 * initialize the expiration time and Ethernet
6616 * address.
6617 */
6618 brt = zalloc_noblock(bridge_rtnode_pool);
6619 if (brt == NULL) {
6620 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6621 "zalloc_nolock failed");
6622 return ENOMEM;
6623 }
6624 bzero(brt, sizeof(struct bridge_rtnode));
6625
6626 if (bif->bif_ifflags & IFBIF_STICKY) {
6627 brt->brt_flags = IFBAF_STICKY;
6628 } else {
6629 brt->brt_flags = IFBAF_DYNAMIC;
6630 }
6631
6632 memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6633 brt->brt_vlan = vlan;
6634
6635
6636 if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6637 zfree(bridge_rtnode_pool, brt);
6638 return error;
6639 }
6640 brt->brt_dst = bif;
6641 bif->bif_addrcnt++;
6642 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6643 "added %02x:%02x:%02x:%02x:%02x:%02x "
6644 "on %s count %u hashsize %u",
6645 dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6646 sc->sc_ifp->if_xname, sc->sc_brtcnt,
6647 sc->sc_rthash_size);
6648 }
6649
6650 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6651 brt->brt_dst != bif) {
6652 brt->brt_dst->bif_addrcnt--;
6653 brt->brt_dst = bif;
6654 brt->brt_dst->bif_addrcnt++;
6655 }
6656
6657 if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6658 unsigned long now;
6659
6660 now = (unsigned long) net_uptime();
6661 brt->brt_expire = now + sc->sc_brttimeout;
6662 }
6663 if (setflags) {
6664 brt->brt_flags = flags;
6665 }
6666
6667
6668 return 0;
6669 }
6670
6671 /*
6672 * bridge_rtlookup:
6673 *
6674 * Lookup the destination interface for an address.
6675 */
6676 static struct ifnet *
6677 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6678 {
6679 struct bridge_rtnode *brt;
6680
6681 BRIDGE_LOCK_ASSERT_HELD(sc);
6682
6683 if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6684 return NULL;
6685 }
6686
6687 return brt->brt_ifp;
6688 }
6689
6690 /*
6691 * bridge_rttrim:
6692 *
6693 * Trim the routine table so that we have a number
6694 * of routing entries less than or equal to the
6695 * maximum number.
6696 */
6697 static void
6698 bridge_rttrim(struct bridge_softc *sc)
6699 {
6700 struct bridge_rtnode *brt, *nbrt;
6701
6702 BRIDGE_LOCK_ASSERT_HELD(sc);
6703
6704 /* Make sure we actually need to do this. */
6705 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6706 return;
6707 }
6708
6709 /* Force an aging cycle; this might trim enough addresses. */
6710 bridge_rtage(sc);
6711 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6712 return;
6713 }
6714
6715 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6716 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6717 bridge_rtnode_destroy(sc, brt);
6718 if (sc->sc_brtcnt <= sc->sc_brtmax) {
6719 return;
6720 }
6721 }
6722 }
6723 }
6724
6725 /*
6726 * bridge_aging_timer:
6727 *
6728 * Aging periodic timer for the bridge routing table.
6729 */
6730 static void
6731 bridge_aging_timer(struct bridge_softc *sc)
6732 {
6733 BRIDGE_LOCK_ASSERT_HELD(sc);
6734
6735 bridge_rtage(sc);
6736 if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6737 (sc->sc_flags & SCF_DETACHING) == 0) {
6738 sc->sc_aging_timer.bdc_sc = sc;
6739 sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6740 sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6741 bridge_schedule_delayed_call(&sc->sc_aging_timer);
6742 }
6743 }
6744
6745 /*
6746 * bridge_rtage:
6747 *
6748 * Perform an aging cycle.
6749 */
6750 static void
6751 bridge_rtage(struct bridge_softc *sc)
6752 {
6753 struct bridge_rtnode *brt, *nbrt;
6754 unsigned long now;
6755
6756 BRIDGE_LOCK_ASSERT_HELD(sc);
6757
6758 now = (unsigned long) net_uptime();
6759
6760 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6761 if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6762 if (now >= brt->brt_expire) {
6763 bridge_rtnode_destroy(sc, brt);
6764 }
6765 }
6766 }
6767 if (sc->sc_mac_nat_bif != NULL) {
6768 bridge_mac_nat_age_entries(sc, now);
6769 }
6770 }
6771
6772 /*
6773 * bridge_rtflush:
6774 *
6775 * Remove all dynamic addresses from the bridge.
6776 */
6777 static void
6778 bridge_rtflush(struct bridge_softc *sc, int full)
6779 {
6780 struct bridge_rtnode *brt, *nbrt;
6781
6782 BRIDGE_LOCK_ASSERT_HELD(sc);
6783
6784 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6785 if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6786 bridge_rtnode_destroy(sc, brt);
6787 }
6788 }
6789 }
6790
6791 /*
6792 * bridge_rtdaddr:
6793 *
6794 * Remove an address from the table.
6795 */
6796 static int
6797 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6798 {
6799 struct bridge_rtnode *brt;
6800 int found = 0;
6801
6802 BRIDGE_LOCK_ASSERT_HELD(sc);
6803
6804 /*
6805 * If vlan is zero then we want to delete for all vlans so the lookup
6806 * may return more than one.
6807 */
6808 while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6809 bridge_rtnode_destroy(sc, brt);
6810 found = 1;
6811 }
6812
6813 return found ? 0 : ENOENT;
6814 }
6815
6816 /*
6817 * bridge_rtdelete:
6818 *
6819 * Delete routes to a specific member interface.
6820 */
6821 static void
6822 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6823 {
6824 struct bridge_rtnode *brt, *nbrt;
6825
6826 BRIDGE_LOCK_ASSERT_HELD(sc);
6827
6828 LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6829 if (brt->brt_ifp == ifp && (full ||
6830 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6831 bridge_rtnode_destroy(sc, brt);
6832 }
6833 }
6834 }
6835
6836 /*
6837 * bridge_rtable_init:
6838 *
6839 * Initialize the route table for this bridge.
6840 */
6841 static int
6842 bridge_rtable_init(struct bridge_softc *sc)
6843 {
6844 u_int32_t i;
6845
6846 sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6847 BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6848 sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6849
6850 for (i = 0; i < sc->sc_rthash_size; i++) {
6851 LIST_INIT(&sc->sc_rthash[i]);
6852 }
6853
6854 sc->sc_rthash_key = RandomULong();
6855
6856 LIST_INIT(&sc->sc_rtlist);
6857
6858 return 0;
6859 }
6860
6861 /*
6862 * bridge_rthash_delayed_resize:
6863 *
6864 * Resize the routing table hash on a delayed thread call.
6865 */
6866 static void
6867 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6868 {
6869 u_int32_t new_rthash_size = 0;
6870 u_int32_t old_rthash_size = 0;
6871 struct _bridge_rtnode_list *new_rthash = NULL;
6872 struct _bridge_rtnode_list *old_rthash = NULL;
6873 u_int32_t i;
6874 struct bridge_rtnode *brt;
6875 int error = 0;
6876
6877 BRIDGE_LOCK_ASSERT_HELD(sc);
6878
6879 /*
6880 * Four entries per hash bucket is our ideal load factor
6881 */
6882 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6883 goto out;
6884 }
6885
6886 /*
6887 * Doubling the number of hash buckets may be too simplistic
6888 * especially when facing a spike of new entries
6889 */
6890 new_rthash_size = sc->sc_rthash_size * 2;
6891
6892 sc->sc_flags |= SCF_RESIZING;
6893 BRIDGE_UNLOCK(sc);
6894
6895 new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6896 Z_WAITOK | Z_ZERO);
6897
6898 BRIDGE_LOCK(sc);
6899 sc->sc_flags &= ~SCF_RESIZING;
6900
6901 if (new_rthash == NULL) {
6902 error = ENOMEM;
6903 goto out;
6904 }
6905 if ((sc->sc_flags & SCF_DETACHING)) {
6906 error = ENODEV;
6907 goto out;
6908 }
6909 /*
6910 * Fail safe from here on
6911 */
6912 old_rthash = sc->sc_rthash;
6913 old_rthash_size = sc->sc_rthash_size;
6914 sc->sc_rthash = new_rthash;
6915 sc->sc_rthash_size = new_rthash_size;
6916
6917 /*
6918 * Get a new key to force entries to be shuffled around to reduce
6919 * the likelihood they will land in the same buckets
6920 */
6921 sc->sc_rthash_key = RandomULong();
6922
6923 for (i = 0; i < sc->sc_rthash_size; i++) {
6924 LIST_INIT(&sc->sc_rthash[i]);
6925 }
6926
6927 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6928 LIST_REMOVE(brt, brt_hash);
6929 (void) bridge_rtnode_hash(sc, brt);
6930 }
6931 out:
6932 if (error == 0) {
6933 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6934 "%s new size %u",
6935 sc->sc_ifp->if_xname, sc->sc_rthash_size);
6936 kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6937 } else {
6938 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6939 "%s failed %d", sc->sc_ifp->if_xname, error);
6940 kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6941 }
6942 }
6943
6944 /*
6945 * Resize the number of hash buckets based on the load factor
6946 * Currently only grow
6947 * Failing to resize the hash table is not fatal
6948 */
6949 static void
6950 bridge_rthash_resize(struct bridge_softc *sc)
6951 {
6952 BRIDGE_LOCK_ASSERT_HELD(sc);
6953
6954 if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6955 return;
6956 }
6957
6958 /*
6959 * Four entries per hash bucket is our ideal load factor
6960 */
6961 if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6962 return;
6963 }
6964 /*
6965 * Hard limit on the size of the routing hash table
6966 */
6967 if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6968 return;
6969 }
6970
6971 sc->sc_resize_call.bdc_sc = sc;
6972 sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6973 bridge_schedule_delayed_call(&sc->sc_resize_call);
6974 }
6975
6976 /*
6977 * bridge_rtable_fini:
6978 *
6979 * Deconstruct the route table for this bridge.
6980 */
6981 static void
6982 bridge_rtable_fini(struct bridge_softc *sc)
6983 {
6984 KASSERT(sc->sc_brtcnt == 0,
6985 ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6986 kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
6987 sc->sc_rthash);
6988 sc->sc_rthash = NULL;
6989 sc->sc_rthash_size = 0;
6990 }
6991
6992 /*
6993 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6994 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6995 */
6996 #define mix(a, b, c) \
6997 do { \
6998 a -= b; a -= c; a ^= (c >> 13); \
6999 b -= c; b -= a; b ^= (a << 8); \
7000 c -= a; c -= b; c ^= (b >> 13); \
7001 a -= b; a -= c; a ^= (c >> 12); \
7002 b -= c; b -= a; b ^= (a << 16); \
7003 c -= a; c -= b; c ^= (b >> 5); \
7004 a -= b; a -= c; a ^= (c >> 3); \
7005 b -= c; b -= a; b ^= (a << 10); \
7006 c -= a; c -= b; c ^= (b >> 15); \
7007 } while ( /*CONSTCOND*/ 0)
7008
7009 static __inline uint32_t
7010 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
7011 {
7012 uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7013
7014 b += addr[5] << 8;
7015 b += addr[4];
7016 a += addr[3] << 24;
7017 a += addr[2] << 16;
7018 a += addr[1] << 8;
7019 a += addr[0];
7020
7021 mix(a, b, c);
7022
7023 return c & BRIDGE_RTHASH_MASK(sc);
7024 }
7025
7026 #undef mix
7027
7028 static int
7029 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
7030 {
7031 int i, d;
7032
7033 for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7034 d = ((int)a[i]) - ((int)b[i]);
7035 }
7036
7037 return d;
7038 }
7039
7040 /*
7041 * bridge_rtnode_lookup:
7042 *
7043 * Look up a bridge route node for the specified destination. Compare the
7044 * vlan id or if zero then just return the first match.
7045 */
7046 static struct bridge_rtnode *
7047 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7048 uint16_t vlan)
7049 {
7050 struct bridge_rtnode *brt;
7051 uint32_t hash;
7052 int dir;
7053
7054 BRIDGE_LOCK_ASSERT_HELD(sc);
7055
7056 hash = bridge_rthash(sc, addr);
7057 LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7058 dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7059 if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7060 return brt;
7061 }
7062 if (dir > 0) {
7063 return NULL;
7064 }
7065 }
7066
7067 return NULL;
7068 }
7069
7070 /*
7071 * bridge_rtnode_hash:
7072 *
7073 * Insert the specified bridge node into the route hash table.
7074 * This is used when adding a new node or to rehash when resizing
7075 * the hash table
7076 */
7077 static int
7078 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7079 {
7080 struct bridge_rtnode *lbrt;
7081 uint32_t hash;
7082 int dir;
7083
7084 BRIDGE_LOCK_ASSERT_HELD(sc);
7085
7086 hash = bridge_rthash(sc, brt->brt_addr);
7087
7088 lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7089 if (lbrt == NULL) {
7090 LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7091 goto out;
7092 }
7093
7094 do {
7095 dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7096 if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7097 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7098 "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7099 sc->sc_ifp->if_xname,
7100 brt->brt_addr[0], brt->brt_addr[1],
7101 brt->brt_addr[2], brt->brt_addr[3],
7102 brt->brt_addr[4], brt->brt_addr[5]);
7103 return EEXIST;
7104 }
7105 if (dir > 0) {
7106 LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7107 goto out;
7108 }
7109 if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7110 LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7111 goto out;
7112 }
7113 lbrt = LIST_NEXT(lbrt, brt_hash);
7114 } while (lbrt != NULL);
7115
7116 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7117 "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7118 sc->sc_ifp->if_xname,
7119 brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7120 brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7121 out:
7122 return 0;
7123 }
7124
7125 /*
7126 * bridge_rtnode_insert:
7127 *
7128 * Insert the specified bridge node into the route table. We
7129 * assume the entry is not already in the table.
7130 */
7131 static int
7132 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7133 {
7134 int error;
7135
7136 error = bridge_rtnode_hash(sc, brt);
7137 if (error != 0) {
7138 return error;
7139 }
7140
7141 LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7142 sc->sc_brtcnt++;
7143
7144 bridge_rthash_resize(sc);
7145
7146 return 0;
7147 }
7148
7149 /*
7150 * bridge_rtnode_destroy:
7151 *
7152 * Destroy a bridge rtnode.
7153 */
7154 static void
7155 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7156 {
7157 BRIDGE_LOCK_ASSERT_HELD(sc);
7158
7159 LIST_REMOVE(brt, brt_hash);
7160
7161 LIST_REMOVE(brt, brt_list);
7162 sc->sc_brtcnt--;
7163 brt->brt_dst->bif_addrcnt--;
7164 zfree(bridge_rtnode_pool, brt);
7165 }
7166
7167 #if BRIDGESTP
7168 /*
7169 * bridge_rtable_expire:
7170 *
7171 * Set the expiry time for all routes on an interface.
7172 */
7173 static void
7174 bridge_rtable_expire(struct ifnet *ifp, int age)
7175 {
7176 struct bridge_softc *sc = ifp->if_bridge;
7177 struct bridge_rtnode *brt;
7178
7179 BRIDGE_LOCK(sc);
7180
7181 /*
7182 * If the age is zero then flush, otherwise set all the expiry times to
7183 * age for the interface
7184 */
7185 if (age == 0) {
7186 bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7187 } else {
7188 unsigned long now;
7189
7190 now = (unsigned long) net_uptime();
7191
7192 LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7193 /* Cap the expiry time to 'age' */
7194 if (brt->brt_ifp == ifp &&
7195 brt->brt_expire > now + age &&
7196 (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7197 brt->brt_expire = now + age;
7198 }
7199 }
7200 }
7201 BRIDGE_UNLOCK(sc);
7202 }
7203
7204 /*
7205 * bridge_state_change:
7206 *
7207 * Callback from the bridgestp code when a port changes states.
7208 */
7209 static void
7210 bridge_state_change(struct ifnet *ifp, int state)
7211 {
7212 struct bridge_softc *sc = ifp->if_bridge;
7213 static const char *stpstates[] = {
7214 "disabled",
7215 "listening",
7216 "learning",
7217 "forwarding",
7218 "blocking",
7219 "discarding"
7220 };
7221
7222 if (log_stp) {
7223 log(LOG_NOTICE, "%s: state changed to %s on %s",
7224 sc->sc_ifp->if_xname,
7225 stpstates[state], ifp->if_xname);
7226 }
7227 }
7228 #endif /* BRIDGESTP */
7229
7230 /*
7231 * bridge_set_bpf_tap:
7232 *
7233 * Sets ups the BPF callbacks.
7234 */
7235 static errno_t
7236 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7237 {
7238 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7239
7240 /* TBD locking */
7241 if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7242 return ENODEV;
7243 }
7244 switch (mode) {
7245 case BPF_TAP_DISABLE:
7246 sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7247 break;
7248
7249 case BPF_TAP_INPUT:
7250 sc->sc_bpf_input = bpf_callback;
7251 break;
7252
7253 case BPF_TAP_OUTPUT:
7254 sc->sc_bpf_output = bpf_callback;
7255 break;
7256
7257 case BPF_TAP_INPUT_OUTPUT:
7258 sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7259 break;
7260
7261 default:
7262 break;
7263 }
7264
7265 return 0;
7266 }
7267
7268 /*
7269 * bridge_detach:
7270 *
7271 * Callback when interface has been detached.
7272 */
7273 static void
7274 bridge_detach(ifnet_t ifp)
7275 {
7276 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7277
7278 #if BRIDGESTP
7279 bstp_detach(&sc->sc_stp);
7280 #endif /* BRIDGESTP */
7281
7282 /* Tear down the routing table. */
7283 bridge_rtable_fini(sc);
7284
7285 lck_mtx_lock(&bridge_list_mtx);
7286 LIST_REMOVE(sc, sc_list);
7287 lck_mtx_unlock(&bridge_list_mtx);
7288
7289 ifnet_release(ifp);
7290
7291 lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7292 kfree_type(struct bridge_softc, sc);
7293 }
7294
7295 /*
7296 * bridge_bpf_input:
7297 *
7298 * Invoke the input BPF callback if enabled
7299 */
7300 static errno_t
7301 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7302 {
7303 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7304 bpf_packet_func input_func = sc->sc_bpf_input;
7305
7306 if (input_func != NULL) {
7307 if (mbuf_pkthdr_rcvif(m) != ifp) {
7308 BRIDGE_LOG(LOG_NOTICE, 0,
7309 "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7310 (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7311 (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7312 }
7313 (*input_func)(ifp, m);
7314 }
7315 return 0;
7316 }
7317
7318 /*
7319 * bridge_bpf_output:
7320 *
7321 * Invoke the output BPF callback if enabled
7322 */
7323 static errno_t
7324 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7325 {
7326 struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7327 bpf_packet_func output_func = sc->sc_bpf_output;
7328
7329 if (output_func != NULL) {
7330 (*output_func)(ifp, m);
7331 }
7332 return 0;
7333 }
7334
7335 /*
7336 * bridge_link_event:
7337 *
7338 * Report a data link event on an interface
7339 */
7340 static void
7341 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7342 {
7343 struct event {
7344 u_int32_t ifnet_family;
7345 u_int32_t unit;
7346 char if_name[IFNAMSIZ];
7347 };
7348 _Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7349 struct kern_event_msg *header = (struct kern_event_msg*)message;
7350 struct event *data = (struct event *)(header + 1);
7351
7352 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7353 "%s event_code %u - %s", ifp->if_xname,
7354 event_code, dlil_kev_dl_code_str(event_code));
7355 header->total_size = sizeof(message);
7356 header->vendor_code = KEV_VENDOR_APPLE;
7357 header->kev_class = KEV_NETWORK_CLASS;
7358 header->kev_subclass = KEV_DL_SUBCLASS;
7359 header->event_code = event_code;
7360 data->ifnet_family = ifnet_family(ifp);
7361 data->unit = (u_int32_t)ifnet_unit(ifp);
7362 strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7363 ifnet_event(ifp, header);
7364 }
7365
7366 #define BRIDGE_HF_DROP(reason, func, line) { \
7367 bridge_hostfilter_stats.reason++; \
7368 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER, \
7369 "%s.%d" #reason, func, line); \
7370 error = EINVAL; \
7371 }
7372
7373 /*
7374 * Make sure this is a DHCP or Bootp request that match the host filter
7375 */
7376 static int
7377 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7378 {
7379 int error = EINVAL;
7380 struct dhcp dhcp;
7381
7382 /*
7383 * Note: We use the dhcp structure because bootp structure definition
7384 * is larger and some vendors do not pad the request
7385 */
7386 error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7387 if (error != 0) {
7388 BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7389 goto done;
7390 }
7391 if (dhcp.dp_op != BOOTREQUEST) {
7392 BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7393 goto done;
7394 }
7395 /*
7396 * The hardware address must be an exact match
7397 */
7398 if (dhcp.dp_htype != ARPHRD_ETHER) {
7399 BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7400 goto done;
7401 }
7402 if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7403 BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7404 goto done;
7405 }
7406 if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7407 ETHER_ADDR_LEN) != 0) {
7408 BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7409 goto done;
7410 }
7411 /*
7412 * Client address must match the host address or be not specified
7413 */
7414 if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7415 dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7416 BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7417 goto done;
7418 }
7419 error = 0;
7420 done:
7421 return error;
7422 }
7423
7424 static int
7425 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7426 {
7427 int error = EINVAL;
7428 struct ether_header *eh;
7429 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7430 mbuf_t m = *data;
7431
7432 eh = mtod(m, struct ether_header *);
7433
7434 /*
7435 * Restrict the source hardware address
7436 */
7437 if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7438 bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7439 ETHER_ADDR_LEN) != 0) {
7440 BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7441 goto done;
7442 }
7443
7444 /*
7445 * Restrict Ethernet protocols to ARP and IP
7446 */
7447 if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7448 struct ether_arp *ea;
7449 size_t minlen = sizeof(struct ether_header) +
7450 sizeof(struct ether_arp);
7451
7452 /*
7453 * Make the Ethernet and ARP headers contiguous
7454 */
7455 if (mbuf_pkthdr_len(m) < minlen) {
7456 BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7457 goto done;
7458 }
7459 if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7460 BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7461 __func__, __LINE__);
7462 goto done;
7463 }
7464 m = *data;
7465
7466 /*
7467 * Verify this is an ethernet/ip arp
7468 */
7469 eh = mtod(m, struct ether_header *);
7470 ea = (struct ether_arp *)(eh + 1);
7471 if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7472 BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7473 __func__, __LINE__);
7474 goto done;
7475 }
7476 if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7477 BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7478 __func__, __LINE__);
7479 goto done;
7480 }
7481 /*
7482 * Verify the address lengths are correct
7483 */
7484 if (ea->arp_hln != ETHER_ADDR_LEN) {
7485 BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7486 goto done;
7487 }
7488 if (ea->arp_pln != sizeof(struct in_addr)) {
7489 BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7490 __func__, __LINE__);
7491 goto done;
7492 }
7493
7494 /*
7495 * Allow only ARP request or ARP reply
7496 */
7497 if (ea->arp_op != htons(ARPOP_REQUEST) &&
7498 ea->arp_op != htons(ARPOP_REPLY)) {
7499 BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7500 goto done;
7501 }
7502 /*
7503 * Verify source hardware address matches
7504 */
7505 if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7506 ETHER_ADDR_LEN) != 0) {
7507 BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7508 goto done;
7509 }
7510 /*
7511 * Verify source protocol address:
7512 * May be null for an ARP probe
7513 */
7514 if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7515 sizeof(struct in_addr)) != 0 &&
7516 bcmp(ea->arp_spa, &inaddr_any,
7517 sizeof(struct in_addr)) != 0) {
7518 BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7519 goto done;
7520 }
7521 bridge_hostfilter_stats.brhf_arp_ok += 1;
7522 error = 0;
7523 } else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7524 size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7525 struct ip iphdr;
7526 size_t offset;
7527
7528 /*
7529 * Make the Ethernet and IP headers contiguous
7530 */
7531 if (mbuf_pkthdr_len(m) < minlen) {
7532 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7533 goto done;
7534 }
7535 offset = sizeof(struct ether_header);
7536 error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7537 if (error != 0) {
7538 BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7539 goto done;
7540 }
7541 /*
7542 * Verify the source IP address
7543 */
7544 if (iphdr.ip_p == IPPROTO_UDP) {
7545 struct udphdr udp;
7546
7547 minlen += sizeof(struct udphdr);
7548 if (mbuf_pkthdr_len(m) < minlen) {
7549 BRIDGE_HF_DROP(brhf_ip_too_small,
7550 __func__, __LINE__);
7551 goto done;
7552 }
7553
7554 /*
7555 * Allow all zero addresses for DHCP requests
7556 */
7557 if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7558 iphdr.ip_src.s_addr != INADDR_ANY) {
7559 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7560 __func__, __LINE__);
7561 goto done;
7562 }
7563 offset = sizeof(struct ether_header) +
7564 (IP_VHL_HL(iphdr.ip_vhl) << 2);
7565 error = mbuf_copydata(m, offset,
7566 sizeof(struct udphdr), &udp);
7567 if (error != 0) {
7568 BRIDGE_HF_DROP(brhf_ip_too_small,
7569 __func__, __LINE__);
7570 goto done;
7571 }
7572 /*
7573 * Either it's a Bootp/DHCP packet that we like or
7574 * it's a UDP packet from the host IP as source address
7575 */
7576 if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7577 udp.uh_dport == htons(IPPORT_BOOTPS)) {
7578 minlen += sizeof(struct dhcp);
7579 if (mbuf_pkthdr_len(m) < minlen) {
7580 BRIDGE_HF_DROP(brhf_ip_too_small,
7581 __func__, __LINE__);
7582 goto done;
7583 }
7584 offset += sizeof(struct udphdr);
7585 error = bridge_dhcp_filter(bif, m, offset);
7586 if (error != 0) {
7587 goto done;
7588 }
7589 } else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7590 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7591 __func__, __LINE__);
7592 goto done;
7593 }
7594 } else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7595 bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7596 BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7597 goto done;
7598 }
7599 /*
7600 * Allow only boring IP protocols
7601 */
7602 if (iphdr.ip_p != IPPROTO_TCP &&
7603 iphdr.ip_p != IPPROTO_UDP &&
7604 iphdr.ip_p != IPPROTO_ICMP &&
7605 iphdr.ip_p != IPPROTO_ESP &&
7606 iphdr.ip_p != IPPROTO_AH &&
7607 iphdr.ip_p != IPPROTO_GRE) {
7608 BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7609 goto done;
7610 }
7611 bridge_hostfilter_stats.brhf_ip_ok += 1;
7612 error = 0;
7613 } else {
7614 BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7615 goto done;
7616 }
7617 done:
7618 if (error != 0) {
7619 if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7620 if (m) {
7621 brlog_mbuf_data(m, 0,
7622 sizeof(struct ether_header) +
7623 sizeof(struct ip));
7624 }
7625 }
7626
7627 if (m != NULL) {
7628 m_freem(m);
7629 }
7630 }
7631 return error;
7632 }
7633
7634 /*
7635 * MAC NAT
7636 */
7637
7638 static errno_t
7639 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7640 {
7641 errno_t error = 0;
7642
7643 BRIDGE_LOCK_ASSERT_HELD(sc);
7644
7645 if (IFNET_IS_VMNET(bif->bif_ifp)) {
7646 error = EINVAL;
7647 goto done;
7648 }
7649 if (sc->sc_mac_nat_bif != NULL) {
7650 if (sc->sc_mac_nat_bif != bif) {
7651 error = EBUSY;
7652 }
7653 goto done;
7654 }
7655 sc->sc_mac_nat_bif = bif;
7656 bif->bif_ifflags |= IFBIF_MAC_NAT;
7657 bridge_mac_nat_populate_entries(sc);
7658
7659 done:
7660 return error;
7661 }
7662
7663 static void
7664 bridge_mac_nat_disable(struct bridge_softc *sc)
7665 {
7666 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7667
7668 assert(mac_nat_bif != NULL);
7669 bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7670 mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7671 sc->sc_mac_nat_bif = NULL;
7672 return;
7673 }
7674
7675 static void
7676 mac_nat_entry_print2(struct mac_nat_entry *mne,
7677 char *ifname, const char *msg1, const char *msg2)
7678 {
7679 int af;
7680 char etopbuf[24];
7681 char ntopbuf[MAX_IPv6_STR_LEN];
7682 const char *space;
7683
7684 af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7685 ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7686 (void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7687 if (msg2 == NULL) {
7688 msg2 = "";
7689 space = "";
7690 } else {
7691 space = " ";
7692 }
7693 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7694 "%s %s%s%s %p (%s, %s, %s)",
7695 ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7696 ntopbuf, etopbuf);
7697 }
7698
7699 static void
7700 mac_nat_entry_print(struct mac_nat_entry *mne,
7701 char *ifname, const char *msg)
7702 {
7703 mac_nat_entry_print2(mne, ifname, msg, NULL);
7704 }
7705
7706 static struct mac_nat_entry *
7707 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7708 {
7709 struct mac_nat_entry *mne;
7710 struct mac_nat_entry *ret_mne = NULL;
7711
7712 if (af == AF_INET) {
7713 in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7714
7715 LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7716 if (mne->mne_ip.s_addr == s_addr) {
7717 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7718 mac_nat_entry_print(mne, sc->sc_if_xname,
7719 "found");
7720 }
7721 ret_mne = mne;
7722 break;
7723 }
7724 }
7725 } else {
7726 const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7727
7728 LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7729 if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7730 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7731 mac_nat_entry_print(mne, sc->sc_if_xname,
7732 "found");
7733 }
7734 ret_mne = mne;
7735 break;
7736 }
7737 }
7738 }
7739 return ret_mne;
7740 }
7741
7742 static void
7743 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7744 struct mac_nat_entry *mne, const char *reason)
7745 {
7746 LIST_REMOVE(mne, mne_list);
7747 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7748 mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7749 }
7750 zfree(bridge_mne_pool, mne);
7751 sc->sc_mne_count--;
7752 }
7753
7754 static struct mac_nat_entry *
7755 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7756 struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7757 {
7758 struct mac_nat_entry_list *list;
7759 struct mac_nat_entry *mne;
7760
7761 if (sc->sc_mne_count >= sc->sc_mne_max) {
7762 sc->sc_mne_allocation_failures++;
7763 return NULL;
7764 }
7765 mne = zalloc_noblock(bridge_mne_pool);
7766 if (mne == NULL) {
7767 sc->sc_mne_allocation_failures++;
7768 return NULL;
7769 }
7770 sc->sc_mne_count++;
7771 bzero(mne, sizeof(*mne));
7772 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7773 mne->mne_bif = bif;
7774 if (af == AF_INET) {
7775 bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7776 list = &sc->sc_mne_list;
7777 } else {
7778 bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7779 mne->mne_flags |= MNE_FLAGS_IPV6;
7780 list = &sc->sc_mne_list_v6;
7781 }
7782 LIST_INSERT_HEAD(list, mne, mne_list);
7783 mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7784 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7785 mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7786 }
7787 return mne;
7788 }
7789
7790 static struct mac_nat_entry *
7791 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7792 struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7793 {
7794 struct mac_nat_entry *mne;
7795
7796 mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7797 if (mne != NULL) {
7798 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7799
7800 if (mne->mne_bif == mac_nat_bif) {
7801 /* the MAC NAT interface takes precedence */
7802 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7803 if (mne->mne_bif != bif) {
7804 mac_nat_entry_print2(mne,
7805 sc->sc_if_xname, "reject",
7806 bif->bif_ifp->if_xname);
7807 }
7808 }
7809 } else if (mne->mne_bif != bif) {
7810 const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7811
7812 mne->mne_bif = bif;
7813 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7814 mac_nat_entry_print2(mne,
7815 sc->sc_if_xname, "replaced",
7816 old_if);
7817 }
7818 bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7819 }
7820 mne->mne_expire = (unsigned long)net_uptime() +
7821 sc->sc_brttimeout;
7822 } else {
7823 mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7824 }
7825 return mne;
7826 }
7827
7828 static void
7829 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7830 struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7831 {
7832 struct mac_nat_entry *mne;
7833 struct mac_nat_entry *tmne;
7834
7835 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7836 if (bif != NULL && mne->mne_bif != bif) {
7837 continue;
7838 }
7839 bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7840 }
7841 }
7842
7843 /*
7844 * bridge_mac_nat_flush_entries:
7845 *
7846 * Flush MAC NAT entries for the specified member. Flush all entries if
7847 * the member is the one that requires MAC NAT, otherwise just flush the
7848 * ones for the specified member.
7849 */
7850 static void
7851 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7852 {
7853 struct bridge_iflist *flush_bif;
7854
7855 flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7856 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7857 bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7858 }
7859
7860 static void
7861 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7862 {
7863 errno_t error;
7864 ifnet_t ifp;
7865 ifaddr_t *list;
7866 struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7867
7868 assert(mac_nat_bif != NULL);
7869 ifp = mac_nat_bif->bif_ifp;
7870 error = ifnet_get_address_list(ifp, &list);
7871 if (error != 0) {
7872 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7873 "ifnet_get_address_list(%s) failed %d",
7874 ifp->if_xname, error);
7875 return;
7876 }
7877 for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7878 sa_family_t af;
7879 void *ip;
7880
7881 union {
7882 struct sockaddr sa;
7883 struct sockaddr_in sin;
7884 struct sockaddr_in6 sin6;
7885 } u;
7886 af = ifaddr_address_family(*scan);
7887 switch (af) {
7888 case AF_INET:
7889 case AF_INET6:
7890 error = ifaddr_address(*scan, &u.sa, sizeof(u));
7891 if (error != 0) {
7892 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7893 "ifaddr_address failed %d",
7894 error);
7895 break;
7896 }
7897 if (af == AF_INET) {
7898 ip = (void *)&u.sin.sin_addr;
7899 } else {
7900 if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7901 /* remove scope ID */
7902 u.sin6.sin6_addr.s6_addr16[1] = 0;
7903 }
7904 ip = (void *)&u.sin6.sin6_addr;
7905 }
7906 bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7907 (uint8_t *)IF_LLADDR(ifp));
7908 break;
7909 default:
7910 break;
7911 }
7912 }
7913 ifnet_free_address_list(list);
7914 return;
7915 }
7916
7917 static void
7918 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7919 struct mac_nat_entry_list *list, unsigned long now)
7920 {
7921 struct mac_nat_entry *mne;
7922 struct mac_nat_entry *tmne;
7923
7924 LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7925 if (now >= mne->mne_expire) {
7926 bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7927 }
7928 }
7929 }
7930
7931 static void
7932 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7933 {
7934 if (sc->sc_mac_nat_bif == NULL) {
7935 return;
7936 }
7937 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7938 bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7939 }
7940
7941 static const char *
7942 get_in_out_string(boolean_t is_output)
7943 {
7944 return is_output ? "OUT" : "IN";
7945 }
7946
7947 /*
7948 * is_valid_arp_packet:
7949 * Verify that this is a valid ARP packet.
7950 *
7951 * Returns TRUE if the packet is valid, FALSE otherwise.
7952 */
7953 static boolean_t
7954 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7955 struct ether_header **eh_p, struct ether_arp **ea_p)
7956 {
7957 struct ether_arp *ea;
7958 struct ether_header *eh;
7959 size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7960 boolean_t is_valid = FALSE;
7961 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7962
7963 if (mbuf_pkthdr_len(*data) < minlen) {
7964 BRIDGE_LOG(LOG_DEBUG, flags,
7965 "ARP %s short frame %lu < %lu",
7966 get_in_out_string(is_output),
7967 mbuf_pkthdr_len(*data), minlen);
7968 goto done;
7969 }
7970 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7971 BRIDGE_LOG(LOG_DEBUG, flags,
7972 "ARP %s size %lu mbuf_pullup fail",
7973 get_in_out_string(is_output),
7974 minlen);
7975 *data = NULL;
7976 goto done;
7977 }
7978
7979 /* validate ARP packet */
7980 eh = mtod(*data, struct ether_header *);
7981 ea = (struct ether_arp *)(eh + 1);
7982 if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7983 BRIDGE_LOG(LOG_DEBUG, flags,
7984 "ARP %s htype not ethernet",
7985 get_in_out_string(is_output));
7986 goto done;
7987 }
7988 if (ea->arp_hln != ETHER_ADDR_LEN) {
7989 BRIDGE_LOG(LOG_DEBUG, flags,
7990 "ARP %s hlen not ethernet",
7991 get_in_out_string(is_output));
7992 goto done;
7993 }
7994 if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7995 BRIDGE_LOG(LOG_DEBUG, flags,
7996 "ARP %s ptype not IP",
7997 get_in_out_string(is_output));
7998 goto done;
7999 }
8000 if (ea->arp_pln != sizeof(struct in_addr)) {
8001 BRIDGE_LOG(LOG_DEBUG, flags,
8002 "ARP %s plen not IP",
8003 get_in_out_string(is_output));
8004 goto done;
8005 }
8006 is_valid = TRUE;
8007 *ea_p = ea;
8008 *eh_p = eh;
8009 done:
8010 return is_valid;
8011 }
8012
8013 static struct mac_nat_entry *
8014 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
8015 {
8016 struct ether_arp *ea;
8017 struct ether_header *eh;
8018 struct mac_nat_entry *mne = NULL;
8019 u_short op;
8020 struct in_addr tpa;
8021
8022 if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
8023 goto done;
8024 }
8025 op = ntohs(ea->arp_op);
8026 switch (op) {
8027 case ARPOP_REQUEST:
8028 case ARPOP_REPLY:
8029 /* only care about REQUEST and REPLY */
8030 break;
8031 default:
8032 goto done;
8033 }
8034
8035 /* check the target IP address for a NAT entry */
8036 bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
8037 if (tpa.s_addr != 0) {
8038 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
8039 }
8040 if (mne != NULL) {
8041 if (op == ARPOP_REPLY) {
8042 /* translate the MAC address */
8043 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8044 char mac_src[24];
8045 char mac_dst[24];
8046
8047 ether_ntop(mac_src, sizeof(mac_src),
8048 ea->arp_tha);
8049 ether_ntop(mac_dst, sizeof(mac_dst),
8050 mne->mne_mac);
8051 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8052 "%s %s ARP %s -> %s",
8053 sc->sc_if_xname,
8054 mne->mne_bif->bif_ifp->if_xname,
8055 mac_src, mac_dst);
8056 }
8057 bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8058 }
8059 } else {
8060 /* handle conflicting ARP (sender matches mne) */
8061 struct in_addr spa;
8062
8063 bcopy(ea->arp_spa, &spa, sizeof(spa));
8064 if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8065 /* check the source IP for a NAT entry */
8066 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8067 }
8068 }
8069
8070 done:
8071 return mne;
8072 }
8073
8074 static boolean_t
8075 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8076 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8077 {
8078 struct ether_arp *ea;
8079 struct ether_header *eh;
8080 struct in_addr ip;
8081 struct mac_nat_entry *mne = NULL;
8082 u_short op;
8083 boolean_t translate = FALSE;
8084
8085 if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8086 goto done;
8087 }
8088 op = ntohs(ea->arp_op);
8089 switch (op) {
8090 case ARPOP_REQUEST:
8091 case ARPOP_REPLY:
8092 /* only care about REQUEST and REPLY */
8093 break;
8094 default:
8095 goto done;
8096 }
8097
8098 bcopy(ea->arp_spa, &ip, sizeof(ip));
8099 if (ip.s_addr == 0) {
8100 goto done;
8101 }
8102 /* XXX validate IP address: no multicast/broadcast */
8103 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8104 if (mnr != NULL && mne != NULL) {
8105 /* record the offset to do the replacement */
8106 translate = TRUE;
8107 mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8108 }
8109
8110 done:
8111 return translate;
8112 }
8113
8114 #define ETHER_IPV4_HEADER_LEN (sizeof(struct ether_header) + \
8115 + sizeof(struct ip))
8116 static struct ether_header *
8117 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8118 {
8119 struct ether_header *eh = NULL;
8120 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8121 size_t minlen = ETHER_IPV4_HEADER_LEN;
8122
8123 if (mbuf_pkthdr_len(*data) < minlen) {
8124 BRIDGE_LOG(LOG_DEBUG, flags,
8125 "IP %s short frame %lu < %lu",
8126 get_in_out_string(is_output),
8127 mbuf_pkthdr_len(*data), minlen);
8128 goto done;
8129 }
8130 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8131 BRIDGE_LOG(LOG_DEBUG, flags,
8132 "IP %s size %lu mbuf_pullup fail",
8133 get_in_out_string(is_output),
8134 minlen);
8135 *data = NULL;
8136 goto done;
8137 }
8138 eh = mtod(*data, struct ether_header *);
8139 done:
8140 return eh;
8141 }
8142
8143 static bool
8144 is_broadcast_ip_packet(mbuf_t *data)
8145 {
8146 struct ether_header *eh;
8147 uint16_t ether_type;
8148 bool is_broadcast = FALSE;
8149
8150 eh = mtod(*data, struct ether_header *);
8151 ether_type = ntohs(eh->ether_type);
8152 switch (ether_type) {
8153 case ETHERTYPE_IP:
8154 eh = get_ether_ip_header(data, FALSE);
8155 if (eh != NULL) {
8156 struct in_addr dst;
8157 struct ip *iphdr;
8158
8159 iphdr = (struct ip *)(void *)(eh + 1);
8160 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8161 is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8162 }
8163 break;
8164 default:
8165 break;
8166 }
8167 return is_broadcast;
8168 }
8169
8170 static struct mac_nat_entry *
8171 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8172 {
8173 struct in_addr dst;
8174 struct ether_header *eh;
8175 struct ip *iphdr;
8176 struct mac_nat_entry *mne = NULL;
8177
8178 eh = get_ether_ip_header(data, FALSE);
8179 if (eh == NULL) {
8180 goto done;
8181 }
8182 iphdr = (struct ip *)(void *)(eh + 1);
8183 bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8184 /* XXX validate IP address */
8185 if (dst.s_addr == 0) {
8186 goto done;
8187 }
8188 mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8189 done:
8190 return mne;
8191 }
8192
8193 static void
8194 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8195 struct bridge_iflist *bif, mbuf_t m,
8196 uint8_t ip_header_len, struct mac_nat_record *mnr)
8197 {
8198 uint16_t dp_flags;
8199 errno_t error;
8200 size_t offset;
8201 struct udphdr udphdr;
8202
8203 /* copy the UDP header */
8204 offset = sizeof(struct ether_header) + ip_header_len;
8205 error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8206 if (error != 0) {
8207 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8208 "mbuf_copydata udphdr failed %d",
8209 error);
8210 return;
8211 }
8212 if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8213 ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8214 /* not a BOOTP/DHCP packet */
8215 return;
8216 }
8217 /* check whether the broadcast bit is already set */
8218 offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8219 error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8220 if (error != 0) {
8221 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8222 "mbuf_copydata dp_flags failed %d",
8223 error);
8224 return;
8225 }
8226 if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8227 /* it's already set, nothing to do */
8228 return;
8229 }
8230 /* broadcast bit needs to be set */
8231 mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8232 mnr->mnr_ip_header_len = ip_header_len;
8233 if (udphdr.uh_sum != 0) {
8234 uint16_t delta;
8235
8236 /* adjust checksum to take modified dp_flags into account */
8237 delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8238 mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8239 }
8240 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8241 "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8242 sc->sc_if_xname,
8243 bif->bif_ifp->if_xname,
8244 ntohs(mnr->mnr_ip_dhcp_flags),
8245 ntohs(mnr->mnr_ip_udp_csum));
8246 return;
8247 }
8248
8249 static boolean_t
8250 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8251 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8252 {
8253 #pragma unused(mnr)
8254 struct ether_header *eh;
8255 struct in_addr ip;
8256 struct ip *iphdr;
8257 uint8_t ip_header_len;
8258 struct mac_nat_entry *mne = NULL;
8259 boolean_t translate = FALSE;
8260
8261 eh = get_ether_ip_header(data, TRUE);
8262 if (eh == NULL) {
8263 goto done;
8264 }
8265 iphdr = (struct ip *)(void *)(eh + 1);
8266 ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8267 if (ip_header_len < sizeof(ip)) {
8268 /* bogus IP header */
8269 goto done;
8270 }
8271 bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8272 /* XXX validate the source address */
8273 if (ip.s_addr != 0) {
8274 mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8275 eh->ether_shost);
8276 }
8277 if (mnr != NULL) {
8278 if (iphdr->ip_p == IPPROTO_UDP) {
8279 /* handle DHCP must broadcast */
8280 bridge_mac_nat_udp_output(sc, bif, *data,
8281 ip_header_len, mnr);
8282 }
8283 translate = TRUE;
8284 }
8285 done:
8286 return translate;
8287 }
8288
8289 #define ETHER_IPV6_HEADER_LEN (sizeof(struct ether_header) + \
8290 + sizeof(struct ip6_hdr))
8291 static struct ether_header *
8292 get_ether_ipv6_header(mbuf_t *data, size_t plen, boolean_t is_output)
8293 {
8294 struct ether_header *eh = NULL;
8295 int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8296 size_t minlen = ETHER_IPV6_HEADER_LEN + plen;
8297
8298 if (mbuf_pkthdr_len(*data) < minlen) {
8299 BRIDGE_LOG(LOG_DEBUG, flags,
8300 "IP %s short frame %lu < %lu",
8301 get_in_out_string(is_output),
8302 mbuf_pkthdr_len(*data), minlen);
8303 goto done;
8304 }
8305 if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8306 BRIDGE_LOG(LOG_DEBUG, flags,
8307 "IP %s size %lu mbuf_pullup fail",
8308 get_in_out_string(is_output),
8309 minlen);
8310 *data = NULL;
8311 goto done;
8312 }
8313 eh = mtod(*data, struct ether_header *);
8314 done:
8315 return eh;
8316 }
8317
8318 #include <netinet/icmp6.h>
8319 #include <netinet6/nd6.h>
8320
8321 #define ETHER_ND_LLADDR_LEN (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8322
8323 static void
8324 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8325 struct bridge_iflist *bif,
8326 mbuf_t *data, struct ip6_hdr *ip6h,
8327 struct in6_addr *saddrp,
8328 struct mac_nat_record *mnr)
8329 {
8330 struct ether_header *eh;
8331 struct icmp6_hdr *icmp6;
8332 uint8_t icmp6_type;
8333 uint32_t icmp6len;
8334 int lladdrlen = 0;
8335 char *lladdr = NULL;
8336 unsigned int off = sizeof(*ip6h);
8337
8338 icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8339 if (icmp6len < sizeof(*icmp6)) {
8340 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8341 "short IPv6 payload length %d < %lu",
8342 icmp6len, sizeof(*icmp6));
8343 return;
8344 }
8345
8346 /* pullup IP6 header + ICMPv6 header */
8347 eh = get_ether_ipv6_header(data, sizeof(*icmp6), TRUE);
8348 if (eh == NULL) {
8349 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8350 "failed to pullup icmp6 header");
8351 return;
8352 }
8353 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8354 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8355 icmp6_type = icmp6->icmp6_type;
8356 switch (icmp6_type) {
8357 case ND_NEIGHBOR_SOLICIT:
8358 case ND_NEIGHBOR_ADVERT:
8359 case ND_ROUTER_ADVERT:
8360 case ND_ROUTER_SOLICIT:
8361 break;
8362 default:
8363 return;
8364 }
8365
8366 /* pullup IP6 header + payload */
8367 eh = get_ether_ipv6_header(data, icmp6len, TRUE);
8368 if (eh == NULL) {
8369 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8370 "failed to pullup icmp6 + payload");
8371 return;
8372 }
8373 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8374 icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8375 switch (icmp6_type) {
8376 case ND_NEIGHBOR_SOLICIT: {
8377 struct nd_neighbor_solicit *nd_ns;
8378 union nd_opts ndopts;
8379 boolean_t is_dad_probe;
8380 struct in6_addr taddr;
8381
8382 if (icmp6len < sizeof(*nd_ns)) {
8383 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8384 "short nd_ns %d < %lu",
8385 icmp6len, sizeof(*nd_ns));
8386 return;
8387 }
8388
8389 nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8390 bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8391 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8392 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8393 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8394 "invalid target ignored");
8395 return;
8396 }
8397 /* parse options */
8398 nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8399 if (nd6_options(&ndopts) < 0) {
8400 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8401 "invalid ND6 NS option");
8402 return;
8403 }
8404 if (ndopts.nd_opts_src_lladdr != NULL) {
8405 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8406 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8407 }
8408 is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8409 if (lladdr != NULL) {
8410 if (is_dad_probe) {
8411 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8412 "bad ND6 DAD packet");
8413 return;
8414 }
8415 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8416 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8417 "source lladdrlen %d != %lu",
8418 lladdrlen, ETHER_ND_LLADDR_LEN);
8419 return;
8420 }
8421 }
8422 if (is_dad_probe) {
8423 /* node is trying use taddr, create an mne for taddr */
8424 *saddrp = taddr;
8425 }
8426 break;
8427 }
8428 case ND_NEIGHBOR_ADVERT: {
8429 struct nd_neighbor_advert *nd_na;
8430 union nd_opts ndopts;
8431 struct in6_addr taddr;
8432
8433
8434 nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8435
8436 if (icmp6len < sizeof(*nd_na)) {
8437 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8438 "short nd_na %d < %lu",
8439 icmp6len, sizeof(*nd_na));
8440 return;
8441 }
8442
8443 bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8444 if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8445 IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8446 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8447 "invalid target ignored");
8448 return;
8449 }
8450 /* parse options */
8451 nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8452 if (nd6_options(&ndopts) < 0) {
8453 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8454 "invalid ND6 NA option");
8455 return;
8456 }
8457 if (ndopts.nd_opts_tgt_lladdr == NULL) {
8458 /* target linklayer, nothing to do */
8459 return;
8460 }
8461 lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8462 lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8463 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8464 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8465 "target lladdrlen %d != %lu",
8466 lladdrlen, ETHER_ND_LLADDR_LEN);
8467 return;
8468 }
8469 break;
8470 }
8471 case ND_ROUTER_ADVERT:
8472 case ND_ROUTER_SOLICIT: {
8473 union nd_opts ndopts;
8474 uint32_t type_length;
8475 const char *description;
8476
8477 if (icmp6_type == ND_ROUTER_ADVERT) {
8478 type_length = sizeof(struct nd_router_advert);
8479 description = "RA";
8480 } else {
8481 type_length = sizeof(struct nd_router_solicit);
8482 description = "RS";
8483 }
8484 if (icmp6len < type_length) {
8485 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8486 "short ND6 %s %d < %d",
8487 description, icmp6len, type_length);
8488 return;
8489 }
8490 /* parse options */
8491 nd6_option_init(((uint8_t *)icmp6) + type_length,
8492 icmp6len - type_length, &ndopts);
8493 if (nd6_options(&ndopts) < 0) {
8494 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8495 "invalid ND6 %s option", description);
8496 return;
8497 }
8498 if (ndopts.nd_opts_src_lladdr != NULL) {
8499 lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8500 lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8501 if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8502 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8503 "source lladdrlen %d != %lu",
8504 lladdrlen, ETHER_ND_LLADDR_LEN);
8505 return;
8506 }
8507 }
8508 break;
8509 }
8510 default:
8511 break;
8512 }
8513 if (lladdr != NULL) {
8514 mnr->mnr_ip6_lladdr_offset = (uint16_t)
8515 ((uintptr_t)lladdr - (uintptr_t)eh);
8516 mnr->mnr_ip6_icmp6_len = icmp6len;
8517 mnr->mnr_ip6_icmp6_type = icmp6_type;
8518 mnr->mnr_ip6_header_len = off;
8519 if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8520 const char *str;
8521
8522 switch (mnr->mnr_ip6_icmp6_type) {
8523 case ND_ROUTER_ADVERT:
8524 str = "ROUTER ADVERT";
8525 break;
8526 case ND_ROUTER_SOLICIT:
8527 str = "ROUTER SOLICIT";
8528 break;
8529 case ND_NEIGHBOR_ADVERT:
8530 str = "NEIGHBOR ADVERT";
8531 break;
8532 case ND_NEIGHBOR_SOLICIT:
8533 str = "NEIGHBOR SOLICIT";
8534 break;
8535 default:
8536 str = "";
8537 break;
8538 }
8539 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8540 "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8541 sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8542 mnr->mnr_ip6_header_len,
8543 mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8544 }
8545 }
8546 }
8547
8548 static struct mac_nat_entry *
8549 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8550 {
8551 struct in6_addr dst;
8552 struct ether_header *eh;
8553 struct ip6_hdr *ip6h;
8554 struct mac_nat_entry *mne = NULL;
8555
8556 eh = get_ether_ipv6_header(data, 0, FALSE);
8557 if (eh == NULL) {
8558 goto done;
8559 }
8560 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8561 bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8562 /* XXX validate IPv6 address */
8563 if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8564 goto done;
8565 }
8566 mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8567
8568 done:
8569 return mne;
8570 }
8571
8572 static boolean_t
8573 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8574 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8575 {
8576 struct ether_header *eh;
8577 ether_addr_t ether_shost;
8578 struct ip6_hdr *ip6h;
8579 struct in6_addr saddr;
8580 boolean_t translate;
8581
8582 translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8583 eh = get_ether_ipv6_header(data, 0, TRUE);
8584 if (eh == NULL) {
8585 translate = FALSE;
8586 goto done;
8587 }
8588 bcopy(eh->ether_shost, ðer_shost, sizeof(ether_shost));
8589 ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8590 bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8591 if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8592 bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8593 }
8594 if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8595 goto done;
8596 }
8597 (void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8598 ether_shost.octet);
8599
8600 done:
8601 return translate;
8602 }
8603
8604 /*
8605 * bridge_mac_nat_input:
8606 * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8607 * This interface is the "external" interface with respect to NAT.
8608 * The interface is only capable of receiving a single MAC address
8609 * (e.g. a Wi-Fi STA interface).
8610 *
8611 * When a packet arrives on the external interface, look up the destination
8612 * IP address in the mac_nat_entry table. If there is a match, *is_input
8613 * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8614 * is set to FALSE and translate the MAC address if necessary.
8615 *
8616 * Returns:
8617 * The internal interface to direct the packet to, or NULL if the packet
8618 * should not be redirected.
8619 *
8620 * *data may be updated to point at a different mbuf chain, or set to NULL
8621 * if the chain was deallocated during processing.
8622 */
8623 static ifnet_t
8624 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8625 boolean_t *is_input)
8626 {
8627 ifnet_t dst_if = NULL;
8628 struct ether_header *eh;
8629 uint16_t ether_type;
8630 boolean_t is_unicast;
8631 mbuf_t m = *data;
8632 struct mac_nat_entry *mne = NULL;
8633
8634 BRIDGE_LOCK_ASSERT_HELD(sc);
8635 *is_input = FALSE;
8636 assert(sc->sc_mac_nat_bif != NULL);
8637 is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8638 eh = mtod(m, struct ether_header *);
8639 ether_type = ntohs(eh->ether_type);
8640 switch (ether_type) {
8641 case ETHERTYPE_ARP:
8642 mne = bridge_mac_nat_arp_input(sc, data);
8643 break;
8644 case ETHERTYPE_IP:
8645 if (is_unicast) {
8646 mne = bridge_mac_nat_ip_input(sc, data);
8647 }
8648 break;
8649 case ETHERTYPE_IPV6:
8650 if (is_unicast) {
8651 mne = bridge_mac_nat_ipv6_input(sc, data);
8652 }
8653 break;
8654 default:
8655 break;
8656 }
8657 if (mne != NULL) {
8658 if (is_unicast) {
8659 if (m != *data) {
8660 /* it may have changed */
8661 eh = mtod(*data, struct ether_header *);
8662 }
8663 bcopy(mne->mne_mac, eh->ether_dhost,
8664 sizeof(eh->ether_dhost));
8665 }
8666 dst_if = mne->mne_bif->bif_ifp;
8667 *is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8668 }
8669 return dst_if;
8670 }
8671
8672 /*
8673 * bridge_mac_nat_output:
8674 * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8675 * from the interface 'bif'.
8676 *
8677 * Create a mac_nat_entry containing the source IP address and MAC address
8678 * from the packet. Populate a mac_nat_record with information detailing
8679 * how to translate the packet. Translation takes place later when
8680 * the bridge lock is no longer held.
8681 *
8682 * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8683 * interface is generating an output packet. No translation is required in this
8684 * case, we just record the IP address used to prevent another bif from
8685 * claiming our IP address.
8686 *
8687 * Returns:
8688 * TRUE if the packet should be translated (*mnr updated as well),
8689 * FALSE otherwise.
8690 *
8691 * *data may be updated to point at a different mbuf chain or NULL if
8692 * the chain was deallocated during processing.
8693 */
8694
8695 static boolean_t
8696 bridge_mac_nat_output(struct bridge_softc *sc,
8697 struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8698 {
8699 struct ether_header *eh;
8700 uint16_t ether_type;
8701 boolean_t translate = FALSE;
8702
8703 BRIDGE_LOCK_ASSERT_HELD(sc);
8704 assert(sc->sc_mac_nat_bif != NULL);
8705
8706 eh = mtod(*data, struct ether_header *);
8707 ether_type = ntohs(eh->ether_type);
8708 if (mnr != NULL) {
8709 bzero(mnr, sizeof(*mnr));
8710 mnr->mnr_ether_type = ether_type;
8711 }
8712 switch (ether_type) {
8713 case ETHERTYPE_ARP:
8714 translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8715 break;
8716 case ETHERTYPE_IP:
8717 translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8718 break;
8719 case ETHERTYPE_IPV6:
8720 translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8721 break;
8722 default:
8723 break;
8724 }
8725 return translate;
8726 }
8727
8728 static void
8729 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8730 const caddr_t eaddr)
8731 {
8732 errno_t error;
8733
8734 if (mnr->mnr_arp_offset == 0) {
8735 return;
8736 }
8737 /* replace the source hardware address */
8738 error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8739 ETHER_ADDR_LEN, eaddr,
8740 MBUF_DONTWAIT);
8741 if (error != 0) {
8742 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8743 "mbuf_copyback failed");
8744 m_freem(*data);
8745 *data = NULL;
8746 }
8747 return;
8748 }
8749
8750 static void
8751 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8752 {
8753 errno_t error;
8754 size_t offset;
8755
8756 if (mnr->mnr_ip_header_len == 0) {
8757 return;
8758 }
8759 /* update the UDP checksum */
8760 offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8761 error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8762 sizeof(mnr->mnr_ip_udp_csum),
8763 &mnr->mnr_ip_udp_csum,
8764 MBUF_DONTWAIT);
8765 if (error != 0) {
8766 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8767 "mbuf_copyback uh_sum failed");
8768 m_freem(*data);
8769 *data = NULL;
8770 }
8771 /* update the DHCP must broadcast flag */
8772 offset += sizeof(struct udphdr);
8773 error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8774 sizeof(mnr->mnr_ip_dhcp_flags),
8775 &mnr->mnr_ip_dhcp_flags,
8776 MBUF_DONTWAIT);
8777 if (error != 0) {
8778 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8779 "mbuf_copyback dp_flags failed");
8780 m_freem(*data);
8781 *data = NULL;
8782 }
8783 }
8784
8785 static void
8786 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8787 const caddr_t eaddr)
8788 {
8789 uint16_t cksum;
8790 errno_t error;
8791 mbuf_t m = *data;
8792
8793 if (mnr->mnr_ip6_header_len == 0) {
8794 return;
8795 }
8796 switch (mnr->mnr_ip6_icmp6_type) {
8797 case ND_ROUTER_ADVERT:
8798 case ND_ROUTER_SOLICIT:
8799 case ND_NEIGHBOR_SOLICIT:
8800 case ND_NEIGHBOR_ADVERT:
8801 if (mnr->mnr_ip6_lladdr_offset == 0) {
8802 /* nothing to do */
8803 return;
8804 }
8805 break;
8806 default:
8807 return;
8808 }
8809
8810 /*
8811 * replace the lladdr
8812 */
8813 error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8814 ETHER_ADDR_LEN, eaddr,
8815 MBUF_DONTWAIT);
8816 if (error != 0) {
8817 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8818 "mbuf_copyback lladdr failed");
8819 m_freem(m);
8820 *data = NULL;
8821 return;
8822 }
8823
8824 /*
8825 * recompute the icmp6 checksum
8826 */
8827
8828 /* skip past the ethernet header */
8829 mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8830 mbuf_len(m) - ETHER_HDR_LEN);
8831 mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8832
8833 #define CKSUM_OFFSET_ICMP6 offsetof(struct icmp6_hdr, icmp6_cksum)
8834 /* set the checksum to zero */
8835 cksum = 0;
8836 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8837 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8838 if (error != 0) {
8839 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8840 "mbuf_copyback cksum=0 failed");
8841 m_freem(m);
8842 *data = NULL;
8843 return;
8844 }
8845 /* compute and set the new checksum */
8846 cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8847 mnr->mnr_ip6_icmp6_len);
8848 error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8849 sizeof(cksum), &cksum, MBUF_DONTWAIT);
8850 if (error != 0) {
8851 BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8852 "mbuf_copyback cksum failed");
8853 m_freem(m);
8854 *data = NULL;
8855 return;
8856 }
8857 /* restore the ethernet header */
8858 mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8859 mbuf_len(m) + ETHER_HDR_LEN);
8860 mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8861 return;
8862 }
8863
8864 static void
8865 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8866 const caddr_t eaddr)
8867 {
8868 struct ether_header *eh;
8869
8870 /* replace the source ethernet address with the single MAC */
8871 eh = mtod(*data, struct ether_header *);
8872 bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8873 switch (mnr->mnr_ether_type) {
8874 case ETHERTYPE_ARP:
8875 bridge_mac_nat_arp_translate(data, mnr, eaddr);
8876 break;
8877
8878 case ETHERTYPE_IP:
8879 bridge_mac_nat_ip_translate(data, mnr);
8880 break;
8881
8882 case ETHERTYPE_IPV6:
8883 bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8884 break;
8885
8886 default:
8887 break;
8888 }
8889 return;
8890 }
8891
8892 /*
8893 * bridge packet filtering
8894 */
8895
8896 /*
8897 * Perform basic checks on header size since
8898 * pfil assumes ip_input has already processed
8899 * it for it. Cut-and-pasted from ip_input.c.
8900 * Given how simple the IPv6 version is,
8901 * does the IPv4 version really need to be
8902 * this complicated?
8903 *
8904 * XXX Should we update ipstat here, or not?
8905 * XXX Right now we update ipstat but not
8906 * XXX csum_counter.
8907 */
8908 static int
8909 bridge_ip_checkbasic(struct mbuf **mp)
8910 {
8911 struct mbuf *m = *mp;
8912 struct ip *ip;
8913 int len, hlen;
8914 u_short sum;
8915
8916 if (*mp == NULL) {
8917 return -1;
8918 }
8919
8920 if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8921 /* max_linkhdr is already rounded up to nearest 4-byte */
8922 if ((m = m_copyup(m, sizeof(struct ip),
8923 max_linkhdr)) == NULL) {
8924 /* XXXJRT new stat, please */
8925 ipstat.ips_toosmall++;
8926 goto bad;
8927 }
8928 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8929 if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8930 ipstat.ips_toosmall++;
8931 goto bad;
8932 }
8933 }
8934 ip = mtod(m, struct ip *);
8935 if (ip == NULL) {
8936 goto bad;
8937 }
8938
8939 if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8940 ipstat.ips_badvers++;
8941 goto bad;
8942 }
8943 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8944 if (hlen < (int)sizeof(struct ip)) { /* minimum header length */
8945 ipstat.ips_badhlen++;
8946 goto bad;
8947 }
8948 if (hlen > m->m_len) {
8949 if ((m = m_pullup(m, hlen)) == 0) {
8950 ipstat.ips_badhlen++;
8951 goto bad;
8952 }
8953 ip = mtod(m, struct ip *);
8954 if (ip == NULL) {
8955 goto bad;
8956 }
8957 }
8958
8959 if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8960 sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8961 } else {
8962 if (hlen == sizeof(struct ip)) {
8963 sum = in_cksum_hdr(ip);
8964 } else {
8965 sum = in_cksum(m, hlen);
8966 }
8967 }
8968 if (sum) {
8969 ipstat.ips_badsum++;
8970 goto bad;
8971 }
8972
8973 /* Retrieve the packet length. */
8974 len = ntohs(ip->ip_len);
8975
8976 /*
8977 * Check for additional length bogosity
8978 */
8979 if (len < hlen) {
8980 ipstat.ips_badlen++;
8981 goto bad;
8982 }
8983
8984 /*
8985 * Check that the amount of data in the buffers
8986 * is as at least much as the IP header would have us expect.
8987 * Drop packet if shorter than we expect.
8988 */
8989 if (m->m_pkthdr.len < len) {
8990 ipstat.ips_tooshort++;
8991 goto bad;
8992 }
8993
8994 /* Checks out, proceed */
8995 *mp = m;
8996 return 0;
8997
8998 bad:
8999 *mp = m;
9000 return -1;
9001 }
9002
9003 /*
9004 * Same as above, but for IPv6.
9005 * Cut-and-pasted from ip6_input.c.
9006 * XXX Should we update ip6stat, or not?
9007 */
9008 static int
9009 bridge_ip6_checkbasic(struct mbuf **mp)
9010 {
9011 struct mbuf *m = *mp;
9012 struct ip6_hdr *ip6;
9013
9014 /*
9015 * If the IPv6 header is not aligned, slurp it up into a new
9016 * mbuf with space for link headers, in the event we forward
9017 * it. Otherwise, if it is aligned, make sure the entire base
9018 * IPv6 header is in the first mbuf of the chain.
9019 */
9020 if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9021 struct ifnet *inifp = m->m_pkthdr.rcvif;
9022 /* max_linkhdr is already rounded up to nearest 4-byte */
9023 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9024 max_linkhdr)) == NULL) {
9025 /* XXXJRT new stat, please */
9026 ip6stat.ip6s_toosmall++;
9027 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9028 goto bad;
9029 }
9030 } else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9031 struct ifnet *inifp = m->m_pkthdr.rcvif;
9032 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9033 ip6stat.ip6s_toosmall++;
9034 in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9035 goto bad;
9036 }
9037 }
9038
9039 ip6 = mtod(m, struct ip6_hdr *);
9040
9041 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9042 ip6stat.ip6s_badvers++;
9043 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9044 goto bad;
9045 }
9046
9047 /* Checks out, proceed */
9048 *mp = m;
9049 return 0;
9050
9051 bad:
9052 *mp = m;
9053 return -1;
9054 }
9055
9056 /*
9057 * the PF routines expect to be called from ip_input, so we
9058 * need to do and undo here some of the same processing.
9059 *
9060 * XXX : this is heavily inspired on bridge_pfil()
9061 */
9062 static int
9063 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9064 int input)
9065 {
9066 /*
9067 * XXX : mpetit : heavily inspired by bridge_pfil()
9068 */
9069
9070 int snap, error, i, hlen;
9071 struct ether_header *eh1, eh2;
9072 struct ip *ip;
9073 struct llc llc1;
9074 u_int16_t ether_type;
9075
9076 snap = 0;
9077 error = -1; /* Default error if not error == 0 */
9078
9079 if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9080 return 0; /* filtering is disabled */
9081 }
9082 i = min((*mp)->m_pkthdr.len, max_protohdr);
9083 if ((*mp)->m_len < i) {
9084 *mp = m_pullup(*mp, i);
9085 if (*mp == NULL) {
9086 BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9087 return -1;
9088 }
9089 }
9090
9091 eh1 = mtod(*mp, struct ether_header *);
9092 ether_type = ntohs(eh1->ether_type);
9093
9094 /*
9095 * Check for SNAP/LLC.
9096 */
9097 if (ether_type < ETHERMTU) {
9098 struct llc *llc2 = (struct llc *)(eh1 + 1);
9099
9100 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9101 llc2->llc_dsap == LLC_SNAP_LSAP &&
9102 llc2->llc_ssap == LLC_SNAP_LSAP &&
9103 llc2->llc_control == LLC_UI) {
9104 ether_type = htons(llc2->llc_un.type_snap.ether_type);
9105 snap = 1;
9106 }
9107 }
9108
9109 /*
9110 * If we're trying to filter bridge traffic, don't look at anything
9111 * other than IP and ARP traffic. If the filter doesn't understand
9112 * IPv6, don't allow IPv6 through the bridge either. This is lame
9113 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9114 * but of course we don't have an AppleTalk filter to begin with.
9115 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9116 * ARP traffic.)
9117 */
9118 switch (ether_type) {
9119 case ETHERTYPE_ARP:
9120 case ETHERTYPE_REVARP:
9121 return 0; /* Automatically pass */
9122
9123 case ETHERTYPE_IP:
9124 case ETHERTYPE_IPV6:
9125 break;
9126 default:
9127 /*
9128 * Check to see if the user wants to pass non-ip
9129 * packets, these will not be checked by pf and
9130 * passed unconditionally so the default is to drop.
9131 */
9132 if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9133 goto bad;
9134 }
9135 break;
9136 }
9137
9138 /* Strip off the Ethernet header and keep a copy. */
9139 m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9140 m_adj(*mp, ETHER_HDR_LEN);
9141
9142 /* Strip off snap header, if present */
9143 if (snap) {
9144 m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9145 m_adj(*mp, sizeof(struct llc));
9146 }
9147
9148 /*
9149 * Check the IP header for alignment and errors
9150 */
9151 switch (ether_type) {
9152 case ETHERTYPE_IP:
9153 error = bridge_ip_checkbasic(mp);
9154 break;
9155 case ETHERTYPE_IPV6:
9156 error = bridge_ip6_checkbasic(mp);
9157 break;
9158 default:
9159 error = 0;
9160 break;
9161 }
9162 if (error) {
9163 goto bad;
9164 }
9165
9166 error = 0;
9167
9168 /*
9169 * Run the packet through pf rules
9170 */
9171 switch (ether_type) {
9172 case ETHERTYPE_IP:
9173 /*
9174 * before calling the firewall, swap fields the same as
9175 * IP does. here we assume the header is contiguous
9176 */
9177 ip = mtod(*mp, struct ip *);
9178
9179 ip->ip_len = ntohs(ip->ip_len);
9180 ip->ip_off = ntohs(ip->ip_off);
9181
9182 if (ifp != NULL) {
9183 error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9184 }
9185
9186 if (*mp == NULL || error != 0) { /* filter may consume */
9187 break;
9188 }
9189
9190 /* Recalculate the ip checksum and restore byte ordering */
9191 ip = mtod(*mp, struct ip *);
9192 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9193 if (hlen < (int)sizeof(struct ip)) {
9194 goto bad;
9195 }
9196 if (hlen > (*mp)->m_len) {
9197 if ((*mp = m_pullup(*mp, hlen)) == 0) {
9198 goto bad;
9199 }
9200 ip = mtod(*mp, struct ip *);
9201 if (ip == NULL) {
9202 goto bad;
9203 }
9204 }
9205 ip->ip_len = htons(ip->ip_len);
9206 ip->ip_off = htons(ip->ip_off);
9207 ip->ip_sum = 0;
9208 if (hlen == sizeof(struct ip)) {
9209 ip->ip_sum = in_cksum_hdr(ip);
9210 } else {
9211 ip->ip_sum = in_cksum(*mp, hlen);
9212 }
9213 break;
9214
9215 case ETHERTYPE_IPV6:
9216 if (ifp != NULL) {
9217 error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9218 }
9219
9220 if (*mp == NULL || error != 0) { /* filter may consume */
9221 break;
9222 }
9223 break;
9224 default:
9225 error = 0;
9226 break;
9227 }
9228
9229 if (*mp == NULL) {
9230 return error;
9231 }
9232 if (error != 0) {
9233 goto bad;
9234 }
9235
9236 error = -1;
9237
9238 /*
9239 * Finally, put everything back the way it was and return
9240 */
9241 if (snap) {
9242 M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9243 if (*mp == NULL) {
9244 return error;
9245 }
9246 bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9247 }
9248
9249 M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9250 if (*mp == NULL) {
9251 return error;
9252 }
9253 bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9254
9255 return 0;
9256
9257 bad:
9258 m_freem(*mp);
9259 *mp = NULL;
9260 return error;
9261 }
9262
9263 /*
9264 * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9265 * All rights reserved.
9266 *
9267 * Redistribution and use in source and binary forms, with or without
9268 * modification, are permitted provided that the following conditions
9269 * are met:
9270 * 1. Redistributions of source code must retain the above copyright
9271 * notice, this list of conditions and the following disclaimer.
9272 * 2. Redistributions in binary form must reproduce the above copyright
9273 * notice, this list of conditions and the following disclaimer in the
9274 * documentation and/or other materials provided with the distribution.
9275 *
9276 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9277 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9278 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9279 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9280 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9281 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9282 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9283 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9284 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9285 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9286 * SUCH DAMAGE.
9287 */
9288
9289 /*
9290 * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9291 *
9292 * Create a queue of packets/segments which fit the given mss + hdr_len.
9293 * m0 points to mbuf chain to be segmented.
9294 * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9295 * into segments of length MSS bytes and then copy the first hdr_len bytes
9296 * from m0 at the top of each segment.
9297 * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9298 * in each segment after the first hdr_len bytes
9299 *
9300 * Return the new queue with the segments on success, NULL on failure.
9301 * (the mbuf queue is freed in this case).
9302 * nsegs contains the number of segments generated.
9303 */
9304
9305 static struct mbuf *
9306 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9307 char * hdr2_buf, int hdr2_len)
9308 {
9309 int off = 0, n, firstlen;
9310 struct mbuf **mnext, *mseg;
9311 int total_len = m0->m_pkthdr.len;
9312
9313 /*
9314 * Segmentation useless
9315 */
9316 if (total_len <= hdr_len + mss) {
9317 return m0;
9318 }
9319
9320 if (hdr2_buf == NULL || hdr2_len <= 0) {
9321 hdr2_buf = NULL;
9322 hdr2_len = 0;
9323 }
9324
9325 off = hdr_len + mss;
9326 firstlen = mss; /* first segment stored in the original mbuf */
9327
9328 mnext = &(m0->m_nextpkt); /* pointer to next packet */
9329
9330 for (n = 1; off < total_len; off += mss, n++) {
9331 struct mbuf *m;
9332 /*
9333 * Copy the header from the original packet
9334 * and create a new mbuf chain
9335 */
9336 if (MHLEN < hdr_len) {
9337 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9338 } else {
9339 m = m_gethdr(M_NOWAIT, MT_DATA);
9340 }
9341
9342 if (m == NULL) {
9343 #ifdef GSO_DEBUG
9344 D("MGETHDR error\n");
9345 #endif
9346 goto err;
9347 }
9348
9349 m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9350
9351 m->m_len = hdr_len;
9352 /*
9353 * if the optional header is present, copy it
9354 */
9355 if (hdr2_buf != NULL) {
9356 m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9357 }
9358
9359 m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9360 if (off + mss >= total_len) { /* last segment */
9361 mss = total_len - off;
9362 }
9363 /*
9364 * Copy the payload from original packet
9365 */
9366 mseg = m_copym(m0, off, mss, M_NOWAIT);
9367 if (mseg == NULL) {
9368 m_freem(m);
9369 #ifdef GSO_DEBUG
9370 D("m_copym error\n");
9371 #endif
9372 goto err;
9373 }
9374 m_cat(m, mseg);
9375
9376 m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9377 m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9378 /*
9379 * Copy the checksum flags and data (in_cksum() need this)
9380 */
9381 m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9382 m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9383 m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9384
9385 *mnext = m;
9386 mnext = &(m->m_nextpkt);
9387 }
9388
9389 /*
9390 * Update first segment.
9391 * If the optional header is present, is necessary
9392 * to insert it into the first segment.
9393 */
9394 if (hdr2_buf == NULL) {
9395 m_adj(m0, hdr_len + firstlen - total_len);
9396 m0->m_pkthdr.len = hdr_len + firstlen;
9397 } else {
9398 mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9399 if (mseg == NULL) {
9400 #ifdef GSO_DEBUG
9401 D("m_copym error\n");
9402 #endif
9403 goto err;
9404 }
9405 m_adj(m0, hdr_len - total_len);
9406 m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9407 m_cat(m0, mseg);
9408 m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9409 }
9410
9411 if (nsegs != NULL) {
9412 *nsegs = n;
9413 }
9414 return m0;
9415 err:
9416 while (m0 != NULL) {
9417 mseg = m0->m_nextpkt;
9418 m0->m_nextpkt = NULL;
9419 m_freem(m0);
9420 m0 = mseg;
9421 }
9422 return NULL;
9423 }
9424
9425 /*
9426 * Wrappers of IPv4 checksum functions
9427 */
9428 static inline void
9429 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9430 {
9431 m->m_data += mac_hlen;
9432 m->m_len -= mac_hlen;
9433 m->m_pkthdr.len -= mac_hlen;
9434 #if __FreeBSD_version < 1000000
9435 ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9436 #endif
9437
9438 in_delayed_cksum(m);
9439
9440 #if __FreeBSD_version < 1000000
9441 ip->ip_len = htons(ip->ip_len);
9442 #endif
9443 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9444 m->m_len += mac_hlen;
9445 m->m_pkthdr.len += mac_hlen;
9446 m->m_data -= mac_hlen;
9447 }
9448
9449 static inline void
9450 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9451 {
9452 m->m_data += mac_hlen;
9453
9454 ip->ip_sum = in_cksum(m, ip_hlen);
9455
9456 m->m_pkthdr.csum_flags &= ~CSUM_IP;
9457 m->m_data -= mac_hlen;
9458 }
9459
9460 /*
9461 * Structure that contains the state during the TCP segmentation
9462 */
9463 struct gso_ip_tcp_state {
9464 void (*update)
9465 (struct gso_ip_tcp_state*, struct mbuf*);
9466 void (*internal)
9467 (struct gso_ip_tcp_state*, struct mbuf*);
9468 union iphdr hdr;
9469 struct tcphdr *tcp;
9470 int mac_hlen;
9471 int ip_hlen;
9472 int tcp_hlen;
9473 int hlen;
9474 int pay_len;
9475 int sw_csum;
9476 uint32_t tcp_seq;
9477 uint16_t ip_id;
9478 boolean_t is_tx;
9479 };
9480
9481 /*
9482 * Update the pointers to TCP and IPv4 headers
9483 */
9484 static inline void
9485 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9486 {
9487 state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9488 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9489 state->pay_len = m->m_pkthdr.len - state->hlen;
9490 }
9491
9492 /*
9493 * Set properly the TCP and IPv4 headers
9494 */
9495 static inline void
9496 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9497 {
9498 /*
9499 * Update IP header
9500 */
9501 state->hdr.ip->ip_id = htons((state->ip_id)++);
9502 state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9503 /*
9504 * TCP Checksum
9505 */
9506 state->tcp->th_sum = 0;
9507 state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9508 state->hdr.ip->ip_dst.s_addr,
9509 htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9510 /*
9511 * Checksum HW not supported (TCP)
9512 */
9513 if (state->sw_csum & CSUM_DELAY_DATA) {
9514 gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9515 }
9516
9517 state->tcp_seq += state->pay_len;
9518 /*
9519 * IP Checksum
9520 */
9521 state->hdr.ip->ip_sum = 0;
9522 /*
9523 * Checksum HW not supported (IP)
9524 */
9525 if (state->sw_csum & CSUM_IP) {
9526 gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9527 }
9528 }
9529
9530
9531 /*
9532 * Updates the pointers to TCP and IPv6 headers
9533 */
9534 static inline void
9535 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9536 {
9537 state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9538 state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9539 state->pay_len = m->m_pkthdr.len - state->hlen;
9540 }
9541
9542 /*
9543 * Sets properly the TCP and IPv6 headers
9544 */
9545 static inline void
9546 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9547 {
9548 state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9549 state->mac_hlen - state->ip_hlen);
9550 /*
9551 * TCP Checksum
9552 */
9553 state->tcp->th_sum = 0;
9554 state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9555 &state->hdr.ip6->ip6_dst,
9556 htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9557 /*
9558 * Checksum HW not supported (TCP)
9559 */
9560 if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9561 (void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9562 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9563 }
9564 state->tcp_seq += state->pay_len;
9565 }
9566
9567 /*
9568 * Init the state during the TCP segmentation
9569 */
9570 static void
9571 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9572 bool is_ipv4, int mac_hlen, int ip_hlen,
9573 void * ip_hdr, struct tcphdr * tcp_hdr)
9574 {
9575 #pragma unused(ifp)
9576
9577 state->hdr.ptr = ip_hdr;
9578 state->tcp = tcp_hdr;
9579 if (is_ipv4) {
9580 state->ip_id = ntohs(state->hdr.ip->ip_id);
9581 state->update = gso_ipv4_tcp_update;
9582 state->internal = gso_ipv4_tcp_internal;
9583 state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9584 } else {
9585 state->update = gso_ipv6_tcp_update;
9586 state->internal = gso_ipv6_tcp_internal;
9587 state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9588 }
9589 state->mac_hlen = mac_hlen;
9590 state->ip_hlen = ip_hlen;
9591 state->tcp_hlen = state->tcp->th_off << 2;
9592 state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9593 state->tcp_seq = ntohl(state->tcp->th_seq);
9594 //state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9595 return;
9596 }
9597
9598 /*
9599 * GSO on TCP/IP (v4 or v6)
9600 *
9601 * If is_tx is TRUE, segmented packets are transmitted after they are
9602 * segmented.
9603 *
9604 * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9605 */
9606 static int
9607 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9608 boolean_t is_tx)
9609 {
9610 struct mbuf *m, *m_tx;
9611 int error = 0;
9612 int mss = 0;
9613 int nsegs = 0;
9614 struct mbuf *m0 = *mp;
9615 #ifdef GSO_STATS
9616 int total_len = m0->m_pkthdr.len;
9617 #endif /* GSO_STATS */
9618
9619 #if 1
9620 u_int reduce_mss;
9621
9622 reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9623 : if_bridge_tso_reduce_mss_forwarding;
9624 mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9625 assert(mss > 0);
9626 #else
9627 if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9628 mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9629 } else {
9630 mss = m0->m_pkthdr.tso_segsz;
9631 }
9632 #endif
9633
9634 *mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9635 if (m0 == NULL) {
9636 return ENOBUFS; /* XXX ok? */
9637 }
9638 BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9639 "%s %s mss %d nsegs %d",
9640 ifp->if_xname,
9641 is_tx ? "TX" : "RX",
9642 mss, nsegs);
9643 /*
9644 * XXX-ste: can this happen?
9645 */
9646 if (m0->m_nextpkt == NULL) {
9647 #ifdef GSO_DEBUG
9648 D("only 1 segment");
9649 #endif
9650 if (is_tx) {
9651 error = bridge_transmit(ifp, m0);
9652 }
9653 return error;
9654 }
9655 #ifdef GSO_STATS
9656 GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9657 GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9658 GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9659 #endif /* GSO_STATS */
9660
9661 /* first pkt */
9662 m = m0;
9663
9664 state->update(state, m);
9665
9666 do {
9667 state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9668
9669 state->internal(state, m);
9670 m_tx = m;
9671 m = m->m_nextpkt;
9672 if (is_tx) {
9673 m_tx->m_nextpkt = NULL;
9674 if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9675 /*
9676 * XXX: If a segment can not be sent, discard the following
9677 * segments and propagate the error to the upper levels.
9678 * In this way the TCP retransmits all the initial packet.
9679 */
9680 #ifdef GSO_DEBUG
9681 D("if_transmit error\n");
9682 #endif
9683 goto err;
9684 }
9685 }
9686 state->update(state, m);
9687
9688 state->tcp->th_flags &= ~TH_CWR;
9689 state->tcp->th_seq = htonl(state->tcp_seq);
9690 } while (m->m_nextpkt);
9691
9692 /* last pkt */
9693 state->internal(state, m);
9694
9695 if (is_tx) {
9696 error = bridge_transmit(ifp, m);
9697 #ifdef GSO_DEBUG
9698 if (error) {
9699 D("last if_transmit error\n");
9700 D("error - type = %d \n", error);
9701 }
9702 #endif
9703 }
9704 #ifdef GSO_STATS
9705 if (!error) {
9706 GSOSTAT_INC(tcp.gsos_segmented);
9707 GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9708 GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9709 GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9710 }
9711 #endif /* GSO_STATS */
9712 return error;
9713
9714 err:
9715 #ifdef GSO_DEBUG
9716 D("error - type = %d \n", error);
9717 #endif
9718 while (m != NULL) {
9719 m_tx = m->m_nextpkt;
9720 m->m_nextpkt = NULL;
9721 m_freem(m);
9722 m = m_tx;
9723 }
9724 return error;
9725 }
9726
9727 /*
9728 * GSO for TCP/IPv[46]
9729 */
9730 static int
9731 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9732 boolean_t is_tx)
9733 {
9734 int error;
9735 ip_packet_info info;
9736 uint32_t csum_flags;
9737 struct gso_ip_tcp_state state;
9738 struct bripstats stats; /* XXX ignored */
9739 struct tcphdr *tcp;
9740
9741 if (!is_tx && ipforwarding == 0) {
9742 /* no need to segment if the packet will not be forwarded */
9743 return 0;
9744 }
9745 error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9746 if (error != 0) {
9747 if (*mp != NULL) {
9748 m_freem(*mp);
9749 *mp = NULL;
9750 }
9751 return error;
9752 }
9753 if (info.ip_proto_hdr == NULL) {
9754 /* not a TCP packet */
9755 return 0;
9756 }
9757 tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9758 gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9759 info.ip_hlen, info.ip_hdr.ptr, tcp);
9760 if (is_ipv4) {
9761 csum_flags = CSUM_DELAY_DATA; /* XXX */
9762 if (!is_tx) {
9763 /* if RX to our local IP address, don't segment */
9764 struct in_addr dst_ip;
9765
9766 bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9767 if (in_addr_is_ours(dst_ip)) {
9768 return 0;
9769 }
9770 }
9771 } else {
9772 csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9773 if (!is_tx) {
9774 /* if RX to our local IP address, don't segment */
9775 if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9776 ifp->if_index)) {
9777 /* local IP address, no need to segment */
9778 return 0;
9779 }
9780 }
9781 }
9782 (*mp)->m_pkthdr.csum_flags = csum_flags;
9783 (*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9784 return gso_ip_tcp(ifp, mp, &state, is_tx);
9785 }
9786