xref: /xnu-10002.81.5/bsd/net/if_bridge.c (revision 5e3eaea39dcf651e66cb99ba7d70e32cc4a99587)
1 /*
2  * Copyright (c) 2004-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162 
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166 
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169 
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172 
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176 
177 #include <os/log.h>
178 
179 /*
180  * if_bridge_debug, BR_DBGF_*
181  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182  *   to enable additional logs for the corresponding bridge function
183  * - "sysctl net.link.bridge.debug" controls the value of
184  *   'if_bridge_debug'
185  */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE       0x0001
188 #define BR_DBGF_INPUT           0x0002
189 #define BR_DBGF_OUTPUT          0x0004
190 #define BR_DBGF_RT_TABLE        0x0008
191 #define BR_DBGF_DELAYED_CALL    0x0010
192 #define BR_DBGF_IOCTL           0x0020
193 #define BR_DBGF_MBUF            0x0040
194 #define BR_DBGF_MCAST           0x0080
195 #define BR_DBGF_HOSTFILTER      0x0100
196 #define BR_DBGF_CHECKSUM        0x0200
197 #define BR_DBGF_MAC_NAT         0x0400
198 
199 /*
200  * if_bridge_log_level
201  * - 'if_bridge_log_level' ensures that by default important logs are
202  *   logged regardless of if_bridge_debug by comparing the log level
203  *   in BRIDGE_LOG to if_bridge_log_level
204  * - use "sysctl net.link.bridge.log_level" controls the value of
205  *   'if_bridge_log_level'
206  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207  *   logs must use LOG_NOTICE to ensure they appear by default
208  */
209 static int if_bridge_log_level = LOG_NOTICE;
210 
211 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
212 
213 /*
214  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215  * - macros to generate the specified log conditionally based on
216  *   the specified log level and debug flags
217  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218  */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
220 	do {                                                            \
221 	        if (__level <= if_bridge_log_level ||                   \
222 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
223 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 	                       __func__, ## __VA_ARGS__);       \
225 	        }                                                       \
226 	} while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
228 	do {                                                    \
229 	        if (__level <= if_bridge_log_level ||           \
230 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
231 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 	        }                                                               \
233 	} while (0)
234 
235 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
238 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
240 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241 
242 #define BRIDGE_LOCK_DEBUG      1
243 #if BRIDGE_LOCK_DEBUG
244 
245 #define BR_LCKDBG_MAX                   4
246 
247 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
253 
254 #else /* !BRIDGE_LOCK_DEBUG */
255 
256 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
259 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
260 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
261 	        (_err) = EBUSY;                                         \
262 	else {                                                          \
263 	        (_sc)->sc_iflist_ref++;                                 \
264 	        (_err) = 0;                                             \
265 	}                                                               \
266 	_BRIDGE_UNLOCK(_sc);                                            \
267 } while (0)
268 #define BRIDGE_UNREF(_sc)               do {                            \
269 	_BRIDGE_LOCK(_sc);                                              \
270 	(_sc)->sc_iflist_ref--;                                         \
271 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
272 	        _BRIDGE_UNLOCK(_sc);                                    \
273 	        wakeup(&(_sc)->sc_cv);                                  \
274 	} else                                                          \
275 	        _BRIDGE_UNLOCK(_sc);                                    \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc)               do {                            \
278 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
279 	(_sc)->sc_iflist_xcnt++;                                        \
280 	while ((_sc)->sc_iflist_ref > 0)                                \
281 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
282 	            "BRIDGE_XLOCK", NULL);                              \
283 } while (0)
284 #define BRIDGE_XDROP(_sc)               do {                            \
285 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
286 	(_sc)->sc_iflist_xcnt--;                                        \
287 } while (0)
288 
289 #endif /* BRIDGE_LOCK_DEBUG */
290 
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
293 	if (sc->sc_bpf_input != NULL)                                   \
294 	        bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298 
299 /*
300  * Initial size of the route hash table.  Must be a power of two.
301  */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE              16
304 #endif
305 
306 /*
307  * Maximum size of the routing hash table
308  */
309 #define BRIDGE_RTHASH_SIZE_MAX          2048
310 
311 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
312 
313 /*
314  * Maximum number of addresses to cache.
315  */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX               100
318 #endif
319 
320 
321 /*
322  * Timeout (in seconds) for entries learned dynamically.
323  */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
326 #endif
327 
328 /*
329  * Number of seconds between walks of the route list.
330  */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
333 #endif
334 
335 /*
336  * Number of MAC NAT entries
337  * - sized based on 16 clients (including MAC NAT interface)
338  *   each with 4 addresses
339  */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343 
344 /*
345  * List of capabilities to possibly mask on the member interface.
346  */
347 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349  * List of capabilities to disable on the member interface.
350  */
351 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
352 
353 /*
354  * Bridge interface list entry.
355  */
356 struct bridge_iflist {
357 	TAILQ_ENTRY(bridge_iflist) bif_next;
358 	struct ifnet            *bif_ifp;       /* member if */
359 	struct bstp_port        bif_stp;        /* STP state */
360 	uint32_t                bif_ifflags;    /* member if flags */
361 	int                     bif_savedcaps;  /* saved capabilities */
362 	uint32_t                bif_addrmax;    /* max # of addresses */
363 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
364 	uint32_t                bif_addrexceeded; /* # of address violations */
365 
366 	interface_filter_t      bif_iff_ref;
367 	struct bridge_softc     *bif_sc;
368 	uint32_t                bif_flags;
369 
370 	/* host filter */
371 	struct in_addr          bif_hf_ipsrc;
372 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
373 
374 	struct ifbrmstats       bif_stats;
375 };
376 
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 	return (bif->bif_ifflags & flags) == flags;
381 }
382 
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388 
389 /* fake errors to make the code clearer */
390 #define _EBADIP                 EJUSTRETURN
391 #define _EBADIPCHECKSUM         EJUSTRETURN
392 #define _EBADIPV6               EJUSTRETURN
393 #define _EBADUDP                EJUSTRETURN
394 #define _EBADTCP                EJUSTRETURN
395 #define _EBADUDPCHECKSUM        EJUSTRETURN
396 #define _EBADTCPCHECKSUM        EJUSTRETURN
397 
398 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
400 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
402 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
403 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
407 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
408 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
409 #define BIFF_LRO_DISABLED       0x800   /* LRO was disabled */
410 #if SKYWALK
411 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
412 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
413 #endif /* SKYWALK */
414 
415 /*
416  * mac_nat_entry
417  * - translates between an IP address and MAC address on a specific
418  *   bridge interface member
419  */
420 struct mac_nat_entry {
421 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
422 	struct bridge_iflist    *mne_bif;       /* originating interface */
423 	unsigned long           mne_expire;     /* expiration time */
424 	union {
425 		struct in_addr  mneu_ip;        /* originating IPv4 address */
426 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
427 	} mne_u;
428 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
429 	uint8_t                 mne_flags;
430 	uint8_t                 mne_reserved;
431 };
432 #define mne_ip  mne_u.mneu_ip
433 #define mne_ip6 mne_u.mneu_ip6
434 
435 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
436 
437 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
438 
439 /*
440  * mac_nat_record
441  * - used by bridge_mac_nat_output() to convey the translation that needs
442  *   to take place in bridge_mac_nat_translate
443  * - holds enough information so that the translation can be done later without
444  *   holding the bridge lock
445  */
446 struct mac_nat_record {
447 	uint16_t                mnr_ether_type;
448 	union {
449 		uint16_t        mnru_arp_offset;
450 		struct {
451 			uint16_t mnruip_dhcp_flags;
452 			uint16_t mnruip_udp_csum;
453 			uint8_t  mnruip_header_len;
454 		} mnru_ip;
455 		struct {
456 			uint16_t mnruip6_icmp6_len;
457 			uint16_t mnruip6_lladdr_offset;
458 			uint8_t mnruip6_icmp6_type;
459 			uint8_t mnruip6_header_len;
460 		} mnru_ip6;
461 	} mnr_u;
462 };
463 
464 #define mnr_arp_offset  mnr_u.mnru_arp_offset
465 
466 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
467 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
468 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
469 
470 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
471 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
472 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
473 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
474 
475 /*
476  * Bridge route node.
477  */
478 struct bridge_rtnode {
479 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
480 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
481 	struct bridge_iflist    *brt_dst;       /* destination if */
482 	unsigned long           brt_expire;     /* expiration time */
483 	uint8_t                 brt_flags;      /* address flags */
484 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
485 	uint16_t                brt_vlan;       /* vlan id */
486 
487 };
488 #define brt_ifp                 brt_dst->bif_ifp
489 
490 /*
491  * Bridge delayed function call context
492  */
493 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
494 
495 struct bridge_delayed_call {
496 	struct bridge_softc     *bdc_sc;
497 	bridge_delayed_func_t   bdc_func; /* Function to call */
498 	struct timespec         bdc_ts; /* Time to call */
499 	u_int32_t               bdc_flags;
500 	thread_call_t           bdc_thread_call;
501 };
502 
503 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
504 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
505 
506 /*
507  * Software state for each bridge.
508  */
509 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
510 
511 struct bridge_softc {
512 	struct ifnet            *sc_ifp;        /* make this an interface */
513 	u_int32_t               sc_flags;
514 	LIST_ENTRY(bridge_softc) sc_list;
515 	decl_lck_mtx_data(, sc_mtx);
516 	struct _bridge_rtnode_list *sc_rthash;  /* our forwarding table */
517 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
518 	uint32_t                sc_rthash_key;  /* key for hash */
519 	uint32_t                sc_rthash_size; /* size of the hash table */
520 	struct bridge_delayed_call sc_aging_timer;
521 	struct bridge_delayed_call sc_resize_call;
522 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
523 	struct bstp_state       sc_stp;         /* STP state */
524 	bpf_packet_func         sc_bpf_input;
525 	bpf_packet_func         sc_bpf_output;
526 	void                    *sc_cv;
527 	uint32_t                sc_brtmax;      /* max # of addresses */
528 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
529 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
530 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
531 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
532 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
533 	uint32_t                sc_brtexceeded; /* # of cache drops */
534 	uint32_t                sc_filter_flags; /* ipf and flags */
535 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
536 	u_char                  sc_defaddr[6];  /* Default MAC address */
537 	char                    sc_if_xname[IFNAMSIZ];
538 
539 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
540 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
541 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
542 	uint32_t                sc_mne_max;      /* max # of entries */
543 	uint32_t                sc_mne_count;    /* cur. # of entries */
544 	uint32_t                sc_mne_allocation_failures;
545 #if BRIDGE_LOCK_DEBUG
546 	/*
547 	 * Locking and unlocking calling history
548 	 */
549 	void                    *lock_lr[BR_LCKDBG_MAX];
550 	int                     next_lock_lr;
551 	void                    *unlock_lr[BR_LCKDBG_MAX];
552 	int                     next_unlock_lr;
553 #endif /* BRIDGE_LOCK_DEBUG */
554 };
555 
556 #define SCF_DETACHING            0x01
557 #define SCF_RESIZING             0x02
558 #define SCF_MEDIA_ACTIVE         0x04
559 
560 typedef enum {
561 	CHECKSUM_OPERATION_NONE = 0,
562 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
563 	CHECKSUM_OPERATION_FINALIZE = 2,
564 	CHECKSUM_OPERATION_COMPUTE = 3,
565 } ChecksumOperation;
566 
567 union iphdr {
568 	struct ip *ip;
569 	struct ip6_hdr *ip6;
570 	void * ptr;
571 };
572 
573 typedef struct {
574 	u_int           ip_hlen;        /* IP header length */
575 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
576 	u_int           ip_opt_len;     /* IPv6 options headers length */
577 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
578 	bool            ip_is_ipv4;
579 	bool            ip_is_fragmented;
580 	union iphdr     ip_hdr;         /* pointer to IP header */
581 	void *          ip_proto_hdr;   /* ptr to protocol header (TCP) */
582 } ip_packet_info, *ip_packet_info_t;
583 
584 struct bridge_hostfilter_stats bridge_hostfilter_stats;
585 
586 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
587 #if BRIDGE_LOCK_DEBUG
588 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
589 #else
590 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
591 #endif
592 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
593 
594 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
595 
596 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
597 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
598 
599 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
600 static int      bridge_clone_destroy(struct ifnet *);
601 
602 static errno_t  bridge_ioctl(struct ifnet *, u_long, void *);
603 #if HAS_IF_CAP
604 static void     bridge_mutecaps(struct bridge_softc *);
605 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
606     int);
607 #endif
608 static errno_t bridge_set_tso(struct bridge_softc *);
609 static void     bridge_proto_attach_changed(struct ifnet *);
610 static int      bridge_init(struct ifnet *);
611 #if HAS_BRIDGE_DUMMYNET
612 static void     bridge_dummynet(struct mbuf *, struct ifnet *);
613 #endif
614 static void     bridge_ifstop(struct ifnet *, int);
615 static int      bridge_output(struct ifnet *, struct mbuf *);
616 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
617 static void     bridge_start(struct ifnet *);
618 static errno_t  bridge_input(struct ifnet *, mbuf_t *);
619 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
620     mbuf_t *, char **);
621 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
622     mbuf_t *);
623 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
624     mbuf_t *m);
625 
626 static int      bridge_enqueue(ifnet_t, struct ifnet *,
627     struct ifnet *, struct mbuf *, ChecksumOperation);
628 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
629 
630 static void     bridge_forward(struct bridge_softc *, struct bridge_iflist *,
631     struct mbuf *);
632 
633 static void     bridge_aging_timer(struct bridge_softc *sc);
634 
635 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
636     struct mbuf *, int);
637 static void     bridge_span(struct bridge_softc *, struct mbuf *);
638 
639 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
640     uint16_t, struct bridge_iflist *, int, uint8_t);
641 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
642     uint16_t);
643 static void     bridge_rttrim(struct bridge_softc *);
644 static void     bridge_rtage(struct bridge_softc *);
645 static void     bridge_rtflush(struct bridge_softc *, int);
646 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
647     uint16_t);
648 
649 static int      bridge_rtable_init(struct bridge_softc *);
650 static void     bridge_rtable_fini(struct bridge_softc *);
651 
652 static void     bridge_rthash_resize(struct bridge_softc *);
653 
654 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
655 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
656     const uint8_t *, uint16_t);
657 static int      bridge_rtnode_hash(struct bridge_softc *,
658     struct bridge_rtnode *);
659 static int      bridge_rtnode_insert(struct bridge_softc *,
660     struct bridge_rtnode *);
661 static void     bridge_rtnode_destroy(struct bridge_softc *,
662     struct bridge_rtnode *);
663 #if BRIDGESTP
664 static void     bridge_rtable_expire(struct ifnet *, int);
665 static void     bridge_state_change(struct ifnet *, int);
666 #endif /* BRIDGESTP */
667 
668 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
669     const char *name);
670 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
671     struct ifnet *ifp);
672 static void     bridge_delete_member(struct bridge_softc *,
673     struct bridge_iflist *);
674 static void     bridge_delete_span(struct bridge_softc *,
675     struct bridge_iflist *);
676 
677 static int      bridge_ioctl_add(struct bridge_softc *, void *);
678 static int      bridge_ioctl_del(struct bridge_softc *, void *);
679 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
680 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
681 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
682 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
683 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *);
684 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *);
685 static int      bridge_ioctl_rts32(struct bridge_softc *, void *);
686 static int      bridge_ioctl_rts64(struct bridge_softc *, void *);
687 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *);
688 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *);
689 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
690 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
691 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *);
692 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *);
693 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
694 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
695 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
696 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
697 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
698 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
699 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
700 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
701 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
702 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
703 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
704 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
705 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
706 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
707 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *);
708 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *);
709 static int      bridge_ioctl_grte(struct bridge_softc *, void *);
710 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
711 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
712 static int      bridge_ioctl_sproto(struct bridge_softc *, void *);
713 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *);
714 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *);
715 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
716 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
717 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
718 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
719 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
720 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
721 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *);
722 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *);
723 
724 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
725 static int bridge_ip_checkbasic(struct mbuf **);
726 static int bridge_ip6_checkbasic(struct mbuf **);
727 
728 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
729 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
730 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
731 
732 static void bridge_detach(ifnet_t);
733 static void bridge_link_event(struct ifnet *, u_int32_t);
734 static void bridge_iflinkevent(struct ifnet *);
735 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
736 static int interface_media_active(struct ifnet *);
737 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
739 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
740 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
741 
742 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
743     struct bridge_iflist *);
744 static void bridge_mac_nat_disable(struct bridge_softc *sc);
745 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
746 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
747 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
748     struct bridge_iflist *);
749 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
750     boolean_t *);
751 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
752     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
753 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
754     const caddr_t);
755 static bool is_broadcast_ip_packet(mbuf_t *);
756 static bool in_addr_is_ours(const struct in_addr);
757 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
758 
759 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
760 
761 static int
762 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
763     boolean_t is_tx);
764 
765 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
766 #define VLANTAGOF(_m)   0
767 
768 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
769 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
770 
771 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
772 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
773 
774 #if BRIDGESTP
775 static struct bstp_cb_ops bridge_ops = {
776 	.bcb_state = bridge_state_change,
777 	.bcb_rtage = bridge_rtable_expire
778 };
779 #endif /* BRIDGESTP */
780 
781 SYSCTL_DECL(_net_link);
782 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
783     "Bridge");
784 
785 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
786 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
787     CTLFLAG_RW | CTLFLAG_LOCKED,
788     &bridge_inherit_mac, 0,
789     "Inherit MAC address from the first bridge member");
790 
791 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
792     CTLFLAG_RW | CTLFLAG_LOCKED,
793     &bridge_rtable_prune_period, 0,
794     "Interval between pruning of routing table");
795 
796 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
797 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
798     CTLFLAG_RW | CTLFLAG_LOCKED,
799     &bridge_rtable_hash_size_max, 0,
800     "Maximum size of the routing hash table");
801 
802 #if BRIDGE_DELAYED_CALLBACK_DEBUG
803 static int bridge_delayed_callback_delay = 0;
804 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
805     CTLFLAG_RW | CTLFLAG_LOCKED,
806     &bridge_delayed_callback_delay, 0,
807     "Delay before calling delayed function");
808 #endif
809 
810 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
811     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
812     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
813 
814 #if BRIDGESTP
815 static int log_stp   = 0;   /* log STP state changes */
816 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
817     &log_stp, 0, "Log STP state changes");
818 #endif /* BRIDGESTP */
819 
820 struct bridge_control {
821 	int             (*bc_func)(struct bridge_softc *, void *);
822 	unsigned int    bc_argsize;
823 	unsigned int    bc_flags;
824 };
825 
826 #define VMNET_TAG               "com.apple.vmnet"
827 #define VMNET_LOCAL_TAG         VMNET_TAG ".local"
828 #define VMNET_BROADCAST_TAG     VMNET_TAG ".broadcast"
829 #define VMNET_MULTICAST_TAG     VMNET_TAG ".multicast"
830 
831 static u_int16_t vmnet_tag;
832 static u_int16_t vmnet_local_tag;
833 static u_int16_t vmnet_broadcast_tag;
834 static u_int16_t vmnet_multicast_tag;
835 
836 static u_int16_t
allocate_pf_tag(char * name)837 allocate_pf_tag(char * name)
838 {
839 	u_int16_t       tag;
840 
841 	tag = pf_tagname2tag_ext(name);
842 	BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
843 	return tag;
844 }
845 
846 static void
allocate_vmnet_pf_tags(void)847 allocate_vmnet_pf_tags(void)
848 {
849 	/* allocate tags to use with PF */
850 	if (vmnet_tag == 0) {
851 		vmnet_tag = allocate_pf_tag(VMNET_TAG);
852 	}
853 	if (vmnet_local_tag == 0) {
854 		vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
855 	}
856 	if (vmnet_broadcast_tag == 0) {
857 		vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
858 	}
859 	if (vmnet_multicast_tag == 0) {
860 		vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
861 	}
862 }
863 
864 #define BC_F_COPYIN             0x01    /* copy arguments in */
865 #define BC_F_COPYOUT            0x02    /* copy arguments out */
866 #define BC_F_SUSER              0x04    /* do super-user check */
867 
868 static const struct bridge_control bridge_control_table32[] = {
869 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
870 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
871 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
872 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
873 
874 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
875 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
876 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
877 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
878 
879 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
880 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
881 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
882 	  .bc_flags = BC_F_COPYOUT },
883 
884 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
885 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
886 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
887 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
888 
889 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
890 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
891 
892 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
893 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
894 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
895 	  .bc_flags = BC_F_COPYOUT },
896 
897 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
898 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
899 
900 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
901 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
902 
903 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
904 	  .bc_flags = BC_F_COPYOUT },
905 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
906 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 
908 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
909 	  .bc_flags = BC_F_COPYOUT },
910 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
911 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
912 
913 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
914 	  .bc_flags = BC_F_COPYOUT },
915 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
916 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917 
918 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
919 	  .bc_flags = BC_F_COPYOUT },
920 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
921 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
922 
923 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
924 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
925 
926 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
927 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
928 
929 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
930 	  .bc_flags = BC_F_COPYOUT },
931 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
932 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
933 
934 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
935 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
936 
937 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
938 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
939 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
940 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
941 
942 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
943 	  .bc_flags = BC_F_COPYOUT },
944 
945 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
946 	  .bc_flags = BC_F_COPYOUT },
947 
948 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
949 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
950 
951 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
952 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953 
954 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
955 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
956 
957 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
958 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
959 
960 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
961 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
962 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
963 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964 
965 	{ .bc_func = bridge_ioctl_gmnelist32,
966 	  .bc_argsize = sizeof(struct ifbrmnelist32),
967 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
968 	{ .bc_func = bridge_ioctl_gifstats32,
969 	  .bc_argsize = sizeof(struct ifbrmreq32),
970 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
971 };
972 
973 static const struct bridge_control bridge_control_table64[] = {
974 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
975 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
976 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
977 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
978 
979 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
980 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
981 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
982 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
983 
984 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
985 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
986 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
987 	  .bc_flags = BC_F_COPYOUT },
988 
989 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
990 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
991 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
992 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
993 
994 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
995 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
996 
997 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
998 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
999 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
1000 	  .bc_flags = BC_F_COPYOUT },
1001 
1002 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1003 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1004 
1005 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1006 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1007 
1008 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1009 	  .bc_flags = BC_F_COPYOUT },
1010 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1011 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 
1013 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1014 	  .bc_flags = BC_F_COPYOUT },
1015 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1016 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1017 
1018 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1019 	  .bc_flags = BC_F_COPYOUT },
1020 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1021 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022 
1023 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1024 	  .bc_flags = BC_F_COPYOUT },
1025 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1026 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1027 
1028 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1029 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1030 
1031 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1032 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1033 
1034 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1035 	  .bc_flags = BC_F_COPYOUT },
1036 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1037 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1038 
1039 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1040 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1041 
1042 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1043 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1044 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1045 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1046 
1047 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1048 	  .bc_flags = BC_F_COPYOUT },
1049 
1050 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1051 	  .bc_flags = BC_F_COPYOUT },
1052 
1053 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1054 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1055 
1056 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1057 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1058 
1059 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1060 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1061 
1062 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1063 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1064 
1065 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1066 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1067 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1068 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1069 
1070 	{ .bc_func = bridge_ioctl_gmnelist64,
1071 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1072 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1073 	{ .bc_func = bridge_ioctl_gifstats64,
1074 	  .bc_argsize = sizeof(struct ifbrmreq64),
1075 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1076 };
1077 
1078 static const unsigned int bridge_control_table_size =
1079     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1080 
1081 static LIST_HEAD(, bridge_softc) bridge_list =
1082     LIST_HEAD_INITIALIZER(bridge_list);
1083 
1084 #define BRIDGENAME      "bridge"
1085 #define BRIDGES_MAX     IF_MAXUNIT
1086 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1087 
1088 static struct if_clone bridge_cloner =
1089     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1090     0, BRIDGES_MAX);
1091 
1092 static int if_bridge_txstart = 0;
1093 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1094     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1095 
1096 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1097     &if_bridge_debug, 0, "Bridge debug flags");
1098 
1099 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1100     CTLFLAG_RW | CTLFLAG_LOCKED,
1101     &if_bridge_log_level, 0, "Bridge log level");
1102 
1103 static int if_bridge_segmentation = 1;
1104 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1105     CTLFLAG_RW | CTLFLAG_LOCKED,
1106     &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1107 
1108 static int if_bridge_vmnet_pf_tagging = 1;
1109 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1110     CTLFLAG_RW | CTLFLAG_LOCKED,
1111     &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1112 
1113 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1114 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1115 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1116 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1117 
1118 static u_int if_bridge_tso_reduce_mss_forwarding
1119         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1120 static u_int if_bridge_tso_reduce_mss_tx
1121         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1122 
1123 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1124 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1125 {
1126 	int     changed;
1127 	int     error;
1128 	u_int   new_value;
1129 
1130 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1131 	    &changed);
1132 	if (error == 0 && changed != 0) {
1133 		if (new_value > val_max) {
1134 			return EINVAL;
1135 		}
1136 		*val = new_value;
1137 	}
1138 	return error;
1139 }
1140 
1141 static int
1142 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1143 {
1144 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1145     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1146 }
1147 
1148 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1149     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1150     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1151     "Bridge tso reduce mss when forwarding");
1152 
1153 static int
1154 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1155 {
1156 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1157     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1158 }
1159 
1160 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1161     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1162     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1163     "Bridge tso reduce mss on transmit");
1164 
1165 
1166 #if DEBUG || DEVELOPMENT
1167 #define BRIDGE_FORCE_ONE        0x00000001
1168 #define BRIDGE_FORCE_TWO        0x00000002
1169 static u_int32_t if_bridge_force_errors = 0;
1170 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1171     CTLFLAG_RW | CTLFLAG_LOCKED,
1172     &if_bridge_force_errors, 0, "Bridge interface force errors");
1173 static inline bool
bridge_error_is_forced(u_int32_t flags)1174 bridge_error_is_forced(u_int32_t flags)
1175 {
1176 	return (if_bridge_force_errors & flags) != 0;
1177 }
1178 
1179 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags)                   \
1180 	do {                                                            \
1181 	        __is_forced = bridge_error_is_forced(__flags);          \
1182 	        if (__is_forced) {                                      \
1183 	                BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1184 	        }                                                       \
1185 	} while (0)
1186 #endif /* DEBUG || DEVELOPMENT */
1187 
1188 
1189 static void brlog_ether_header(struct ether_header *);
1190 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1191 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1192 static void brlog_mbuf(mbuf_t, const char *, const char *);
1193 static void brlog_link(struct bridge_softc * sc);
1194 
1195 #if BRIDGE_LOCK_DEBUG
1196 static void bridge_lock(struct bridge_softc *);
1197 static void bridge_unlock(struct bridge_softc *);
1198 static int bridge_lock2ref(struct bridge_softc *);
1199 static void bridge_unref(struct bridge_softc *);
1200 static void bridge_xlock(struct bridge_softc *);
1201 static void bridge_xdrop(struct bridge_softc *);
1202 
1203 static void
bridge_lock(struct bridge_softc * sc)1204 bridge_lock(struct bridge_softc *sc)
1205 {
1206 	void *lr_saved = __builtin_return_address(0);
1207 
1208 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1209 
1210 	_BRIDGE_LOCK(sc);
1211 
1212 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1213 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1214 }
1215 
1216 static void
bridge_unlock(struct bridge_softc * sc)1217 bridge_unlock(struct bridge_softc *sc)
1218 {
1219 	void *lr_saved = __builtin_return_address(0);
1220 
1221 	BRIDGE_LOCK_ASSERT_HELD(sc);
1222 
1223 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1224 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1225 
1226 	_BRIDGE_UNLOCK(sc);
1227 }
1228 
1229 static int
bridge_lock2ref(struct bridge_softc * sc)1230 bridge_lock2ref(struct bridge_softc *sc)
1231 {
1232 	int error = 0;
1233 	void *lr_saved = __builtin_return_address(0);
1234 
1235 	BRIDGE_LOCK_ASSERT_HELD(sc);
1236 
1237 	if (sc->sc_iflist_xcnt > 0) {
1238 		error = EBUSY;
1239 	} else {
1240 		sc->sc_iflist_ref++;
1241 	}
1242 
1243 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1244 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1245 
1246 	_BRIDGE_UNLOCK(sc);
1247 
1248 	return error;
1249 }
1250 
1251 static void
bridge_unref(struct bridge_softc * sc)1252 bridge_unref(struct bridge_softc *sc)
1253 {
1254 	void *lr_saved = __builtin_return_address(0);
1255 
1256 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1257 
1258 	_BRIDGE_LOCK(sc);
1259 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1260 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1261 
1262 	sc->sc_iflist_ref--;
1263 
1264 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1265 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1266 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1267 		_BRIDGE_UNLOCK(sc);
1268 		wakeup(&sc->sc_cv);
1269 	} else {
1270 		_BRIDGE_UNLOCK(sc);
1271 	}
1272 }
1273 
1274 static void
bridge_xlock(struct bridge_softc * sc)1275 bridge_xlock(struct bridge_softc *sc)
1276 {
1277 	void *lr_saved = __builtin_return_address(0);
1278 
1279 	BRIDGE_LOCK_ASSERT_HELD(sc);
1280 
1281 	sc->sc_iflist_xcnt++;
1282 	while (sc->sc_iflist_ref > 0) {
1283 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1284 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1285 
1286 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1287 
1288 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1289 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1290 	}
1291 }
1292 
1293 static void
bridge_xdrop(struct bridge_softc * sc)1294 bridge_xdrop(struct bridge_softc *sc)
1295 {
1296 	BRIDGE_LOCK_ASSERT_HELD(sc);
1297 
1298 	sc->sc_iflist_xcnt--;
1299 }
1300 
1301 #endif /* BRIDGE_LOCK_DEBUG */
1302 
1303 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1304 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1305 {
1306 	if (m) {
1307 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1308 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1309 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1310 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1311 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1312 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1313 		    suffix ? suffix : "");
1314 	} else {
1315 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1316 	}
1317 }
1318 
1319 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1320 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1321 {
1322 	if (m) {
1323 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1324 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1325 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1326 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1327 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1328 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1329 		    (unsigned int)mbuf_maxlen(m),
1330 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1331 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1332 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1333 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1334 			brlog_mbuf_pkthdr(m, "", suffix);
1335 		}
1336 	} else {
1337 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1338 	}
1339 }
1340 
1341 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1342 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1343 {
1344 	mbuf_t                  n;
1345 	size_t                  i, j;
1346 	size_t                  pktlen, mlen, maxlen;
1347 	unsigned char   *ptr;
1348 
1349 	pktlen = mbuf_pkthdr_len(m);
1350 
1351 	if (offset > pktlen) {
1352 		return;
1353 	}
1354 
1355 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1356 	n = m;
1357 	mlen = mbuf_len(n);
1358 	ptr = mbuf_data(n);
1359 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1360 		if (j >= mlen) {
1361 			n = mbuf_next(n);
1362 			if (n == 0) {
1363 				break;
1364 			}
1365 			ptr = mbuf_data(n);
1366 			mlen = mbuf_len(n);
1367 			j = 0;
1368 		}
1369 		if (i >= offset) {
1370 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1371 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1372 		}
1373 	}
1374 }
1375 
1376 static void
brlog_ether_header(struct ether_header * eh)1377 brlog_ether_header(struct ether_header *eh)
1378 {
1379 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1380 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1381 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1382 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1383 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1384 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1385 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1386 	    ntohs(eh->ether_type));
1387 }
1388 
1389 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1390 ether_ntop(char *buf, size_t len, const u_char *ap)
1391 {
1392 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1393 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1394 
1395 	return buf;
1396 }
1397 
1398 static void
brlog_link(struct bridge_softc * sc)1399 brlog_link(struct bridge_softc * sc)
1400 {
1401 	int i;
1402 	uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1403 	IFNAMSIZ + ETHER_ADDR_LEN];
1404 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1405 	const u_char * lladdr;
1406 	char lladdr_str[48];
1407 
1408 	memset(sdl, 0, sizeof(sdl_buffer));
1409 	sdl->sdl_family = AF_LINK;
1410 	sdl->sdl_nlen = strlen(sc->sc_if_xname);
1411 	sdl->sdl_alen = ETHER_ADDR_LEN;
1412 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1413 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1414 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1415 	lladdr_str[0] = '\0';
1416 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1417 	    i < sdl->sdl_alen;
1418 	    i++, lladdr++) {
1419 		char    byte_str[4];
1420 
1421 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1422 		    *lladdr);
1423 		strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1424 	}
1425 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1426 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1427 	    " slen %d addr %s", sc->sc_if_xname,
1428 	    sdl->sdl_len, sdl->sdl_index,
1429 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1430 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1431 }
1432 
1433 
1434 /*
1435  * bridgeattach:
1436  *
1437  *	Pseudo-device attach routine.
1438  */
1439 __private_extern__ int
bridgeattach(int n)1440 bridgeattach(int n)
1441 {
1442 #pragma unused(n)
1443 	int error;
1444 
1445 	LIST_INIT(&bridge_list);
1446 
1447 #if BRIDGESTP
1448 	bstp_sys_init();
1449 #endif /* BRIDGESTP */
1450 
1451 	error = if_clone_attach(&bridge_cloner);
1452 	if (error != 0) {
1453 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1454 	}
1455 	return error;
1456 }
1457 
1458 
1459 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1460 bridge_ifnet_set_attrs(struct ifnet * ifp)
1461 {
1462 	errno_t         error;
1463 
1464 	error = ifnet_set_mtu(ifp, ETHERMTU);
1465 	if (error != 0) {
1466 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1467 		goto done;
1468 	}
1469 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1470 	if (error != 0) {
1471 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1472 		goto done;
1473 	}
1474 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1475 	if (error != 0) {
1476 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1477 		goto done;
1478 	}
1479 	error = ifnet_set_flags(ifp,
1480 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1481 	    0xffff);
1482 
1483 	if (error != 0) {
1484 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1485 		goto done;
1486 	}
1487 done:
1488 	return error;
1489 }
1490 
1491 /*
1492  * bridge_clone_create:
1493  *
1494  *	Create a new bridge instance.
1495  */
1496 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1497 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1498 {
1499 #pragma unused(params)
1500 	struct ifnet *ifp = NULL;
1501 	struct bridge_softc *sc = NULL;
1502 	struct bridge_softc *sc2 = NULL;
1503 	struct ifnet_init_eparams init_params;
1504 	errno_t error = 0;
1505 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1506 	int fb, retry, has_hostid;
1507 
1508 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1509 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1510 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1511 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1512 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1513 	sc->sc_filter_flags = 0;
1514 
1515 	TAILQ_INIT(&sc->sc_iflist);
1516 
1517 	/* use the interface name as the unique id for ifp recycle */
1518 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1519 	    ifc->ifc_name, unit);
1520 	bzero(&init_params, sizeof(init_params));
1521 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1522 	init_params.len                 = sizeof(init_params);
1523 	/* Initialize our routing table. */
1524 	error = bridge_rtable_init(sc);
1525 	if (error != 0) {
1526 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1527 		goto done;
1528 	}
1529 	TAILQ_INIT(&sc->sc_spanlist);
1530 	if (if_bridge_txstart) {
1531 		init_params.start = bridge_start;
1532 	} else {
1533 		init_params.flags = IFNET_INIT_LEGACY;
1534 		init_params.output = bridge_output;
1535 	}
1536 	init_params.set_bpf_tap = bridge_set_bpf_tap;
1537 	init_params.uniqueid            = sc->sc_if_xname;
1538 	init_params.uniqueid_len        = strlen(sc->sc_if_xname);
1539 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1540 	init_params.name                = ifc->ifc_name;
1541 	init_params.unit                = unit;
1542 	init_params.family              = IFNET_FAMILY_ETHERNET;
1543 	init_params.type                = IFT_BRIDGE;
1544 	init_params.demux               = ether_demux;
1545 	init_params.add_proto           = ether_add_proto;
1546 	init_params.del_proto           = ether_del_proto;
1547 	init_params.check_multi         = ether_check_multi;
1548 	init_params.framer_extended     = ether_frameout_extended;
1549 	init_params.softc               = sc;
1550 	init_params.ioctl               = bridge_ioctl;
1551 	init_params.detach              = bridge_detach;
1552 	init_params.broadcast_addr      = etherbroadcastaddr;
1553 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1554 
1555 	error = ifnet_allocate_extended(&init_params, &ifp);
1556 	if (error != 0) {
1557 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1558 		goto done;
1559 	}
1560 	LIST_INIT(&sc->sc_mne_list);
1561 	LIST_INIT(&sc->sc_mne_list_v6);
1562 	sc->sc_ifp = ifp;
1563 	error = bridge_ifnet_set_attrs(ifp);
1564 	if (error != 0) {
1565 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1566 		    error);
1567 		goto done;
1568 	}
1569 	/*
1570 	 * Generate an ethernet address with a locally administered address.
1571 	 *
1572 	 * Since we are using random ethernet addresses for the bridge, it is
1573 	 * possible that we might have address collisions, so make sure that
1574 	 * this hardware address isn't already in use on another bridge.
1575 	 * The first try uses the "hostid" and falls back to read_frandom();
1576 	 * for "hostid", we use the MAC address of the first-encountered
1577 	 * Ethernet-type interface that is currently configured.
1578 	 */
1579 	fb = 0;
1580 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1581 	for (retry = 1; retry != 0;) {
1582 		if (fb || has_hostid == 0) {
1583 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1584 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1585 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1586 		} else {
1587 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1588 			    ETHER_ADDR_LEN);
1589 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1590 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1591 			sc->sc_defaddr[3] =     /* stir it up a bit */
1592 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1593 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1594 			/*
1595 			 * Mix in the LSB as it's actually pretty significant,
1596 			 * see rdar://14076061
1597 			 */
1598 			sc->sc_defaddr[4] =
1599 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1600 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1601 			    sc->sc_defaddr[5];
1602 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1603 		}
1604 
1605 		fb = 1;
1606 		retry = 0;
1607 		lck_mtx_lock(&bridge_list_mtx);
1608 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1609 			if (_ether_cmp(sc->sc_defaddr,
1610 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1611 				retry = 1;
1612 			}
1613 		}
1614 		lck_mtx_unlock(&bridge_list_mtx);
1615 	}
1616 
1617 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1618 
1619 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1620 		brlog_link(sc);
1621 	}
1622 	error = ifnet_attach(ifp, NULL);
1623 	if (error != 0) {
1624 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1625 		goto done;
1626 	}
1627 
1628 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1629 	    IFT_ETHER);
1630 	if (error != 0) {
1631 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1632 		    error);
1633 		goto done;
1634 	}
1635 
1636 	ifnet_set_offload(ifp,
1637 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1638 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1639 	error = bridge_set_tso(sc);
1640 	if (error != 0) {
1641 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1642 		goto done;
1643 	}
1644 #if BRIDGESTP
1645 	bstp_attach(&sc->sc_stp, &bridge_ops);
1646 #endif /* BRIDGESTP */
1647 
1648 	lck_mtx_lock(&bridge_list_mtx);
1649 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1650 	lck_mtx_unlock(&bridge_list_mtx);
1651 
1652 	/* attach as ethernet */
1653 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1654 	    NULL, NULL);
1655 
1656 done:
1657 	if (error != 0) {
1658 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1659 		/* TBD: Clean up: sc, sc_rthash etc */
1660 	}
1661 
1662 	return error;
1663 }
1664 
1665 /*
1666  * bridge_clone_destroy:
1667  *
1668  *	Destroy a bridge instance.
1669  */
1670 static int
bridge_clone_destroy(struct ifnet * ifp)1671 bridge_clone_destroy(struct ifnet *ifp)
1672 {
1673 	struct bridge_softc *sc = ifp->if_softc;
1674 	struct bridge_iflist *bif;
1675 	errno_t error;
1676 
1677 	BRIDGE_LOCK(sc);
1678 	if ((sc->sc_flags & SCF_DETACHING)) {
1679 		BRIDGE_UNLOCK(sc);
1680 		return 0;
1681 	}
1682 	sc->sc_flags |= SCF_DETACHING;
1683 
1684 	bridge_ifstop(ifp, 1);
1685 
1686 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1687 
1688 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1689 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1690 
1691 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1692 	if (error != 0) {
1693 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1694 	}
1695 
1696 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1697 		bridge_delete_member(sc, bif);
1698 	}
1699 
1700 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1701 		bridge_delete_span(sc, bif);
1702 	}
1703 	BRIDGE_UNLOCK(sc);
1704 
1705 	error = ifnet_detach(ifp);
1706 	if (error != 0) {
1707 		panic("%s (%d): ifnet_detach(%p) failed %d",
1708 		    __func__, __LINE__, ifp, error);
1709 	}
1710 	return 0;
1711 }
1712 
1713 #define DRVSPEC do { \
1714 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1715 	        error = EINVAL;                                         \
1716 	        break;                                                  \
1717 	}                                                               \
1718 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1719                                                                         \
1720 	if (cmd == SIOCGDRVSPEC &&                                      \
1721 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1722 	        error = EINVAL;                                         \
1723 	        break;                                                  \
1724 	} else if (cmd == SIOCSDRVSPEC &&                               \
1725 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1726 	        error = EINVAL;                                         \
1727 	        break;                                                  \
1728 	}                                                               \
1729                                                                         \
1730 	if (bc->bc_flags & BC_F_SUSER) {                                \
1731 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1732 	            KAUTH_GENERIC_ISSUSER);                             \
1733 	        if (error)                                              \
1734 	                break;                                          \
1735 	}                                                               \
1736                                                                         \
1737 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1738 	    ifd->ifd_len > sizeof (args)) {                             \
1739 	        error = EINVAL;                                         \
1740 	        break;                                                  \
1741 	}                                                               \
1742                                                                         \
1743 	bzero(&args, sizeof (args));                                    \
1744 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1745 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1746 	        if (error)                                              \
1747 	                break;                                          \
1748 	}                                                               \
1749                                                                         \
1750 	BRIDGE_LOCK(sc);                                                \
1751 	error = (*bc->bc_func)(sc, &args);                              \
1752 	BRIDGE_UNLOCK(sc);                                              \
1753 	if (error)                                                      \
1754 	        break;                                                  \
1755                                                                         \
1756 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1757 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1758 } while (0)
1759 
1760 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1761 interface_needs_input_broadcast(struct ifnet * ifp)
1762 {
1763 	/*
1764 	 * Selectively enable input broadcast only when necessary.
1765 	 * The bridge interface itself attaches a fake protocol
1766 	 * so checking for at least two protocols means that the
1767 	 * interface is being used for something besides bridging
1768 	 * and needs to see broadcast packets from other members.
1769 	 */
1770 	return if_get_protolist(ifp, NULL, 0) >= 2;
1771 }
1772 
1773 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1774 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1775 {
1776 	boolean_t       old_input_broadcast;
1777 
1778 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1779 	if (input_broadcast) {
1780 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1781 	} else {
1782 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1783 	}
1784 	return old_input_broadcast != input_broadcast;
1785 }
1786 
1787 /*
1788  * bridge_ioctl:
1789  *
1790  *	Handle a control request from the operator.
1791  */
1792 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1793 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1794 {
1795 	struct bridge_softc *sc = ifp->if_softc;
1796 	struct ifreq *ifr = (struct ifreq *)data;
1797 	struct bridge_iflist *bif;
1798 	int error = 0;
1799 
1800 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1801 
1802 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1803 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1804 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1805 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1806 	    (char)IOCGROUP(cmd), cmd & 0xff);
1807 
1808 	switch (cmd) {
1809 	case SIOCSIFADDR:
1810 	case SIOCAIFADDR:
1811 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1812 		break;
1813 
1814 	case SIOCGIFMEDIA32:
1815 	case SIOCGIFMEDIA64: {
1816 		struct ifmediareq *ifmr = (struct ifmediareq *)data;
1817 		user_addr_t user_addr;
1818 
1819 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1820 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1821 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1822 
1823 		ifmr->ifm_status = IFM_AVALID;
1824 		ifmr->ifm_mask = 0;
1825 		ifmr->ifm_count = 1;
1826 
1827 		BRIDGE_LOCK(sc);
1828 		if (!(sc->sc_flags & SCF_DETACHING) &&
1829 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1830 			ifmr->ifm_status |= IFM_ACTIVE;
1831 			ifmr->ifm_active = ifmr->ifm_current =
1832 			    IFM_ETHER | IFM_AUTO;
1833 		} else {
1834 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1835 		}
1836 		BRIDGE_UNLOCK(sc);
1837 
1838 		if (user_addr != USER_ADDR_NULL) {
1839 			error = copyout(&ifmr->ifm_current, user_addr,
1840 			    sizeof(int));
1841 		}
1842 		break;
1843 	}
1844 
1845 	case SIOCADDMULTI:
1846 	case SIOCDELMULTI:
1847 		break;
1848 
1849 	case SIOCSDRVSPEC32:
1850 	case SIOCGDRVSPEC32: {
1851 		union {
1852 			struct ifbreq ifbreq;
1853 			struct ifbifconf32 ifbifconf;
1854 			struct ifbareq32 ifbareq;
1855 			struct ifbaconf32 ifbaconf;
1856 			struct ifbrparam ifbrparam;
1857 			struct ifbropreq32 ifbropreq;
1858 		} args;
1859 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1860 		const struct bridge_control *bridge_control_table =
1861 		    bridge_control_table32, *bc;
1862 
1863 		DRVSPEC;
1864 
1865 		break;
1866 	}
1867 	case SIOCSDRVSPEC64:
1868 	case SIOCGDRVSPEC64: {
1869 		union {
1870 			struct ifbreq ifbreq;
1871 			struct ifbifconf64 ifbifconf;
1872 			struct ifbareq64 ifbareq;
1873 			struct ifbaconf64 ifbaconf;
1874 			struct ifbrparam ifbrparam;
1875 			struct ifbropreq64 ifbropreq;
1876 		} args;
1877 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1878 		const struct bridge_control *bridge_control_table =
1879 		    bridge_control_table64, *bc;
1880 
1881 		DRVSPEC;
1882 
1883 		break;
1884 	}
1885 
1886 	case SIOCSIFFLAGS:
1887 		if (!(ifp->if_flags & IFF_UP) &&
1888 		    (ifp->if_flags & IFF_RUNNING)) {
1889 			/*
1890 			 * If interface is marked down and it is running,
1891 			 * then stop and disable it.
1892 			 */
1893 			BRIDGE_LOCK(sc);
1894 			bridge_ifstop(ifp, 1);
1895 			BRIDGE_UNLOCK(sc);
1896 		} else if ((ifp->if_flags & IFF_UP) &&
1897 		    !(ifp->if_flags & IFF_RUNNING)) {
1898 			/*
1899 			 * If interface is marked up and it is stopped, then
1900 			 * start it.
1901 			 */
1902 			BRIDGE_LOCK(sc);
1903 			error = bridge_init(ifp);
1904 			BRIDGE_UNLOCK(sc);
1905 		}
1906 		break;
1907 
1908 	case SIOCSIFLLADDR:
1909 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1910 		    ifr->ifr_addr.sa_len);
1911 		if (error != 0) {
1912 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1913 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1914 			    error);
1915 		}
1916 		break;
1917 
1918 	case SIOCSIFMTU:
1919 		if (ifr->ifr_mtu < 576) {
1920 			error = EINVAL;
1921 			break;
1922 		}
1923 		BRIDGE_LOCK(sc);
1924 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1925 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1926 			BRIDGE_UNLOCK(sc);
1927 			break;
1928 		}
1929 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1930 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1931 				BRIDGE_LOG(LOG_NOTICE, 0,
1932 				    "%s invalid MTU: %u(%s) != %d",
1933 				    sc->sc_ifp->if_xname,
1934 				    bif->bif_ifp->if_mtu,
1935 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1936 				error = EINVAL;
1937 				break;
1938 			}
1939 		}
1940 		if (!error) {
1941 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1942 		}
1943 		BRIDGE_UNLOCK(sc);
1944 		break;
1945 
1946 	default:
1947 		error = ether_ioctl(ifp, cmd, data);
1948 		if (error != 0 && error != EOPNOTSUPP) {
1949 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1950 			    "ifp %s cmd 0x%08lx "
1951 			    "(%c%c [%lu] %c %lu) failed error: %d",
1952 			    ifp->if_xname, cmd,
1953 			    (cmd & IOC_IN) ? 'I' : ' ',
1954 			    (cmd & IOC_OUT) ? 'O' : ' ',
1955 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1956 			    cmd & 0xff, error);
1957 		}
1958 		break;
1959 	}
1960 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1961 
1962 	return error;
1963 }
1964 
1965 #if HAS_IF_CAP
1966 /*
1967  * bridge_mutecaps:
1968  *
1969  *	Clear or restore unwanted capabilities on the member interface
1970  */
1971 static void
bridge_mutecaps(struct bridge_softc * sc)1972 bridge_mutecaps(struct bridge_softc *sc)
1973 {
1974 	struct bridge_iflist *bif;
1975 	int enabled, mask;
1976 
1977 	/* Initial bitmask of capabilities to test */
1978 	mask = BRIDGE_IFCAPS_MASK;
1979 
1980 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1981 		/* Every member must support it or its disabled */
1982 		mask &= bif->bif_savedcaps;
1983 	}
1984 
1985 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1986 		enabled = bif->bif_ifp->if_capenable;
1987 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1988 		/* strip off mask bits and enable them again if allowed */
1989 		enabled &= ~BRIDGE_IFCAPS_MASK;
1990 		enabled |= mask;
1991 
1992 		bridge_set_ifcap(sc, bif, enabled);
1993 	}
1994 }
1995 
1996 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1997 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1998 {
1999 	struct ifnet *ifp = bif->bif_ifp;
2000 	struct ifreq ifr;
2001 	int error;
2002 
2003 	bzero(&ifr, sizeof(ifr));
2004 	ifr.ifr_reqcap = set;
2005 
2006 	if (ifp->if_capenable != set) {
2007 		IFF_LOCKGIANT(ifp);
2008 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2009 		IFF_UNLOCKGIANT(ifp);
2010 		if (error) {
2011 			BRIDGE_LOG(LOG_NOTICE, 0,
2012 			    "%s error setting interface capabilities on %s",
2013 			    sc->sc_ifp->if_xname, ifp->if_xname);
2014 		}
2015 	}
2016 }
2017 #endif /* HAS_IF_CAP */
2018 
2019 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2020 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2021 {
2022 	struct ifreq    ifr;
2023 
2024 	bzero(&ifr, sizeof(ifr));
2025 	ifr.ifr_reqcap = cap_enable;
2026 	return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2027 }
2028 
2029 static const char *
enable_disable_str(boolean_t enable)2030 enable_disable_str(boolean_t enable)
2031 {
2032 	return enable ? "enable" : "disable";
2033 }
2034 
2035 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2036 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2037 {
2038 	uint32_t        cap_enable;
2039 	uint32_t        cap_supported;
2040 	boolean_t       changed = FALSE;
2041 	boolean_t       lro_enabled;
2042 
2043 	cap_supported = ifnet_capabilities_supported(ifp);
2044 	if ((cap_supported & IFCAP_LRO) == 0) {
2045 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2046 		    "%s doesn't support LRO",
2047 		    ifp->if_xname);
2048 		goto done;
2049 	}
2050 	cap_enable = ifnet_capabilities_enabled(ifp);
2051 	lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2052 	if (lro_enabled != enable) {
2053 		errno_t         error;
2054 
2055 		if (enable) {
2056 			cap_enable |= IFCAP_LRO;
2057 		} else {
2058 			cap_enable &= ~IFCAP_LRO;
2059 		}
2060 		error = siocsifcap(ifp, cap_enable);
2061 		if (error != 0) {
2062 			BRIDGE_LOG(LOG_NOTICE, 0,
2063 			    "%s %s failed (cap 0x%x) %d",
2064 			    ifp->if_xname,
2065 			    enable_disable_str(enable),
2066 			    cap_enable,
2067 			    error);
2068 		} else {
2069 			changed = TRUE;
2070 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2071 			    "%s %s success (cap 0x%x)",
2072 			    ifp->if_xname,
2073 			    enable_disable_str(enable),
2074 			    cap_enable);
2075 		}
2076 	}
2077 done:
2078 	return changed;
2079 }
2080 
2081 static errno_t
bridge_set_tso(struct bridge_softc * sc)2082 bridge_set_tso(struct bridge_softc *sc)
2083 {
2084 	struct bridge_iflist *bif;
2085 	u_int32_t tso_v4_mtu;
2086 	u_int32_t tso_v6_mtu;
2087 	ifnet_offload_t offload;
2088 	errno_t error = 0;
2089 
2090 	/* By default, support TSO */
2091 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2092 	tso_v4_mtu = IP_MAXPACKET;
2093 	tso_v6_mtu = IP_MAXPACKET;
2094 
2095 	/* Use the lowest common denominator of the members */
2096 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2097 		ifnet_t ifp = bif->bif_ifp;
2098 
2099 		if (ifp == NULL) {
2100 			continue;
2101 		}
2102 
2103 		if (offload & IFNET_TSO_IPV4) {
2104 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2105 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2106 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2107 				}
2108 			} else {
2109 				offload &= ~IFNET_TSO_IPV4;
2110 				tso_v4_mtu = 0;
2111 			}
2112 		}
2113 		if (offload & IFNET_TSO_IPV6) {
2114 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2115 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2116 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2117 				}
2118 			} else {
2119 				offload &= ~IFNET_TSO_IPV6;
2120 				tso_v6_mtu = 0;
2121 			}
2122 		}
2123 	}
2124 
2125 	if (offload != sc->sc_ifp->if_hwassist) {
2126 		error = ifnet_set_offload(sc->sc_ifp, offload);
2127 		if (error != 0) {
2128 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2129 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2130 			    sc->sc_ifp->if_xname, offload, error);
2131 			goto done;
2132 		}
2133 		/*
2134 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2135 		 * as large as the interface MTU
2136 		 */
2137 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2138 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2139 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2140 			}
2141 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2142 			    tso_v4_mtu);
2143 			if (error != 0) {
2144 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2145 				    "ifnet_set_tso_mtu(%s, "
2146 				    "AF_INET, %u) failed %d",
2147 				    sc->sc_ifp->if_xname,
2148 				    tso_v4_mtu, error);
2149 				goto done;
2150 			}
2151 		}
2152 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2153 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2154 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2155 			}
2156 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2157 			    tso_v6_mtu);
2158 			if (error != 0) {
2159 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2160 				    "ifnet_set_tso_mtu(%s, "
2161 				    "AF_INET6, %u) failed %d",
2162 				    sc->sc_ifp->if_xname,
2163 				    tso_v6_mtu, error);
2164 				goto done;
2165 			}
2166 		}
2167 	}
2168 done:
2169 	return error;
2170 }
2171 
2172 /*
2173  * bridge_lookup_member:
2174  *
2175  *	Lookup a bridge member interface.
2176  */
2177 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2178 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2179 {
2180 	struct bridge_iflist *bif;
2181 	struct ifnet *ifp;
2182 
2183 	BRIDGE_LOCK_ASSERT_HELD(sc);
2184 
2185 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2186 		ifp = bif->bif_ifp;
2187 		if (strcmp(ifp->if_xname, name) == 0) {
2188 			return bif;
2189 		}
2190 	}
2191 
2192 	return NULL;
2193 }
2194 
2195 /*
2196  * bridge_lookup_member_if:
2197  *
2198  *	Lookup a bridge member interface by ifnet*.
2199  */
2200 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2201 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2202 {
2203 	struct bridge_iflist *bif;
2204 
2205 	BRIDGE_LOCK_ASSERT_HELD(sc);
2206 
2207 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2208 		if (bif->bif_ifp == member_ifp) {
2209 			return bif;
2210 		}
2211 	}
2212 
2213 	return NULL;
2214 }
2215 
2216 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2217 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2218     mbuf_t *data, char **frame_ptr)
2219 {
2220 #pragma unused(protocol)
2221 	errno_t error = 0;
2222 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2223 	struct bridge_softc *sc = bif->bif_sc;
2224 	int included = 0;
2225 	size_t frmlen = 0;
2226 	mbuf_t m = *data;
2227 
2228 	if ((m->m_flags & M_PROTO1)) {
2229 		goto out;
2230 	}
2231 
2232 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2233 	    *frame_ptr <= (char *)mbuf_data(m)) {
2234 		included = 1;
2235 		frmlen = (char *)mbuf_data(m) - *frame_ptr;
2236 	}
2237 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2238 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2239 	    "frmlen %lu", sc->sc_ifp->if_xname,
2240 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2241 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2242 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2243 	    included ? "inside" : "outside", frmlen);
2244 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2245 		brlog_mbuf(m, "bridge_iff_input[", "");
2246 		brlog_ether_header((struct ether_header *)
2247 		    (void *)*frame_ptr);
2248 		brlog_mbuf_data(m, 0, 20);
2249 	}
2250 	if (included == 0) {
2251 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2252 		goto out;
2253 	}
2254 
2255 	/* Move data pointer to start of frame to the link layer header */
2256 	(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2257 	    mbuf_len(m) + frmlen);
2258 	(void) mbuf_pkthdr_adjustlen(m, frmlen);
2259 
2260 	/* make sure we can access the ethernet header */
2261 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2262 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2263 		    "short frame %lu < %lu",
2264 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2265 		goto out;
2266 	}
2267 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2268 		error = mbuf_pullup(data, sizeof(struct ether_header));
2269 		if (error != 0) {
2270 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2271 			    "mbuf_pullup(%lu) failed %d",
2272 			    sizeof(struct ether_header),
2273 			    error);
2274 			error = EJUSTRETURN;
2275 			goto out;
2276 		}
2277 		if (m != *data) {
2278 			m = *data;
2279 			*frame_ptr = mbuf_data(m);
2280 		}
2281 	}
2282 
2283 	error = bridge_input(ifp, data);
2284 
2285 	/* Adjust packet back to original */
2286 	if (error == 0) {
2287 		/* bridge_input might have modified *data */
2288 		if (*data != m) {
2289 			m = *data;
2290 			*frame_ptr = mbuf_data(m);
2291 		}
2292 		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2293 		    mbuf_len(m) - frmlen);
2294 		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
2295 	}
2296 
2297 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2298 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2299 		brlog_mbuf(m, "bridge_iff_input]", "");
2300 	}
2301 
2302 out:
2303 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2304 
2305 	return error;
2306 }
2307 
2308 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2309 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2310     mbuf_t *data)
2311 {
2312 #pragma unused(protocol)
2313 	errno_t error = 0;
2314 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2315 	struct bridge_softc *sc = bif->bif_sc;
2316 	mbuf_t m = *data;
2317 
2318 	if ((m->m_flags & M_PROTO1)) {
2319 		goto out;
2320 	}
2321 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2322 	    "%s from %s m 0x%llx data 0x%llx",
2323 	    sc->sc_ifp->if_xname, ifp->if_xname,
2324 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2325 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2326 
2327 	error = bridge_member_output(sc, ifp, data);
2328 	if (error != 0 && error != EJUSTRETURN) {
2329 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2330 		    "bridge_member_output failed error %d",
2331 		    error);
2332 	}
2333 out:
2334 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2335 
2336 	return error;
2337 }
2338 
2339 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2340 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2341     const struct kev_msg *event_msg)
2342 {
2343 #pragma unused(protocol)
2344 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2345 	struct bridge_softc *sc = bif->bif_sc;
2346 
2347 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2348 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2349 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2350 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2351 		    "%s event_code %u - %s",
2352 		    ifp->if_xname, event_msg->event_code,
2353 		    dlil_kev_dl_code_str(event_msg->event_code));
2354 
2355 		switch (event_msg->event_code) {
2356 		case KEV_DL_LINK_OFF:
2357 		case KEV_DL_LINK_ON: {
2358 			bridge_iflinkevent(ifp);
2359 #if BRIDGESTP
2360 			bstp_linkstate(ifp, event_msg->event_code);
2361 #endif /* BRIDGESTP */
2362 			break;
2363 		}
2364 		case KEV_DL_SIFFLAGS: {
2365 			if ((ifp->if_flags & IFF_UP) == 0) {
2366 				break;
2367 			}
2368 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2369 				errno_t error;
2370 
2371 				error = ifnet_set_promiscuous(ifp, 1);
2372 				if (error != 0) {
2373 					BRIDGE_LOG(LOG_NOTICE, 0,
2374 					    "ifnet_set_promiscuous (%s)"
2375 					    " failed %d", ifp->if_xname,
2376 					    error);
2377 				} else {
2378 					bif->bif_flags |= BIFF_PROMISC;
2379 				}
2380 			}
2381 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2382 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2383 				errno_t error;
2384 
2385 				error = if_allmulti(ifp, 1);
2386 				if (error != 0) {
2387 					BRIDGE_LOG(LOG_NOTICE, 0,
2388 					    "if_allmulti (%s)"
2389 					    " failed %d", ifp->if_xname,
2390 					    error);
2391 				} else {
2392 					bif->bif_flags |= BIFF_ALL_MULTI;
2393 #ifdef XNU_PLATFORM_AppleTVOS
2394 					ip6_forwarding = 1;
2395 #endif /* XNU_PLATFORM_AppleTVOS */
2396 				}
2397 			}
2398 			break;
2399 		}
2400 		case KEV_DL_IFCAP_CHANGED: {
2401 			BRIDGE_LOCK(sc);
2402 			bridge_set_tso(sc);
2403 			BRIDGE_UNLOCK(sc);
2404 			break;
2405 		}
2406 		case KEV_DL_PROTO_DETACHED:
2407 		case KEV_DL_PROTO_ATTACHED: {
2408 			bridge_proto_attach_changed(ifp);
2409 			break;
2410 		}
2411 		default:
2412 			break;
2413 		}
2414 	}
2415 }
2416 
2417 /*
2418  * bridge_iff_detached:
2419  *
2420  *      Called when our interface filter has been detached from a
2421  *      member interface.
2422  */
2423 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2424 bridge_iff_detached(void *cookie, ifnet_t ifp)
2425 {
2426 #pragma unused(cookie)
2427 	struct bridge_iflist *bif;
2428 	struct bridge_softc *sc = ifp->if_bridge;
2429 
2430 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2431 
2432 	/* Check if the interface is a bridge member */
2433 	if (sc != NULL) {
2434 		BRIDGE_LOCK(sc);
2435 		bif = bridge_lookup_member_if(sc, ifp);
2436 		if (bif != NULL) {
2437 			bridge_delete_member(sc, bif);
2438 		}
2439 		BRIDGE_UNLOCK(sc);
2440 		return;
2441 	}
2442 	/* Check if the interface is a span port */
2443 	lck_mtx_lock(&bridge_list_mtx);
2444 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2445 		BRIDGE_LOCK(sc);
2446 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2447 		if (ifp == bif->bif_ifp) {
2448 			bridge_delete_span(sc, bif);
2449 			break;
2450 		}
2451 		BRIDGE_UNLOCK(sc);
2452 	}
2453 	lck_mtx_unlock(&bridge_list_mtx);
2454 }
2455 
2456 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2457 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2458     char *header)
2459 {
2460 #pragma unused(protocol, packet, header)
2461 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2462 	    ifp->if_xname);
2463 	return 0;
2464 }
2465 
2466 static int
bridge_attach_protocol(struct ifnet * ifp)2467 bridge_attach_protocol(struct ifnet *ifp)
2468 {
2469 	int     error;
2470 	struct ifnet_attach_proto_param reg;
2471 
2472 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2473 	bzero(&reg, sizeof(reg));
2474 	reg.input = bridge_proto_input;
2475 
2476 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2477 	if (error) {
2478 		BRIDGE_LOG(LOG_NOTICE, 0,
2479 		    "ifnet_attach_protocol(%s) failed, %d",
2480 		    ifp->if_xname, error);
2481 	}
2482 
2483 	return error;
2484 }
2485 
2486 static int
bridge_detach_protocol(struct ifnet * ifp)2487 bridge_detach_protocol(struct ifnet *ifp)
2488 {
2489 	int     error;
2490 
2491 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2492 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2493 	if (error) {
2494 		BRIDGE_LOG(LOG_NOTICE, 0,
2495 		    "ifnet_detach_protocol(%s) failed, %d",
2496 		    ifp->if_xname, error);
2497 	}
2498 
2499 	return error;
2500 }
2501 
2502 /*
2503  * bridge_delete_member:
2504  *
2505  *	Delete the specified member interface.
2506  */
2507 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2508 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2509 {
2510 #if SKYWALK
2511 	boolean_t add_netagent = FALSE;
2512 #endif /* SKYWALK */
2513 	uint32_t    bif_flags;
2514 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2515 	int lladdr_changed = 0, error;
2516 	uint8_t eaddr[ETHER_ADDR_LEN];
2517 	u_int32_t event_code = 0;
2518 
2519 	BRIDGE_LOCK_ASSERT_HELD(sc);
2520 	VERIFY(ifs != NULL);
2521 
2522 	/*
2523 	 * Remove the member from the list first so it cannot be found anymore
2524 	 * when we release the bridge lock below
2525 	 */
2526 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2527 		BRIDGE_XLOCK(sc);
2528 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2529 		BRIDGE_XDROP(sc);
2530 	}
2531 	if (sc->sc_mac_nat_bif != NULL) {
2532 		if (bif == sc->sc_mac_nat_bif) {
2533 			bridge_mac_nat_disable(sc);
2534 		} else {
2535 			bridge_mac_nat_flush_entries(sc, bif);
2536 		}
2537 	}
2538 #if BRIDGESTP
2539 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2540 		bstp_disable(&bif->bif_stp);
2541 	}
2542 #endif /* BRIDGESTP */
2543 
2544 	/*
2545 	 * If removing the interface that gave the bridge its mac address, set
2546 	 * the mac address of the bridge to the address of the next member, or
2547 	 * to its default address if no members are left.
2548 	 */
2549 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2550 		ifnet_release(sc->sc_ifaddr);
2551 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2552 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2553 			sc->sc_ifaddr = NULL;
2554 		} else {
2555 			struct ifnet *fif =
2556 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2557 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2558 			sc->sc_ifaddr = fif;
2559 			ifnet_reference(fif);   /* for sc_ifaddr */
2560 		}
2561 		lladdr_changed = 1;
2562 	}
2563 
2564 #if HAS_IF_CAP
2565 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2566 #endif /* HAS_IF_CAP */
2567 
2568 	error = bridge_set_tso(sc);
2569 	if (error != 0) {
2570 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2571 	}
2572 
2573 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2574 
2575 	KASSERT(bif->bif_addrcnt == 0,
2576 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2577 
2578 	/*
2579 	 * Update link status of the bridge based on its remaining members
2580 	 */
2581 	event_code = bridge_updatelinkstatus(sc);
2582 	bif_flags = bif->bif_flags;
2583 	BRIDGE_UNLOCK(sc);
2584 
2585 	/* only perform these steps if the interface is still attached */
2586 	if (ifnet_is_attached(ifs, 1)) {
2587 #if SKYWALK
2588 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2589 
2590 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2591 			ifnet_detach_flowswitch_nexus(ifs);
2592 		}
2593 #endif /* SKYWALK */
2594 		/* disable promiscuous mode */
2595 		if ((bif_flags & BIFF_PROMISC) != 0) {
2596 			(void) ifnet_set_promiscuous(ifs, 0);
2597 		}
2598 		/* disable all multi */
2599 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2600 			(void)if_allmulti(ifs, 0);
2601 		}
2602 #if HAS_IF_CAP
2603 		/* re-enable any interface capabilities */
2604 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2605 #endif
2606 		/* detach bridge "protocol" */
2607 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2608 			(void)bridge_detach_protocol(ifs);
2609 		}
2610 		/* detach interface filter */
2611 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2612 			iflt_detach(bif->bif_iff_ref);
2613 		}
2614 		/* re-enable LRO */
2615 		if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2616 			(void)bridge_set_lro(ifs, TRUE);
2617 		}
2618 		ifnet_decr_iorefcnt(ifs);
2619 	}
2620 
2621 	if (lladdr_changed &&
2622 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2623 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2624 	}
2625 
2626 	if (event_code != 0) {
2627 		bridge_link_event(bifp, event_code);
2628 	}
2629 
2630 #if BRIDGESTP
2631 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2632 #endif /* BRIDGESTP */
2633 
2634 	kfree_type(struct bridge_iflist, bif);
2635 	ifs->if_bridge = NULL;
2636 #if SKYWALK
2637 	if (add_netagent && ifnet_is_attached(ifs, 1)) {
2638 		(void)ifnet_add_netagent(ifs);
2639 		ifnet_decr_iorefcnt(ifs);
2640 	}
2641 #endif /* SKYWALK */
2642 
2643 	ifnet_release(ifs);
2644 
2645 	BRIDGE_LOCK(sc);
2646 }
2647 
2648 /*
2649  * bridge_delete_span:
2650  *
2651  *	Delete the specified span interface.
2652  */
2653 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2654 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2655 {
2656 	BRIDGE_LOCK_ASSERT_HELD(sc);
2657 
2658 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2659 	    ("%s: not a span interface", __func__));
2660 
2661 	ifnet_release(bif->bif_ifp);
2662 
2663 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2664 	kfree_type(struct bridge_iflist, bif);
2665 }
2666 
2667 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2668 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2669 {
2670 	struct ifbreq *req = arg;
2671 	struct bridge_iflist *bif = NULL;
2672 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2673 	int error = 0, lladdr_changed = 0;
2674 	uint8_t eaddr[ETHER_ADDR_LEN];
2675 	struct iff_filter iff;
2676 	u_int32_t event_code = 0;
2677 	boolean_t input_broadcast;
2678 	int media_active;
2679 	boolean_t wifi_infra = FALSE;
2680 
2681 	ifs = ifunit(req->ifbr_ifsname);
2682 	if (ifs == NULL) {
2683 		return ENOENT;
2684 	}
2685 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2686 		return EINVAL;
2687 	}
2688 
2689 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2690 		return EINVAL;
2691 	}
2692 
2693 	/* If it's in the span list, it can't be a member. */
2694 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2695 		if (ifs == bif->bif_ifp) {
2696 			return EBUSY;
2697 		}
2698 	}
2699 
2700 	if (ifs->if_bridge == sc) {
2701 		return EEXIST;
2702 	}
2703 
2704 	if (ifs->if_bridge != NULL) {
2705 		return EBUSY;
2706 	}
2707 
2708 	switch (ifs->if_type) {
2709 	case IFT_ETHER:
2710 		if (strcmp(ifs->if_name, "en") == 0 &&
2711 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2712 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2713 			/* XXX is there a better way to identify Wi-Fi STA? */
2714 			wifi_infra = TRUE;
2715 		}
2716 		break;
2717 	case IFT_L2VLAN:
2718 	case IFT_IEEE8023ADLAG:
2719 		break;
2720 	case IFT_GIF:
2721 	/* currently not supported */
2722 	/* FALLTHRU */
2723 	default:
2724 		return EINVAL;
2725 	}
2726 
2727 	/* fail to add the interface if the MTU doesn't match */
2728 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2729 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2730 		    sc->sc_ifp->if_xname,
2731 		    ifs->if_xname);
2732 		return EINVAL;
2733 	}
2734 
2735 	/* there's already an interface that's doing MAC NAT */
2736 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2737 		return EBUSY;
2738 	}
2739 
2740 	/* prevent the interface from detaching while we add the member */
2741 	if (!ifnet_is_attached(ifs, 1)) {
2742 		return ENXIO;
2743 	}
2744 
2745 	/* allocate a new member */
2746 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2747 	bif->bif_ifp = ifs;
2748 	ifnet_reference(ifs);
2749 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2750 #if HAS_IF_CAP
2751 	bif->bif_savedcaps = ifs->if_capenable;
2752 #endif /* HAS_IF_CAP */
2753 	bif->bif_sc = sc;
2754 	if (wifi_infra) {
2755 		(void)bridge_mac_nat_enable(sc, bif);
2756 	}
2757 
2758 	if (IFNET_IS_VMNET(ifs)) {
2759 		allocate_vmnet_pf_tags();
2760 	}
2761 	/* Allow the first Ethernet member to define the MTU */
2762 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2763 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2764 	}
2765 
2766 	/*
2767 	 * Assign the interface's MAC address to the bridge if it's the first
2768 	 * member and the MAC address of the bridge has not been changed from
2769 	 * the default (randomly) generated one.
2770 	 */
2771 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2772 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2773 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2774 		sc->sc_ifaddr = ifs;
2775 		ifnet_reference(ifs);   /* for sc_ifaddr */
2776 		lladdr_changed = 1;
2777 	}
2778 
2779 	ifs->if_bridge = sc;
2780 #if BRIDGESTP
2781 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2782 #endif /* BRIDGESTP */
2783 
2784 #if HAS_IF_CAP
2785 	/* Set interface capabilities to the intersection set of all members */
2786 	bridge_mutecaps(sc);
2787 #endif /* HAS_IF_CAP */
2788 
2789 
2790 	/*
2791 	 * Respect lock ordering with DLIL lock for the following operations
2792 	 */
2793 	BRIDGE_UNLOCK(sc);
2794 
2795 	/* enable promiscuous mode */
2796 	error = ifnet_set_promiscuous(ifs, 1);
2797 	switch (error) {
2798 	case 0:
2799 		bif->bif_flags |= BIFF_PROMISC;
2800 		break;
2801 	case ENETDOWN:
2802 	case EPWROFF:
2803 		BRIDGE_LOG(LOG_NOTICE, 0,
2804 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2805 		    ifs->if_xname, error);
2806 		/* Ignore error when device is not up */
2807 		error = 0;
2808 		break;
2809 	default:
2810 		BRIDGE_LOG(LOG_NOTICE, 0,
2811 		    "ifnet_set_promiscuous(%s) failed %d",
2812 		    ifs->if_xname, error);
2813 		BRIDGE_LOCK(sc);
2814 		goto out;
2815 	}
2816 	if (wifi_infra) {
2817 		int this_error;
2818 
2819 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2820 		bif->bif_flags |= BIFF_WIFI_INFRA;
2821 		this_error = if_allmulti(ifs, 1);
2822 		if (this_error == 0) {
2823 			bif->bif_flags |= BIFF_ALL_MULTI;
2824 #ifdef XNU_PLATFORM_AppleTVOS
2825 			ip6_forwarding = 1;
2826 #endif /* XNU_PLATFORM_AppleTVOS */
2827 		} else {
2828 			BRIDGE_LOG(LOG_NOTICE, 0,
2829 			    "if_allmulti(%s) failed %d, ignoring",
2830 			    ifs->if_xname, this_error);
2831 		}
2832 	}
2833 #if SKYWALK
2834 	/* ensure that the flowswitch is present for native interface */
2835 	if (SKYWALK_NATIVE(ifs)) {
2836 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2837 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2838 		}
2839 	}
2840 	/* remove the netagent on the flowswitch (rdar://75050182) */
2841 	if (if_is_fsw_netagent_enabled()) {
2842 		(void)ifnet_remove_netagent(ifs);
2843 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2844 	}
2845 #endif /* SKYWALK */
2846 
2847 	/*
2848 	 * install an interface filter
2849 	 */
2850 	memset(&iff, 0, sizeof(struct iff_filter));
2851 	iff.iff_cookie = bif;
2852 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2853 	iff.iff_input = bridge_iff_input;
2854 	iff.iff_output = bridge_iff_output;
2855 	iff.iff_event = bridge_iff_event;
2856 	iff.iff_detached = bridge_iff_detached;
2857 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2858 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2859 	if (error != 0) {
2860 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2861 		BRIDGE_LOCK(sc);
2862 		goto out;
2863 	}
2864 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2865 
2866 	/*
2867 	 * install a dummy "bridge" protocol
2868 	 */
2869 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2870 		if (error != 0) {
2871 			BRIDGE_LOG(LOG_NOTICE, 0,
2872 			    "bridge_attach_protocol failed %d", error);
2873 			BRIDGE_LOCK(sc);
2874 			goto out;
2875 		}
2876 	}
2877 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2878 
2879 	if (lladdr_changed &&
2880 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2881 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2882 	}
2883 
2884 	media_active = interface_media_active(ifs);
2885 
2886 	/* disable LRO */
2887 	if (bridge_set_lro(ifs, FALSE)) {
2888 		bif->bif_flags |= BIFF_LRO_DISABLED;
2889 	}
2890 
2891 	/*
2892 	 * No failures past this point. Add the member to the list.
2893 	 */
2894 	BRIDGE_LOCK(sc);
2895 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2896 	BRIDGE_XLOCK(sc);
2897 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2898 	BRIDGE_XDROP(sc);
2899 
2900 	/* cache the member link status */
2901 	if (media_active != 0) {
2902 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2903 	} else {
2904 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2905 	}
2906 
2907 	/* the new member may change the link status of the bridge interface */
2908 	event_code = bridge_updatelinkstatus(sc);
2909 
2910 	/* check whether we need input broadcast or not */
2911 	input_broadcast = interface_needs_input_broadcast(ifs);
2912 	bif_set_input_broadcast(bif, input_broadcast);
2913 	BRIDGE_UNLOCK(sc);
2914 
2915 	if (event_code != 0) {
2916 		bridge_link_event(bifp, event_code);
2917 	}
2918 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2919 	    "%s input broadcast %s", ifs->if_xname,
2920 	    input_broadcast ? "ENABLED" : "DISABLED");
2921 
2922 	BRIDGE_LOCK(sc);
2923 	bridge_set_tso(sc);
2924 
2925 out:
2926 	/* allow the interface to detach */
2927 	ifnet_decr_iorefcnt(ifs);
2928 
2929 	if (error != 0) {
2930 		if (bif != NULL) {
2931 			bridge_delete_member(sc, bif);
2932 		}
2933 	} else if (IFNET_IS_VMNET(ifs)) {
2934 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2935 	}
2936 
2937 	return error;
2938 }
2939 
2940 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2941 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2942 {
2943 	struct ifbreq *req = arg;
2944 	struct bridge_iflist *bif;
2945 
2946 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2947 	if (bif == NULL) {
2948 		return ENOENT;
2949 	}
2950 
2951 	bridge_delete_member(sc, bif);
2952 
2953 	return 0;
2954 }
2955 
2956 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2957 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2958 {
2959 #pragma unused(sc, arg)
2960 	return 0;
2961 }
2962 
2963 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2964 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2965 {
2966 	struct ifbreq *req = arg;
2967 	struct bridge_iflist *bif;
2968 
2969 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2970 	if (bif == NULL) {
2971 		return ENOENT;
2972 	}
2973 
2974 	struct bstp_port *bp;
2975 
2976 	bp = &bif->bif_stp;
2977 	req->ifbr_state = bp->bp_state;
2978 	req->ifbr_priority = bp->bp_priority;
2979 	req->ifbr_path_cost = bp->bp_path_cost;
2980 	req->ifbr_proto = bp->bp_protover;
2981 	req->ifbr_role = bp->bp_role;
2982 	req->ifbr_stpflags = bp->bp_flags;
2983 	req->ifbr_ifsflags = bif->bif_ifflags;
2984 
2985 	/* Copy STP state options as flags */
2986 	if (bp->bp_operedge) {
2987 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2988 	}
2989 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2990 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2991 	}
2992 	if (bp->bp_ptp_link) {
2993 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2994 	}
2995 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2996 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2997 	}
2998 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2999 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3000 	}
3001 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3002 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3003 	}
3004 
3005 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3006 	req->ifbr_addrcnt = bif->bif_addrcnt;
3007 	req->ifbr_addrmax = bif->bif_addrmax;
3008 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
3009 
3010 	return 0;
3011 }
3012 
3013 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)3014 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
3015 {
3016 	struct ifbreq *req = arg;
3017 	struct bridge_iflist *bif;
3018 #if BRIDGESTP
3019 	struct bstp_port *bp;
3020 	int error;
3021 #endif /* BRIDGESTP */
3022 
3023 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3024 	if (bif == NULL) {
3025 		return ENOENT;
3026 	}
3027 
3028 	if (req->ifbr_ifsflags & IFBIF_SPAN) {
3029 		/* SPAN is readonly */
3030 		return EINVAL;
3031 	}
3032 #define _EXCLUSIVE_FLAGS        (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
3033 	if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
3034 		/* can't specify both MAC-NAT and checksum offload */
3035 		return EINVAL;
3036 	}
3037 	if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
3038 		errno_t error;
3039 
3040 		error = bridge_mac_nat_enable(sc, bif);
3041 		if (error != 0) {
3042 			return error;
3043 		}
3044 	} else if (sc->sc_mac_nat_bif == bif) {
3045 		bridge_mac_nat_disable(sc);
3046 	}
3047 
3048 
3049 #if BRIDGESTP
3050 	if (req->ifbr_ifsflags & IFBIF_STP) {
3051 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3052 			error = bstp_enable(&bif->bif_stp);
3053 			if (error) {
3054 				return error;
3055 			}
3056 		}
3057 	} else {
3058 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3059 			bstp_disable(&bif->bif_stp);
3060 		}
3061 	}
3062 
3063 	/* Pass on STP flags */
3064 	bp = &bif->bif_stp;
3065 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3066 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3067 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3068 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3069 #else /* !BRIDGESTP */
3070 	if (req->ifbr_ifsflags & IFBIF_STP) {
3071 		return EOPNOTSUPP;
3072 	}
3073 #endif /* !BRIDGESTP */
3074 
3075 	/* Save the bits relating to the bridge */
3076 	bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
3077 
3078 
3079 	return 0;
3080 }
3081 
3082 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)3083 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
3084 {
3085 	struct ifbrparam *param = arg;
3086 
3087 	sc->sc_brtmax = param->ifbrp_csize;
3088 	bridge_rttrim(sc);
3089 	return 0;
3090 }
3091 
3092 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)3093 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
3094 {
3095 	struct ifbrparam *param = arg;
3096 
3097 	param->ifbrp_csize = sc->sc_brtmax;
3098 
3099 	return 0;
3100 }
3101 
3102 #define BRIDGE_IOCTL_GIFS do { \
3103 	struct bridge_iflist *bif;                                      \
3104 	struct ifbreq breq;                                             \
3105 	char *buf, *outbuf;                                             \
3106 	unsigned int count, buflen, len;                                \
3107                                                                         \
3108 	count = 0;                                                      \
3109 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3110 	        count++;                                                \
3111 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3112 	        count++;                                                \
3113                                                                         \
3114 	buflen = sizeof (breq) * count;                                 \
3115 	if (bifc->ifbic_len == 0) {                                     \
3116 	        bifc->ifbic_len = buflen;                               \
3117 	        return (0);                                             \
3118 	}                                                               \
3119 	BRIDGE_UNLOCK(sc);                                              \
3120 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3121 	BRIDGE_LOCK(sc);                                                \
3122                                                                         \
3123 	count = 0;                                                      \
3124 	buf = outbuf;                                                   \
3125 	len = min(bifc->ifbic_len, buflen);                             \
3126 	bzero(&breq, sizeof (breq));                                    \
3127 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3128 	        if (len < sizeof (breq))                                \
3129 	                break;                                          \
3130                                                                         \
3131 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3132 	            "%s", bif->bif_ifp->if_xname);                      \
3133 	/* Fill in the ifbreq structure */                      \
3134 	        error = bridge_ioctl_gifflags(sc, &breq);               \
3135 	        if (error)                                              \
3136 	                break;                                          \
3137 	        memcpy(buf, &breq, sizeof (breq));                      \
3138 	        count++;                                                \
3139 	        buf += sizeof (breq);                                   \
3140 	        len -= sizeof (breq);                                   \
3141 	}                                                               \
3142 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3143 	        if (len < sizeof (breq))                                \
3144 	                break;                                          \
3145                                                                         \
3146 	        snprintf(breq.ifbr_ifsname,                             \
3147 	                 sizeof (breq.ifbr_ifsname),                    \
3148 	                 "%s", bif->bif_ifp->if_xname);                 \
3149 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3150 	        breq.ifbr_portno                                        \
3151 	                = bif->bif_ifp->if_index & 0xfff;               \
3152 	        memcpy(buf, &breq, sizeof (breq));                      \
3153 	        count++;                                                \
3154 	        buf += sizeof (breq);                                   \
3155 	        len -= sizeof (breq);                                   \
3156 	}                                                               \
3157                                                                         \
3158 	BRIDGE_UNLOCK(sc);                                              \
3159 	bifc->ifbic_len = sizeof (breq) * count;                        \
3160 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
3161 	BRIDGE_LOCK(sc);                                                \
3162 	kfree_data(outbuf, buflen);                                     \
3163 } while (0)
3164 
3165 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3166 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3167 {
3168 	struct ifbifconf64 *bifc = arg;
3169 	int error = 0;
3170 
3171 	BRIDGE_IOCTL_GIFS;
3172 
3173 	return error;
3174 }
3175 
3176 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3177 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3178 {
3179 	struct ifbifconf32 *bifc = arg;
3180 	int error = 0;
3181 
3182 	BRIDGE_IOCTL_GIFS;
3183 
3184 	return error;
3185 }
3186 
3187 #define BRIDGE_IOCTL_RTS do {                                               \
3188 	struct bridge_rtnode *brt;                                          \
3189 	char *buf;                                                          \
3190 	char *outbuf = NULL;                                                \
3191 	unsigned int count, buflen, len;                                    \
3192 	unsigned long now;                                                  \
3193                                                                             \
3194 	if (bac->ifbac_len == 0)                                            \
3195 	        return (0);                                                 \
3196                                                                             \
3197 	bzero(&bareq, sizeof (bareq));                                      \
3198 	count = 0;                                                          \
3199 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3200 	        count++;                                                    \
3201 	buflen = sizeof (bareq) * count;                                    \
3202                                                                             \
3203 	BRIDGE_UNLOCK(sc);                                                  \
3204 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);            \
3205 	BRIDGE_LOCK(sc);                                                    \
3206                                                                             \
3207 	count = 0;                                                          \
3208 	buf = outbuf;                                                       \
3209 	len = min(bac->ifbac_len, buflen);                                  \
3210 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3211 	        if (len < sizeof (bareq))                                   \
3212 	                goto out;                                           \
3213 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3214 	                 "%s", brt->brt_ifp->if_xname);                     \
3215 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3216 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3217 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3218 	                now = (unsigned long) net_uptime();                 \
3219 	                if (now < brt->brt_expire)                          \
3220 	                        bareq.ifba_expire =                         \
3221 	                            brt->brt_expire - now;                  \
3222 	        } else                                                      \
3223 	                bareq.ifba_expire = 0;                              \
3224 	        bareq.ifba_flags = brt->brt_flags;                          \
3225                                                                             \
3226 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3227 	        count++;                                                    \
3228 	        buf += sizeof (bareq);                                      \
3229 	        len -= sizeof (bareq);                                      \
3230 	}                                                                   \
3231 out:                                                                        \
3232 	bac->ifbac_len = sizeof (bareq) * count;                            \
3233 	if (outbuf != NULL) {                                               \
3234 	        BRIDGE_UNLOCK(sc);                                          \
3235 	        error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);    \
3236 	        kfree_data(outbuf, buflen);                                 \
3237 	        BRIDGE_LOCK(sc);                                            \
3238 	}                                                                   \
3239 	return (error);                                                     \
3240 } while (0)
3241 
3242 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3243 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3244 {
3245 	struct ifbaconf64 *bac = arg;
3246 	struct ifbareq64 bareq;
3247 	int error = 0;
3248 
3249 	BRIDGE_IOCTL_RTS;
3250 	return error;
3251 }
3252 
3253 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3254 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3255 {
3256 	struct ifbaconf32 *bac = arg;
3257 	struct ifbareq32 bareq;
3258 	int error = 0;
3259 
3260 	BRIDGE_IOCTL_RTS;
3261 	return error;
3262 }
3263 
3264 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3265 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3266 {
3267 	struct ifbareq32 *req = arg;
3268 	struct bridge_iflist *bif;
3269 	int error;
3270 
3271 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3272 	if (bif == NULL) {
3273 		return ENOENT;
3274 	}
3275 
3276 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3277 	    req->ifba_flags);
3278 
3279 	return error;
3280 }
3281 
3282 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3283 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3284 {
3285 	struct ifbareq64 *req = arg;
3286 	struct bridge_iflist *bif;
3287 	int error;
3288 
3289 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3290 	if (bif == NULL) {
3291 		return ENOENT;
3292 	}
3293 
3294 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3295 	    req->ifba_flags);
3296 
3297 	return error;
3298 }
3299 
3300 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3301 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3302 {
3303 	struct ifbrparam *param = arg;
3304 
3305 	sc->sc_brttimeout = param->ifbrp_ctime;
3306 	return 0;
3307 }
3308 
3309 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3310 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3311 {
3312 	struct ifbrparam *param = arg;
3313 
3314 	param->ifbrp_ctime = sc->sc_brttimeout;
3315 	return 0;
3316 }
3317 
3318 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3319 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3320 {
3321 	struct ifbareq32 *req = arg;
3322 
3323 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3324 }
3325 
3326 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3327 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3328 {
3329 	struct ifbareq64 *req = arg;
3330 
3331 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3332 }
3333 
3334 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3335 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3336 {
3337 	struct ifbreq *req = arg;
3338 
3339 	bridge_rtflush(sc, req->ifbr_ifsflags);
3340 	return 0;
3341 }
3342 
3343 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3344 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3345 {
3346 	struct ifbrparam *param = arg;
3347 	struct bstp_state *bs = &sc->sc_stp;
3348 
3349 	param->ifbrp_prio = bs->bs_bridge_priority;
3350 	return 0;
3351 }
3352 
3353 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3354 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3355 {
3356 #if BRIDGESTP
3357 	struct ifbrparam *param = arg;
3358 
3359 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3360 #else /* !BRIDGESTP */
3361 #pragma unused(sc, arg)
3362 	return EOPNOTSUPP;
3363 #endif /* !BRIDGESTP */
3364 }
3365 
3366 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3367 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3368 {
3369 	struct ifbrparam *param = arg;
3370 	struct bstp_state *bs = &sc->sc_stp;
3371 
3372 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3373 	return 0;
3374 }
3375 
3376 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3377 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3378 {
3379 #if BRIDGESTP
3380 	struct ifbrparam *param = arg;
3381 
3382 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3383 #else /* !BRIDGESTP */
3384 #pragma unused(sc, arg)
3385 	return EOPNOTSUPP;
3386 #endif /* !BRIDGESTP */
3387 }
3388 
3389 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3390 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3391 {
3392 	struct ifbrparam *param;
3393 	struct bstp_state *bs;
3394 
3395 	param = arg;
3396 	bs = &sc->sc_stp;
3397 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3398 	return 0;
3399 }
3400 
3401 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3402 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3403 {
3404 #if BRIDGESTP
3405 	struct ifbrparam *param = arg;
3406 
3407 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3408 #else /* !BRIDGESTP */
3409 #pragma unused(sc, arg)
3410 	return EOPNOTSUPP;
3411 #endif /* !BRIDGESTP */
3412 }
3413 
3414 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3415 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3416 {
3417 	struct ifbrparam *param;
3418 	struct bstp_state *bs;
3419 
3420 	param = arg;
3421 	bs = &sc->sc_stp;
3422 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3423 	return 0;
3424 }
3425 
3426 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3427 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3428 {
3429 #if BRIDGESTP
3430 	struct ifbrparam *param = arg;
3431 
3432 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3433 #else /* !BRIDGESTP */
3434 #pragma unused(sc, arg)
3435 	return EOPNOTSUPP;
3436 #endif /* !BRIDGESTP */
3437 }
3438 
3439 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3440 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3441 {
3442 #if BRIDGESTP
3443 	struct ifbreq *req = arg;
3444 	struct bridge_iflist *bif;
3445 
3446 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3447 	if (bif == NULL) {
3448 		return ENOENT;
3449 	}
3450 
3451 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3452 #else /* !BRIDGESTP */
3453 #pragma unused(sc, arg)
3454 	return EOPNOTSUPP;
3455 #endif /* !BRIDGESTP */
3456 }
3457 
3458 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3459 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3460 {
3461 #if BRIDGESTP
3462 	struct ifbreq *req = arg;
3463 	struct bridge_iflist *bif;
3464 
3465 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3466 	if (bif == NULL) {
3467 		return ENOENT;
3468 	}
3469 
3470 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3471 #else /* !BRIDGESTP */
3472 #pragma unused(sc, arg)
3473 	return EOPNOTSUPP;
3474 #endif /* !BRIDGESTP */
3475 }
3476 
3477 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3478 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3479 {
3480 	struct ifbrparam *param = arg;
3481 
3482 	param->ifbrp_filter = sc->sc_filter_flags;
3483 
3484 	return 0;
3485 }
3486 
3487 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3488 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3489 {
3490 	struct ifbrparam *param = arg;
3491 
3492 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3493 		return EINVAL;
3494 	}
3495 
3496 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3497 		return EINVAL;
3498 	}
3499 
3500 	sc->sc_filter_flags = param->ifbrp_filter;
3501 
3502 	return 0;
3503 }
3504 
3505 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3506 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3507 {
3508 	struct ifbreq *req = arg;
3509 	struct bridge_iflist *bif;
3510 
3511 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3512 	if (bif == NULL) {
3513 		return ENOENT;
3514 	}
3515 
3516 	bif->bif_addrmax = req->ifbr_addrmax;
3517 	return 0;
3518 }
3519 
3520 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3521 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3522 {
3523 	struct ifbreq *req = arg;
3524 	struct bridge_iflist *bif = NULL;
3525 	struct ifnet *ifs;
3526 
3527 	ifs = ifunit(req->ifbr_ifsname);
3528 	if (ifs == NULL) {
3529 		return ENOENT;
3530 	}
3531 
3532 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3533 		return EINVAL;
3534 	}
3535 
3536 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3537 	if (ifs == bif->bif_ifp) {
3538 		return EBUSY;
3539 	}
3540 
3541 	if (ifs->if_bridge != NULL) {
3542 		return EBUSY;
3543 	}
3544 
3545 	switch (ifs->if_type) {
3546 	case IFT_ETHER:
3547 	case IFT_L2VLAN:
3548 	case IFT_IEEE8023ADLAG:
3549 		break;
3550 	case IFT_GIF:
3551 	/* currently not supported */
3552 	/* FALLTHRU */
3553 	default:
3554 		return EINVAL;
3555 	}
3556 
3557 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3558 
3559 	bif->bif_ifp = ifs;
3560 	bif->bif_ifflags = IFBIF_SPAN;
3561 
3562 	ifnet_reference(bif->bif_ifp);
3563 
3564 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3565 
3566 	return 0;
3567 }
3568 
3569 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3570 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3571 {
3572 	struct ifbreq *req = arg;
3573 	struct bridge_iflist *bif;
3574 	struct ifnet *ifs;
3575 
3576 	ifs = ifunit(req->ifbr_ifsname);
3577 	if (ifs == NULL) {
3578 		return ENOENT;
3579 	}
3580 
3581 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3582 	if (ifs == bif->bif_ifp) {
3583 		break;
3584 	}
3585 
3586 	if (bif == NULL) {
3587 		return ENOENT;
3588 	}
3589 
3590 	bridge_delete_span(sc, bif);
3591 
3592 	return 0;
3593 }
3594 
3595 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3596 	struct bstp_state *bs = &sc->sc_stp;                            \
3597 	struct bstp_port *root_port;                                    \
3598                                                                         \
3599 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3600 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3601 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3602                                                                         \
3603 	root_port = bs->bs_root_port;                                   \
3604 	if (root_port == NULL)                                          \
3605 	        req->ifbop_root_port = 0;                               \
3606 	else                                                            \
3607 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3608                                                                         \
3609 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3610 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3611 	req->ifbop_protocol = bs->bs_protover;                          \
3612 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3613 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3614 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3615 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3616 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3617 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3618 } while (0)
3619 
3620 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3621 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3622 {
3623 	struct ifbropreq32 *req = arg;
3624 
3625 	BRIDGE_IOCTL_GBPARAM;
3626 	return 0;
3627 }
3628 
3629 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3630 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3631 {
3632 	struct ifbropreq64 *req = arg;
3633 
3634 	BRIDGE_IOCTL_GBPARAM;
3635 	return 0;
3636 }
3637 
3638 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3639 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3640 {
3641 	struct ifbrparam *param = arg;
3642 
3643 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3644 	return 0;
3645 }
3646 
3647 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3648 	struct bridge_iflist *bif;                                      \
3649 	struct bstp_port *bp;                                           \
3650 	struct ifbpstpreq bpreq;                                        \
3651 	char *buf, *outbuf;                                             \
3652 	unsigned int count, buflen, len;                                \
3653                                                                         \
3654 	count = 0;                                                      \
3655 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3656 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3657 	                count++;                                        \
3658 	}                                                               \
3659                                                                         \
3660 	buflen = sizeof (bpreq) * count;                                \
3661 	if (bifstp->ifbpstp_len == 0) {                                 \
3662 	        bifstp->ifbpstp_len = buflen;                           \
3663 	        return (0);                                             \
3664 	}                                                               \
3665                                                                         \
3666 	BRIDGE_UNLOCK(sc);                                              \
3667 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3668 	BRIDGE_LOCK(sc);                                                \
3669                                                                         \
3670 	count = 0;                                                      \
3671 	buf = outbuf;                                                   \
3672 	len = min(bifstp->ifbpstp_len, buflen);                         \
3673 	bzero(&bpreq, sizeof (bpreq));                                  \
3674 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3675 	        if (len < sizeof (bpreq))                               \
3676 	                break;                                          \
3677                                                                         \
3678 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3679 	                continue;                                       \
3680                                                                         \
3681 	        bp = &bif->bif_stp;                                     \
3682 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3683 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3684 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3685 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3686 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3687 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3688                                                                         \
3689 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3690 	        count++;                                                \
3691 	        buf += sizeof (bpreq);                                  \
3692 	        len -= sizeof (bpreq);                                  \
3693 	}                                                               \
3694                                                                         \
3695 	BRIDGE_UNLOCK(sc);                                              \
3696 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3697 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3698 	BRIDGE_LOCK(sc);                                                \
3699 	kfree_data(outbuf, buflen);                                     \
3700 	return (error);                                                 \
3701 } while (0)
3702 
3703 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3704 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3705 {
3706 	struct ifbpstpconf32 *bifstp = arg;
3707 	int error = 0;
3708 
3709 	BRIDGE_IOCTL_GIFSSTP;
3710 	return error;
3711 }
3712 
3713 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3714 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3715 {
3716 	struct ifbpstpconf64 *bifstp = arg;
3717 	int error = 0;
3718 
3719 	BRIDGE_IOCTL_GIFSSTP;
3720 	return error;
3721 }
3722 
3723 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3724 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3725 {
3726 #if BRIDGESTP
3727 	struct ifbrparam *param = arg;
3728 
3729 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3730 #else /* !BRIDGESTP */
3731 #pragma unused(sc, arg)
3732 	return EOPNOTSUPP;
3733 #endif /* !BRIDGESTP */
3734 }
3735 
3736 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3737 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3738 {
3739 #if BRIDGESTP
3740 	struct ifbrparam *param = arg;
3741 
3742 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3743 #else /* !BRIDGESTP */
3744 #pragma unused(sc, arg)
3745 	return EOPNOTSUPP;
3746 #endif /* !BRIDGESTP */
3747 }
3748 
3749 
3750 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3751 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3752 {
3753 	struct ifbrhostfilter *req = arg;
3754 	struct bridge_iflist *bif;
3755 
3756 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3757 	if (bif == NULL) {
3758 		return ENOENT;
3759 	}
3760 
3761 	bzero(req, sizeof(struct ifbrhostfilter));
3762 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3763 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3764 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3765 		    ETHER_ADDR_LEN);
3766 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3767 	}
3768 	return 0;
3769 }
3770 
3771 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3772 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3773 {
3774 	struct ifbrhostfilter *req = arg;
3775 	struct bridge_iflist *bif;
3776 
3777 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3778 	if (bif == NULL) {
3779 		return ENOENT;
3780 	}
3781 
3782 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3783 		bif->bif_flags |= BIFF_HOST_FILTER;
3784 
3785 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3786 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3787 			    ETHER_ADDR_LEN);
3788 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3789 			    ETHER_ADDR_LEN) != 0) {
3790 				bif->bif_flags |= BIFF_HF_HWSRC;
3791 			} else {
3792 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3793 			}
3794 		}
3795 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3796 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3797 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3798 				bif->bif_flags |= BIFF_HF_IPSRC;
3799 			} else {
3800 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3801 			}
3802 		}
3803 	} else {
3804 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3805 		    BIFF_HF_IPSRC);
3806 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3807 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3808 	}
3809 
3810 	return 0;
3811 }
3812 
3813 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3814 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3815     unsigned int * count_p, char *buf, unsigned int *len_p)
3816 {
3817 	unsigned int            count = *count_p;
3818 	struct ifbrmne          ifbmne;
3819 	unsigned int            len = *len_p;
3820 	struct mac_nat_entry    *mne;
3821 	unsigned long           now;
3822 
3823 	bzero(&ifbmne, sizeof(ifbmne));
3824 	LIST_FOREACH(mne, list, mne_list) {
3825 		if (len < sizeof(ifbmne)) {
3826 			break;
3827 		}
3828 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3829 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3830 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3831 		    sizeof(ifbmne.ifbmne_mac));
3832 		now = (unsigned long) net_uptime();
3833 		if (now < mne->mne_expire) {
3834 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3835 		} else {
3836 			ifbmne.ifbmne_expire = 0;
3837 		}
3838 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3839 			ifbmne.ifbmne_af = AF_INET6;
3840 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3841 		} else {
3842 			ifbmne.ifbmne_af = AF_INET;
3843 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3844 		}
3845 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3846 		count++;
3847 		buf += sizeof(ifbmne);
3848 		len -= sizeof(ifbmne);
3849 	}
3850 	*count_p = count;
3851 	*len_p = len;
3852 	return buf;
3853 }
3854 
3855 /*
3856  * bridge_ioctl_gmnelist()
3857  *   Perform the get mac_nat_entry list ioctl.
3858  *
3859  * Note:
3860  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3861  *   field size/layout except for the last field ifbml_buf, the user-supplied
3862  *   buffer pointer. That is passed in separately via the 'user_addr'
3863  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3864  */
3865 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3866 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3867     user_addr_t user_addr)
3868 {
3869 	unsigned int            count;
3870 	char                    *buf;
3871 	int                     error = 0;
3872 	char                    *outbuf = NULL;
3873 	struct mac_nat_entry    *mne;
3874 	unsigned int            buflen;
3875 	unsigned int            len;
3876 
3877 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3878 	count = 0;
3879 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3880 		count++;
3881 	}
3882 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3883 		count++;
3884 	}
3885 	buflen = sizeof(struct ifbrmne) * count;
3886 	if (buflen == 0 || mnl->ifbml_len == 0) {
3887 		mnl->ifbml_len = buflen;
3888 		return error;
3889 	}
3890 	BRIDGE_UNLOCK(sc);
3891 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3892 	BRIDGE_LOCK(sc);
3893 	count = 0;
3894 	buf = outbuf;
3895 	len = min(mnl->ifbml_len, buflen);
3896 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3897 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3898 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3899 	BRIDGE_UNLOCK(sc);
3900 	error = copyout(outbuf, user_addr, mnl->ifbml_len);
3901 	kfree_data(outbuf, buflen);
3902 	BRIDGE_LOCK(sc);
3903 	return error;
3904 }
3905 
3906 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3907 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3908 {
3909 	struct ifbrmnelist64 *mnl = arg;
3910 
3911 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3912 }
3913 
3914 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3915 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3916 {
3917 	struct ifbrmnelist32 *mnl = arg;
3918 
3919 	return bridge_ioctl_gmnelist(sc, arg,
3920 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
3921 }
3922 
3923 /*
3924  * bridge_ioctl_gifstats()
3925  *   Return per-member stats.
3926  *
3927  * Note:
3928  *   The ifbrmreq32 and ifbrmreq64 structures have the same
3929  *   field size/layout except for the last field brmr_buf, the user-supplied
3930  *   buffer pointer. That is passed in separately via the 'user_addr'
3931  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3932  */
3933 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3934 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3935     user_addr_t user_addr)
3936 {
3937 	struct bridge_iflist    *bif;
3938 	int                     error = 0;
3939 	unsigned int            buflen;
3940 
3941 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3942 	if (bif == NULL) {
3943 		error = ENOENT;
3944 		goto done;
3945 	}
3946 
3947 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3948 	if (buflen == 0 || mreq->brmr_len == 0) {
3949 		mreq->brmr_len = buflen;
3950 		goto done;
3951 	}
3952 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3953 		error = ENOBUFS;
3954 		goto done;
3955 	}
3956 	mreq->brmr_len = buflen;
3957 	error = copyout(&bif->bif_stats, user_addr, buflen);
3958 done:
3959 	return error;
3960 }
3961 
3962 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3963 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3964 {
3965 	struct ifbrmreq32 *mreq = arg;
3966 
3967 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3968 }
3969 
3970 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3971 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3972 {
3973 	struct ifbrmreq64 *mreq = arg;
3974 
3975 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3976 }
3977 
3978 /*
3979  * bridge_proto_attach_changed
3980  *
3981  *	Called when protocol attachment on the interface changes.
3982  */
3983 static void
bridge_proto_attach_changed(struct ifnet * ifp)3984 bridge_proto_attach_changed(struct ifnet *ifp)
3985 {
3986 	boolean_t changed = FALSE;
3987 	struct bridge_iflist *bif;
3988 	boolean_t input_broadcast;
3989 	struct bridge_softc *sc = ifp->if_bridge;
3990 
3991 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3992 	if (sc == NULL) {
3993 		return;
3994 	}
3995 	input_broadcast = interface_needs_input_broadcast(ifp);
3996 	BRIDGE_LOCK(sc);
3997 	bif = bridge_lookup_member_if(sc, ifp);
3998 	if (bif != NULL) {
3999 		changed = bif_set_input_broadcast(bif, input_broadcast);
4000 	}
4001 	BRIDGE_UNLOCK(sc);
4002 	if (changed) {
4003 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4004 		    "%s input broadcast %s", ifp->if_xname,
4005 		    input_broadcast ? "ENABLED" : "DISABLED");
4006 	}
4007 	return;
4008 }
4009 
4010 /*
4011  * interface_media_active:
4012  *
4013  *	Tells if an interface media is active.
4014  */
4015 static int
interface_media_active(struct ifnet * ifp)4016 interface_media_active(struct ifnet *ifp)
4017 {
4018 	struct ifmediareq   ifmr;
4019 	int status = 0;
4020 
4021 	bzero(&ifmr, sizeof(ifmr));
4022 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4023 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4024 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4025 		}
4026 	}
4027 
4028 	return status;
4029 }
4030 
4031 /*
4032  * bridge_updatelinkstatus:
4033  *
4034  *      Update the media active status of the bridge based on the
4035  *	media active status of its member.
4036  *	If changed, return the corresponding onf/off link event.
4037  */
4038 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4039 bridge_updatelinkstatus(struct bridge_softc *sc)
4040 {
4041 	struct bridge_iflist *bif;
4042 	int active_member = 0;
4043 	u_int32_t event_code = 0;
4044 
4045 	BRIDGE_LOCK_ASSERT_HELD(sc);
4046 
4047 	/*
4048 	 * Find out if we have an active interface
4049 	 */
4050 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4051 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4052 			active_member = 1;
4053 			break;
4054 		}
4055 	}
4056 
4057 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4058 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
4059 		event_code = KEV_DL_LINK_ON;
4060 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4061 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4062 		event_code = KEV_DL_LINK_OFF;
4063 	}
4064 
4065 	return event_code;
4066 }
4067 
4068 /*
4069  * bridge_iflinkevent:
4070  */
4071 static void
bridge_iflinkevent(struct ifnet * ifp)4072 bridge_iflinkevent(struct ifnet *ifp)
4073 {
4074 	struct bridge_softc *sc = ifp->if_bridge;
4075 	struct bridge_iflist *bif;
4076 	u_int32_t event_code = 0;
4077 	int media_active;
4078 
4079 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4080 
4081 	/* Check if the interface is a bridge member */
4082 	if (sc == NULL) {
4083 		return;
4084 	}
4085 
4086 	media_active = interface_media_active(ifp);
4087 	BRIDGE_LOCK(sc);
4088 	bif = bridge_lookup_member_if(sc, ifp);
4089 	if (bif != NULL) {
4090 		if (media_active) {
4091 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4092 		} else {
4093 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4094 		}
4095 		if (sc->sc_mac_nat_bif != NULL) {
4096 			bridge_mac_nat_flush_entries(sc, bif);
4097 		}
4098 
4099 		event_code = bridge_updatelinkstatus(sc);
4100 	}
4101 	BRIDGE_UNLOCK(sc);
4102 
4103 	if (event_code != 0) {
4104 		bridge_link_event(sc->sc_ifp, event_code);
4105 	}
4106 }
4107 
4108 /*
4109  * bridge_delayed_callback:
4110  *
4111  *	Makes a delayed call
4112  */
4113 static void
bridge_delayed_callback(void * param,__unused void * param2)4114 bridge_delayed_callback(void *param, __unused void *param2)
4115 {
4116 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4117 	struct bridge_softc *sc = call->bdc_sc;
4118 
4119 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4120 	if (bridge_delayed_callback_delay > 0) {
4121 		struct timespec ts;
4122 
4123 		ts.tv_sec = bridge_delayed_callback_delay;
4124 		ts.tv_nsec = 0;
4125 
4126 		BRIDGE_LOG(LOG_NOTICE, 0,
4127 		    "sleeping for %d seconds",
4128 		    bridge_delayed_callback_delay);
4129 
4130 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4131 		    __func__, &ts);
4132 
4133 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4134 	}
4135 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4136 
4137 	BRIDGE_LOCK(sc);
4138 
4139 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4140 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4141 	    "%s call 0x%llx flags 0x%x",
4142 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4143 	    call->bdc_flags);
4144 }
4145 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4146 
4147 	if (call->bdc_flags & BDCF_CANCELLING) {
4148 		wakeup(call);
4149 	} else {
4150 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4151 			(*call->bdc_func)(sc);
4152 		}
4153 	}
4154 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4155 	BRIDGE_UNLOCK(sc);
4156 }
4157 
4158 /*
4159  * bridge_schedule_delayed_call:
4160  *
4161  *	Schedule a function to be called on a separate thread
4162  *      The actual call may be scheduled to run at a given time or ASAP.
4163  */
4164 static void
4165 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4166 {
4167 	uint64_t deadline = 0;
4168 	struct bridge_softc *sc = call->bdc_sc;
4169 
4170 	BRIDGE_LOCK_ASSERT_HELD(sc);
4171 
4172 	if ((sc->sc_flags & SCF_DETACHING) ||
4173 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4174 		return;
4175 	}
4176 
4177 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4178 		nanoseconds_to_absolutetime(
4179 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4180 			call->bdc_ts.tv_nsec, &deadline);
4181 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4182 	}
4183 
4184 	call->bdc_flags = BDCF_OUTSTANDING;
4185 
4186 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4187 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4188 	    "%s call 0x%llx flags 0x%x",
4189 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4190 	    call->bdc_flags);
4191 }
4192 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4193 
4194 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4195 		thread_call_func_delayed(
4196 			(thread_call_func_t)bridge_delayed_callback,
4197 			call, deadline);
4198 	} else {
4199 		if (call->bdc_thread_call == NULL) {
4200 			call->bdc_thread_call = thread_call_allocate(
4201 				(thread_call_func_t)bridge_delayed_callback,
4202 				call);
4203 		}
4204 		thread_call_enter(call->bdc_thread_call);
4205 	}
4206 }
4207 
4208 /*
4209  * bridge_cancel_delayed_call:
4210  *
4211  *	Cancel a queued or running delayed call.
4212  *	If call is running, does not return until the call is done to
4213  *	prevent race condition with the brigde interface getting destroyed
4214  */
4215 static void
4216 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4217 {
4218 	boolean_t result;
4219 	struct bridge_softc *sc = call->bdc_sc;
4220 
4221 	/*
4222 	 * The call was never scheduled
4223 	 */
4224 	if (sc == NULL) {
4225 		return;
4226 	}
4227 
4228 	BRIDGE_LOCK_ASSERT_HELD(sc);
4229 
4230 	call->bdc_flags |= BDCF_CANCELLING;
4231 
4232 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4233 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4234 		    "%s call 0x%llx flags 0x%x",
4235 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4236 		    call->bdc_flags);
4237 		result = thread_call_func_cancel(
4238 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4239 
4240 		if (result) {
4241 			/*
4242 			 * We managed to dequeue the delayed call
4243 			 */
4244 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4245 		} else {
4246 			/*
4247 			 * Wait for delayed call do be done running
4248 			 */
4249 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4250 		}
4251 	}
4252 	call->bdc_flags &= ~BDCF_CANCELLING;
4253 }
4254 
4255 /*
4256  * bridge_cleanup_delayed_call:
4257  *
4258  *	Dispose resource allocated for a delayed call
4259  *	Assume the delayed call is not queued or running .
4260  */
4261 static void
4262 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4263 {
4264 	boolean_t result;
4265 	struct bridge_softc *sc = call->bdc_sc;
4266 
4267 	/*
4268 	 * The call was never scheduled
4269 	 */
4270 	if (sc == NULL) {
4271 		return;
4272 	}
4273 
4274 	BRIDGE_LOCK_ASSERT_HELD(sc);
4275 
4276 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4277 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4278 
4279 	if (call->bdc_thread_call != NULL) {
4280 		result = thread_call_free(call->bdc_thread_call);
4281 		if (result == FALSE) {
4282 			panic("%s thread_call_free() failed for call %p",
4283 			    __func__, call);
4284 		}
4285 		call->bdc_thread_call = NULL;
4286 	}
4287 }
4288 
4289 /*
4290  * bridge_init:
4291  *
4292  *	Initialize a bridge interface.
4293  */
4294 static int
4295 bridge_init(struct ifnet *ifp)
4296 {
4297 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4298 	errno_t error;
4299 
4300 	BRIDGE_LOCK_ASSERT_HELD(sc);
4301 
4302 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4303 		return 0;
4304 	}
4305 
4306 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4307 
4308 	/*
4309 	 * Calling bridge_aging_timer() is OK as there are no entries to
4310 	 * age so we're just going to arm the timer
4311 	 */
4312 	bridge_aging_timer(sc);
4313 #if BRIDGESTP
4314 	if (error == 0) {
4315 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4316 	}
4317 #endif /* BRIDGESTP */
4318 	return error;
4319 }
4320 
4321 /*
4322  * bridge_ifstop:
4323  *
4324  *	Stop the bridge interface.
4325  */
4326 static void
4327 bridge_ifstop(struct ifnet *ifp, int disable)
4328 {
4329 #pragma unused(disable)
4330 	struct bridge_softc *sc = ifp->if_softc;
4331 
4332 	BRIDGE_LOCK_ASSERT_HELD(sc);
4333 
4334 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4335 		return;
4336 	}
4337 
4338 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4339 
4340 #if BRIDGESTP
4341 	bstp_stop(&sc->sc_stp);
4342 #endif /* BRIDGESTP */
4343 
4344 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4345 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4346 }
4347 
4348 /*
4349  * bridge_compute_cksum:
4350  *
4351  *	If the packet has checksum flags, compare the hardware checksum
4352  *	capabilities of the source and destination interfaces. If they
4353  *	are the same, there's nothing to do. If they are different,
4354  *	finalize the checksum so that it can be sent on the destination
4355  *	interface.
4356  */
4357 static void
4358 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4359 {
4360 	uint32_t csum_flags;
4361 	uint16_t dst_hw_csum;
4362 	uint32_t did_sw = 0;
4363 	struct ether_header *eh;
4364 	uint16_t src_hw_csum;
4365 
4366 	if (src_if == dst_if) {
4367 		return;
4368 	}
4369 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4370 	if (csum_flags == 0) {
4371 		/* no checksum offload */
4372 		return;
4373 	}
4374 
4375 	/*
4376 	 * if destination/source differ in checksum offload
4377 	 * capabilities, finalize/compute the checksum
4378 	 */
4379 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4380 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4381 	if (dst_hw_csum == src_hw_csum) {
4382 		return;
4383 	}
4384 	eh = mtod(m, struct ether_header *);
4385 	switch (ntohs(eh->ether_type)) {
4386 	case ETHERTYPE_IP:
4387 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4388 		break;
4389 	case ETHERTYPE_IPV6:
4390 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4391 		break;
4392 	}
4393 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4394 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4395 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4396 	    m->m_pkthdr.csum_flags);
4397 }
4398 
4399 static errno_t
4400 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4401 {
4402 	struct flowadv  adv = { .code = FADV_SUCCESS };
4403 	errno_t         error;
4404 
4405 	error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4406 	if (error == 0) {
4407 		if (adv.code == FADV_FLOW_CONTROLLED) {
4408 			error = EQFULL;
4409 		} else if (adv.code == FADV_SUSPENDED) {
4410 			error = EQSUSPENDED;
4411 		}
4412 	}
4413 	return error;
4414 }
4415 
4416 static int
4417 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4418     bool *is_fragmented)
4419 {
4420 	int newoff;
4421 
4422 	*is_fragmented = false;
4423 	while (1) {
4424 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4425 		if (newoff < 0) {
4426 			return off;
4427 		} else if (newoff < off) {
4428 			return -1;    /* invalid */
4429 		} else if (newoff == off) {
4430 			return newoff;
4431 		}
4432 		off = newoff;
4433 		proto = *nxtp;
4434 		if (proto == IPPROTO_FRAGMENT) {
4435 			*is_fragmented = true;
4436 		}
4437 	}
4438 }
4439 
4440 static int
4441 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4442     ip_packet_info_t info_p, struct bripstats * stats_p)
4443 {
4444 	int             error = 0;
4445 	u_int           hlen;
4446 	u_int           ip_hlen;
4447 	u_int           ip_pay_len;
4448 	struct mbuf *   m0 = *mp;
4449 	int             off;
4450 	int             opt_len = 0;
4451 	int             proto = 0;
4452 
4453 	bzero(info_p, sizeof(*info_p));
4454 	if (is_ipv4) {
4455 		struct ip *     ip;
4456 		u_int           ip_total_len;
4457 
4458 		/* IPv4 */
4459 		hlen = mac_hlen + sizeof(struct ip);
4460 		if (m0->m_pkthdr.len < hlen) {
4461 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4462 			    "Short IP packet %d < %d",
4463 			    m0->m_pkthdr.len, hlen);
4464 			error = _EBADIP;
4465 			stats_p->bips_bad_ip++;
4466 			goto done;
4467 		}
4468 		if (m0->m_len < hlen) {
4469 			*mp = m0 = m_pullup(m0, hlen);
4470 			if (m0 == NULL) {
4471 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4472 				    "m_pullup failed hlen %d",
4473 				    hlen);
4474 				error = ENOBUFS;
4475 				stats_p->bips_bad_ip++;
4476 				goto done;
4477 			}
4478 		}
4479 		ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4480 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4481 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4482 			    "bad IP version");
4483 			error = _EBADIP;
4484 			stats_p->bips_bad_ip++;
4485 			goto done;
4486 		}
4487 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4488 		if (ip_hlen < sizeof(struct ip)) {
4489 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4490 			    "bad IP header length %d < %d",
4491 			    ip_hlen,
4492 			    (int)sizeof(struct ip));
4493 			error = _EBADIP;
4494 			stats_p->bips_bad_ip++;
4495 			goto done;
4496 		}
4497 		hlen = mac_hlen + ip_hlen;
4498 		if (m0->m_len < hlen) {
4499 			*mp = m0 = m_pullup(m0, hlen);
4500 			if (m0 == NULL) {
4501 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4502 				    "m_pullup failed hlen %d",
4503 				    hlen);
4504 				error = ENOBUFS;
4505 				stats_p->bips_bad_ip++;
4506 				goto done;
4507 			}
4508 		}
4509 
4510 		ip_total_len = ntohs(ip->ip_len);
4511 		if (ip_total_len < ip_hlen) {
4512 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4513 			    "IP total len %d < header len %d",
4514 			    ip_total_len, ip_hlen);
4515 			error = _EBADIP;
4516 			stats_p->bips_bad_ip++;
4517 			goto done;
4518 		}
4519 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4520 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4521 			    "invalid IP payload length %d > %d",
4522 			    ip_total_len,
4523 			    (m0->m_pkthdr.len - mac_hlen));
4524 			error = _EBADIP;
4525 			stats_p->bips_bad_ip++;
4526 			goto done;
4527 		}
4528 		ip_pay_len = ip_total_len - ip_hlen;
4529 		info_p->ip_proto = ip->ip_p;
4530 		info_p->ip_hdr.ip = ip;
4531 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4532 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4533 			info_p->ip_is_fragmented = true;
4534 		}
4535 		stats_p->bips_ip++;
4536 	} else {
4537 		struct ip6_hdr *ip6;
4538 
4539 		/* IPv6 */
4540 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4541 		if (m0->m_pkthdr.len < hlen) {
4542 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4543 			    "short IPv6 packet %d < %d",
4544 			    m0->m_pkthdr.len, hlen);
4545 			error = _EBADIPV6;
4546 			stats_p->bips_bad_ip6++;
4547 			goto done;
4548 		}
4549 		if (m0->m_len < hlen) {
4550 			*mp = m0 = m_pullup(m0, hlen);
4551 			if (m0 == NULL) {
4552 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4553 				    "m_pullup failed hlen %d",
4554 				    hlen);
4555 				error = ENOBUFS;
4556 				stats_p->bips_bad_ip6++;
4557 				goto done;
4558 			}
4559 		}
4560 		ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4561 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4562 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4563 			    "bad IPv6 version");
4564 			error = _EBADIPV6;
4565 			stats_p->bips_bad_ip6++;
4566 			goto done;
4567 		}
4568 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4569 		    &info_p->ip_is_fragmented);
4570 		if (off < 0 || m0->m_pkthdr.len < off) {
4571 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4572 			    "ip6_lasthdr() returned %d",
4573 			    off);
4574 			error = _EBADIPV6;
4575 			stats_p->bips_bad_ip6++;
4576 			goto done;
4577 		}
4578 		ip_hlen = sizeof(*ip6);
4579 		opt_len = off - mac_hlen - ip_hlen;
4580 		if (opt_len < 0) {
4581 			error = _EBADIPV6;
4582 			stats_p->bips_bad_ip6++;
4583 			goto done;
4584 		}
4585 		info_p->ip_proto = proto;
4586 		info_p->ip_hdr.ip6 = ip6;
4587 		ip_pay_len = ntohs(ip6->ip6_plen);
4588 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4589 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4590 			    "invalid IPv6 payload length %d > %d",
4591 			    ip_pay_len,
4592 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4593 			error = _EBADIPV6;
4594 			stats_p->bips_bad_ip6++;
4595 			goto done;
4596 		}
4597 		stats_p->bips_ip6++;
4598 	}
4599 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4600 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4601 	    is_ipv4 ? '4' : '6',
4602 	    proto, ip_hlen, ip_pay_len, opt_len,
4603 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4604 	info_p->ip_hlen = ip_hlen;
4605 	info_p->ip_pay_len = ip_pay_len;
4606 	info_p->ip_opt_len = opt_len;
4607 	info_p->ip_is_ipv4 = is_ipv4;
4608 done:
4609 	return error;
4610 }
4611 
4612 static int
4613 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4614     ip_packet_info_t info_p, struct bripstats * stats_p)
4615 {
4616 	int             error;
4617 	u_int           hlen;
4618 
4619 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4620 	if (error != 0) {
4621 		goto done;
4622 	}
4623 	if (info_p->ip_proto != IPPROTO_TCP) {
4624 		/* not a TCP frame, not an error, just a bad guess */
4625 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4626 		    "non-TCP (%d) IPv%c frame %d bytes",
4627 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4628 		    (*mp)->m_pkthdr.len);
4629 		goto done;
4630 	}
4631 	if (info_p->ip_is_fragmented) {
4632 		/* both TSO and IP fragmentation don't make sense */
4633 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4634 		    "fragmented TSO packet?");
4635 		stats_p->bips_bad_tcp++;
4636 		error = _EBADTCP;
4637 		goto done;
4638 	}
4639 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4640 	    info_p->ip_opt_len;
4641 	if ((*mp)->m_len < hlen) {
4642 		*mp = m_pullup(*mp, hlen);
4643 		if (*mp == NULL) {
4644 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4645 			    "m_pullup %d failed",
4646 			    hlen);
4647 			stats_p->bips_bad_tcp++;
4648 			error = _EBADTCP;
4649 			goto done;
4650 		}
4651 	}
4652 	info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4653 	    info_p->ip_hlen + info_p->ip_opt_len;
4654 done:
4655 	return error;
4656 }
4657 
4658 static inline void
4659 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4660 {
4661 	if (proto == IPPROTO_TCP) {
4662 		stats_p->brcs_tcp_checksum++;
4663 	} else {
4664 		stats_p->brcs_udp_checksum++;
4665 	}
4666 	return;
4667 }
4668 
4669 static bool
4670 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4671 {
4672 	uint16_t        ether_type;
4673 	bool            is_ip = TRUE;
4674 
4675 	ether_type = ntohs(eh->ether_type);
4676 	switch (ether_type) {
4677 	case ETHERTYPE_IP:
4678 		*is_ipv4 = TRUE;
4679 		break;
4680 	case ETHERTYPE_IPV6:
4681 		*is_ipv4 = FALSE;
4682 		break;
4683 	default:
4684 		is_ip = FALSE;
4685 		break;
4686 	}
4687 	return is_ip;
4688 }
4689 
4690 static errno_t
4691 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4692 {
4693 	struct brcsumstats *csum_stats_p;
4694 	struct ether_header     *eh;
4695 	errno_t         error = 0;
4696 	ip_packet_info  info;
4697 	bool            is_ipv4;
4698 	struct mbuf *   m;
4699 	u_int           mac_hlen = sizeof(struct ether_header);
4700 	uint16_t        sum;
4701 	bool            valid;
4702 
4703 	eh = mtod(*mp, struct ether_header *);
4704 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4705 		goto done;
4706 	}
4707 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4708 	    &stats_p->brms_out_ip);
4709 	m = *mp;
4710 	if (error != 0) {
4711 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4712 		    "bridge_get_ip_proto failed %d",
4713 		    error);
4714 		goto done;
4715 	}
4716 	if (is_ipv4) {
4717 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4718 			/* hardware offloaded IP header checksum */
4719 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4720 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4721 			    "IP checksum HW %svalid",
4722 			    valid ? "" : "in");
4723 			if (!valid) {
4724 				stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4725 				error = _EBADIPCHECKSUM;
4726 				goto done;
4727 			}
4728 			stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4729 		} else {
4730 			/* verify */
4731 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4732 			valid = (sum == 0);
4733 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4734 			    "IP checksum SW %svalid",
4735 			    valid ? "" : "in");
4736 			if (!valid) {
4737 				stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4738 				error = _EBADIPCHECKSUM;
4739 				goto done;
4740 			}
4741 			stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4742 		}
4743 	}
4744 	if (info.ip_is_fragmented) {
4745 		/* can't verify checksum on fragmented packets */
4746 		goto done;
4747 	}
4748 	switch (info.ip_proto) {
4749 	case IPPROTO_TCP:
4750 		stats_p->brms_out_ip.bips_tcp++;
4751 		break;
4752 	case IPPROTO_UDP:
4753 		stats_p->brms_out_ip.bips_udp++;
4754 		break;
4755 	default:
4756 		goto done;
4757 	}
4758 	/* check for hardware offloaded UDP/TCP checksum */
4759 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4760 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4761 		/* checksum verified by hardware */
4762 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4763 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4764 		    "IPv%c %s checksum HW 0x%x %svalid",
4765 		    is_ipv4 ? '4' : '6',
4766 		    (info.ip_proto == IPPROTO_TCP)
4767 		    ? "TCP" : "UDP",
4768 		    m->m_pkthdr.csum_data,
4769 		    valid ? "" : "in" );
4770 		if (!valid) {
4771 			/* bad checksum */
4772 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4773 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4774 			    : _EBADTCPCHECKSUM;
4775 		} else {
4776 			/* good checksum */
4777 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4778 		}
4779 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4780 		goto done;
4781 	}
4782 	m->m_data += mac_hlen;
4783 	m->m_len -= mac_hlen;
4784 	m->m_pkthdr.len -= mac_hlen;
4785 	if (is_ipv4) {
4786 		sum = inet_cksum(m, info.ip_proto,
4787 		    info.ip_hlen,
4788 		    info.ip_pay_len);
4789 	} else {
4790 		sum = inet6_cksum(m, info.ip_proto,
4791 		    info.ip_hlen + info.ip_opt_len,
4792 		    info.ip_pay_len - info.ip_opt_len);
4793 	}
4794 	valid = (sum == 0);
4795 	if (valid) {
4796 		csum_stats_p = &stats_p->brms_out_cksum_good;
4797 	} else {
4798 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4799 		error = (info.ip_proto == IPPROTO_TCP)
4800 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4801 	}
4802 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4803 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4804 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4805 	    is_ipv4 ? '4' : '6',
4806 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4807 	    valid ? "" : "in",
4808 	    sum, info.ip_hlen, info.ip_pay_len);
4809 	m->m_data -= mac_hlen;
4810 	m->m_len += mac_hlen;
4811 	m->m_pkthdr.len += mac_hlen;
4812 done:
4813 	return error;
4814 }
4815 
4816 static errno_t
4817 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4818     struct ifbrmstats * stats_p)
4819 {
4820 	uint16_t *      csum_p;
4821 	errno_t         error = 0;
4822 	u_int           hlen;
4823 	struct mbuf *   m0 = *mp;
4824 	u_int           mac_hlen = sizeof(struct ether_header);
4825 	u_int           pkt_hdr_len;
4826 	struct tcphdr * tcp;
4827 	u_int           tcp_hlen;
4828 	struct udphdr * udp;
4829 
4830 	if (info_p->ip_is_ipv4) {
4831 		/* compute IP header checksum */
4832 		info_p->ip_hdr.ip->ip_sum = 0;
4833 		info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4834 		    info_p->ip_hlen);
4835 		stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4836 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4837 		    "IPv4 checksum 0x%x",
4838 		    ntohs(info_p->ip_hdr.ip->ip_sum));
4839 	}
4840 	if (info_p->ip_is_fragmented) {
4841 		/* can't compute checksum on fragmented packets */
4842 		goto done;
4843 	}
4844 	pkt_hdr_len = m0->m_pkthdr.len;
4845 	switch (info_p->ip_proto) {
4846 	case IPPROTO_TCP:
4847 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4848 		    + sizeof(struct tcphdr);
4849 		if (m0->m_len < hlen) {
4850 			*mp = m0 = m_pullup(m0, hlen);
4851 			if (m0 == NULL) {
4852 				stats_p->brms_in_ip.bips_bad_tcp++;
4853 				error = _EBADTCP;
4854 				goto done;
4855 			}
4856 		}
4857 		tcp = (struct tcphdr *)(void *)
4858 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4859 		    + info_p->ip_opt_len);
4860 		tcp_hlen = tcp->th_off << 2;
4861 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4862 		if (hlen > pkt_hdr_len) {
4863 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4864 			    "bad tcp header length %u",
4865 			    tcp_hlen);
4866 			stats_p->brms_in_ip.bips_bad_tcp++;
4867 			error = _EBADTCP;
4868 			goto done;
4869 		}
4870 		csum_p = &tcp->th_sum;
4871 		stats_p->brms_in_ip.bips_tcp++;
4872 		break;
4873 	case IPPROTO_UDP:
4874 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4875 		if (m0->m_len < hlen) {
4876 			*mp = m0 = m_pullup(m0, hlen);
4877 			if (m0 == NULL) {
4878 				stats_p->brms_in_ip.bips_bad_udp++;
4879 				error = ENOBUFS;
4880 				goto done;
4881 			}
4882 		}
4883 		udp = (struct udphdr *)(void *)
4884 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4885 		    + info_p->ip_opt_len);
4886 		csum_p = &udp->uh_sum;
4887 		stats_p->brms_in_ip.bips_udp++;
4888 		break;
4889 	default:
4890 		/* not TCP or UDP */
4891 		goto done;
4892 	}
4893 	*csum_p = 0;
4894 	m0->m_data += mac_hlen;
4895 	m0->m_len -= mac_hlen;
4896 	m0->m_pkthdr.len -= mac_hlen;
4897 	if (info_p->ip_is_ipv4) {
4898 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4899 		    info_p->ip_pay_len);
4900 	} else {
4901 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
4902 		    info_p->ip_hlen + info_p->ip_opt_len,
4903 		    info_p->ip_pay_len - info_p->ip_opt_len);
4904 	}
4905 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4906 		/* RFC 1122 4.1.3.4 */
4907 		*csum_p = 0xffff;
4908 	}
4909 	m0->m_data -= mac_hlen;
4910 	m0->m_len += mac_hlen;
4911 	m0->m_pkthdr.len += mac_hlen;
4912 	proto_csum_stats_increment(info_p->ip_proto,
4913 	    &stats_p->brms_in_computed_cksum);
4914 
4915 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4916 	    "IPv%c %s set checksum 0x%x",
4917 	    info_p->ip_is_ipv4 ? '4' : '6',
4918 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4919 	    ntohs(*csum_p));
4920 done:
4921 	return error;
4922 }
4923 
4924 static errno_t
4925 bridge_send(struct ifnet *src_ifp,
4926     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4927 {
4928 	switch (cksum_op) {
4929 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4930 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4931 		break;
4932 	case CHECKSUM_OPERATION_FINALIZE:
4933 		/* the checksum might not be correct, finalize now */
4934 		bridge_finalize_cksum(dst_ifp, m);
4935 		break;
4936 	case CHECKSUM_OPERATION_COMPUTE:
4937 		bridge_compute_cksum(src_ifp, dst_ifp, m);
4938 		break;
4939 	default:
4940 		break;
4941 	}
4942 #if HAS_IF_CAP
4943 	/*
4944 	 * If underlying interface can not do VLAN tag insertion itself
4945 	 * then attach a packet tag that holds it.
4946 	 */
4947 	if ((m->m_flags & M_VLANTAG) &&
4948 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4949 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4950 		if (m == NULL) {
4951 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4952 			    "%s: unable to prepend VLAN header",
4953 			    dst_ifp->if_xname);
4954 			(void) ifnet_stat_increment_out(dst_ifp,
4955 			    0, 0, 1);
4956 			return 0;
4957 		}
4958 		m->m_flags &= ~M_VLANTAG;
4959 	}
4960 #endif /* HAS_IF_CAP */
4961 	return bridge_transmit(dst_ifp, m);
4962 }
4963 
4964 static errno_t
4965 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4966 {
4967 	errno_t                 error;
4968 	u_int                   mac_hlen;
4969 
4970 	mac_hlen = sizeof(struct ether_header);
4971 
4972 #if HAS_IF_CAP
4973 	/*
4974 	 * If underlying interface can not do VLAN tag insertion itself
4975 	 * then attach a packet tag that holds it.
4976 	 */
4977 	if ((m->m_flags & M_VLANTAG) &&
4978 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4979 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4980 		if (m == NULL) {
4981 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4982 			    "%s: unable to prepend VLAN header",
4983 			    dst_ifp->if_xname);
4984 			(void) ifnet_stat_increment_out(dst_ifp,
4985 			    0, 0, 1);
4986 			error = ENOBUFS;
4987 			goto done;
4988 		}
4989 		m->m_flags &= ~M_VLANTAG;
4990 		mac_hlen += ETHER_VLAN_ENCAP_LEN;
4991 	}
4992 #endif /* HAS_IF_CAP */
4993 	error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4994 	return error;
4995 }
4996 
4997 /*
4998  * tso_hwassist:
4999  * - determine whether the destination interface supports TSO offload
5000  * - if the packet is already marked for offload and the hardware supports
5001  *   it, just allow the packet to continue on
5002  * - if not, parse the packet headers to verify that this is a large TCP
5003  *   packet requiring segmentation; if the hardware doesn't support it
5004  *   set need_sw_tso; otherwise, mark the packet for TSO offload
5005  */
5006 static int
5007 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5008     bool * need_sw_tso, bool * is_large_tcp)
5009 {
5010 	int             error = 0;
5011 	u_int32_t       if_csum;
5012 	u_int32_t       if_tso;
5013 	u_int32_t       mbuf_tso;
5014 	bool            supports_cksum = false;
5015 
5016 	*need_sw_tso = false;
5017 	*is_large_tcp = false;
5018 	if (is_ipv4) {
5019 		/*
5020 		 * Enable both TCP and IP offload if the hardware supports it.
5021 		 * If the hardware doesn't support TCP offload, supports_cksum
5022 		 * will be false so we won't set either offload.
5023 		 */
5024 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5025 		supports_cksum = (if_csum & CSUM_TCP) != 0;
5026 		if_tso = IFNET_TSO_IPV4;
5027 		mbuf_tso = CSUM_TSO_IPV4;
5028 	} else {
5029 		supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
5030 		if_csum = CSUM_TCPIPV6;
5031 		if_tso = IFNET_TSO_IPV6;
5032 		mbuf_tso = CSUM_TSO_IPV6;
5033 	}
5034 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5035 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
5036 	    ifp->if_xname, supports_cksum ? "" : " not",
5037 	    ifp->if_hwassist, if_csum);
5038 	if ((ifp->if_hwassist & if_tso) != 0 &&
5039 	    ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
5040 		/* hardware TSO, mbuf already marked */
5041 	} else {
5042 		/* verify that this is a large TCP frame */
5043 		uint32_t                csum_flags;
5044 		ip_packet_info          info;
5045 		int                     mss;
5046 		struct bripstats        stats;
5047 		struct tcphdr *         tcp;
5048 
5049 		error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5050 		    &info, &stats);
5051 		if (error != 0) {
5052 			/* bad packet */
5053 			goto done;
5054 		}
5055 		if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
5056 		    ifp->if_mtu) {
5057 			/* not actually a large packet */
5058 			goto done;
5059 		}
5060 		if (info.ip_proto_hdr == NULL) {
5061 			/* not a TCP packet */
5062 			goto done;
5063 		}
5064 		if ((ifp->if_hwassist & if_tso) == 0) {
5065 			/* hardware does not support TSO, enable sw tso */
5066 			*need_sw_tso = if_bridge_segmentation != 0;
5067 			goto done;
5068 		}
5069 		/* use hardware TSO */
5070 		(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5071 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5072 		mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
5073 		    - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
5074 		assert(mss > 0);
5075 		csum_flags = mbuf_tso;
5076 		if (supports_cksum) {
5077 			csum_flags |= if_csum;
5078 		}
5079 		(*mp)->m_pkthdr.tso_segsz = mss;
5080 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5081 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5082 		*is_large_tcp = true;
5083 	}
5084 done:
5085 	return error;
5086 }
5087 
5088 /*
5089  * bridge_enqueue:
5090  *
5091  *	Enqueue a packet on a bridge member interface.
5092  *
5093  */
5094 static errno_t
5095 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
5096     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
5097 {
5098 	errno_t         error = 0;
5099 	int             len;
5100 
5101 	VERIFY(dst_ifp != NULL);
5102 
5103 	/*
5104 	 * We may be sending a fragment so traverse the mbuf
5105 	 *
5106 	 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
5107 	 */
5108 	for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
5109 		bool            need_sw_tso = false;
5110 		bool            is_ipv4 = false;
5111 		bool            is_large_pkt;
5112 		errno_t         _error = 0;
5113 
5114 		len = m->m_pkthdr.len;
5115 		m->m_flags |= M_PROTO1; /* set to avoid loops */
5116 		next_m = m->m_nextpkt;
5117 		m->m_nextpkt = NULL;
5118 		/*
5119 		 * Need to segment the packet if it is a large frame
5120 		 * and the destination interface does not support TSO.
5121 		 *
5122 		 * Note that with trailers, it's possible for a packet to
5123 		 * be large but not actually require segmentation.
5124 		 */
5125 		is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5126 		if (is_large_pkt) {
5127 			struct ether_header     *eh;
5128 			bool                    is_large_tcp = false;
5129 
5130 			eh = mtod(m, struct ether_header *);
5131 			if (ether_header_type_is_ip(eh, &is_ipv4)) {
5132 				_error = tso_hwassist(&m, is_ipv4,
5133 				    dst_ifp, sizeof(struct ether_header),
5134 				    &need_sw_tso, &is_large_tcp);
5135 				if (is_large_tcp) {
5136 					cksum_op = CHECKSUM_OPERATION_NONE;
5137 				}
5138 			} else {
5139 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5140 				    "large non IP packet");
5141 			}
5142 		}
5143 		if (_error != 0) {
5144 			if (m != NULL) {
5145 				m_freem(m);
5146 			}
5147 		} else if (need_sw_tso) {
5148 			_error = bridge_send_tso(dst_ifp, m, is_ipv4);
5149 		} else {
5150 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5151 			    "%s bridge_send(%s) len %d op %d",
5152 			    bridge_ifp->if_xname,
5153 			    dst_ifp->if_xname,
5154 			    len, cksum_op);
5155 			_error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5156 		}
5157 
5158 		/* Preserve first error value */
5159 		if (error == 0 && _error != 0) {
5160 			error = _error;
5161 		}
5162 		if (_error == 0) {
5163 			(void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5164 		} else {
5165 			(void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5166 		}
5167 	}
5168 
5169 	return error;
5170 }
5171 
5172 #if HAS_BRIDGE_DUMMYNET
5173 /*
5174  * bridge_dummynet:
5175  *
5176  *	Receive a queued packet from dummynet and pass it on to the output
5177  *	interface.
5178  *
5179  *	The mbuf has the Ethernet header already attached.
5180  */
5181 static void
5182 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5183 {
5184 	struct bridge_softc *sc;
5185 
5186 	sc = ifp->if_bridge;
5187 
5188 	/*
5189 	 * The packet didn't originate from a member interface. This should only
5190 	 * ever happen if a member interface is removed while packets are
5191 	 * queued for it.
5192 	 */
5193 	if (sc == NULL) {
5194 		m_freem(m);
5195 		return;
5196 	}
5197 
5198 	if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5199 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5200 			return;
5201 		}
5202 		if (m == NULL) {
5203 			return;
5204 		}
5205 	}
5206 	(void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5207 }
5208 
5209 #endif /* HAS_BRIDGE_DUMMYNET */
5210 
5211 /*
5212  * bridge_member_output:
5213  *
5214  *	Send output from a bridge member interface.  This
5215  *	performs the bridging function for locally originated
5216  *	packets.
5217  *
5218  *	The mbuf has the Ethernet header already attached.
5219  */
5220 static errno_t
5221 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5222 {
5223 	ifnet_t bridge_ifp;
5224 	struct ether_header *eh;
5225 	struct ifnet *dst_if;
5226 	uint16_t vlan;
5227 	struct bridge_iflist *mac_nat_bif;
5228 	ifnet_t mac_nat_ifp;
5229 	mbuf_t m = *data;
5230 
5231 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5232 	    "ifp %s", ifp->if_xname);
5233 	if (m->m_len < ETHER_HDR_LEN) {
5234 		m = m_pullup(m, ETHER_HDR_LEN);
5235 		if (m == NULL) {
5236 			*data = NULL;
5237 			return EJUSTRETURN;
5238 		}
5239 	}
5240 
5241 	eh = mtod(m, struct ether_header *);
5242 	vlan = VLANTAGOF(m);
5243 
5244 	BRIDGE_LOCK(sc);
5245 	mac_nat_bif = sc->sc_mac_nat_bif;
5246 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5247 	if (mac_nat_ifp == ifp) {
5248 		/* record the IP address used by the MAC NAT interface */
5249 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5250 		m = *data;
5251 		if (m == NULL) {
5252 			/* packet was deallocated */
5253 			BRIDGE_UNLOCK(sc);
5254 			return EJUSTRETURN;
5255 		}
5256 	}
5257 	bridge_ifp = sc->sc_ifp;
5258 
5259 	/*
5260 	 * APPLE MODIFICATION
5261 	 * If the packet is an 802.1X ethertype, then only send on the
5262 	 * original output interface.
5263 	 */
5264 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5265 		dst_if = ifp;
5266 		goto sendunicast;
5267 	}
5268 
5269 	/*
5270 	 * If bridge is down, but the original output interface is up,
5271 	 * go ahead and send out that interface.  Otherwise, the packet
5272 	 * is dropped below.
5273 	 */
5274 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5275 		dst_if = ifp;
5276 		goto sendunicast;
5277 	}
5278 
5279 	/*
5280 	 * If the packet is a multicast, or we don't know a better way to
5281 	 * get there, send to all interfaces.
5282 	 */
5283 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5284 		dst_if = NULL;
5285 	} else {
5286 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5287 	}
5288 	if (dst_if == NULL) {
5289 		struct bridge_iflist *bif;
5290 		struct mbuf *mc;
5291 		errno_t error;
5292 
5293 
5294 		bridge_span(sc, m);
5295 
5296 		BRIDGE_LOCK2REF(sc, error);
5297 		if (error != 0) {
5298 			m_freem(m);
5299 			return EJUSTRETURN;
5300 		}
5301 
5302 		/*
5303 		 * Duplicate and send the packet across all member interfaces
5304 		 * except the originating interface.
5305 		 */
5306 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5307 			dst_if = bif->bif_ifp;
5308 			if (dst_if == ifp) {
5309 				/* skip the originating interface */
5310 				continue;
5311 			}
5312 			/* skip interface with inactive link status */
5313 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5314 				continue;
5315 			}
5316 #if 0
5317 			if (dst_if->if_type == IFT_GIF) {
5318 				continue;
5319 			}
5320 #endif
5321 			/* skip interface that isn't running */
5322 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5323 				continue;
5324 			}
5325 			/*
5326 			 * If the interface is participating in spanning
5327 			 * tree, make sure the port is in a state that
5328 			 * allows forwarding.
5329 			 */
5330 			if ((bif->bif_ifflags & IFBIF_STP) &&
5331 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5332 				continue;
5333 			}
5334 			/*
5335 			 * If the destination is the MAC NAT interface,
5336 			 * skip sending the packet. The packet can't be sent
5337 			 * if the source MAC is incorrect.
5338 			 */
5339 			if (dst_if == mac_nat_ifp) {
5340 				continue;
5341 			}
5342 
5343 			/* make a deep copy to send on this member interface */
5344 			mc = m_dup(m, M_DONTWAIT);
5345 			if (mc == NULL) {
5346 				(void)ifnet_stat_increment_out(bridge_ifp,
5347 				    0, 0, 1);
5348 				continue;
5349 			}
5350 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5351 			    mc, CHECKSUM_OPERATION_COMPUTE);
5352 		}
5353 		BRIDGE_UNREF(sc);
5354 
5355 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5356 			m_freem(m);
5357 			return EJUSTRETURN;
5358 		}
5359 		/* allow packet to continue on the originating interface */
5360 		return 0;
5361 	}
5362 
5363 sendunicast:
5364 	/*
5365 	 * XXX Spanning tree consideration here?
5366 	 */
5367 
5368 	bridge_span(sc, m);
5369 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5370 		m_freem(m);
5371 		BRIDGE_UNLOCK(sc);
5372 		return EJUSTRETURN;
5373 	}
5374 
5375 	BRIDGE_UNLOCK(sc);
5376 	if (dst_if == ifp) {
5377 		/* allow packet to continue on the originating interface */
5378 		return 0;
5379 	}
5380 	if (dst_if != mac_nat_ifp) {
5381 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5382 		    CHECKSUM_OPERATION_COMPUTE);
5383 	} else {
5384 		/*
5385 		 * This is not the original output interface
5386 		 * and the destination is the MAC NAT interface.
5387 		 * Drop the packet because the packet can't be sent
5388 		 * if the source MAC is incorrect.
5389 		 */
5390 		m_freem(m);
5391 	}
5392 	return EJUSTRETURN;
5393 }
5394 
5395 /*
5396  * Output callback.
5397  *
5398  * This routine is called externally from above only when if_bridge_txstart
5399  * is disabled; otherwise it is called internally by bridge_start().
5400  */
5401 static int
5402 bridge_output(struct ifnet *ifp, struct mbuf *m)
5403 {
5404 	struct bridge_softc *sc = ifnet_softc(ifp);
5405 	struct ether_header *eh;
5406 	struct ifnet *dst_if = NULL;
5407 	int error = 0;
5408 
5409 	eh = mtod(m, struct ether_header *);
5410 
5411 	BRIDGE_LOCK(sc);
5412 
5413 	if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5414 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5415 	}
5416 
5417 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5418 
5419 #if NBPFILTER > 0
5420 	if (sc->sc_bpf_output) {
5421 		bridge_bpf_output(ifp, m);
5422 	}
5423 #endif
5424 
5425 	if (dst_if == NULL) {
5426 		/* callee will unlock */
5427 		bridge_broadcast(sc, NULL, m, 0);
5428 	} else {
5429 		ifnet_t bridge_ifp;
5430 
5431 		bridge_ifp = sc->sc_ifp;
5432 		BRIDGE_UNLOCK(sc);
5433 
5434 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5435 		    CHECKSUM_OPERATION_FINALIZE);
5436 	}
5437 
5438 	return error;
5439 }
5440 
5441 static void
5442 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5443 {
5444 	struct ether_header *eh;
5445 	bool is_ipv4;
5446 	uint32_t sw_csum, hwcap;
5447 	uint32_t did_sw;
5448 	uint32_t csum_flags;
5449 
5450 	eh = mtod(m, struct ether_header *);
5451 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5452 		return;
5453 	}
5454 
5455 	/* do in software what the hardware cannot */
5456 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5457 	csum_flags = m->m_pkthdr.csum_flags;
5458 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5459 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5460 
5461 	if (is_ipv4) {
5462 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5463 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5464 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5465 				uint16_t start =
5466 				    sizeof(*eh) + sizeof(struct ip);
5467 				uint16_t ulpoff =
5468 				    m->m_pkthdr.csum_data & 0xffff;
5469 				m->m_pkthdr.csum_flags |=
5470 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5471 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5472 				m->m_pkthdr.csum_tx_start = start;
5473 			} else {
5474 				sw_csum |= (CSUM_DELAY_DATA &
5475 				    m->m_pkthdr.csum_flags);
5476 			}
5477 		}
5478 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5479 	} else {
5480 		if ((hwcap & CSUM_PARTIAL) &&
5481 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5482 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5483 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5484 				uint16_t start =
5485 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5486 				uint16_t ulpoff =
5487 				    m->m_pkthdr.csum_data & 0xffff;
5488 				m->m_pkthdr.csum_flags |=
5489 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5490 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5491 				m->m_pkthdr.csum_tx_start = start;
5492 			} else {
5493 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5494 				    m->m_pkthdr.csum_flags);
5495 			}
5496 		}
5497 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5498 	}
5499 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5500 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5501 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5502 	    did_sw, m->m_pkthdr.csum_flags);
5503 }
5504 
5505 /*
5506  * bridge_start:
5507  *
5508  *	Start output on a bridge.
5509  *
5510  * This routine is invoked by the start worker thread; because we never call
5511  * it directly, there is no need do deploy any serialization mechanism other
5512  * than what's already used by the worker thread, i.e. this is already single
5513  * threaded.
5514  *
5515  * This routine is called only when if_bridge_txstart is enabled.
5516  */
5517 static void
5518 bridge_start(struct ifnet *ifp)
5519 {
5520 	struct mbuf *m;
5521 
5522 	for (;;) {
5523 		if (ifnet_dequeue(ifp, &m) != 0) {
5524 			break;
5525 		}
5526 
5527 		(void) bridge_output(ifp, m);
5528 	}
5529 }
5530 
5531 /*
5532  * bridge_forward:
5533  *
5534  *	The forwarding function of the bridge.
5535  *
5536  *	NOTE: Releases the lock on return.
5537  */
5538 static void
5539 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5540     struct mbuf *m)
5541 {
5542 	struct bridge_iflist *dbif;
5543 	ifnet_t bridge_ifp;
5544 	struct ifnet *src_if, *dst_if;
5545 	struct ether_header *eh;
5546 	uint16_t vlan;
5547 	uint8_t *dst;
5548 	int error;
5549 	struct mac_nat_record mnr;
5550 	bool translate_mac = FALSE;
5551 	uint32_t sc_filter_flags = 0;
5552 
5553 	BRIDGE_LOCK_ASSERT_HELD(sc);
5554 
5555 	bridge_ifp = sc->sc_ifp;
5556 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5557 	    "%s m 0x%llx", bridge_ifp->if_xname,
5558 	    (uint64_t)VM_KERNEL_ADDRPERM(m));
5559 
5560 	src_if = m->m_pkthdr.rcvif;
5561 	if (src_if != sbif->bif_ifp) {
5562 		const char *    src_if_name;
5563 
5564 		src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5565 		BRIDGE_LOG(LOG_NOTICE, 0,
5566 		    "src_if %s != bif_ifp %s",
5567 		    src_if_name, sbif->bif_ifp->if_xname);
5568 		goto drop;
5569 	}
5570 
5571 	(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5572 	vlan = VLANTAGOF(m);
5573 
5574 
5575 	if ((sbif->bif_ifflags & IFBIF_STP) &&
5576 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5577 		goto drop;
5578 	}
5579 
5580 	eh = mtod(m, struct ether_header *);
5581 	dst = eh->ether_dhost;
5582 
5583 	/* If the interface is learning, record the address. */
5584 	if (sbif->bif_ifflags & IFBIF_LEARNING) {
5585 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5586 		    sbif, 0, IFBAF_DYNAMIC);
5587 		/*
5588 		 * If the interface has addresses limits then deny any source
5589 		 * that is not in the cache.
5590 		 */
5591 		if (error && sbif->bif_addrmax) {
5592 			goto drop;
5593 		}
5594 	}
5595 
5596 	if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5597 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5598 		goto drop;
5599 	}
5600 
5601 	/*
5602 	 * At this point, the port either doesn't participate
5603 	 * in spanning tree or it is in the forwarding state.
5604 	 */
5605 
5606 	/*
5607 	 * If the packet is unicast, destined for someone on
5608 	 * "this" side of the bridge, drop it.
5609 	 */
5610 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5611 		/* unicast */
5612 		dst_if = bridge_rtlookup(sc, dst, vlan);
5613 		if (src_if == dst_if) {
5614 			goto drop;
5615 		}
5616 	} else {
5617 		/* broadcast/multicast */
5618 
5619 		/*
5620 		 * Check if its a reserved multicast address, any address
5621 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
5622 		 * bridge.
5623 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5624 		 */
5625 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
5626 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
5627 		    dst[4] == 0x00 && dst[5] <= 0x0f) {
5628 			goto drop;
5629 		}
5630 
5631 
5632 		/* ...forward it to all interfaces. */
5633 		os_atomic_inc(&bridge_ifp->if_imcasts, relaxed);
5634 		dst_if = NULL;
5635 	}
5636 
5637 	/*
5638 	 * If we have a destination interface which is a member of our bridge,
5639 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
5640 	 * For broadcast or multicast packets, don't bother because it will
5641 	 * be reinjected into ether_input. We do this before we pass the packets
5642 	 * through the pfil(9) framework, as it is possible that pfil(9) will
5643 	 * drop the packet, or possibly modify it, making it difficult to debug
5644 	 * firewall issues on the bridge.
5645 	 */
5646 #if NBPFILTER > 0
5647 	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5648 	    dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5649 		m->m_pkthdr.rcvif = bridge_ifp;
5650 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5651 	}
5652 #endif /* NBPFILTER */
5653 
5654 	if (dst_if == NULL) {
5655 		/* bridge_broadcast will unlock */
5656 		bridge_broadcast(sc, sbif, m, 1);
5657 		return;
5658 	}
5659 
5660 	/*
5661 	 * Unicast.
5662 	 */
5663 	/*
5664 	 * At this point, we're dealing with a unicast frame
5665 	 * going to a different interface.
5666 	 */
5667 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5668 		goto drop;
5669 	}
5670 
5671 	dbif = bridge_lookup_member_if(sc, dst_if);
5672 	if (dbif == NULL) {
5673 		/* Not a member of the bridge (anymore?) */
5674 		goto drop;
5675 	}
5676 
5677 	/* Private segments can not talk to each other */
5678 	if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5679 		goto drop;
5680 	}
5681 
5682 	if ((dbif->bif_ifflags & IFBIF_STP) &&
5683 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5684 		goto drop;
5685 	}
5686 
5687 #if HAS_DHCPRA_MASK
5688 	/* APPLE MODIFICATION <rdar:6985737> */
5689 	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5690 		m = ip_xdhcpra_output(dst_if, m);
5691 		if (!m) {
5692 			++bridge_ifp.if_xdhcpra;
5693 			BRIDGE_UNLOCK(sc);
5694 			return;
5695 		}
5696 	}
5697 #endif /* HAS_DHCPRA_MASK */
5698 
5699 	if (dbif == sc->sc_mac_nat_bif) {
5700 		/* determine how to translate the packet */
5701 		translate_mac
5702 		        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5703 		if (m == NULL) {
5704 			/* packet was deallocated */
5705 			BRIDGE_UNLOCK(sc);
5706 			return;
5707 		}
5708 	} else if (bif_has_checksum_offload(dbif) &&
5709 	    !bif_has_checksum_offload(sbif)) {
5710 		/*
5711 		 * If the destination interface has checksum offload enabled,
5712 		 * verify the checksum now, unless the source interface also has
5713 		 * checksum offload enabled. The checksum in that case has
5714 		 * already just been computed and verifying it is unnecessary.
5715 		 */
5716 		error = bridge_verify_checksum(&m, &dbif->bif_stats);
5717 		if (error != 0) {
5718 			BRIDGE_UNLOCK(sc);
5719 			if (m != NULL) {
5720 				m_freem(m);
5721 			}
5722 			return;
5723 		}
5724 	}
5725 
5726 	sc_filter_flags = sc->sc_filter_flags;
5727 
5728 	BRIDGE_UNLOCK(sc);
5729 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5730 		if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5731 			return;
5732 		}
5733 		if (m == NULL) {
5734 			return;
5735 		}
5736 	}
5737 
5738 	/* if we need to, translate the MAC address */
5739 	if (translate_mac) {
5740 		bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5741 	}
5742 	/*
5743 	 * We're forwarding an inbound packet in which the checksum must
5744 	 * already have been computed and if required, verified.
5745 	 */
5746 	if (m != NULL) {
5747 		(void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5748 		    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5749 	}
5750 	return;
5751 
5752 drop:
5753 	BRIDGE_UNLOCK(sc);
5754 	m_freem(m);
5755 }
5756 
5757 static void
5758 inject_input_packet(ifnet_t ifp, mbuf_t m)
5759 {
5760 	mbuf_pkthdr_setrcvif(m, ifp);
5761 	mbuf_pkthdr_setheader(m, mbuf_data(m));
5762 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5763 	    mbuf_len(m) - ETHER_HDR_LEN);
5764 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5765 	m->m_flags |= M_PROTO1; /* set to avoid loops */
5766 	dlil_input_packet_list(ifp, m);
5767 	return;
5768 }
5769 
5770 static bool
5771 in_addr_is_ours(struct in_addr ip)
5772 {
5773 	struct in_ifaddr *ia;
5774 	bool             ours = false;
5775 
5776 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5777 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5778 		if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5779 			ours = true;
5780 			break;
5781 		}
5782 	}
5783 	lck_rw_done(&in_ifaddr_rwlock);
5784 	return ours;
5785 }
5786 
5787 static bool
5788 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5789 {
5790 	struct in6_ifaddr       *ia6;
5791 	bool                    ours = false;
5792 
5793 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5794 		struct in6_addr         dst_ip;
5795 
5796 		/* need to embed scope ID for comparison */
5797 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5798 		dst_ip.s6_addr16[1] = htons(ifscope);
5799 		ip6_p = &dst_ip;
5800 	}
5801 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5802 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5803 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5804 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5805 			ours = true;
5806 			break;
5807 		}
5808 	}
5809 	lck_rw_done(&in6_ifaddr_rwlock);
5810 	return ours;
5811 }
5812 
5813 static void
5814 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5815     bpf_packet_func bpf_input_func)
5816 {
5817 	size_t                  byte_count;
5818 	struct ether_header     *eh;
5819 	errno_t                 error;
5820 	bool                    is_ipv4;
5821 	int                     len;
5822 	u_int                   mac_hlen;
5823 	int                     pkt_count;
5824 
5825 	/* segment large packets before sending them up */
5826 	if (if_bridge_segmentation == 0) {
5827 		goto done;
5828 	}
5829 	len = m->m_pkthdr.len;
5830 	if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5831 		goto done;
5832 	}
5833 	eh = mtod(m, struct ether_header *);
5834 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5835 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5836 		    "large non IPv4/IPv6 packet");
5837 		goto done;
5838 	}
5839 
5840 	/*
5841 	 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5842 	 *
5843 	 * If gso_tcp() returns success (0), the packet(s) are
5844 	 * ready to be passed up. If the destination is a local IP address,
5845 	 * the packet will be passed up as a large, single packet.
5846 	 *
5847 	 * If gso_tcp() returns an error, the packet has already
5848 	 * been freed.
5849 	 */
5850 	mac_hlen = sizeof(*eh);
5851 	error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5852 	if (error != 0) {
5853 		return;
5854 	}
5855 
5856 done:
5857 	pkt_count = 0;
5858 	byte_count = 0;
5859 	for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5860 		/* Mark the packet as arriving on the bridge interface */
5861 		mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5862 		mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5863 		if (bpf_input_func != NULL) {
5864 			(*bpf_input_func)(bridge_ifp, scan);
5865 		}
5866 		mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5867 		    mbuf_len(scan) - ETHER_HDR_LEN);
5868 		mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5869 		byte_count += mbuf_pkthdr_len(scan);
5870 		pkt_count++;
5871 	}
5872 	(void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5873 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5874 	    "%s %d packet(s) %ld bytes",
5875 	    bridge_ifp->if_xname, pkt_count, byte_count);
5876 	dlil_input_packet_list(bridge_ifp, m);
5877 	return;
5878 }
5879 
5880 static bool
5881 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5882 {
5883 	bool    ours;
5884 
5885 	if (info_p->ip_is_ipv4) {
5886 		struct in_addr  dst_ip;
5887 
5888 		bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5889 		ours = in_addr_is_ours(dst_ip);
5890 	} else {
5891 		ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5892 	}
5893 	return ours;
5894 }
5895 
5896 static inline errno_t
5897 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5898     const u_char * ether_dhost, mbuf_t *mp,
5899     bool is_broadcast, bool is_ip, bool is_ipv4,
5900     ip_packet_info * info_p, struct bripstats * stats_p,
5901     bool *info_initialized)
5902 {
5903 	errno_t         error = 0;
5904 	bool            is_local = false;
5905 	struct pf_mtag *pf_mtag;
5906 	u_int16_t       tag = vmnet_tag;
5907 
5908 	*info_initialized = false;
5909 	if (is_broadcast) {
5910 		if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5911 			tag = vmnet_broadcast_tag;
5912 		} else {
5913 			tag = vmnet_multicast_tag;
5914 		}
5915 	} else if (is_ip) {
5916 		unsigned int    mac_hlen = sizeof(struct ether_header);
5917 
5918 		bzero(stats_p, sizeof(*stats_p));
5919 		*info_initialized = true;
5920 		error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5921 		    stats_p);
5922 		if (error != 0) {
5923 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5924 			    "%s(%s) bridge_get_ip_proto failed %d",
5925 			    bridge_ifp->if_xname,
5926 			    ifp->if_xname, error);
5927 			if (*mp == NULL) {
5928 				return EJUSTRETURN;
5929 			}
5930 		} else {
5931 			is_local = is_our_ip(info_p, bridge_ifp->if_index);
5932 			if (is_local) {
5933 				tag = vmnet_local_tag;
5934 			}
5935 		}
5936 	}
5937 	pf_mtag = pf_get_mtag(*mp);
5938 	if (pf_mtag != NULL) {
5939 		pf_mtag->pftag_tag = tag;
5940 	}
5941 #if DEBUG || DEVELOPMENT
5942 	{
5943 		bool forced;
5944 
5945 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5946 		if (forced) {
5947 			m_freem(*mp);
5948 			*mp = NULL;
5949 			error = EJUSTRETURN;
5950 			goto done;
5951 		}
5952 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5953 		if (forced) {
5954 			error = _EBADIP;
5955 			goto done;
5956 		}
5957 	}
5958 done:
5959 #endif /* DEBUG || DEVELOPMENT */
5960 	return error;
5961 }
5962 
5963 static void
5964 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5965 {
5966 	dst_p->bips_ip += src_p->bips_ip;
5967 	dst_p->bips_ip6 += src_p->bips_ip6;
5968 	dst_p->bips_udp += src_p->bips_udp;
5969 	dst_p->bips_tcp += src_p->bips_tcp;
5970 
5971 	dst_p->bips_bad_ip += src_p->bips_bad_ip;
5972 	dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5973 	dst_p->bips_bad_udp += src_p->bips_bad_udp;
5974 	dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5975 }
5976 
5977 static void
5978 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5979 {
5980 	struct bridge_iflist *bif;
5981 	struct bridge_softc *sc = ifp->if_bridge;
5982 
5983 	BRIDGE_LOCK(sc);
5984 	bif = bridge_lookup_member_if(sc, ifp);
5985 	if (bif == NULL) {
5986 		goto done;
5987 	}
5988 	if (!bif_has_checksum_offload(bif)) {
5989 		goto done;
5990 	}
5991 	bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5992 
5993 done:
5994 	BRIDGE_UNLOCK(sc);
5995 	return;
5996 }
5997 
5998 /*
5999  * bridge_input:
6000  *
6001  *	Filter input from a member interface.  Queue the packet for
6002  *	bridging if it is not for us.
6003  */
6004 errno_t
6005 bridge_input(struct ifnet *ifp, mbuf_t *data)
6006 {
6007 	struct bridge_softc *sc = ifp->if_bridge;
6008 	struct bridge_iflist *bif, *bif2;
6009 	struct ether_header eh_in;
6010 	bool is_ip = false;
6011 	bool is_ipv4 = false;
6012 	ifnet_t bridge_ifp;
6013 	struct mbuf *mc, *mc2;
6014 	unsigned int mac_hlen = sizeof(struct ether_header);
6015 	uint16_t vlan;
6016 	errno_t error;
6017 	ip_packet_info info;
6018 	struct bripstats stats;
6019 	bool info_initialized = false;
6020 	errno_t ip_packet_error = 0;
6021 	bool is_broadcast;
6022 	bool is_ip_broadcast = false;
6023 	bool is_ifp_mac = false;
6024 	mbuf_t m = *data;
6025 	uint32_t sc_filter_flags = 0;
6026 
6027 	bridge_ifp = sc->sc_ifp;
6028 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6029 	    "%s from %s m 0x%llx data 0x%llx",
6030 	    bridge_ifp->if_xname, ifp->if_xname,
6031 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
6032 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
6033 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
6034 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6035 		    "%s not running passing along",
6036 		    bridge_ifp->if_xname);
6037 		return 0;
6038 	}
6039 
6040 	vlan = VLANTAGOF(m);
6041 
6042 #ifdef IFF_MONITOR
6043 	/*
6044 	 * Implement support for bridge monitoring. If this flag has been
6045 	 * set on this interface, discard the packet once we push it through
6046 	 * the bpf(4) machinery, but before we do, increment the byte and
6047 	 * packet counters associated with this interface.
6048 	 */
6049 	if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
6050 		m->m_pkthdr.rcvif = bridge_ifp;
6051 		BRIDGE_BPF_MTAP_INPUT(sc, m);
6052 		(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
6053 		*data = NULL;
6054 		m_freem(m);
6055 		return EJUSTRETURN;
6056 	}
6057 #endif /* IFF_MONITOR */
6058 
6059 	is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
6060 
6061 	/*
6062 	 * Need to clear the promiscuous flag otherwise it will be
6063 	 * dropped by DLIL after processing filters
6064 	 */
6065 	if ((mbuf_flags(m) & MBUF_PROMISC)) {
6066 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
6067 	}
6068 
6069 	/* copy the ethernet header */
6070 	eh_in = *(mtod(m, struct ether_header *));
6071 
6072 	is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
6073 
6074 	if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
6075 		/* tag packets coming from VMNET interfaces */
6076 		ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
6077 		    eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
6078 		    &info, &stats, &info_initialized);
6079 		m = *data;
6080 		if (m == NULL) {
6081 			bridge_bripstats_apply(ifp, &stats);
6082 			return EJUSTRETURN;
6083 		}
6084 	}
6085 
6086 	sc_filter_flags = sc->sc_filter_flags;
6087 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6088 		error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
6089 		m = *data;
6090 		if (error != 0 || m == NULL) {
6091 			return EJUSTRETURN;
6092 		}
6093 	}
6094 
6095 	BRIDGE_LOCK(sc);
6096 	bif = bridge_lookup_member_if(sc, ifp);
6097 	if (bif == NULL) {
6098 		BRIDGE_UNLOCK(sc);
6099 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6100 		    "%s bridge_lookup_member_if failed",
6101 		    bridge_ifp->if_xname);
6102 		return 0;
6103 	}
6104 	if (is_ip && bif_has_checksum_offload(bif)) {
6105 		if (info_initialized) {
6106 			bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
6107 		} else {
6108 			error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
6109 			    &info, &bif->bif_stats.brms_in_ip);
6110 			if (error != 0) {
6111 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6112 				    "%s(%s) bridge_get_ip_proto failed %d",
6113 				    bridge_ifp->if_xname,
6114 				    bif->bif_ifp->if_xname, error);
6115 				ip_packet_error = error;
6116 			}
6117 		}
6118 		if (ip_packet_error == 0) {
6119 			/* need to compute IP/UDP/TCP/checksums */
6120 			error = bridge_offload_checksum(data, &info,
6121 			    &bif->bif_stats);
6122 			if (error != 0) {
6123 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6124 				    "%s(%s) bridge_offload_checksum failed %d",
6125 				    bridge_ifp->if_xname,
6126 				    bif->bif_ifp->if_xname, error);
6127 				ip_packet_error = error;
6128 			}
6129 		}
6130 		if (ip_packet_error != 0) {
6131 			BRIDGE_UNLOCK(sc);
6132 			if (*data != NULL) {
6133 				m_freem(*data);
6134 				*data = NULL;
6135 			}
6136 			return EJUSTRETURN;
6137 		}
6138 		m = *data;
6139 	}
6140 
6141 	if (bif->bif_flags & BIFF_HOST_FILTER) {
6142 		error = bridge_host_filter(bif, data);
6143 		if (error != 0) {
6144 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6145 			    "%s bridge_host_filter failed",
6146 			    bif->bif_ifp->if_xname);
6147 			BRIDGE_UNLOCK(sc);
6148 			return EJUSTRETURN;
6149 		}
6150 		m = *data;
6151 	}
6152 
6153 	if (!is_broadcast &&
6154 	    _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6155 		/* the packet is unicast to the interface's MAC address */
6156 		if (is_ip && sc->sc_mac_nat_bif == bif) {
6157 			/* doing MAC-NAT, check if destination is IP broadcast */
6158 			is_ip_broadcast = is_broadcast_ip_packet(data);
6159 			if (*data == NULL) {
6160 				BRIDGE_UNLOCK(sc);
6161 				return EJUSTRETURN;
6162 			}
6163 			m = *data;
6164 		}
6165 		if (!is_ip_broadcast) {
6166 			is_ifp_mac = TRUE;
6167 		}
6168 	}
6169 
6170 	bridge_span(sc, m);
6171 
6172 	if (is_broadcast || is_ip_broadcast) {
6173 		if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6174 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6175 			    " multicast: "
6176 			    "%02x:%02x:%02x:%02x:%02x:%02x",
6177 			    eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6178 			    eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6179 			    eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6180 		}
6181 		/* Tap off 802.1D packets; they do not get forwarded. */
6182 		if (is_broadcast &&
6183 		    _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6184 #if BRIDGESTP
6185 			m = bstp_input(&bif->bif_stp, ifp, m);
6186 #else /* !BRIDGESTP */
6187 			m_freem(m);
6188 			m = NULL;
6189 #endif /* !BRIDGESTP */
6190 			if (m == NULL) {
6191 				BRIDGE_UNLOCK(sc);
6192 				return EJUSTRETURN;
6193 			}
6194 		}
6195 
6196 		if ((bif->bif_ifflags & IFBIF_STP) &&
6197 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6198 			BRIDGE_UNLOCK(sc);
6199 			return 0;
6200 		}
6201 
6202 		/*
6203 		 * Make a deep copy of the packet and enqueue the copy
6204 		 * for bridge processing.
6205 		 */
6206 		mc = m_dup(m, M_DONTWAIT);
6207 		if (mc == NULL) {
6208 			BRIDGE_UNLOCK(sc);
6209 			return 0;
6210 		}
6211 
6212 		/*
6213 		 * Perform the bridge forwarding function with the copy.
6214 		 *
6215 		 * Note that bridge_forward calls BRIDGE_UNLOCK
6216 		 */
6217 		if (is_ip_broadcast) {
6218 			struct ether_header *eh;
6219 
6220 			/* make the copy look like it is actually broadcast */
6221 			mc->m_flags |= M_BCAST;
6222 			eh = mtod(mc, struct ether_header *);
6223 			bcopy(etherbroadcastaddr, eh->ether_dhost,
6224 			    ETHER_ADDR_LEN);
6225 		}
6226 		bridge_forward(sc, bif, mc);
6227 
6228 		/*
6229 		 * Reinject the mbuf as arriving on the bridge so we have a
6230 		 * chance at claiming multicast packets. We can not loop back
6231 		 * here from ether_input as a bridge is never a member of a
6232 		 * bridge.
6233 		 */
6234 		VERIFY(bridge_ifp->if_bridge == NULL);
6235 		mc2 = m_dup(m, M_DONTWAIT);
6236 		if (mc2 != NULL) {
6237 			/* Keep the layer3 header aligned */
6238 			int i = min(mc2->m_pkthdr.len, max_protohdr);
6239 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6240 		}
6241 		if (mc2 != NULL) {
6242 			/* mark packet as arriving on the bridge */
6243 			mc2->m_pkthdr.rcvif = bridge_ifp;
6244 			mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6245 			BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6246 			(void) mbuf_setdata(mc2,
6247 			    (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6248 			    mbuf_len(mc2) - ETHER_HDR_LEN);
6249 			(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6250 			(void) ifnet_stat_increment_in(bridge_ifp, 1,
6251 			    mbuf_pkthdr_len(mc2), 0);
6252 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6253 			    "%s mcast for us", bridge_ifp->if_xname);
6254 			dlil_input_packet_list(bridge_ifp, mc2);
6255 		}
6256 
6257 		/* Return the original packet for local processing. */
6258 		return 0;
6259 	}
6260 
6261 	if ((bif->bif_ifflags & IFBIF_STP) &&
6262 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6263 		BRIDGE_UNLOCK(sc);
6264 		return 0;
6265 	}
6266 
6267 #ifdef DEV_CARP
6268 #define CARP_CHECK_WE_ARE_DST(iface) \
6269 	((iface)->if_carp &&\
6270 	        carp_forus((iface)->if_carp, eh_in.ether_dhost))
6271 #define CARP_CHECK_WE_ARE_SRC(iface) \
6272 	((iface)->if_carp &&\
6273 	        carp_forus((iface)->if_carp, eh_in.ether_shost))
6274 #else
6275 #define CARP_CHECK_WE_ARE_DST(iface) 0
6276 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6277 #endif
6278 
6279 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6280 
6281 #define PFIL_PHYS(sc, ifp, m)
6282 
6283 #define GRAB_OUR_PACKETS(iface)                                         \
6284 	if ((iface)->if_type == IFT_GIF)                                \
6285 	        continue;                                               \
6286 	/* It is destined for us. */                                    \
6287 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 ||   \
6288 	    CARP_CHECK_WE_ARE_DST((iface))) {                           \
6289 	        if ((iface)->if_type == IFT_BRIDGE) {                   \
6290 	                BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
6291 	/* Filter on the physical interface. */         \
6292 	                PFIL_PHYS(sc, iface, m);                        \
6293 	        } else {                                                \
6294 	                bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0);      \
6295 	        }                                                       \
6296 	        if (bif->bif_ifflags & IFBIF_LEARNING) {                \
6297 	                error = bridge_rtupdate(sc, eh_in.ether_shost,  \
6298 	                    vlan, bif, 0, IFBAF_DYNAMIC);               \
6299 	                if (error && bif->bif_addrmax) {                \
6300 	                        BRIDGE_UNLOCK(sc);                      \
6301 	                        m_freem(m);                             \
6302 	                        return (EJUSTRETURN);                   \
6303 	                }                                               \
6304 	        }                                                       \
6305 	        BRIDGE_UNLOCK(sc);                                      \
6306 	        inject_input_packet(iface, m);                          \
6307 	        return (EJUSTRETURN);                                   \
6308 	}                                                               \
6309                                                                         \
6310 	/* We just received a packet that we sent out. */               \
6311 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 ||   \
6312 	    CARP_CHECK_WE_ARE_SRC((iface))) {                           \
6313 	        BRIDGE_UNLOCK(sc);                                      \
6314 	        m_freem(m);                                             \
6315 	        return (EJUSTRETURN);                                   \
6316 	}
6317 
6318 	/*
6319 	 * Unicast.
6320 	 */
6321 
6322 	/* handle MAC-NAT if enabled */
6323 	if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6324 		ifnet_t dst_if;
6325 		boolean_t is_input = FALSE;
6326 
6327 		dst_if = bridge_mac_nat_input(sc, data, &is_input);
6328 		m = *data;
6329 		if (dst_if == ifp) {
6330 			/* our input packet */
6331 		} else if (dst_if != NULL || m == NULL) {
6332 			BRIDGE_UNLOCK(sc);
6333 			if (dst_if != NULL) {
6334 				ASSERT(m != NULL);
6335 				if (is_input) {
6336 					inject_input_packet(dst_if, m);
6337 				} else {
6338 					(void)bridge_enqueue(bridge_ifp, NULL,
6339 					    dst_if, m,
6340 					    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6341 				}
6342 			}
6343 			return EJUSTRETURN;
6344 		}
6345 	}
6346 
6347 	/*
6348 	 * If the packet is for the bridge, pass it up for local processing.
6349 	 */
6350 	if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6351 	    CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6352 		bpf_packet_func     bpf_input_func = sc->sc_bpf_input;
6353 
6354 		/*
6355 		 * If the interface is learning, and the source
6356 		 * address is valid and not multicast, record
6357 		 * the address.
6358 		 */
6359 		if (bif->bif_ifflags & IFBIF_LEARNING) {
6360 			(void) bridge_rtupdate(sc, eh_in.ether_shost,
6361 			    vlan, bif, 0, IFBAF_DYNAMIC);
6362 		}
6363 		BRIDGE_UNLOCK(sc);
6364 
6365 		bridge_interface_input(bridge_ifp, m, bpf_input_func);
6366 		return EJUSTRETURN;
6367 	}
6368 
6369 	/*
6370 	 * if the destination of the packet is for the MAC address of
6371 	 * the member interface itself, then we don't need to forward
6372 	 * it -- just pass it back.  Note that it'll likely just be
6373 	 * dropped by the stack, but if something else is bound to
6374 	 * the interface directly (for example, the wireless stats
6375 	 * protocol -- although that actually uses BPF right now),
6376 	 * then it will consume the packet
6377 	 *
6378 	 * ALSO, note that we do this check AFTER checking for the
6379 	 * bridge's own MAC address, because the bridge may be
6380 	 * using the SAME MAC address as one of its interfaces
6381 	 */
6382 	if (is_ifp_mac) {
6383 
6384 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6385 		BRIDGE_LOG(LOG_NOTICE, 0,
6386 		    "not forwarding packet bound for member interface");
6387 #endif
6388 
6389 		BRIDGE_UNLOCK(sc);
6390 		return 0;
6391 	}
6392 
6393 	/* Now check the remaining bridge members. */
6394 	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6395 		if (bif2->bif_ifp != ifp) {
6396 			GRAB_OUR_PACKETS(bif2->bif_ifp);
6397 		}
6398 	}
6399 
6400 #undef CARP_CHECK_WE_ARE_DST
6401 #undef CARP_CHECK_WE_ARE_SRC
6402 #undef GRAB_OUR_PACKETS
6403 
6404 	/*
6405 	 * Perform the bridge forwarding function.
6406 	 *
6407 	 * Note that bridge_forward calls BRIDGE_UNLOCK
6408 	 */
6409 	bridge_forward(sc, bif, m);
6410 
6411 	return EJUSTRETURN;
6412 }
6413 
6414 /*
6415  * bridge_broadcast:
6416  *
6417  *	Send a frame to all interfaces that are members of
6418  *	the bridge, except for the one on which the packet
6419  *	arrived.
6420  *
6421  *	NOTE: Releases the lock on return.
6422  */
6423 static void
6424 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6425     struct mbuf *m, int runfilt)
6426 {
6427 	ifnet_t bridge_ifp;
6428 	struct bridge_iflist *dbif;
6429 	struct ifnet * src_if;
6430 	struct mbuf *mc;
6431 	struct mbuf *mc_in;
6432 	struct ifnet *dst_if;
6433 	int error = 0, used = 0;
6434 	boolean_t bridge_if_out;
6435 	ChecksumOperation cksum_op;
6436 	struct mac_nat_record mnr;
6437 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6438 	boolean_t translate_mac = FALSE;
6439 	uint32_t sc_filter_flags = 0;
6440 
6441 	bridge_ifp = sc->sc_ifp;
6442 	if (sbif != NULL) {
6443 		bridge_if_out = FALSE;
6444 		src_if = sbif->bif_ifp;
6445 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6446 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6447 			/* get the translation record while holding the lock */
6448 			translate_mac
6449 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6450 			if (m == NULL) {
6451 				/* packet was deallocated */
6452 				BRIDGE_UNLOCK(sc);
6453 				return;
6454 			}
6455 		}
6456 	} else {
6457 		/*
6458 		 * sbif is NULL when the bridge interface calls
6459 		 * bridge_broadcast().
6460 		 */
6461 		bridge_if_out = TRUE;
6462 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6463 		sbif = NULL;
6464 		src_if = NULL;
6465 	}
6466 
6467 	BRIDGE_LOCK2REF(sc, error);
6468 	if (error) {
6469 		m_freem(m);
6470 		return;
6471 	}
6472 
6473 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6474 		dst_if = dbif->bif_ifp;
6475 		if (dst_if == src_if) {
6476 			/* skip the interface that the packet came in on */
6477 			continue;
6478 		}
6479 
6480 		/* Private segments can not talk to each other */
6481 		if (sbif != NULL &&
6482 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6483 			continue;
6484 		}
6485 
6486 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6487 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6488 			continue;
6489 		}
6490 
6491 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6492 		    (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6493 			continue;
6494 		}
6495 
6496 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6497 			continue;
6498 		}
6499 
6500 		if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6501 			continue;
6502 		}
6503 
6504 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6505 			mc = m;
6506 			used = 1;
6507 		} else {
6508 			mc = m_dup(m, M_DONTWAIT);
6509 			if (mc == NULL) {
6510 				(void) ifnet_stat_increment_out(bridge_ifp,
6511 				    0, 0, 1);
6512 				continue;
6513 			}
6514 		}
6515 
6516 		/*
6517 		 * If broadcast input is enabled, do so only if this
6518 		 * is an input packet.
6519 		 */
6520 		if (!bridge_if_out &&
6521 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6522 			mc_in = m_dup(mc, M_DONTWAIT);
6523 			/* this could fail, but we continue anyways */
6524 		} else {
6525 			mc_in = NULL;
6526 		}
6527 
6528 		/* out */
6529 		if (translate_mac && mac_nat_bif == dbif) {
6530 			/* translate the packet without holding the lock */
6531 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6532 		}
6533 
6534 		sc_filter_flags = sc->sc_filter_flags;
6535 		if (runfilt &&
6536 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6537 			if (used == 0) {
6538 				/* Keep the layer3 header aligned */
6539 				int i = min(mc->m_pkthdr.len, max_protohdr);
6540 				mc = m_copyup(mc, i, ETHER_ALIGN);
6541 				if (mc == NULL) {
6542 					(void) ifnet_stat_increment_out(
6543 						sc->sc_ifp, 0, 0, 1);
6544 					if (mc_in != NULL) {
6545 						m_freem(mc_in);
6546 						mc_in = NULL;
6547 					}
6548 					continue;
6549 				}
6550 			}
6551 			if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6552 				if (mc_in != NULL) {
6553 					m_freem(mc_in);
6554 					mc_in = NULL;
6555 				}
6556 				continue;
6557 			}
6558 			if (mc == NULL) {
6559 				if (mc_in != NULL) {
6560 					m_freem(mc_in);
6561 					mc_in = NULL;
6562 				}
6563 				continue;
6564 			}
6565 		}
6566 
6567 		if (mc != NULL) {
6568 			/* verify checksum if necessary */
6569 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6570 			    !bif_has_checksum_offload(sbif)) {
6571 				error = bridge_verify_checksum(&mc,
6572 				    &dbif->bif_stats);
6573 				if (error != 0) {
6574 					if (mc != NULL) {
6575 						m_freem(mc);
6576 					}
6577 					mc = NULL;
6578 				}
6579 			}
6580 			if (mc != NULL) {
6581 				(void) bridge_enqueue(bridge_ifp,
6582 				    NULL, dst_if, mc, cksum_op);
6583 			}
6584 		}
6585 
6586 		/* in */
6587 		if (mc_in == NULL) {
6588 			continue;
6589 		}
6590 		bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6591 		mbuf_pkthdr_setrcvif(mc_in, dst_if);
6592 		mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6593 		mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6594 		    mbuf_len(mc_in) - ETHER_HDR_LEN);
6595 		mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6596 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6597 		dlil_input_packet_list(dst_if, mc_in);
6598 	}
6599 	if (used == 0) {
6600 		m_freem(m);
6601 	}
6602 
6603 
6604 	BRIDGE_UNREF(sc);
6605 }
6606 
6607 /*
6608  * bridge_span:
6609  *
6610  *	Duplicate a packet out one or more interfaces that are in span mode,
6611  *	the original mbuf is unmodified.
6612  */
6613 static void
6614 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6615 {
6616 	struct bridge_iflist *bif;
6617 	struct ifnet *dst_if;
6618 	struct mbuf *mc;
6619 
6620 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6621 		return;
6622 	}
6623 
6624 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6625 		dst_if = bif->bif_ifp;
6626 
6627 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6628 			continue;
6629 		}
6630 
6631 		mc = m_copypacket(m, M_DONTWAIT);
6632 		if (mc == NULL) {
6633 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6634 			continue;
6635 		}
6636 
6637 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6638 		    CHECKSUM_OPERATION_NONE);
6639 	}
6640 }
6641 
6642 
6643 /*
6644  * bridge_rtupdate:
6645  *
6646  *	Add a bridge routing entry.
6647  */
6648 static int
6649 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6650     struct bridge_iflist *bif, int setflags, uint8_t flags)
6651 {
6652 	struct bridge_rtnode *brt;
6653 	int error;
6654 
6655 	BRIDGE_LOCK_ASSERT_HELD(sc);
6656 
6657 	/* Check the source address is valid and not multicast. */
6658 	if (ETHER_IS_MULTICAST(dst) ||
6659 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6660 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6661 		return EINVAL;
6662 	}
6663 
6664 
6665 	/* 802.1p frames map to vlan 1 */
6666 	if (vlan == 0) {
6667 		vlan = 1;
6668 	}
6669 
6670 	/*
6671 	 * A route for this destination might already exist.  If so,
6672 	 * update it, otherwise create a new one.
6673 	 */
6674 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6675 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6676 			sc->sc_brtexceeded++;
6677 			return ENOSPC;
6678 		}
6679 		/* Check per interface address limits (if enabled) */
6680 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6681 			bif->bif_addrexceeded++;
6682 			return ENOSPC;
6683 		}
6684 
6685 		/*
6686 		 * Allocate a new bridge forwarding node, and
6687 		 * initialize the expiration time and Ethernet
6688 		 * address.
6689 		 */
6690 		brt = zalloc_noblock(bridge_rtnode_pool);
6691 		if (brt == NULL) {
6692 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6693 			    "zalloc_nolock failed");
6694 			return ENOMEM;
6695 		}
6696 		bzero(brt, sizeof(struct bridge_rtnode));
6697 
6698 		if (bif->bif_ifflags & IFBIF_STICKY) {
6699 			brt->brt_flags = IFBAF_STICKY;
6700 		} else {
6701 			brt->brt_flags = IFBAF_DYNAMIC;
6702 		}
6703 
6704 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6705 		brt->brt_vlan = vlan;
6706 
6707 
6708 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6709 			zfree(bridge_rtnode_pool, brt);
6710 			return error;
6711 		}
6712 		brt->brt_dst = bif;
6713 		bif->bif_addrcnt++;
6714 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6715 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6716 		    "on %s count %u hashsize %u",
6717 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6718 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6719 		    sc->sc_rthash_size);
6720 	}
6721 
6722 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6723 	    brt->brt_dst != bif) {
6724 		brt->brt_dst->bif_addrcnt--;
6725 		brt->brt_dst = bif;
6726 		brt->brt_dst->bif_addrcnt++;
6727 	}
6728 
6729 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6730 		unsigned long now;
6731 
6732 		now = (unsigned long) net_uptime();
6733 		brt->brt_expire = now + sc->sc_brttimeout;
6734 	}
6735 	if (setflags) {
6736 		brt->brt_flags = flags;
6737 	}
6738 
6739 
6740 	return 0;
6741 }
6742 
6743 /*
6744  * bridge_rtlookup:
6745  *
6746  *	Lookup the destination interface for an address.
6747  */
6748 static struct ifnet *
6749 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6750 {
6751 	struct bridge_rtnode *brt;
6752 
6753 	BRIDGE_LOCK_ASSERT_HELD(sc);
6754 
6755 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6756 		return NULL;
6757 	}
6758 
6759 	return brt->brt_ifp;
6760 }
6761 
6762 /*
6763  * bridge_rttrim:
6764  *
6765  *	Trim the routine table so that we have a number
6766  *	of routing entries less than or equal to the
6767  *	maximum number.
6768  */
6769 static void
6770 bridge_rttrim(struct bridge_softc *sc)
6771 {
6772 	struct bridge_rtnode *brt, *nbrt;
6773 
6774 	BRIDGE_LOCK_ASSERT_HELD(sc);
6775 
6776 	/* Make sure we actually need to do this. */
6777 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6778 		return;
6779 	}
6780 
6781 	/* Force an aging cycle; this might trim enough addresses. */
6782 	bridge_rtage(sc);
6783 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6784 		return;
6785 	}
6786 
6787 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6788 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6789 			bridge_rtnode_destroy(sc, brt);
6790 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6791 				return;
6792 			}
6793 		}
6794 	}
6795 }
6796 
6797 /*
6798  * bridge_aging_timer:
6799  *
6800  *	Aging periodic timer for the bridge routing table.
6801  */
6802 static void
6803 bridge_aging_timer(struct bridge_softc *sc)
6804 {
6805 	BRIDGE_LOCK_ASSERT_HELD(sc);
6806 
6807 	bridge_rtage(sc);
6808 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6809 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6810 		sc->sc_aging_timer.bdc_sc = sc;
6811 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6812 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6813 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6814 	}
6815 }
6816 
6817 /*
6818  * bridge_rtage:
6819  *
6820  *	Perform an aging cycle.
6821  */
6822 static void
6823 bridge_rtage(struct bridge_softc *sc)
6824 {
6825 	struct bridge_rtnode *brt, *nbrt;
6826 	unsigned long now;
6827 
6828 	BRIDGE_LOCK_ASSERT_HELD(sc);
6829 
6830 	now = (unsigned long) net_uptime();
6831 
6832 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6833 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6834 			if (now >= brt->brt_expire) {
6835 				bridge_rtnode_destroy(sc, brt);
6836 			}
6837 		}
6838 	}
6839 	if (sc->sc_mac_nat_bif != NULL) {
6840 		bridge_mac_nat_age_entries(sc, now);
6841 	}
6842 }
6843 
6844 /*
6845  * bridge_rtflush:
6846  *
6847  *	Remove all dynamic addresses from the bridge.
6848  */
6849 static void
6850 bridge_rtflush(struct bridge_softc *sc, int full)
6851 {
6852 	struct bridge_rtnode *brt, *nbrt;
6853 
6854 	BRIDGE_LOCK_ASSERT_HELD(sc);
6855 
6856 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6857 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6858 			bridge_rtnode_destroy(sc, brt);
6859 		}
6860 	}
6861 }
6862 
6863 /*
6864  * bridge_rtdaddr:
6865  *
6866  *	Remove an address from the table.
6867  */
6868 static int
6869 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6870 {
6871 	struct bridge_rtnode *brt;
6872 	int found = 0;
6873 
6874 	BRIDGE_LOCK_ASSERT_HELD(sc);
6875 
6876 	/*
6877 	 * If vlan is zero then we want to delete for all vlans so the lookup
6878 	 * may return more than one.
6879 	 */
6880 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6881 		bridge_rtnode_destroy(sc, brt);
6882 		found = 1;
6883 	}
6884 
6885 	return found ? 0 : ENOENT;
6886 }
6887 
6888 /*
6889  * bridge_rtdelete:
6890  *
6891  *	Delete routes to a specific member interface.
6892  */
6893 static void
6894 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6895 {
6896 	struct bridge_rtnode *brt, *nbrt;
6897 
6898 	BRIDGE_LOCK_ASSERT_HELD(sc);
6899 
6900 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6901 		if (brt->brt_ifp == ifp && (full ||
6902 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6903 			bridge_rtnode_destroy(sc, brt);
6904 		}
6905 	}
6906 }
6907 
6908 /*
6909  * bridge_rtable_init:
6910  *
6911  *	Initialize the route table for this bridge.
6912  */
6913 static int
6914 bridge_rtable_init(struct bridge_softc *sc)
6915 {
6916 	u_int32_t i;
6917 
6918 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6919 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6920 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6921 
6922 	for (i = 0; i < sc->sc_rthash_size; i++) {
6923 		LIST_INIT(&sc->sc_rthash[i]);
6924 	}
6925 
6926 	sc->sc_rthash_key = RandomULong();
6927 
6928 	LIST_INIT(&sc->sc_rtlist);
6929 
6930 	return 0;
6931 }
6932 
6933 /*
6934  * bridge_rthash_delayed_resize:
6935  *
6936  *	Resize the routing table hash on a delayed thread call.
6937  */
6938 static void
6939 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6940 {
6941 	u_int32_t new_rthash_size = 0;
6942 	u_int32_t old_rthash_size = 0;
6943 	struct _bridge_rtnode_list *new_rthash = NULL;
6944 	struct _bridge_rtnode_list *old_rthash = NULL;
6945 	u_int32_t i;
6946 	struct bridge_rtnode *brt;
6947 	int error = 0;
6948 
6949 	BRIDGE_LOCK_ASSERT_HELD(sc);
6950 
6951 	/*
6952 	 * Four entries per hash bucket is our ideal load factor
6953 	 */
6954 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6955 		goto out;
6956 	}
6957 
6958 	/*
6959 	 * Doubling the number of hash buckets may be too simplistic
6960 	 * especially when facing a spike of new entries
6961 	 */
6962 	new_rthash_size = sc->sc_rthash_size * 2;
6963 
6964 	sc->sc_flags |= SCF_RESIZING;
6965 	BRIDGE_UNLOCK(sc);
6966 
6967 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6968 	    Z_WAITOK | Z_ZERO);
6969 
6970 	BRIDGE_LOCK(sc);
6971 	sc->sc_flags &= ~SCF_RESIZING;
6972 
6973 	if (new_rthash == NULL) {
6974 		error = ENOMEM;
6975 		goto out;
6976 	}
6977 	if ((sc->sc_flags & SCF_DETACHING)) {
6978 		error = ENODEV;
6979 		goto out;
6980 	}
6981 	/*
6982 	 * Fail safe from here on
6983 	 */
6984 	old_rthash = sc->sc_rthash;
6985 	old_rthash_size = sc->sc_rthash_size;
6986 	sc->sc_rthash = new_rthash;
6987 	sc->sc_rthash_size = new_rthash_size;
6988 
6989 	/*
6990 	 * Get a new key to force entries to be shuffled around to reduce
6991 	 * the likelihood they will land in the same buckets
6992 	 */
6993 	sc->sc_rthash_key = RandomULong();
6994 
6995 	for (i = 0; i < sc->sc_rthash_size; i++) {
6996 		LIST_INIT(&sc->sc_rthash[i]);
6997 	}
6998 
6999 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7000 		LIST_REMOVE(brt, brt_hash);
7001 		(void) bridge_rtnode_hash(sc, brt);
7002 	}
7003 out:
7004 	if (error == 0) {
7005 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7006 		    "%s new size %u",
7007 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
7008 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7009 	} else {
7010 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7011 		    "%s failed %d", sc->sc_ifp->if_xname, error);
7012 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7013 	}
7014 }
7015 
7016 /*
7017  * Resize the number of hash buckets based on the load factor
7018  * Currently only grow
7019  * Failing to resize the hash table is not fatal
7020  */
7021 static void
7022 bridge_rthash_resize(struct bridge_softc *sc)
7023 {
7024 	BRIDGE_LOCK_ASSERT_HELD(sc);
7025 
7026 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7027 		return;
7028 	}
7029 
7030 	/*
7031 	 * Four entries per hash bucket is our ideal load factor
7032 	 */
7033 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7034 		return;
7035 	}
7036 	/*
7037 	 * Hard limit on the size of the routing hash table
7038 	 */
7039 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7040 		return;
7041 	}
7042 
7043 	sc->sc_resize_call.bdc_sc = sc;
7044 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7045 	bridge_schedule_delayed_call(&sc->sc_resize_call);
7046 }
7047 
7048 /*
7049  * bridge_rtable_fini:
7050  *
7051  *	Deconstruct the route table for this bridge.
7052  */
7053 static void
7054 bridge_rtable_fini(struct bridge_softc *sc)
7055 {
7056 	KASSERT(sc->sc_brtcnt == 0,
7057 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7058 	kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
7059 	    sc->sc_rthash);
7060 	sc->sc_rthash = NULL;
7061 	sc->sc_rthash_size = 0;
7062 }
7063 
7064 /*
7065  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7066  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7067  */
7068 #define mix(a, b, c)                                                    \
7069 do {                                                                    \
7070 	a -= b; a -= c; a ^= (c >> 13);                                 \
7071 	b -= c; b -= a; b ^= (a << 8);                                  \
7072 	c -= a; c -= b; c ^= (b >> 13);                                 \
7073 	a -= b; a -= c; a ^= (c >> 12);                                 \
7074 	b -= c; b -= a; b ^= (a << 16);                                 \
7075 	c -= a; c -= b; c ^= (b >> 5);                                  \
7076 	a -= b; a -= c; a ^= (c >> 3);                                  \
7077 	b -= c; b -= a; b ^= (a << 10);                                 \
7078 	c -= a; c -= b; c ^= (b >> 15);                                 \
7079 } while ( /*CONSTCOND*/ 0)
7080 
7081 static __inline uint32_t
7082 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
7083 {
7084 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7085 
7086 	b += addr[5] << 8;
7087 	b += addr[4];
7088 	a += addr[3] << 24;
7089 	a += addr[2] << 16;
7090 	a += addr[1] << 8;
7091 	a += addr[0];
7092 
7093 	mix(a, b, c);
7094 
7095 	return c & BRIDGE_RTHASH_MASK(sc);
7096 }
7097 
7098 #undef mix
7099 
7100 static int
7101 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
7102 {
7103 	int i, d;
7104 
7105 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7106 		d = ((int)a[i]) - ((int)b[i]);
7107 	}
7108 
7109 	return d;
7110 }
7111 
7112 /*
7113  * bridge_rtnode_lookup:
7114  *
7115  *	Look up a bridge route node for the specified destination. Compare the
7116  *	vlan id or if zero then just return the first match.
7117  */
7118 static struct bridge_rtnode *
7119 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7120     uint16_t vlan)
7121 {
7122 	struct bridge_rtnode *brt;
7123 	uint32_t hash;
7124 	int dir;
7125 
7126 	BRIDGE_LOCK_ASSERT_HELD(sc);
7127 
7128 	hash = bridge_rthash(sc, addr);
7129 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7130 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7131 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7132 			return brt;
7133 		}
7134 		if (dir > 0) {
7135 			return NULL;
7136 		}
7137 	}
7138 
7139 	return NULL;
7140 }
7141 
7142 /*
7143  * bridge_rtnode_hash:
7144  *
7145  *	Insert the specified bridge node into the route hash table.
7146  *	This is used when adding a new node or to rehash when resizing
7147  *	the hash table
7148  */
7149 static int
7150 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7151 {
7152 	struct bridge_rtnode *lbrt;
7153 	uint32_t hash;
7154 	int dir;
7155 
7156 	BRIDGE_LOCK_ASSERT_HELD(sc);
7157 
7158 	hash = bridge_rthash(sc, brt->brt_addr);
7159 
7160 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7161 	if (lbrt == NULL) {
7162 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7163 		goto out;
7164 	}
7165 
7166 	do {
7167 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7168 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7169 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7170 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7171 			    sc->sc_ifp->if_xname,
7172 			    brt->brt_addr[0], brt->brt_addr[1],
7173 			    brt->brt_addr[2], brt->brt_addr[3],
7174 			    brt->brt_addr[4], brt->brt_addr[5]);
7175 			return EEXIST;
7176 		}
7177 		if (dir > 0) {
7178 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7179 			goto out;
7180 		}
7181 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7182 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7183 			goto out;
7184 		}
7185 		lbrt = LIST_NEXT(lbrt, brt_hash);
7186 	} while (lbrt != NULL);
7187 
7188 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7189 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7190 	    sc->sc_ifp->if_xname,
7191 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7192 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7193 out:
7194 	return 0;
7195 }
7196 
7197 /*
7198  * bridge_rtnode_insert:
7199  *
7200  *	Insert the specified bridge node into the route table.  We
7201  *	assume the entry is not already in the table.
7202  */
7203 static int
7204 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7205 {
7206 	int error;
7207 
7208 	error = bridge_rtnode_hash(sc, brt);
7209 	if (error != 0) {
7210 		return error;
7211 	}
7212 
7213 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7214 	sc->sc_brtcnt++;
7215 
7216 	bridge_rthash_resize(sc);
7217 
7218 	return 0;
7219 }
7220 
7221 /*
7222  * bridge_rtnode_destroy:
7223  *
7224  *	Destroy a bridge rtnode.
7225  */
7226 static void
7227 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7228 {
7229 	BRIDGE_LOCK_ASSERT_HELD(sc);
7230 
7231 	LIST_REMOVE(brt, brt_hash);
7232 
7233 	LIST_REMOVE(brt, brt_list);
7234 	sc->sc_brtcnt--;
7235 	brt->brt_dst->bif_addrcnt--;
7236 	zfree(bridge_rtnode_pool, brt);
7237 }
7238 
7239 #if BRIDGESTP
7240 /*
7241  * bridge_rtable_expire:
7242  *
7243  *	Set the expiry time for all routes on an interface.
7244  */
7245 static void
7246 bridge_rtable_expire(struct ifnet *ifp, int age)
7247 {
7248 	struct bridge_softc *sc = ifp->if_bridge;
7249 	struct bridge_rtnode *brt;
7250 
7251 	BRIDGE_LOCK(sc);
7252 
7253 	/*
7254 	 * If the age is zero then flush, otherwise set all the expiry times to
7255 	 * age for the interface
7256 	 */
7257 	if (age == 0) {
7258 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7259 	} else {
7260 		unsigned long now;
7261 
7262 		now = (unsigned long) net_uptime();
7263 
7264 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7265 			/* Cap the expiry time to 'age' */
7266 			if (brt->brt_ifp == ifp &&
7267 			    brt->brt_expire > now + age &&
7268 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7269 				brt->brt_expire = now + age;
7270 			}
7271 		}
7272 	}
7273 	BRIDGE_UNLOCK(sc);
7274 }
7275 
7276 /*
7277  * bridge_state_change:
7278  *
7279  *	Callback from the bridgestp code when a port changes states.
7280  */
7281 static void
7282 bridge_state_change(struct ifnet *ifp, int state)
7283 {
7284 	struct bridge_softc *sc = ifp->if_bridge;
7285 	static const char *stpstates[] = {
7286 		"disabled",
7287 		"listening",
7288 		"learning",
7289 		"forwarding",
7290 		"blocking",
7291 		"discarding"
7292 	};
7293 
7294 	if (log_stp) {
7295 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7296 		    sc->sc_ifp->if_xname,
7297 		    stpstates[state], ifp->if_xname);
7298 	}
7299 }
7300 #endif /* BRIDGESTP */
7301 
7302 /*
7303  * bridge_set_bpf_tap:
7304  *
7305  *	Sets ups the BPF callbacks.
7306  */
7307 static errno_t
7308 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7309 {
7310 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7311 
7312 	/* TBD locking */
7313 	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7314 		return ENODEV;
7315 	}
7316 	switch (mode) {
7317 	case BPF_TAP_DISABLE:
7318 		sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7319 		break;
7320 
7321 	case BPF_TAP_INPUT:
7322 		sc->sc_bpf_input = bpf_callback;
7323 		break;
7324 
7325 	case BPF_TAP_OUTPUT:
7326 		sc->sc_bpf_output = bpf_callback;
7327 		break;
7328 
7329 	case BPF_TAP_INPUT_OUTPUT:
7330 		sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7331 		break;
7332 
7333 	default:
7334 		break;
7335 	}
7336 
7337 	return 0;
7338 }
7339 
7340 /*
7341  * bridge_detach:
7342  *
7343  *	Callback when interface has been detached.
7344  */
7345 static void
7346 bridge_detach(ifnet_t ifp)
7347 {
7348 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7349 
7350 #if BRIDGESTP
7351 	bstp_detach(&sc->sc_stp);
7352 #endif /* BRIDGESTP */
7353 
7354 	/* Tear down the routing table. */
7355 	bridge_rtable_fini(sc);
7356 
7357 	lck_mtx_lock(&bridge_list_mtx);
7358 	LIST_REMOVE(sc, sc_list);
7359 	lck_mtx_unlock(&bridge_list_mtx);
7360 
7361 	ifnet_release(ifp);
7362 
7363 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7364 	kfree_type(struct bridge_softc, sc);
7365 }
7366 
7367 /*
7368  * bridge_bpf_input:
7369  *
7370  *	Invoke the input BPF callback if enabled
7371  */
7372 static errno_t
7373 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7374 {
7375 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7376 	bpf_packet_func     input_func = sc->sc_bpf_input;
7377 
7378 	if (input_func != NULL) {
7379 		if (mbuf_pkthdr_rcvif(m) != ifp) {
7380 			BRIDGE_LOG(LOG_NOTICE, 0,
7381 			    "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7382 			    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7383 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7384 		}
7385 		(*input_func)(ifp, m);
7386 	}
7387 	return 0;
7388 }
7389 
7390 /*
7391  * bridge_bpf_output:
7392  *
7393  *	Invoke the output BPF callback if enabled
7394  */
7395 static errno_t
7396 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7397 {
7398 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7399 	bpf_packet_func     output_func = sc->sc_bpf_output;
7400 
7401 	if (output_func != NULL) {
7402 		(*output_func)(ifp, m);
7403 	}
7404 	return 0;
7405 }
7406 
7407 /*
7408  * bridge_link_event:
7409  *
7410  *	Report a data link event on an interface
7411  */
7412 static void
7413 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7414 {
7415 	struct event {
7416 		u_int32_t ifnet_family;
7417 		u_int32_t unit;
7418 		char if_name[IFNAMSIZ];
7419 	};
7420 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7421 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7422 	struct event *data = (struct event *)(header + 1);
7423 
7424 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7425 	    "%s event_code %u - %s", ifp->if_xname,
7426 	    event_code, dlil_kev_dl_code_str(event_code));
7427 	header->total_size   = sizeof(message);
7428 	header->vendor_code  = KEV_VENDOR_APPLE;
7429 	header->kev_class    = KEV_NETWORK_CLASS;
7430 	header->kev_subclass = KEV_DL_SUBCLASS;
7431 	header->event_code   = event_code;
7432 	data->ifnet_family   = ifnet_family(ifp);
7433 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7434 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7435 	ifnet_event(ifp, header);
7436 }
7437 
7438 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7439 	        bridge_hostfilter_stats.reason++;                       \
7440 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7441 	                   "%s.%d" #reason, func, line);                \
7442 	        error = EINVAL;                                         \
7443 	}
7444 
7445 /*
7446  * Make sure this is a DHCP or Bootp request that match the host filter
7447  */
7448 static int
7449 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7450 {
7451 	int error = EINVAL;
7452 	struct dhcp dhcp;
7453 
7454 	/*
7455 	 * Note: We use the dhcp structure because bootp structure definition
7456 	 * is larger and some vendors do not pad the request
7457 	 */
7458 	error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7459 	if (error != 0) {
7460 		BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7461 		goto done;
7462 	}
7463 	if (dhcp.dp_op != BOOTREQUEST) {
7464 		BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7465 		goto done;
7466 	}
7467 	/*
7468 	 * The hardware address must be an exact match
7469 	 */
7470 	if (dhcp.dp_htype != ARPHRD_ETHER) {
7471 		BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7472 		goto done;
7473 	}
7474 	if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7475 		BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7476 		goto done;
7477 	}
7478 	if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7479 	    ETHER_ADDR_LEN) != 0) {
7480 		BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7481 		goto done;
7482 	}
7483 	/*
7484 	 * Client address must match the host address or be not specified
7485 	 */
7486 	if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7487 	    dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7488 		BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7489 		goto done;
7490 	}
7491 	error = 0;
7492 done:
7493 	return error;
7494 }
7495 
7496 static int
7497 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7498 {
7499 	int error = EINVAL;
7500 	struct ether_header *eh;
7501 	static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7502 	mbuf_t m = *data;
7503 
7504 	eh = mtod(m, struct ether_header *);
7505 
7506 	/*
7507 	 * Restrict the source hardware address
7508 	 */
7509 	if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
7510 	    bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7511 	    ETHER_ADDR_LEN) != 0) {
7512 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7513 		goto done;
7514 	}
7515 
7516 	/*
7517 	 * Restrict Ethernet protocols to ARP and IP/IPv6
7518 	 */
7519 	if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7520 		struct ether_arp *ea;
7521 		size_t minlen = sizeof(struct ether_header) +
7522 		    sizeof(struct ether_arp);
7523 
7524 		/*
7525 		 * Make the Ethernet and ARP headers contiguous
7526 		 */
7527 		if (mbuf_pkthdr_len(m) < minlen) {
7528 			BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7529 			goto done;
7530 		}
7531 		if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7532 			BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7533 			    __func__, __LINE__);
7534 			goto done;
7535 		}
7536 		m = *data;
7537 
7538 		/*
7539 		 * Verify this is an ethernet/ip arp
7540 		 */
7541 		eh = mtod(m, struct ether_header *);
7542 		ea = (struct ether_arp *)(eh + 1);
7543 		if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7544 			BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7545 			    __func__, __LINE__);
7546 			goto done;
7547 		}
7548 		if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7549 			BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7550 			    __func__, __LINE__);
7551 			goto done;
7552 		}
7553 		/*
7554 		 * Verify the address lengths are correct
7555 		 */
7556 		if (ea->arp_hln != ETHER_ADDR_LEN) {
7557 			BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7558 			goto done;
7559 		}
7560 		if (ea->arp_pln != sizeof(struct in_addr)) {
7561 			BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7562 			    __func__, __LINE__);
7563 			goto done;
7564 		}
7565 		/*
7566 		 * Allow only ARP request or ARP reply
7567 		 */
7568 		if (ea->arp_op != htons(ARPOP_REQUEST) &&
7569 		    ea->arp_op != htons(ARPOP_REPLY)) {
7570 			BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7571 			goto done;
7572 		}
7573 		if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7574 			/*
7575 			 * Verify source hardware address matches
7576 			 */
7577 			if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7578 			    ETHER_ADDR_LEN) != 0) {
7579 				BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7580 				goto done;
7581 			}
7582 		}
7583 		if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7584 			/*
7585 			 * Verify source protocol address:
7586 			 * May be null for an ARP probe
7587 			 */
7588 			if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7589 			    sizeof(struct in_addr)) != 0 &&
7590 			    bcmp(ea->arp_spa, &inaddr_any,
7591 			    sizeof(struct in_addr)) != 0) {
7592 				BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7593 				goto done;
7594 			}
7595 		}
7596 		bridge_hostfilter_stats.brhf_arp_ok += 1;
7597 		error = 0;
7598 	} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7599 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7600 		struct ip iphdr;
7601 		size_t offset;
7602 
7603 		/*
7604 		 * Make the Ethernet and IP headers contiguous
7605 		 */
7606 		if (mbuf_pkthdr_len(m) < minlen) {
7607 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7608 			goto done;
7609 		}
7610 		offset = sizeof(struct ether_header);
7611 		error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7612 		if (error != 0) {
7613 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7614 			goto done;
7615 		}
7616 		if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7617 			/*
7618 			 * Verify the source IP address
7619 			 */
7620 			if (iphdr.ip_p == IPPROTO_UDP) {
7621 				struct udphdr udp;
7622 
7623 				minlen += sizeof(struct udphdr);
7624 				if (mbuf_pkthdr_len(m) < minlen) {
7625 					BRIDGE_HF_DROP(brhf_ip_too_small,
7626 					    __func__, __LINE__);
7627 					goto done;
7628 				}
7629 
7630 				/*
7631 				 * Allow all zero addresses for DHCP requests
7632 				 */
7633 				if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7634 				    iphdr.ip_src.s_addr != INADDR_ANY) {
7635 					BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7636 					    __func__, __LINE__);
7637 					goto done;
7638 				}
7639 				offset = sizeof(struct ether_header) +
7640 				    (IP_VHL_HL(iphdr.ip_vhl) << 2);
7641 				error = mbuf_copydata(m, offset,
7642 				    sizeof(struct udphdr), &udp);
7643 				if (error != 0) {
7644 					BRIDGE_HF_DROP(brhf_ip_too_small,
7645 					    __func__, __LINE__);
7646 					goto done;
7647 				}
7648 				/*
7649 				 * Either it's a Bootp/DHCP packet that we like or
7650 				 * it's a UDP packet from the host IP as source address
7651 				 */
7652 				if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7653 				    udp.uh_dport == htons(IPPORT_BOOTPS)) {
7654 					minlen += sizeof(struct dhcp);
7655 					if (mbuf_pkthdr_len(m) < minlen) {
7656 						BRIDGE_HF_DROP(brhf_ip_too_small,
7657 						    __func__, __LINE__);
7658 						goto done;
7659 					}
7660 					offset += sizeof(struct udphdr);
7661 					error = bridge_dhcp_filter(bif, m, offset);
7662 					if (error != 0) {
7663 						goto done;
7664 					}
7665 				} else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7666 					BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7667 					    __func__, __LINE__);
7668 					goto done;
7669 				}
7670 			} else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr) {
7671 				assert(bif->bif_hf_ipsrc.s_addr != INADDR_ANY);
7672 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7673 				goto done;
7674 			}
7675 		}
7676 		/*
7677 		 * Allow only boring IP protocols
7678 		 */
7679 		if (iphdr.ip_p != IPPROTO_TCP &&
7680 		    iphdr.ip_p != IPPROTO_UDP &&
7681 		    iphdr.ip_p != IPPROTO_ICMP &&
7682 		    iphdr.ip_p != IPPROTO_IGMP) {
7683 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7684 			goto done;
7685 		}
7686 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7687 		error = 0;
7688 	} else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
7689 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip6_hdr);
7690 		struct ip6_hdr ip6hdr;
7691 		size_t offset;
7692 
7693 		/*
7694 		 * Make the Ethernet and IP headers contiguous
7695 		 */
7696 		if (mbuf_pkthdr_len(m) < minlen) {
7697 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7698 			goto done;
7699 		}
7700 		offset = sizeof(struct ether_header);
7701 		error = mbuf_copydata(m, offset, sizeof(struct ip6_hdr), &ip6hdr);
7702 		if (error != 0) {
7703 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7704 			goto done;
7705 		}
7706 		/*
7707 		 * Allow only boring IPv6 protocols
7708 		 */
7709 		if (ip6hdr.ip6_nxt != IPPROTO_TCP &&
7710 		    ip6hdr.ip6_nxt != IPPROTO_UDP &&
7711 		    ip6hdr.ip6_nxt != IPPROTO_ICMPV6) {
7712 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7713 			goto done;
7714 		}
7715 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7716 		error = 0;
7717 	} else {
7718 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7719 		goto done;
7720 	}
7721 done:
7722 	if (error != 0) {
7723 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7724 			if (m) {
7725 				brlog_mbuf_data(m, 0,
7726 				    sizeof(struct ether_header) +
7727 				    sizeof(struct ip));
7728 			}
7729 		}
7730 
7731 		if (m != NULL) {
7732 			m_freem(m);
7733 		}
7734 	}
7735 	return error;
7736 }
7737 
7738 /*
7739  * MAC NAT
7740  */
7741 
7742 static errno_t
7743 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7744 {
7745 	errno_t         error = 0;
7746 
7747 	BRIDGE_LOCK_ASSERT_HELD(sc);
7748 
7749 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7750 		error = EINVAL;
7751 		goto done;
7752 	}
7753 	if (sc->sc_mac_nat_bif != NULL) {
7754 		if (sc->sc_mac_nat_bif != bif) {
7755 			error = EBUSY;
7756 		}
7757 		goto done;
7758 	}
7759 	sc->sc_mac_nat_bif = bif;
7760 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7761 	bridge_mac_nat_populate_entries(sc);
7762 
7763 done:
7764 	return error;
7765 }
7766 
7767 static void
7768 bridge_mac_nat_disable(struct bridge_softc *sc)
7769 {
7770 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7771 
7772 	assert(mac_nat_bif != NULL);
7773 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7774 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7775 	sc->sc_mac_nat_bif = NULL;
7776 	return;
7777 }
7778 
7779 static void
7780 mac_nat_entry_print2(struct mac_nat_entry *mne,
7781     char *ifname, const char *msg1, const char *msg2)
7782 {
7783 	int             af;
7784 	char            etopbuf[24];
7785 	char            ntopbuf[MAX_IPv6_STR_LEN];
7786 	const char      *space;
7787 
7788 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7789 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7790 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7791 	if (msg2 == NULL) {
7792 		msg2 = "";
7793 		space = "";
7794 	} else {
7795 		space = " ";
7796 	}
7797 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7798 	    "%s %s%s%s %p (%s, %s, %s)",
7799 	    ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7800 	    ntopbuf, etopbuf);
7801 }
7802 
7803 static void
7804 mac_nat_entry_print(struct mac_nat_entry *mne,
7805     char *ifname, const char *msg)
7806 {
7807 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7808 }
7809 
7810 static struct mac_nat_entry *
7811 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7812 {
7813 	struct mac_nat_entry    *mne;
7814 	struct mac_nat_entry    *ret_mne = NULL;
7815 
7816 	if (af == AF_INET) {
7817 		in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7818 
7819 		LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7820 			if (mne->mne_ip.s_addr == s_addr) {
7821 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7822 					mac_nat_entry_print(mne, sc->sc_if_xname,
7823 					    "found");
7824 				}
7825 				ret_mne = mne;
7826 				break;
7827 			}
7828 		}
7829 	} else {
7830 		const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7831 
7832 		LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7833 			if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7834 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7835 					mac_nat_entry_print(mne, sc->sc_if_xname,
7836 					    "found");
7837 				}
7838 				ret_mne = mne;
7839 				break;
7840 			}
7841 		}
7842 	}
7843 	return ret_mne;
7844 }
7845 
7846 static void
7847 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7848     struct mac_nat_entry *mne, const char *reason)
7849 {
7850 	LIST_REMOVE(mne, mne_list);
7851 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7852 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7853 	}
7854 	zfree(bridge_mne_pool, mne);
7855 	sc->sc_mne_count--;
7856 }
7857 
7858 static struct mac_nat_entry *
7859 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7860     struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7861 {
7862 	struct mac_nat_entry_list *list;
7863 	struct mac_nat_entry *mne;
7864 
7865 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7866 		sc->sc_mne_allocation_failures++;
7867 		return NULL;
7868 	}
7869 	mne = zalloc_noblock(bridge_mne_pool);
7870 	if (mne == NULL) {
7871 		sc->sc_mne_allocation_failures++;
7872 		return NULL;
7873 	}
7874 	sc->sc_mne_count++;
7875 	bzero(mne, sizeof(*mne));
7876 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7877 	mne->mne_bif = bif;
7878 	if (af == AF_INET) {
7879 		bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7880 		list = &sc->sc_mne_list;
7881 	} else {
7882 		bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7883 		mne->mne_flags |= MNE_FLAGS_IPV6;
7884 		list = &sc->sc_mne_list_v6;
7885 	}
7886 	LIST_INSERT_HEAD(list, mne, mne_list);
7887 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7888 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7889 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7890 	}
7891 	return mne;
7892 }
7893 
7894 static struct mac_nat_entry *
7895 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7896     struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7897 {
7898 	struct mac_nat_entry *mne;
7899 
7900 	mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7901 	if (mne != NULL) {
7902 		struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7903 
7904 		if (mne->mne_bif == mac_nat_bif) {
7905 			/* the MAC NAT interface takes precedence */
7906 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7907 				if (mne->mne_bif != bif) {
7908 					mac_nat_entry_print2(mne,
7909 					    sc->sc_if_xname, "reject",
7910 					    bif->bif_ifp->if_xname);
7911 				}
7912 			}
7913 		} else if (mne->mne_bif != bif) {
7914 			const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7915 
7916 			mne->mne_bif = bif;
7917 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7918 				mac_nat_entry_print2(mne,
7919 				    sc->sc_if_xname, "replaced",
7920 				    old_if);
7921 			}
7922 			bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7923 		}
7924 		mne->mne_expire = (unsigned long)net_uptime() +
7925 		    sc->sc_brttimeout;
7926 	} else {
7927 		mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7928 	}
7929 	return mne;
7930 }
7931 
7932 static void
7933 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7934     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7935 {
7936 	struct mac_nat_entry *mne;
7937 	struct mac_nat_entry *tmne;
7938 
7939 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7940 		if (bif != NULL && mne->mne_bif != bif) {
7941 			continue;
7942 		}
7943 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7944 	}
7945 }
7946 
7947 /*
7948  * bridge_mac_nat_flush_entries:
7949  *
7950  * Flush MAC NAT entries for the specified member. Flush all entries if
7951  * the member is the one that requires MAC NAT, otherwise just flush the
7952  * ones for the specified member.
7953  */
7954 static void
7955 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7956 {
7957 	struct bridge_iflist *flush_bif;
7958 
7959 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7960 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7961 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7962 }
7963 
7964 static void
7965 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7966 {
7967 	errno_t                 error;
7968 	ifnet_t                 ifp;
7969 	ifaddr_t                *list;
7970 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7971 
7972 	assert(mac_nat_bif != NULL);
7973 	ifp = mac_nat_bif->bif_ifp;
7974 	error = ifnet_get_address_list(ifp, &list);
7975 	if (error != 0) {
7976 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7977 		    "ifnet_get_address_list(%s) failed %d",
7978 		    ifp->if_xname, error);
7979 		return;
7980 	}
7981 	for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7982 		sa_family_t     af;
7983 		void            *ip;
7984 
7985 		union {
7986 			struct sockaddr         sa;
7987 			struct sockaddr_in      sin;
7988 			struct sockaddr_in6     sin6;
7989 		} u;
7990 		af = ifaddr_address_family(*scan);
7991 		switch (af) {
7992 		case AF_INET:
7993 		case AF_INET6:
7994 			error = ifaddr_address(*scan, &u.sa, sizeof(u));
7995 			if (error != 0) {
7996 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7997 				    "ifaddr_address failed %d",
7998 				    error);
7999 				break;
8000 			}
8001 			if (af == AF_INET) {
8002 				ip = (void *)&u.sin.sin_addr;
8003 			} else {
8004 				if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
8005 					/* remove scope ID */
8006 					u.sin6.sin6_addr.s6_addr16[1] = 0;
8007 				}
8008 				ip = (void *)&u.sin6.sin6_addr;
8009 			}
8010 			bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
8011 			    (uint8_t *)IF_LLADDR(ifp));
8012 			break;
8013 		default:
8014 			break;
8015 		}
8016 	}
8017 	ifnet_free_address_list(list);
8018 	return;
8019 }
8020 
8021 static void
8022 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
8023     struct mac_nat_entry_list *list, unsigned long now)
8024 {
8025 	struct mac_nat_entry *mne;
8026 	struct mac_nat_entry *tmne;
8027 
8028 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
8029 		if (now >= mne->mne_expire) {
8030 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
8031 		}
8032 	}
8033 }
8034 
8035 static void
8036 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
8037 {
8038 	if (sc->sc_mac_nat_bif == NULL) {
8039 		return;
8040 	}
8041 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
8042 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
8043 }
8044 
8045 static const char *
8046 get_in_out_string(boolean_t is_output)
8047 {
8048 	return is_output ? "OUT" : "IN";
8049 }
8050 
8051 /*
8052  * is_valid_arp_packet:
8053  *	Verify that this is a valid ARP packet.
8054  *
8055  *	Returns TRUE if the packet is valid, FALSE otherwise.
8056  */
8057 static boolean_t
8058 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
8059     struct ether_header **eh_p, struct ether_arp **ea_p)
8060 {
8061 	struct ether_arp *ea;
8062 	struct ether_header *eh;
8063 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
8064 	boolean_t is_valid = FALSE;
8065 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8066 
8067 	if (mbuf_pkthdr_len(*data) < minlen) {
8068 		BRIDGE_LOG(LOG_DEBUG, flags,
8069 		    "ARP %s short frame %lu < %lu",
8070 		    get_in_out_string(is_output),
8071 		    mbuf_pkthdr_len(*data), minlen);
8072 		goto done;
8073 	}
8074 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8075 		BRIDGE_LOG(LOG_DEBUG, flags,
8076 		    "ARP %s size %lu mbuf_pullup fail",
8077 		    get_in_out_string(is_output),
8078 		    minlen);
8079 		*data = NULL;
8080 		goto done;
8081 	}
8082 
8083 	/* validate ARP packet */
8084 	eh = mtod(*data, struct ether_header *);
8085 	ea = (struct ether_arp *)(eh + 1);
8086 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
8087 		BRIDGE_LOG(LOG_DEBUG, flags,
8088 		    "ARP %s htype not ethernet",
8089 		    get_in_out_string(is_output));
8090 		goto done;
8091 	}
8092 	if (ea->arp_hln != ETHER_ADDR_LEN) {
8093 		BRIDGE_LOG(LOG_DEBUG, flags,
8094 		    "ARP %s hlen not ethernet",
8095 		    get_in_out_string(is_output));
8096 		goto done;
8097 	}
8098 	if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
8099 		BRIDGE_LOG(LOG_DEBUG, flags,
8100 		    "ARP %s ptype not IP",
8101 		    get_in_out_string(is_output));
8102 		goto done;
8103 	}
8104 	if (ea->arp_pln != sizeof(struct in_addr)) {
8105 		BRIDGE_LOG(LOG_DEBUG, flags,
8106 		    "ARP %s plen not IP",
8107 		    get_in_out_string(is_output));
8108 		goto done;
8109 	}
8110 	is_valid = TRUE;
8111 	*ea_p = ea;
8112 	*eh_p = eh;
8113 done:
8114 	return is_valid;
8115 }
8116 
8117 static struct mac_nat_entry *
8118 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
8119 {
8120 	struct ether_arp        *ea;
8121 	struct ether_header     *eh;
8122 	struct mac_nat_entry    *mne = NULL;
8123 	u_short                 op;
8124 	struct in_addr          tpa;
8125 
8126 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
8127 		goto done;
8128 	}
8129 	op = ntohs(ea->arp_op);
8130 	switch (op) {
8131 	case ARPOP_REQUEST:
8132 	case ARPOP_REPLY:
8133 		/* only care about REQUEST and REPLY */
8134 		break;
8135 	default:
8136 		goto done;
8137 	}
8138 
8139 	/* check the target IP address for a NAT entry */
8140 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
8141 	if (tpa.s_addr != 0) {
8142 		mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
8143 	}
8144 	if (mne != NULL) {
8145 		if (op == ARPOP_REPLY) {
8146 			/* translate the MAC address */
8147 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8148 				char    mac_src[24];
8149 				char    mac_dst[24];
8150 
8151 				ether_ntop(mac_src, sizeof(mac_src),
8152 				    ea->arp_tha);
8153 				ether_ntop(mac_dst, sizeof(mac_dst),
8154 				    mne->mne_mac);
8155 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8156 				    "%s %s ARP %s -> %s",
8157 				    sc->sc_if_xname,
8158 				    mne->mne_bif->bif_ifp->if_xname,
8159 				    mac_src, mac_dst);
8160 			}
8161 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8162 		}
8163 	} else {
8164 		/* handle conflicting ARP (sender matches mne) */
8165 		struct in_addr spa;
8166 
8167 		bcopy(ea->arp_spa, &spa, sizeof(spa));
8168 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8169 			/* check the source IP for a NAT entry */
8170 			mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8171 		}
8172 	}
8173 
8174 done:
8175 	return mne;
8176 }
8177 
8178 static boolean_t
8179 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8180     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8181 {
8182 	struct ether_arp        *ea;
8183 	struct ether_header     *eh;
8184 	struct in_addr          ip;
8185 	struct mac_nat_entry    *mne = NULL;
8186 	u_short                 op;
8187 	boolean_t               translate = FALSE;
8188 
8189 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8190 		goto done;
8191 	}
8192 	op = ntohs(ea->arp_op);
8193 	switch (op) {
8194 	case ARPOP_REQUEST:
8195 	case ARPOP_REPLY:
8196 		/* only care about REQUEST and REPLY */
8197 		break;
8198 	default:
8199 		goto done;
8200 	}
8201 
8202 	bcopy(ea->arp_spa, &ip, sizeof(ip));
8203 	if (ip.s_addr == 0) {
8204 		goto done;
8205 	}
8206 	/* XXX validate IP address: no multicast/broadcast */
8207 	mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8208 	if (mnr != NULL && mne != NULL) {
8209 		/* record the offset to do the replacement */
8210 		translate = TRUE;
8211 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8212 	}
8213 
8214 done:
8215 	return translate;
8216 }
8217 
8218 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
8219 	                         + sizeof(struct ip))
8220 static struct ether_header *
8221 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8222 {
8223 	struct ether_header     *eh = NULL;
8224 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8225 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
8226 
8227 	if (mbuf_pkthdr_len(*data) < minlen) {
8228 		BRIDGE_LOG(LOG_DEBUG, flags,
8229 		    "IP %s short frame %lu < %lu",
8230 		    get_in_out_string(is_output),
8231 		    mbuf_pkthdr_len(*data), minlen);
8232 		goto done;
8233 	}
8234 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8235 		BRIDGE_LOG(LOG_DEBUG, flags,
8236 		    "IP %s size %lu mbuf_pullup fail",
8237 		    get_in_out_string(is_output),
8238 		    minlen);
8239 		*data = NULL;
8240 		goto done;
8241 	}
8242 	eh = mtod(*data, struct ether_header *);
8243 done:
8244 	return eh;
8245 }
8246 
8247 static bool
8248 is_broadcast_ip_packet(mbuf_t *data)
8249 {
8250 	struct ether_header     *eh;
8251 	uint16_t                ether_type;
8252 	bool                    is_broadcast = FALSE;
8253 
8254 	eh = mtod(*data, struct ether_header *);
8255 	ether_type = ntohs(eh->ether_type);
8256 	switch (ether_type) {
8257 	case ETHERTYPE_IP:
8258 		eh = get_ether_ip_header(data, FALSE);
8259 		if (eh != NULL) {
8260 			struct in_addr  dst;
8261 			struct ip       *iphdr;
8262 
8263 			iphdr = (struct ip *)(void *)(eh + 1);
8264 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8265 			is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8266 		}
8267 		break;
8268 	default:
8269 		break;
8270 	}
8271 	return is_broadcast;
8272 }
8273 
8274 static struct mac_nat_entry *
8275 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8276 {
8277 	struct in_addr          dst;
8278 	struct ether_header     *eh;
8279 	struct ip               *iphdr;
8280 	struct mac_nat_entry    *mne = NULL;
8281 
8282 	eh = get_ether_ip_header(data, FALSE);
8283 	if (eh == NULL) {
8284 		goto done;
8285 	}
8286 	iphdr = (struct ip *)(void *)(eh + 1);
8287 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8288 	/* XXX validate IP address */
8289 	if (dst.s_addr == 0) {
8290 		goto done;
8291 	}
8292 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8293 done:
8294 	return mne;
8295 }
8296 
8297 static void
8298 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8299     struct bridge_iflist *bif, mbuf_t m,
8300     uint8_t ip_header_len, struct mac_nat_record *mnr)
8301 {
8302 	uint16_t        dp_flags;
8303 	errno_t         error;
8304 	size_t          offset;
8305 	struct udphdr   udphdr;
8306 
8307 	/* copy the UDP header */
8308 	offset = sizeof(struct ether_header) + ip_header_len;
8309 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8310 	if (error != 0) {
8311 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8312 		    "mbuf_copydata udphdr failed %d",
8313 		    error);
8314 		return;
8315 	}
8316 	if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8317 	    ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8318 		/* not a BOOTP/DHCP packet */
8319 		return;
8320 	}
8321 	/* check whether the broadcast bit is already set */
8322 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8323 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8324 	if (error != 0) {
8325 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8326 		    "mbuf_copydata dp_flags failed %d",
8327 		    error);
8328 		return;
8329 	}
8330 	if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8331 		/* it's already set, nothing to do */
8332 		return;
8333 	}
8334 	/* broadcast bit needs to be set */
8335 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8336 	mnr->mnr_ip_header_len = ip_header_len;
8337 	if (udphdr.uh_sum != 0) {
8338 		uint16_t        delta;
8339 
8340 		/* adjust checksum to take modified dp_flags into account */
8341 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8342 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8343 	}
8344 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8345 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8346 	    sc->sc_if_xname,
8347 	    bif->bif_ifp->if_xname,
8348 	    ntohs(mnr->mnr_ip_dhcp_flags),
8349 	    ntohs(mnr->mnr_ip_udp_csum));
8350 	return;
8351 }
8352 
8353 static boolean_t
8354 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8355     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8356 {
8357 #pragma unused(mnr)
8358 	struct ether_header     *eh;
8359 	struct in_addr          ip;
8360 	struct ip               *iphdr;
8361 	uint8_t                 ip_header_len;
8362 	struct mac_nat_entry    *mne = NULL;
8363 	boolean_t               translate = FALSE;
8364 
8365 	eh = get_ether_ip_header(data, TRUE);
8366 	if (eh == NULL) {
8367 		goto done;
8368 	}
8369 	iphdr = (struct ip *)(void *)(eh + 1);
8370 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8371 	if (ip_header_len < sizeof(ip)) {
8372 		/* bogus IP header */
8373 		goto done;
8374 	}
8375 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8376 	/* XXX validate the source address */
8377 	if (ip.s_addr != 0) {
8378 		mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8379 		    eh->ether_shost);
8380 	}
8381 	if (mnr != NULL) {
8382 		if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8383 			/* handle DHCP must broadcast */
8384 			bridge_mac_nat_udp_output(sc, bif, *data,
8385 			    ip_header_len, mnr);
8386 		}
8387 		translate = TRUE;
8388 	}
8389 done:
8390 	return translate;
8391 }
8392 
8393 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8394 	                         + sizeof(struct ip6_hdr))
8395 static struct ether_header *
8396 get_ether_ipv6_header(mbuf_t *data, size_t plen, boolean_t is_output)
8397 {
8398 	struct ether_header     *eh = NULL;
8399 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8400 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8401 
8402 	if (mbuf_pkthdr_len(*data) < minlen) {
8403 		BRIDGE_LOG(LOG_DEBUG, flags,
8404 		    "IP %s short frame %lu < %lu",
8405 		    get_in_out_string(is_output),
8406 		    mbuf_pkthdr_len(*data), minlen);
8407 		goto done;
8408 	}
8409 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8410 		BRIDGE_LOG(LOG_DEBUG, flags,
8411 		    "IP %s size %lu mbuf_pullup fail",
8412 		    get_in_out_string(is_output),
8413 		    minlen);
8414 		*data = NULL;
8415 		goto done;
8416 	}
8417 	eh = mtod(*data, struct ether_header *);
8418 done:
8419 	return eh;
8420 }
8421 
8422 #include <netinet/icmp6.h>
8423 #include <netinet6/nd6.h>
8424 
8425 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8426 
8427 static void
8428 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8429     struct bridge_iflist *bif,
8430     mbuf_t *data, struct ip6_hdr *ip6h,
8431     struct in6_addr *saddrp,
8432     struct mac_nat_record *mnr)
8433 {
8434 	struct ether_header *eh;
8435 	struct icmp6_hdr *icmp6;
8436 	uint8_t         icmp6_type;
8437 	uint32_t        icmp6len;
8438 	int             lladdrlen = 0;
8439 	char            *lladdr = NULL;
8440 	unsigned int    off = sizeof(*ip6h);
8441 
8442 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8443 	if (icmp6len < sizeof(*icmp6)) {
8444 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8445 		    "short IPv6 payload length %d < %lu",
8446 		    icmp6len, sizeof(*icmp6));
8447 		return;
8448 	}
8449 
8450 	/* pullup IP6 header + ICMPv6 header */
8451 	eh = get_ether_ipv6_header(data, sizeof(*icmp6), TRUE);
8452 	if (eh == NULL) {
8453 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8454 		    "failed to pullup icmp6 header");
8455 		return;
8456 	}
8457 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8458 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8459 	icmp6_type = icmp6->icmp6_type;
8460 	switch (icmp6_type) {
8461 	case ND_NEIGHBOR_SOLICIT:
8462 	case ND_NEIGHBOR_ADVERT:
8463 	case ND_ROUTER_ADVERT:
8464 	case ND_ROUTER_SOLICIT:
8465 		break;
8466 	default:
8467 		return;
8468 	}
8469 
8470 	/* pullup IP6 header + payload */
8471 	eh = get_ether_ipv6_header(data, icmp6len, TRUE);
8472 	if (eh == NULL) {
8473 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8474 		    "failed to pullup icmp6 + payload");
8475 		return;
8476 	}
8477 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8478 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8479 	switch (icmp6_type) {
8480 	case ND_NEIGHBOR_SOLICIT: {
8481 		struct nd_neighbor_solicit *nd_ns;
8482 		union nd_opts ndopts;
8483 		boolean_t is_dad_probe;
8484 		struct in6_addr taddr;
8485 
8486 		if (icmp6len < sizeof(*nd_ns)) {
8487 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8488 			    "short nd_ns %d < %lu",
8489 			    icmp6len, sizeof(*nd_ns));
8490 			return;
8491 		}
8492 
8493 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8494 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8495 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8496 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8497 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8498 			    "invalid target ignored");
8499 			return;
8500 		}
8501 		/* parse options */
8502 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8503 		if (nd6_options(&ndopts) < 0) {
8504 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8505 			    "invalid ND6 NS option");
8506 			return;
8507 		}
8508 		if (ndopts.nd_opts_src_lladdr != NULL) {
8509 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8510 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8511 		}
8512 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8513 		if (lladdr != NULL) {
8514 			if (is_dad_probe) {
8515 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8516 				    "bad ND6 DAD packet");
8517 				return;
8518 			}
8519 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8520 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8521 				    "source lladdrlen %d != %lu",
8522 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8523 				return;
8524 			}
8525 		}
8526 		if (is_dad_probe) {
8527 			/* node is trying use taddr, create an mne for taddr */
8528 			*saddrp = taddr;
8529 		}
8530 		break;
8531 	}
8532 	case ND_NEIGHBOR_ADVERT: {
8533 		struct nd_neighbor_advert *nd_na;
8534 		union nd_opts ndopts;
8535 		struct in6_addr taddr;
8536 
8537 
8538 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8539 
8540 		if (icmp6len < sizeof(*nd_na)) {
8541 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8542 			    "short nd_na %d < %lu",
8543 			    icmp6len, sizeof(*nd_na));
8544 			return;
8545 		}
8546 
8547 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8548 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8549 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8550 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8551 			    "invalid target ignored");
8552 			return;
8553 		}
8554 		/* parse options */
8555 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8556 		if (nd6_options(&ndopts) < 0) {
8557 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8558 			    "invalid ND6 NA option");
8559 			return;
8560 		}
8561 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8562 			/* target linklayer, nothing to do */
8563 			return;
8564 		}
8565 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8566 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8567 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8568 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8569 			    "target lladdrlen %d != %lu",
8570 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8571 			return;
8572 		}
8573 		break;
8574 	}
8575 	case ND_ROUTER_ADVERT:
8576 	case ND_ROUTER_SOLICIT: {
8577 		union nd_opts ndopts;
8578 		uint32_t type_length;
8579 		const char *description;
8580 
8581 		if (icmp6_type == ND_ROUTER_ADVERT) {
8582 			type_length = sizeof(struct nd_router_advert);
8583 			description = "RA";
8584 		} else {
8585 			type_length = sizeof(struct nd_router_solicit);
8586 			description = "RS";
8587 		}
8588 		if (icmp6len < type_length) {
8589 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8590 			    "short ND6 %s %d < %d",
8591 			    description, icmp6len, type_length);
8592 			return;
8593 		}
8594 		/* parse options */
8595 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8596 		    icmp6len - type_length, &ndopts);
8597 		if (nd6_options(&ndopts) < 0) {
8598 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8599 			    "invalid ND6 %s option", description);
8600 			return;
8601 		}
8602 		if (ndopts.nd_opts_src_lladdr != NULL) {
8603 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8604 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8605 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8606 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8607 				    "source lladdrlen %d != %lu",
8608 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8609 				return;
8610 			}
8611 		}
8612 		break;
8613 	}
8614 	default:
8615 		break;
8616 	}
8617 	if (lladdr != NULL) {
8618 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8619 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8620 		mnr->mnr_ip6_icmp6_len = icmp6len;
8621 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8622 		mnr->mnr_ip6_header_len = off;
8623 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8624 			const char *str;
8625 
8626 			switch (mnr->mnr_ip6_icmp6_type) {
8627 			case ND_ROUTER_ADVERT:
8628 				str = "ROUTER ADVERT";
8629 				break;
8630 			case ND_ROUTER_SOLICIT:
8631 				str = "ROUTER SOLICIT";
8632 				break;
8633 			case ND_NEIGHBOR_ADVERT:
8634 				str = "NEIGHBOR ADVERT";
8635 				break;
8636 			case ND_NEIGHBOR_SOLICIT:
8637 				str = "NEIGHBOR SOLICIT";
8638 				break;
8639 			default:
8640 				str = "";
8641 				break;
8642 			}
8643 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8644 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8645 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8646 			    mnr->mnr_ip6_header_len,
8647 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8648 		}
8649 	}
8650 }
8651 
8652 static struct mac_nat_entry *
8653 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8654 {
8655 	struct in6_addr         dst;
8656 	struct ether_header     *eh;
8657 	struct ip6_hdr          *ip6h;
8658 	struct mac_nat_entry    *mne = NULL;
8659 
8660 	eh = get_ether_ipv6_header(data, 0, FALSE);
8661 	if (eh == NULL) {
8662 		goto done;
8663 	}
8664 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8665 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8666 	/* XXX validate IPv6 address */
8667 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8668 		goto done;
8669 	}
8670 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8671 
8672 done:
8673 	return mne;
8674 }
8675 
8676 static boolean_t
8677 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8678     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8679 {
8680 	struct ether_header     *eh;
8681 	ether_addr_t            ether_shost;
8682 	struct ip6_hdr          *ip6h;
8683 	struct in6_addr         saddr;
8684 	boolean_t               translate;
8685 
8686 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8687 	eh = get_ether_ipv6_header(data, 0, TRUE);
8688 	if (eh == NULL) {
8689 		translate = FALSE;
8690 		goto done;
8691 	}
8692 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8693 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8694 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8695 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8696 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8697 	}
8698 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8699 		goto done;
8700 	}
8701 	(void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8702 	    ether_shost.octet);
8703 
8704 done:
8705 	return translate;
8706 }
8707 
8708 /*
8709  * bridge_mac_nat_input:
8710  * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8711  * This interface is the "external" interface with respect to NAT.
8712  * The interface is only capable of receiving a single MAC address
8713  * (e.g. a Wi-Fi STA interface).
8714  *
8715  * When a packet arrives on the external interface, look up the destination
8716  * IP address in the mac_nat_entry table. If there is a match, *is_input
8717  * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8718  * is set to FALSE and translate the MAC address if necessary.
8719  *
8720  * Returns:
8721  * The internal interface to direct the packet to, or NULL if the packet
8722  * should not be redirected.
8723  *
8724  * *data may be updated to point at a different mbuf chain, or set to NULL
8725  * if the chain was deallocated during processing.
8726  */
8727 static ifnet_t
8728 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8729     boolean_t *is_input)
8730 {
8731 	ifnet_t                 dst_if = NULL;
8732 	struct ether_header     *eh;
8733 	uint16_t                ether_type;
8734 	boolean_t               is_unicast;
8735 	mbuf_t                  m = *data;
8736 	struct mac_nat_entry    *mne = NULL;
8737 
8738 	BRIDGE_LOCK_ASSERT_HELD(sc);
8739 	*is_input = FALSE;
8740 	assert(sc->sc_mac_nat_bif != NULL);
8741 	is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8742 	eh = mtod(m, struct ether_header *);
8743 	ether_type = ntohs(eh->ether_type);
8744 	switch (ether_type) {
8745 	case ETHERTYPE_ARP:
8746 		mne = bridge_mac_nat_arp_input(sc, data);
8747 		break;
8748 	case ETHERTYPE_IP:
8749 		if (is_unicast) {
8750 			mne = bridge_mac_nat_ip_input(sc, data);
8751 		}
8752 		break;
8753 	case ETHERTYPE_IPV6:
8754 		if (is_unicast) {
8755 			mne = bridge_mac_nat_ipv6_input(sc, data);
8756 		}
8757 		break;
8758 	default:
8759 		break;
8760 	}
8761 	if (mne != NULL) {
8762 		if (is_unicast) {
8763 			if (m != *data) {
8764 				/* it may have changed */
8765 				eh = mtod(*data, struct ether_header *);
8766 			}
8767 			bcopy(mne->mne_mac, eh->ether_dhost,
8768 			    sizeof(eh->ether_dhost));
8769 		}
8770 		dst_if = mne->mne_bif->bif_ifp;
8771 		*is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8772 	}
8773 	return dst_if;
8774 }
8775 
8776 /*
8777  * bridge_mac_nat_output:
8778  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8779  * from the interface 'bif'.
8780  *
8781  * Create a mac_nat_entry containing the source IP address and MAC address
8782  * from the packet. Populate a mac_nat_record with information detailing
8783  * how to translate the packet. Translation takes place later when
8784  * the bridge lock is no longer held.
8785  *
8786  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8787  * interface is generating an output packet. No translation is required in this
8788  * case, we just record the IP address used to prevent another bif from
8789  * claiming our IP address.
8790  *
8791  * Returns:
8792  * TRUE if the packet should be translated (*mnr updated as well),
8793  * FALSE otherwise.
8794  *
8795  * *data may be updated to point at a different mbuf chain or NULL if
8796  * the chain was deallocated during processing.
8797  */
8798 
8799 static boolean_t
8800 bridge_mac_nat_output(struct bridge_softc *sc,
8801     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8802 {
8803 	struct ether_header     *eh;
8804 	uint16_t                ether_type;
8805 	boolean_t               translate = FALSE;
8806 
8807 	BRIDGE_LOCK_ASSERT_HELD(sc);
8808 	assert(sc->sc_mac_nat_bif != NULL);
8809 
8810 	eh = mtod(*data, struct ether_header *);
8811 	ether_type = ntohs(eh->ether_type);
8812 	if (mnr != NULL) {
8813 		bzero(mnr, sizeof(*mnr));
8814 		mnr->mnr_ether_type = ether_type;
8815 	}
8816 	switch (ether_type) {
8817 	case ETHERTYPE_ARP:
8818 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8819 		break;
8820 	case ETHERTYPE_IP:
8821 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8822 		break;
8823 	case ETHERTYPE_IPV6:
8824 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8825 		break;
8826 	default:
8827 		break;
8828 	}
8829 	return translate;
8830 }
8831 
8832 static void
8833 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8834     const caddr_t eaddr)
8835 {
8836 	errno_t                 error;
8837 
8838 	if (mnr->mnr_arp_offset == 0) {
8839 		return;
8840 	}
8841 	/* replace the source hardware address */
8842 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8843 	    ETHER_ADDR_LEN, eaddr,
8844 	    MBUF_DONTWAIT);
8845 	if (error != 0) {
8846 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8847 		    "mbuf_copyback failed");
8848 		m_freem(*data);
8849 		*data = NULL;
8850 	}
8851 	return;
8852 }
8853 
8854 static void
8855 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8856 {
8857 	errno_t         error;
8858 	size_t          offset;
8859 
8860 	if (mnr->mnr_ip_header_len == 0) {
8861 		return;
8862 	}
8863 	/* update the UDP checksum */
8864 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8865 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8866 	    sizeof(mnr->mnr_ip_udp_csum),
8867 	    &mnr->mnr_ip_udp_csum,
8868 	    MBUF_DONTWAIT);
8869 	if (error != 0) {
8870 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8871 		    "mbuf_copyback uh_sum failed");
8872 		m_freem(*data);
8873 		*data = NULL;
8874 	}
8875 	/* update the DHCP must broadcast flag */
8876 	offset += sizeof(struct udphdr);
8877 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8878 	    sizeof(mnr->mnr_ip_dhcp_flags),
8879 	    &mnr->mnr_ip_dhcp_flags,
8880 	    MBUF_DONTWAIT);
8881 	if (error != 0) {
8882 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8883 		    "mbuf_copyback dp_flags failed");
8884 		m_freem(*data);
8885 		*data = NULL;
8886 	}
8887 }
8888 
8889 static void
8890 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8891     const caddr_t eaddr)
8892 {
8893 	uint16_t        cksum;
8894 	errno_t         error;
8895 	mbuf_t          m = *data;
8896 
8897 	if (mnr->mnr_ip6_header_len == 0) {
8898 		return;
8899 	}
8900 	switch (mnr->mnr_ip6_icmp6_type) {
8901 	case ND_ROUTER_ADVERT:
8902 	case ND_ROUTER_SOLICIT:
8903 	case ND_NEIGHBOR_SOLICIT:
8904 	case ND_NEIGHBOR_ADVERT:
8905 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8906 			/* nothing to do */
8907 			return;
8908 		}
8909 		break;
8910 	default:
8911 		return;
8912 	}
8913 
8914 	/*
8915 	 * replace the lladdr
8916 	 */
8917 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8918 	    ETHER_ADDR_LEN, eaddr,
8919 	    MBUF_DONTWAIT);
8920 	if (error != 0) {
8921 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8922 		    "mbuf_copyback lladdr failed");
8923 		m_freem(m);
8924 		*data = NULL;
8925 		return;
8926 	}
8927 
8928 	/*
8929 	 * recompute the icmp6 checksum
8930 	 */
8931 
8932 	/* skip past the ethernet header */
8933 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8934 	    mbuf_len(m) - ETHER_HDR_LEN);
8935 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8936 
8937 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8938 	/* set the checksum to zero */
8939 	cksum = 0;
8940 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8941 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8942 	if (error != 0) {
8943 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8944 		    "mbuf_copyback cksum=0 failed");
8945 		m_freem(m);
8946 		*data = NULL;
8947 		return;
8948 	}
8949 	/* compute and set the new checksum */
8950 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8951 	    mnr->mnr_ip6_icmp6_len);
8952 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8953 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8954 	if (error != 0) {
8955 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8956 		    "mbuf_copyback cksum failed");
8957 		m_freem(m);
8958 		*data = NULL;
8959 		return;
8960 	}
8961 	/* restore the ethernet header */
8962 	mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8963 	    mbuf_len(m) + ETHER_HDR_LEN);
8964 	mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8965 	return;
8966 }
8967 
8968 static void
8969 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8970     const caddr_t eaddr)
8971 {
8972 	struct ether_header     *eh;
8973 
8974 	/* replace the source ethernet address with the single MAC */
8975 	eh = mtod(*data, struct ether_header *);
8976 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8977 	switch (mnr->mnr_ether_type) {
8978 	case ETHERTYPE_ARP:
8979 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8980 		break;
8981 
8982 	case ETHERTYPE_IP:
8983 		bridge_mac_nat_ip_translate(data, mnr);
8984 		break;
8985 
8986 	case ETHERTYPE_IPV6:
8987 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8988 		break;
8989 
8990 	default:
8991 		break;
8992 	}
8993 	return;
8994 }
8995 
8996 /*
8997  * bridge packet filtering
8998  */
8999 
9000 /*
9001  * Perform basic checks on header size since
9002  * pfil assumes ip_input has already processed
9003  * it for it.  Cut-and-pasted from ip_input.c.
9004  * Given how simple the IPv6 version is,
9005  * does the IPv4 version really need to be
9006  * this complicated?
9007  *
9008  * XXX Should we update ipstat here, or not?
9009  * XXX Right now we update ipstat but not
9010  * XXX csum_counter.
9011  */
9012 static int
9013 bridge_ip_checkbasic(struct mbuf **mp)
9014 {
9015 	struct mbuf *m = *mp;
9016 	struct ip *ip;
9017 	int len, hlen;
9018 	u_short sum;
9019 
9020 	if (*mp == NULL) {
9021 		return -1;
9022 	}
9023 
9024 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9025 		/* max_linkhdr is already rounded up to nearest 4-byte */
9026 		if ((m = m_copyup(m, sizeof(struct ip),
9027 		    max_linkhdr)) == NULL) {
9028 			/* XXXJRT new stat, please */
9029 			ipstat.ips_toosmall++;
9030 			goto bad;
9031 		}
9032 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
9033 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
9034 			ipstat.ips_toosmall++;
9035 			goto bad;
9036 		}
9037 	}
9038 	ip = mtod(m, struct ip *);
9039 	if (ip == NULL) {
9040 		goto bad;
9041 	}
9042 
9043 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
9044 		ipstat.ips_badvers++;
9045 		goto bad;
9046 	}
9047 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9048 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
9049 		ipstat.ips_badhlen++;
9050 		goto bad;
9051 	}
9052 	if (hlen > m->m_len) {
9053 		if ((m = m_pullup(m, hlen)) == 0) {
9054 			ipstat.ips_badhlen++;
9055 			goto bad;
9056 		}
9057 		ip = mtod(m, struct ip *);
9058 		if (ip == NULL) {
9059 			goto bad;
9060 		}
9061 	}
9062 
9063 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9064 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9065 	} else {
9066 		if (hlen == sizeof(struct ip)) {
9067 			sum = in_cksum_hdr(ip);
9068 		} else {
9069 			sum = in_cksum(m, hlen);
9070 		}
9071 	}
9072 	if (sum) {
9073 		ipstat.ips_badsum++;
9074 		goto bad;
9075 	}
9076 
9077 	/* Retrieve the packet length. */
9078 	len = ntohs(ip->ip_len);
9079 
9080 	/*
9081 	 * Check for additional length bogosity
9082 	 */
9083 	if (len < hlen) {
9084 		ipstat.ips_badlen++;
9085 		goto bad;
9086 	}
9087 
9088 	/*
9089 	 * Check that the amount of data in the buffers
9090 	 * is as at least much as the IP header would have us expect.
9091 	 * Drop packet if shorter than we expect.
9092 	 */
9093 	if (m->m_pkthdr.len < len) {
9094 		ipstat.ips_tooshort++;
9095 		goto bad;
9096 	}
9097 
9098 	/* Checks out, proceed */
9099 	*mp = m;
9100 	return 0;
9101 
9102 bad:
9103 	*mp = m;
9104 	return -1;
9105 }
9106 
9107 /*
9108  * Same as above, but for IPv6.
9109  * Cut-and-pasted from ip6_input.c.
9110  * XXX Should we update ip6stat, or not?
9111  */
9112 static int
9113 bridge_ip6_checkbasic(struct mbuf **mp)
9114 {
9115 	struct mbuf *m = *mp;
9116 	struct ip6_hdr *ip6;
9117 
9118 	/*
9119 	 * If the IPv6 header is not aligned, slurp it up into a new
9120 	 * mbuf with space for link headers, in the event we forward
9121 	 * it.  Otherwise, if it is aligned, make sure the entire base
9122 	 * IPv6 header is in the first mbuf of the chain.
9123 	 */
9124 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9125 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9126 		/* max_linkhdr is already rounded up to nearest 4-byte */
9127 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9128 		    max_linkhdr)) == NULL) {
9129 			/* XXXJRT new stat, please */
9130 			ip6stat.ip6s_toosmall++;
9131 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9132 			goto bad;
9133 		}
9134 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9135 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9136 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9137 			ip6stat.ip6s_toosmall++;
9138 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9139 			goto bad;
9140 		}
9141 	}
9142 
9143 	ip6 = mtod(m, struct ip6_hdr *);
9144 
9145 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9146 		ip6stat.ip6s_badvers++;
9147 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9148 		goto bad;
9149 	}
9150 
9151 	/* Checks out, proceed */
9152 	*mp = m;
9153 	return 0;
9154 
9155 bad:
9156 	*mp = m;
9157 	return -1;
9158 }
9159 
9160 /*
9161  * the PF routines expect to be called from ip_input, so we
9162  * need to do and undo here some of the same processing.
9163  *
9164  * XXX : this is heavily inspired on bridge_pfil()
9165  */
9166 static int
9167 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9168     int input)
9169 {
9170 	/*
9171 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9172 	 */
9173 
9174 	int snap, error, i, hlen;
9175 	struct ether_header *eh1, eh2;
9176 	struct ip *ip;
9177 	struct llc llc1;
9178 	u_int16_t ether_type;
9179 
9180 	snap = 0;
9181 	error = -1;     /* Default error if not error == 0 */
9182 
9183 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9184 		return 0; /* filtering is disabled */
9185 	}
9186 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9187 	if ((*mp)->m_len < i) {
9188 		*mp = m_pullup(*mp, i);
9189 		if (*mp == NULL) {
9190 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9191 			return -1;
9192 		}
9193 	}
9194 
9195 	eh1 = mtod(*mp, struct ether_header *);
9196 	ether_type = ntohs(eh1->ether_type);
9197 
9198 	/*
9199 	 * Check for SNAP/LLC.
9200 	 */
9201 	if (ether_type < ETHERMTU) {
9202 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9203 
9204 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9205 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9206 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9207 		    llc2->llc_control == LLC_UI) {
9208 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9209 			snap = 1;
9210 		}
9211 	}
9212 
9213 	/*
9214 	 * If we're trying to filter bridge traffic, don't look at anything
9215 	 * other than IP and ARP traffic.  If the filter doesn't understand
9216 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9217 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9218 	 * but of course we don't have an AppleTalk filter to begin with.
9219 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9220 	 * ARP traffic.)
9221 	 */
9222 	switch (ether_type) {
9223 	case ETHERTYPE_ARP:
9224 	case ETHERTYPE_REVARP:
9225 		return 0;         /* Automatically pass */
9226 
9227 	case ETHERTYPE_IP:
9228 	case ETHERTYPE_IPV6:
9229 		break;
9230 	default:
9231 		/*
9232 		 * Check to see if the user wants to pass non-ip
9233 		 * packets, these will not be checked by pf and
9234 		 * passed unconditionally so the default is to drop.
9235 		 */
9236 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9237 			goto bad;
9238 		}
9239 		break;
9240 	}
9241 
9242 	/* Strip off the Ethernet header and keep a copy. */
9243 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9244 	m_adj(*mp, ETHER_HDR_LEN);
9245 
9246 	/* Strip off snap header, if present */
9247 	if (snap) {
9248 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9249 		m_adj(*mp, sizeof(struct llc));
9250 	}
9251 
9252 	/*
9253 	 * Check the IP header for alignment and errors
9254 	 */
9255 	switch (ether_type) {
9256 	case ETHERTYPE_IP:
9257 		error = bridge_ip_checkbasic(mp);
9258 		break;
9259 	case ETHERTYPE_IPV6:
9260 		error = bridge_ip6_checkbasic(mp);
9261 		break;
9262 	default:
9263 		error = 0;
9264 		break;
9265 	}
9266 	if (error) {
9267 		goto bad;
9268 	}
9269 
9270 	error = 0;
9271 
9272 	/*
9273 	 * Run the packet through pf rules
9274 	 */
9275 	switch (ether_type) {
9276 	case ETHERTYPE_IP:
9277 		/*
9278 		 * before calling the firewall, swap fields the same as
9279 		 * IP does. here we assume the header is contiguous
9280 		 */
9281 		ip = mtod(*mp, struct ip *);
9282 
9283 		ip->ip_len = ntohs(ip->ip_len);
9284 		ip->ip_off = ntohs(ip->ip_off);
9285 
9286 		if (ifp != NULL) {
9287 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9288 		}
9289 
9290 		if (*mp == NULL || error != 0) { /* filter may consume */
9291 			break;
9292 		}
9293 
9294 		/* Recalculate the ip checksum and restore byte ordering */
9295 		ip = mtod(*mp, struct ip *);
9296 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9297 		if (hlen < (int)sizeof(struct ip)) {
9298 			goto bad;
9299 		}
9300 		if (hlen > (*mp)->m_len) {
9301 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9302 				goto bad;
9303 			}
9304 			ip = mtod(*mp, struct ip *);
9305 			if (ip == NULL) {
9306 				goto bad;
9307 			}
9308 		}
9309 		ip->ip_len = htons(ip->ip_len);
9310 		ip->ip_off = htons(ip->ip_off);
9311 		ip->ip_sum = 0;
9312 		if (hlen == sizeof(struct ip)) {
9313 			ip->ip_sum = in_cksum_hdr(ip);
9314 		} else {
9315 			ip->ip_sum = in_cksum(*mp, hlen);
9316 		}
9317 		break;
9318 
9319 	case ETHERTYPE_IPV6:
9320 		if (ifp != NULL) {
9321 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9322 		}
9323 
9324 		if (*mp == NULL || error != 0) { /* filter may consume */
9325 			break;
9326 		}
9327 		break;
9328 	default:
9329 		error = 0;
9330 		break;
9331 	}
9332 
9333 	if (*mp == NULL) {
9334 		return error;
9335 	}
9336 	if (error != 0) {
9337 		goto bad;
9338 	}
9339 
9340 	error = -1;
9341 
9342 	/*
9343 	 * Finally, put everything back the way it was and return
9344 	 */
9345 	if (snap) {
9346 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9347 		if (*mp == NULL) {
9348 			return error;
9349 		}
9350 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9351 	}
9352 
9353 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9354 	if (*mp == NULL) {
9355 		return error;
9356 	}
9357 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9358 
9359 	return 0;
9360 
9361 bad:
9362 	m_freem(*mp);
9363 	*mp = NULL;
9364 	return error;
9365 }
9366 
9367 /*
9368  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9369  * All rights reserved.
9370  *
9371  * Redistribution and use in source and binary forms, with or without
9372  * modification, are permitted provided that the following conditions
9373  * are met:
9374  *   1. Redistributions of source code must retain the above copyright
9375  *      notice, this list of conditions and the following disclaimer.
9376  *   2. Redistributions in binary form must reproduce the above copyright
9377  *      notice, this list of conditions and the following disclaimer in the
9378  *      documentation and/or other materials provided with the distribution.
9379  *
9380  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9381  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9382  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9383  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9384  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9385  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9386  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9387  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9388  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9389  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9390  * SUCH DAMAGE.
9391  */
9392 
9393 /*
9394  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9395  *
9396  * Create a queue of packets/segments which fit the given mss + hdr_len.
9397  * m0 points to mbuf chain to be segmented.
9398  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9399  * into segments of length MSS bytes and then copy the first hdr_len bytes
9400  * from m0 at the top of each segment.
9401  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9402  * in each segment after the first hdr_len bytes
9403  *
9404  * Return the new queue with the segments on success, NULL on failure.
9405  * (the mbuf queue is freed in this case).
9406  * nsegs contains the number of segments generated.
9407  */
9408 
9409 static struct mbuf *
9410 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9411     char * hdr2_buf, int hdr2_len)
9412 {
9413 	int off = 0, n, firstlen;
9414 	struct mbuf **mnext, *mseg;
9415 	int total_len = m0->m_pkthdr.len;
9416 
9417 	/*
9418 	 * Segmentation useless
9419 	 */
9420 	if (total_len <= hdr_len + mss) {
9421 		return m0;
9422 	}
9423 
9424 	if (hdr2_buf == NULL || hdr2_len <= 0) {
9425 		hdr2_buf = NULL;
9426 		hdr2_len = 0;
9427 	}
9428 
9429 	off = hdr_len + mss;
9430 	firstlen = mss; /* first segment stored in the original mbuf */
9431 
9432 	mnext = &(m0->m_nextpkt); /* pointer to next packet */
9433 
9434 	for (n = 1; off < total_len; off += mss, n++) {
9435 		struct mbuf *m;
9436 		/*
9437 		 * Copy the header from the original packet
9438 		 * and create a new mbuf chain
9439 		 */
9440 		if (MHLEN < hdr_len) {
9441 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9442 		} else {
9443 			m = m_gethdr(M_NOWAIT, MT_DATA);
9444 		}
9445 
9446 		if (m == NULL) {
9447 #ifdef GSO_DEBUG
9448 			D("MGETHDR error\n");
9449 #endif
9450 			goto err;
9451 		}
9452 
9453 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9454 
9455 		m->m_len = hdr_len;
9456 		/*
9457 		 * if the optional header is present, copy it
9458 		 */
9459 		if (hdr2_buf != NULL) {
9460 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9461 		}
9462 
9463 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9464 		if (off + mss >= total_len) {           /* last segment */
9465 			mss = total_len - off;
9466 		}
9467 		/*
9468 		 * Copy the payload from original packet
9469 		 */
9470 		mseg = m_copym(m0, off, mss, M_NOWAIT);
9471 		if (mseg == NULL) {
9472 			m_freem(m);
9473 #ifdef GSO_DEBUG
9474 			D("m_copym error\n");
9475 #endif
9476 			goto err;
9477 		}
9478 		m_cat(m, mseg);
9479 
9480 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9481 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9482 		/*
9483 		 * Copy the checksum flags and data (in_cksum() need this)
9484 		 */
9485 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9486 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9487 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9488 
9489 		*mnext = m;
9490 		mnext = &(m->m_nextpkt);
9491 	}
9492 
9493 	/*
9494 	 * Update first segment.
9495 	 * If the optional header is present, is necessary
9496 	 * to insert it into the first segment.
9497 	 */
9498 	if (hdr2_buf == NULL) {
9499 		m_adj(m0, hdr_len + firstlen - total_len);
9500 		m0->m_pkthdr.len = hdr_len + firstlen;
9501 	} else {
9502 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9503 		if (mseg == NULL) {
9504 #ifdef GSO_DEBUG
9505 			D("m_copym error\n");
9506 #endif
9507 			goto err;
9508 		}
9509 		m_adj(m0, hdr_len - total_len);
9510 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9511 		m_cat(m0, mseg);
9512 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9513 	}
9514 
9515 	if (nsegs != NULL) {
9516 		*nsegs = n;
9517 	}
9518 	return m0;
9519 err:
9520 	while (m0 != NULL) {
9521 		mseg = m0->m_nextpkt;
9522 		m0->m_nextpkt = NULL;
9523 		m_freem(m0);
9524 		m0 = mseg;
9525 	}
9526 	return NULL;
9527 }
9528 
9529 /*
9530  * Wrappers of IPv4 checksum functions
9531  */
9532 static inline void
9533 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9534 {
9535 	m->m_data += mac_hlen;
9536 	m->m_len -= mac_hlen;
9537 	m->m_pkthdr.len -= mac_hlen;
9538 #if __FreeBSD_version < 1000000
9539 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9540 #endif
9541 
9542 	in_delayed_cksum(m);
9543 
9544 #if __FreeBSD_version < 1000000
9545 	ip->ip_len = htons(ip->ip_len);
9546 #endif
9547 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9548 	m->m_len += mac_hlen;
9549 	m->m_pkthdr.len += mac_hlen;
9550 	m->m_data -= mac_hlen;
9551 }
9552 
9553 static inline void
9554 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9555 {
9556 	m->m_data += mac_hlen;
9557 
9558 	ip->ip_sum = in_cksum(m, ip_hlen);
9559 
9560 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
9561 	m->m_data -= mac_hlen;
9562 }
9563 
9564 /*
9565  * Structure that contains the state during the TCP segmentation
9566  */
9567 struct gso_ip_tcp_state {
9568 	void    (*update)
9569 	(struct gso_ip_tcp_state*, struct mbuf*);
9570 	void    (*internal)
9571 	(struct gso_ip_tcp_state*, struct mbuf*);
9572 	union iphdr hdr;
9573 	struct tcphdr *tcp;
9574 	int mac_hlen;
9575 	int ip_hlen;
9576 	int tcp_hlen;
9577 	int hlen;
9578 	int pay_len;
9579 	int sw_csum;
9580 	uint32_t tcp_seq;
9581 	uint16_t ip_id;
9582 	boolean_t is_tx;
9583 };
9584 
9585 /*
9586  * Update the pointers to TCP and IPv4 headers
9587  */
9588 static inline void
9589 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9590 {
9591 	state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9592 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9593 	state->pay_len = m->m_pkthdr.len - state->hlen;
9594 }
9595 
9596 /*
9597  * Set properly the TCP and IPv4 headers
9598  */
9599 static inline void
9600 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9601 {
9602 	/*
9603 	 * Update IP header
9604 	 */
9605 	state->hdr.ip->ip_id = htons((state->ip_id)++);
9606 	state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9607 	/*
9608 	 * TCP Checksum
9609 	 */
9610 	state->tcp->th_sum = 0;
9611 	state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9612 	    state->hdr.ip->ip_dst.s_addr,
9613 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9614 	/*
9615 	 * Checksum HW not supported (TCP)
9616 	 */
9617 	if (state->sw_csum & CSUM_DELAY_DATA) {
9618 		gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9619 	}
9620 
9621 	state->tcp_seq += state->pay_len;
9622 	/*
9623 	 * IP Checksum
9624 	 */
9625 	state->hdr.ip->ip_sum = 0;
9626 	/*
9627 	 * Checksum HW not supported (IP)
9628 	 */
9629 	if (state->sw_csum & CSUM_IP) {
9630 		gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9631 	}
9632 }
9633 
9634 
9635 /*
9636  * Updates the pointers to TCP and IPv6 headers
9637  */
9638 static inline void
9639 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9640 {
9641 	state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9642 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9643 	state->pay_len = m->m_pkthdr.len - state->hlen;
9644 }
9645 
9646 /*
9647  * Sets properly the TCP and IPv6 headers
9648  */
9649 static inline void
9650 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9651 {
9652 	state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9653 	    state->mac_hlen - state->ip_hlen);
9654 	/*
9655 	 * TCP Checksum
9656 	 */
9657 	state->tcp->th_sum = 0;
9658 	state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9659 	    &state->hdr.ip6->ip6_dst,
9660 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9661 	/*
9662 	 * Checksum HW not supported (TCP)
9663 	 */
9664 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9665 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9666 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9667 	}
9668 	state->tcp_seq += state->pay_len;
9669 }
9670 
9671 /*
9672  * Init the state during the TCP segmentation
9673  */
9674 static void
9675 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9676     bool is_ipv4, int mac_hlen, int ip_hlen,
9677     void * ip_hdr, struct tcphdr * tcp_hdr)
9678 {
9679 #pragma unused(ifp)
9680 
9681 	state->hdr.ptr = ip_hdr;
9682 	state->tcp = tcp_hdr;
9683 	if (is_ipv4) {
9684 		state->ip_id = ntohs(state->hdr.ip->ip_id);
9685 		state->update = gso_ipv4_tcp_update;
9686 		state->internal = gso_ipv4_tcp_internal;
9687 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9688 	} else {
9689 		state->update = gso_ipv6_tcp_update;
9690 		state->internal = gso_ipv6_tcp_internal;
9691 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9692 	}
9693 	state->mac_hlen = mac_hlen;
9694 	state->ip_hlen = ip_hlen;
9695 	state->tcp_hlen = state->tcp->th_off << 2;
9696 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9697 	state->tcp_seq = ntohl(state->tcp->th_seq);
9698 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9699 	return;
9700 }
9701 
9702 /*
9703  * GSO on TCP/IP (v4 or v6)
9704  *
9705  * If is_tx is TRUE, segmented packets are transmitted after they are
9706  * segmented.
9707  *
9708  * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9709  */
9710 static int
9711 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9712     boolean_t is_tx)
9713 {
9714 	struct mbuf *m, *m_tx;
9715 	int error = 0;
9716 	int mss = 0;
9717 	int nsegs = 0;
9718 	struct mbuf *m0 = *mp;
9719 #ifdef GSO_STATS
9720 	int total_len = m0->m_pkthdr.len;
9721 #endif /* GSO_STATS */
9722 
9723 #if 1
9724 	u_int reduce_mss;
9725 
9726 	reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9727 	    : if_bridge_tso_reduce_mss_forwarding;
9728 	mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9729 	assert(mss > 0);
9730 #else
9731 	if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9732 		mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9733 	} else {
9734 		mss = m0->m_pkthdr.tso_segsz;
9735 	}
9736 #endif
9737 
9738 	*mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9739 	if (m0 == NULL) {
9740 		return ENOBUFS; /* XXX ok? */
9741 	}
9742 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9743 	    "%s %s mss %d nsegs %d",
9744 	    ifp->if_xname,
9745 	    is_tx ? "TX" : "RX",
9746 	    mss, nsegs);
9747 	/*
9748 	 * XXX-ste: can this happen?
9749 	 */
9750 	if (m0->m_nextpkt == NULL) {
9751 #ifdef GSO_DEBUG
9752 		D("only 1 segment");
9753 #endif
9754 		if (is_tx) {
9755 			error = bridge_transmit(ifp, m0);
9756 		}
9757 		return error;
9758 	}
9759 #ifdef GSO_STATS
9760 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9761 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9762 	GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9763 #endif /* GSO_STATS */
9764 
9765 	/* first pkt */
9766 	m = m0;
9767 
9768 	state->update(state, m);
9769 
9770 	do {
9771 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9772 
9773 		state->internal(state, m);
9774 		m_tx = m;
9775 		m = m->m_nextpkt;
9776 		if (is_tx) {
9777 			m_tx->m_nextpkt = NULL;
9778 			if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9779 				/*
9780 				 * XXX: If a segment can not be sent, discard the following
9781 				 * segments and propagate the error to the upper levels.
9782 				 * In this way the TCP retransmits all the initial packet.
9783 				 */
9784 #ifdef GSO_DEBUG
9785 				D("if_transmit error\n");
9786 #endif
9787 				goto err;
9788 			}
9789 		}
9790 		state->update(state, m);
9791 
9792 		state->tcp->th_flags &= ~TH_CWR;
9793 		state->tcp->th_seq = htonl(state->tcp_seq);
9794 	} while (m->m_nextpkt);
9795 
9796 	/* last pkt */
9797 	state->internal(state, m);
9798 
9799 	if (is_tx) {
9800 		error = bridge_transmit(ifp, m);
9801 #ifdef GSO_DEBUG
9802 		if (error) {
9803 			D("last if_transmit error\n");
9804 			D("error - type = %d \n", error);
9805 		}
9806 #endif
9807 	}
9808 #ifdef GSO_STATS
9809 	if (!error) {
9810 		GSOSTAT_INC(tcp.gsos_segmented);
9811 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9812 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9813 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9814 	}
9815 #endif /* GSO_STATS */
9816 	return error;
9817 
9818 err:
9819 #ifdef GSO_DEBUG
9820 	D("error - type = %d \n", error);
9821 #endif
9822 	while (m != NULL) {
9823 		m_tx = m->m_nextpkt;
9824 		m->m_nextpkt = NULL;
9825 		m_freem(m);
9826 		m = m_tx;
9827 	}
9828 	return error;
9829 }
9830 
9831 /*
9832  * GSO for TCP/IPv[46]
9833  */
9834 static int
9835 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9836     boolean_t is_tx)
9837 {
9838 	int error;
9839 	ip_packet_info  info;
9840 	uint32_t csum_flags;
9841 	struct gso_ip_tcp_state state;
9842 	struct bripstats stats; /* XXX ignored */
9843 	struct tcphdr *tcp;
9844 
9845 	if (!is_tx && ipforwarding == 0) {
9846 		/* no need to segment if the packet will not be forwarded */
9847 		return 0;
9848 	}
9849 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9850 	if (error != 0) {
9851 		if (*mp != NULL) {
9852 			m_freem(*mp);
9853 			*mp = NULL;
9854 		}
9855 		return error;
9856 	}
9857 	if (info.ip_proto_hdr == NULL) {
9858 		/* not a TCP packet */
9859 		return 0;
9860 	}
9861 	tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9862 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9863 	    info.ip_hlen, info.ip_hdr.ptr, tcp);
9864 	if (is_ipv4) {
9865 		csum_flags = CSUM_DELAY_DATA; /* XXX */
9866 		if (!is_tx) {
9867 			/* if RX to our local IP address, don't segment */
9868 			struct in_addr  dst_ip;
9869 
9870 			bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9871 			if (in_addr_is_ours(dst_ip)) {
9872 				return 0;
9873 			}
9874 		}
9875 	} else {
9876 		csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9877 		if (!is_tx) {
9878 			/* if RX to our local IP address, don't segment */
9879 			if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9880 			    ifp->if_index)) {
9881 				/* local IP address, no need to segment */
9882 				return 0;
9883 			}
9884 		}
9885 	}
9886 	(*mp)->m_pkthdr.csum_flags = csum_flags;
9887 	(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9888 	return gso_ip_tcp(ifp, mp, &state, is_tx);
9889 }
9890