xref: /xnu-12377.61.12/bsd/net/if_bridge.c (revision 4d495c6e23c53686cf65f45067f79024cf5dcee8)
1 /*
2  * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/uipc_domain.h>
131 #include <kern/zalloc.h>
132 
133 #if NBPFILTER > 0
134 #include <net/bpf.h>
135 #endif
136 #include <net/if.h>
137 #include <net/if_dl.h>
138 #include <net/if_types.h>
139 #include <net/if_var.h>
140 #include <net/if_media.h>
141 #include <net/net_api_stats.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #include <netinet/if_ether.h> /* for struct arpcom */
153 #include <net/bridgestp.h>
154 #include <net/if_bridgevar.h>
155 #include <net/if_llc.h>
156 #if NVLAN > 0
157 #include <net/if_vlan_var.h>
158 #endif /* NVLAN > 0 */
159 
160 #include <net/if_ether.h>
161 #include <net/dlil.h>
162 #include <net/kpi_interfacefilter.h>
163 #include <net/pfvar.h>
164 
165 #include <net/route.h>
166 #include <net/droptap.h>
167 #include <dev/random/randomdev.h>
168 
169 #include <netinet/bootp.h>
170 #include <netinet/dhcp.h>
171 
172 #if SKYWALK
173 #include <skywalk/nexus/netif/nx_netif.h>
174 #endif /* SKYWALK */
175 
176 #include <net/sockaddr_utils.h>
177 #include <net/mblist.h>
178 
179 #include <os/log.h>
180 
181 #define _TSO_CSUM       (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
182 
183 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
184 
185 
186 #define __M_FLAGS_ARE_SET(m, flags)     (((m)->m_flags & (flags)) != 0)
187 #define IS_BCAST(m)                     __M_FLAGS_ARE_SET(m, M_BCAST)
188 #define IS_MCAST(m)                     __M_FLAGS_ARE_SET(m, M_MCAST)
189 #define IS_BCAST_MCAST(m)               __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
190 
191 #define HTONS_ETHERTYPE_ARP             htons(ETHERTYPE_ARP)
192 #define HTONS_ETHERTYPE_IP              htons(ETHERTYPE_IP)
193 #define HTONS_ETHERTYPE_IPV6            htons(ETHERTYPE_IPV6)
194 #define HTONS_ARPHRD_ETHER              htons(ARPHRD_ETHER)
195 #define HTONS_ARPOP_REQUEST             htons(ARPOP_REQUEST)
196 #define HTONS_ARPOP_REPLY               htons(ARPOP_REPLY)
197 #define HTONS_IPPORT_BOOTPC             htons(IPPORT_BOOTPC)
198 #define HTONS_IPPORT_BOOTPS             htons(IPPORT_BOOTPS)
199 #define HTONS_DHCP_FLAGS_BROADCAST      htons(DHCP_FLAGS_BROADCAST)
200 
201 /*
202  * if_bridge_debug, BR_DBGF_*
203  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
204  *   to enable additional logs for the corresponding bridge function
205  * - "sysctl net.link.bridge.debug" controls the value of
206  *   'if_bridge_debug'
207  */
208 static uint32_t if_bridge_debug = 0;
209 #define BR_DBGF_LIFECYCLE       0x0001
210 #define BR_DBGF_INPUT           0x0002
211 #define BR_DBGF_OUTPUT          0x0004
212 #define BR_DBGF_RT_TABLE        0x0008
213 #define BR_DBGF_DELAYED_CALL    0x0010
214 #define BR_DBGF_IOCTL           0x0020
215 #define BR_DBGF_MBUF            0x0040
216 #define BR_DBGF_MCAST           0x0080
217 #define BR_DBGF_HOSTFILTER      0x0100
218 #define BR_DBGF_CHECKSUM        0x0200
219 #define BR_DBGF_MAC_NAT         0x0400
220 #define BR_DBGF_INPUT_LIST      0x0800
221 
222 /*
223  * if_bridge_log_level
224  * - 'if_bridge_log_level' ensures that by default important logs are
225  *   logged regardless of if_bridge_debug by comparing the log level
226  *   in BRIDGE_LOG to if_bridge_log_level
227  * - use "sysctl net.link.bridge.log_level" controls the value of
228  *   'if_bridge_log_level'
229  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
230  *   logs must use LOG_NOTICE to ensure they appear by default
231  */
232 static int if_bridge_log_level = LOG_NOTICE;
233 
234 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
235 
236 /*
237  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
238  * - macros to generate the specified log conditionally based on
239  *   the specified log level and debug flags
240  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
241  */
242 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
243 	do {                                                            \
244 	        if (__level <= if_bridge_log_level ||                   \
245 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
246 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
247 	                       __func__, ## __VA_ARGS__);       \
248 	        }                                                       \
249 	} while (0)
250 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
251 	do {                                                    \
252 	        if (__level <= if_bridge_log_level ||           \
253 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
254 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
255 	        }                                                               \
256 	} while (0)
257 
258 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
259 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
260 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
261 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
262 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
263 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
264 
265 #define BRIDGE_LOCK_DEBUG      1
266 #if BRIDGE_LOCK_DEBUG
267 
268 #define BR_LCKDBG_MAX                   4
269 
270 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
271 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
272 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
273 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
274 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
275 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
276 
277 #else /* !BRIDGE_LOCK_DEBUG */
278 
279 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
280 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
281 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
282 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
283 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
284 	        (_err) = EBUSY;                                         \
285 	else {                                                          \
286 	        (_sc)->sc_iflist_ref++;                                 \
287 	        (_err) = 0;                                             \
288 	}                                                               \
289 	_BRIDGE_UNLOCK(_sc);                                            \
290 } while (0)
291 #define BRIDGE_UNREF(_sc)               do {                            \
292 	_BRIDGE_LOCK(_sc);                                              \
293 	(_sc)->sc_iflist_ref--;                                         \
294 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
295 	        _BRIDGE_UNLOCK(_sc);                                    \
296 	        wakeup(&(_sc)->sc_cv);                                  \
297 	} else                                                          \
298 	        _BRIDGE_UNLOCK(_sc);                                    \
299 } while (0)
300 #define BRIDGE_XLOCK(_sc)               do {                            \
301 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
302 	(_sc)->sc_iflist_xcnt++;                                        \
303 	while ((_sc)->sc_iflist_ref > 0)                                \
304 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
305 	            "BRIDGE_XLOCK", NULL);                              \
306 } while (0)
307 #define BRIDGE_XDROP(_sc)               do {                            \
308 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
309 	(_sc)->sc_iflist_xcnt--;                                        \
310 } while (0)
311 
312 #endif /* BRIDGE_LOCK_DEBUG */
313 
314 #define BRIDGE_BPF_TAP_IN(ifp, m) \
315 	do {                                                            \
316 	        if (ifp->if_bpf != NULL) {                              \
317 	                bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0);        \
318 	        }                                                       \
319 	} while(0)
320 
321 #define BRIDGE_BPF_TAP_OUT(ifp, m)                                      \
322 	do {                                                            \
323 	        if (ifp->if_bpf != NULL) {                              \
324 	                bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);       \
325 	        }                                                       \
326 	} while(0)
327 
328 
329 /*
330  * Initial size of the route hash table.  Must be a power of two.
331  */
332 #ifndef BRIDGE_RTHASH_SIZE
333 #define BRIDGE_RTHASH_SIZE              16
334 #endif
335 
336 /*
337  * Maximum size of the routing hash table
338  */
339 #define BRIDGE_RTHASH_SIZE_MAX          2048
340 
341 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
342 
343 /*
344  * Maximum number of addresses to cache.
345  */
346 #ifndef BRIDGE_RTABLE_MAX
347 #define BRIDGE_RTABLE_MAX               100
348 #endif
349 
350 /*
351  * Timeout (in seconds) for entries learned dynamically.
352  */
353 #ifndef BRIDGE_RTABLE_TIMEOUT
354 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
355 #endif
356 
357 /*
358  * Number of seconds between walks of the route list.
359  */
360 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
361 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
362 #endif
363 
364 /*
365  * Number of MAC NAT entries
366  * - sized based on 16 clients (including MAC NAT interface)
367  *   each with 4 addresses
368  */
369 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
370 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
371 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
372 
373 /*
374  * List of capabilities to possibly mask on the member interface.
375  */
376 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
377 /*
378  * List of capabilities to disable on the member interface.
379  */
380 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
381 
382 /*
383  * Bridge interface list entry.
384  */
385 struct bridge_iflist {
386 	TAILQ_ENTRY(bridge_iflist) bif_next;
387 	struct ifnet            *bif_ifp;       /* member if */
388 	struct bstp_port        bif_stp;        /* STP state */
389 	uint32_t                bif_ifflags;    /* member if flags */
390 	int                     bif_savedcaps;  /* saved capabilities */
391 	uint32_t                bif_addrmax;    /* max # of addresses */
392 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
393 	uint32_t                bif_addrexceeded; /* # of address violations */
394 
395 	interface_filter_t      bif_iff_ref;
396 	struct bridge_softc     *bif_sc;
397 	uint32_t                bif_flags;
398 
399 	/* host filter */
400 	struct in_addr          bif_hf_ipsrc;
401 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
402 
403 	struct ifbrmstats       bif_stats;
404 };
405 
406 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)407 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
408 {
409 	return (bif->bif_ifflags & flags) != 0;
410 }
411 
412 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)413 bif_has_checksum_offload(struct bridge_iflist * bif)
414 {
415 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
416 }
417 
418 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)419 bif_has_mac_nat(struct bridge_iflist * bif)
420 {
421 	return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
422 }
423 
424 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)425 bif_uses_virtio(struct bridge_iflist * bif)
426 {
427 	return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
428 }
429 
430 /* fake errors to make the code clearer */
431 #define _EBADIP                 EJUSTRETURN
432 #define _EBADIPCHECKSUM         EJUSTRETURN
433 #define _EBADIPV6               EJUSTRETURN
434 #define _EBADUDP                EJUSTRETURN
435 #define _EBADTCP                EJUSTRETURN
436 #define _EBADUDPCHECKSUM        EJUSTRETURN
437 #define _EBADTCPCHECKSUM        EJUSTRETURN
438 
439 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
440 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
441 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
442 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
443 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
444 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
445 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
446 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
447 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
448 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
449 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
450 #define BIFF_LRO_DISABLED       0x800   /* LRO was disabled */
451 #if SKYWALK
452 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
453 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
454 #endif /* SKYWALK */
455 
456 /*
457  * mac_nat_entry
458  * - translates between an IP address and MAC address on a specific
459  *   bridge interface member
460  */
461 struct mac_nat_entry {
462 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
463 	struct bridge_iflist    *mne_bif;       /* originating interface */
464 	unsigned long           mne_expire;     /* expiration time */
465 	union {
466 		struct in_addr  mneu_ip;        /* originating IPv4 address */
467 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
468 	} mne_u;
469 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
470 	uint8_t                 mne_flags;
471 	uint8_t                 mne_reserved;
472 };
473 #define mne_ip  mne_u.mneu_ip
474 #define mne_ip6 mne_u.mneu_ip6
475 
476 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
477 
478 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
479 
480 /*
481  * mac_nat_record
482  * - used by bridge_mac_nat_output() to convey the translation that needs
483  *   to take place in bridge_mac_nat_translate
484  * - holds enough information so that the translation can be done later
485  *   when the destination interface is the MAC-NAT interface
486  */
487 struct mac_nat_record {
488 	uint16_t                mnr_ether_type;
489 	union {
490 		uint16_t        mnru_arp_offset;
491 		struct {
492 			uint16_t mnruip_dhcp_flags;
493 			uint16_t mnruip_udp_csum;
494 			uint8_t  mnruip_header_len;
495 		} mnru_ip;
496 		struct {
497 			uint16_t mnruip6_icmp6_len;
498 			uint16_t mnruip6_lladdr_offset;
499 			uint8_t mnruip6_icmp6_type;
500 			uint8_t mnruip6_header_len;
501 		} mnru_ip6;
502 	} mnr_u;
503 };
504 
505 #define mnr_arp_offset  mnr_u.mnru_arp_offset
506 
507 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
508 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
509 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
510 
511 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
512 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
513 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
514 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
515 
516 /*
517  * Bridge route node.
518  */
519 struct bridge_rtnode {
520 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
521 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
522 	struct bridge_iflist    *brt_dst;       /* destination if */
523 	unsigned long           brt_expire;     /* expiration time */
524 	uint8_t                 brt_flags;      /* address flags */
525 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
526 	uint16_t                brt_vlan;       /* vlan id */
527 };
528 
529 #define brt_ifp                 brt_dst->bif_ifp
530 
531 /*
532  * Bridge delayed function call context
533  */
534 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
535 
536 struct bridge_delayed_call {
537 	struct bridge_softc     *bdc_sc;
538 	bridge_delayed_func_t   bdc_func; /* Function to call */
539 	struct timespec         bdc_ts; /* Time to call */
540 	u_int32_t               bdc_flags;
541 	thread_call_t           bdc_thread_call;
542 };
543 
544 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
545 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
546 
547 /*
548  * Software state for each bridge.
549  */
550 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
551 
552 struct bridge_softc {
553 	struct ifnet            *sc_ifp;        /* make this an interface */
554 	uint32_t                sc_flags;
555 	LIST_ENTRY(bridge_softc) sc_list;
556 	decl_lck_mtx_data(, sc_mtx);
557 	struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash;  /* our forwarding table */
558 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
559 	uint32_t                sc_rthash_key;  /* key for hash */
560 	uint32_t                sc_rthash_size; /* size of the hash table */
561 	struct bridge_delayed_call sc_aging_timer;
562 	struct bridge_delayed_call sc_resize_call;
563 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
564 	struct bstp_state       sc_stp;         /* STP state */
565 	void                    *sc_cv;
566 	uint32_t                sc_brtmax;      /* max # of addresses */
567 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
568 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
569 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
570 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
571 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
572 	uint32_t                sc_brtexceeded; /* # of cache drops */
573 	uint32_t                sc_filter_flags; /* ipf and flags */
574 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
575 	u_char                  sc_defaddr[6];  /* Default MAC address */
576 	char                    sc_if_xname[IFNAMSIZ];
577 
578 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
579 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
580 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
581 	uint32_t                sc_mne_max;      /* max # of entries */
582 	uint32_t                sc_mne_count;    /* cur. # of entries */
583 	uint32_t                sc_mne_allocation_failures;
584 #if BRIDGE_LOCK_DEBUG
585 	/*
586 	 * Locking and unlocking calling history
587 	 */
588 	void                    *lock_lr[BR_LCKDBG_MAX];
589 	int                     next_lock_lr;
590 	void                    *unlock_lr[BR_LCKDBG_MAX];
591 	int                     next_unlock_lr;
592 #endif /* BRIDGE_LOCK_DEBUG */
593 };
594 
595 #define SCF_DETACHING            0x01
596 #define SCF_RESIZING             0x02
597 #define SCF_MEDIA_ACTIVE         0x04
598 #define SCF_PROTO_ATTACHED       0x08
599 
600 typedef enum {
601 	CHECKSUM_OPERATION_NONE = 0,
602 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
603 	CHECKSUM_OPERATION_FINALIZE = 2,
604 	CHECKSUM_OPERATION_COMPUTE = 3,
605 } ChecksumOperation;
606 
607 typedef struct {
608 	u_int           ip_hlen;        /* IP header length */
609 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
610 	u_int           ip_m0_len;      /* bytes available at ip_hdr (without jumping mbufs) */
611 	u_int           ip_opt_len;     /* IPv6 options headers length */
612 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
613 	bool            ip_is_ipv4;
614 	bool            ip_is_fragmented;
615 	uint8_t         *__sized_by(ip_m0_len) ip_hdr;   /* pointer to IP header */
616 	uint8_t         *__indexable ip_proto_hdr;   /* ptr to protocol header (TCP) */
617 } ip_packet_info, *ip_packet_info_t;
618 
619 struct bridge_hostfilter_stats bridge_hostfilter_stats;
620 
621 typedef uint8_t ether_type_flag_t;
622 
623 typedef enum {
624 	pkt_direction_RX,
625 	pkt_direction_TX
626 } pkt_direction_t;
627 
628 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
629 #if BRIDGE_LOCK_DEBUG
630 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
631 #else
632 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
633 #endif
634 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
635 
636 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
637 
638 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
639 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
640 
641 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
642 static int      bridge_clone_destroy(struct ifnet *);
643 
644 static errno_t  bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
645 #if HAS_IF_CAP
646 static void     bridge_mutecaps(struct bridge_softc *);
647 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
648     int);
649 #endif
650 static errno_t bridge_set_tso(struct bridge_softc *);
651 static void     bridge_proto_attach_changed(struct ifnet *);
652 static int      bridge_init(struct ifnet *);
653 static void     bridge_ifstop(struct ifnet *, int);
654 static int      bridge_output(struct ifnet *, struct mbuf *);
655 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
656 static void     bridge_start(struct ifnet *);
657 static mblist   bridge_input_list(struct bridge_softc *, ifnet_t,
658     struct ether_header *, mblist, bool);
659 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
660     mbuf_t *, char **);
661 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
662     mbuf_t *);
663 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
664     mbuf_t *m);
665 static int      bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
666     ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
667 static mbuf_t   bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
668     mbuf_t, bool);
669 static mbuf_t   bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
670     mbuf_t m, bool, bool, bool);
671 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
672 
673 static void     bridge_aging_timer(struct bridge_softc *sc);
674 
675 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
676     ether_type_flag_t, mbuf_t);
677 static void     bridge_broadcast_list(struct bridge_softc *,
678     struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
679 
680 static void     bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
681 
682 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
683     uint16_t, struct bridge_iflist *, int, uint8_t);
684 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
685     const uint8_t[ETHER_ADDR_LEN], uint16_t);
686 static void     bridge_rttrim(struct bridge_softc *);
687 static void     bridge_rtage(struct bridge_softc *);
688 static void     bridge_rtflush(struct bridge_softc *, int);
689 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
690     uint16_t);
691 
692 static int      bridge_rtable_init(struct bridge_softc *);
693 static void     bridge_rtable_fini(struct bridge_softc *);
694 
695 static void     bridge_rthash_resize(struct bridge_softc *);
696 
697 static int      bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
698 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
699     const uint8_t[ETHER_ADDR_LEN], uint16_t);
700 static int      bridge_rtnode_hash(struct bridge_softc *,
701     struct bridge_rtnode *);
702 static int      bridge_rtnode_insert(struct bridge_softc *,
703     struct bridge_rtnode *);
704 static void     bridge_rtnode_destroy(struct bridge_softc *,
705     struct bridge_rtnode *);
706 #if BRIDGESTP
707 static void     bridge_rtable_expire(struct ifnet *, int);
708 static void     bridge_state_change(struct ifnet *, int);
709 #endif /* BRIDGESTP */
710 
711 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
712     char * __sized_by(IFNAMSIZ) name);
713 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
714     struct ifnet *ifp);
715 static void     bridge_delete_member(struct bridge_softc *,
716     struct bridge_iflist *);
717 static void     bridge_delete_span(struct bridge_softc *,
718     struct bridge_iflist *);
719 
720 static int      bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int      bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int      bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int      bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int      bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int      bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int      bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int      bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int      bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int      bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int      bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int      bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int      bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int      bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int      bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int      bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int      bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int      bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int      bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int      bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int      bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
765 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
766 
767 static int      bridge_pf(struct mbuf **, struct ifnet *,
768     uint32_t sc_filter_flags, bool input);
769 static int bridge_ip_checkbasic(struct mbuf **);
770 static int bridge_ip6_checkbasic(struct mbuf **);
771 
772 static void bridge_detach(ifnet_t);
773 static void bridge_link_event(struct ifnet *, u_int32_t);
774 static void bridge_iflinkevent(struct ifnet *);
775 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
776 static int interface_media_active(struct ifnet *);
777 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
778 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
779 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
780 
781 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
782     struct bridge_iflist *);
783 static void bridge_mac_nat_disable(struct bridge_softc *sc);
784 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
785 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
786 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
787     struct bridge_iflist *);
788 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
789     ifnet_t * dst_if);
790 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
791     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
792 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
793     const char[ETHER_ADDR_LEN]);
794 
795 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
796     ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
797 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
798     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
800     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
801 
802 static mbuf_t   bridge_pf_list_out(mbuf_t m, ifnet_t ifp,
803     uint32_t sc_filter_flags);
804 
805 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)806 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
807     uint16_t vlan)
808 {
809 	struct bridge_iflist *  bif;
810 	ifnet_t                 ifp = NULL;
811 
812 	bif = bridge_rtlookup_bif(sc, addr, vlan);
813 	if (bif != NULL) {
814 		ifp = bif->bif_ifp;
815 	}
816 	return ifp;
817 }
818 
819 static bool in_addr_is_ours(const struct in_addr);
820 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
821 
822 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
823 
824 static mblist
825 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
826 
827 static mblist
828 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
829     u_int mac_hlen, bool is_ipv4, bool is_tx);
830 
831 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)832 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
833 {
834 	return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
835 }
836 
837 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
838 #define VLANTAGOF(_m)   0
839 
840 #define BSTP_ETHERADDR_RANGE_FIRST      0x00
841 #define BSTP_ETHERADDR_RANGE_LAST       0x0f
842 
843 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
844 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
845 
846 
847 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
848 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
849 
850 #if BRIDGESTP
851 static struct bstp_cb_ops bridge_ops = {
852 	.bcb_state = bridge_state_change,
853 	.bcb_rtage = bridge_rtable_expire
854 };
855 #endif /* BRIDGESTP */
856 
857 SYSCTL_DECL(_net_link);
858 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
859     "Bridge");
860 
861 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
862 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
863     CTLFLAG_RW | CTLFLAG_LOCKED,
864     &bridge_inherit_mac, 0,
865     "Inherit MAC address from the first bridge member");
866 
867 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
868     CTLFLAG_RW | CTLFLAG_LOCKED,
869     &bridge_rtable_prune_period, 0,
870     "Interval between pruning of routing table");
871 
872 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
873 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
874     CTLFLAG_RW | CTLFLAG_LOCKED,
875     &bridge_rtable_hash_size_max, 0,
876     "Maximum size of the routing hash table");
877 
878 #if BRIDGE_DELAYED_CALLBACK_DEBUG
879 static int bridge_delayed_callback_delay = 0;
880 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
881     CTLFLAG_RW | CTLFLAG_LOCKED,
882     &bridge_delayed_callback_delay, 0,
883     "Delay before calling delayed function");
884 #endif
885 
886 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
887     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
888     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
889 
890 #if BRIDGESTP
891 static int log_stp   = 0;   /* log STP state changes */
892 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
893     &log_stp, 0, "Log STP state changes");
894 #endif /* BRIDGESTP */
895 
896 struct bridge_control {
897 	int             (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
898 	unsigned int    bc_argsize;
899 	unsigned int    bc_flags;
900 };
901 
902 #define BC_F_COPYIN             0x01    /* copy arguments in */
903 #define BC_F_COPYOUT            0x02    /* copy arguments out */
904 #define BC_F_SUSER              0x04    /* do super-user check */
905 
906 static const struct bridge_control bridge_control_table32[] = {
907 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
908 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
910 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911 
912 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
913 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
914 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
915 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916 
917 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
919 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
920 	  .bc_flags = BC_F_COPYOUT },
921 
922 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
923 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
925 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
926 
927 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
928 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
929 
930 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
931 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
933 	  .bc_flags = BC_F_COPYOUT },
934 
935 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
936 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
937 
938 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
939 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940 
941 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
942 	  .bc_flags = BC_F_COPYOUT },
943 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
944 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
945 
946 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
947 	  .bc_flags = BC_F_COPYOUT },
948 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
949 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
950 
951 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
952 	  .bc_flags = BC_F_COPYOUT },
953 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
954 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955 
956 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
957 	  .bc_flags = BC_F_COPYOUT },
958 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
959 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
960 
961 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
962 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963 
964 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
965 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
966 
967 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
968 	  .bc_flags = BC_F_COPYOUT },
969 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
970 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
971 
972 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
973 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
974 
975 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
976 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
978 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979 
980 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
981 	  .bc_flags = BC_F_COPYOUT },
982 
983 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
984 	  .bc_flags = BC_F_COPYOUT },
985 
986 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
987 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
988 
989 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
990 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
991 
992 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
993 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
994 
995 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
996 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
997 
998 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1000 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1001 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1002 
1003 	{ .bc_func = bridge_ioctl_gmnelist32,
1004 	  .bc_argsize = sizeof(struct ifbrmnelist32),
1005 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1006 	{ .bc_func = bridge_ioctl_gifstats32,
1007 	  .bc_argsize = sizeof(struct ifbrmreq32),
1008 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1009 };
1010 
1011 static const struct bridge_control bridge_control_table64[] = {
1012 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
1013 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1015 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016 
1017 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1018 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1019 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1020 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021 
1022 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1024 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1025 	  .bc_flags = BC_F_COPYOUT },
1026 
1027 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1028 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1030 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1031 
1032 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1033 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1034 
1035 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1036 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
1038 	  .bc_flags = BC_F_COPYOUT },
1039 
1040 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1041 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1042 
1043 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1044 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045 
1046 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1047 	  .bc_flags = BC_F_COPYOUT },
1048 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1049 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1050 
1051 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1052 	  .bc_flags = BC_F_COPYOUT },
1053 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1054 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1055 
1056 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1057 	  .bc_flags = BC_F_COPYOUT },
1058 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1059 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060 
1061 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1062 	  .bc_flags = BC_F_COPYOUT },
1063 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1064 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1065 
1066 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1067 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068 
1069 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1070 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1071 
1072 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 	  .bc_flags = BC_F_COPYOUT },
1074 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1075 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1076 
1077 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1078 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1079 
1080 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1081 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1083 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1084 
1085 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1086 	  .bc_flags = BC_F_COPYOUT },
1087 
1088 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1089 	  .bc_flags = BC_F_COPYOUT },
1090 
1091 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1092 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1093 
1094 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1095 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1096 
1097 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1098 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1099 
1100 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1101 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1102 
1103 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1105 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1106 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1107 
1108 	{ .bc_func = bridge_ioctl_gmnelist64,
1109 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1110 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1111 	{ .bc_func = bridge_ioctl_gifstats64,
1112 	  .bc_argsize = sizeof(struct ifbrmreq64),
1113 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1114 };
1115 
1116 static const unsigned int bridge_control_table_size =
1117     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1118 
1119 static LIST_HEAD(, bridge_softc) bridge_list =
1120     LIST_HEAD_INITIALIZER(bridge_list);
1121 
1122 #define BRIDGENAME      "bridge"
1123 #define BRIDGES_MAX     IF_MAXUNIT
1124 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1125 
1126 static struct if_clone bridge_cloner =
1127     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1128     0, BRIDGES_MAX);
1129 
1130 static int if_bridge_txstart = 0;
1131 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1132     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1133 
1134 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1135     &if_bridge_debug, 0, "Bridge debug flags");
1136 
1137 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1138     CTLFLAG_RW | CTLFLAG_LOCKED,
1139     &if_bridge_log_level, 0, "Bridge log level");
1140 
1141 static int if_bridge_output_skip_filters = 1;
1142 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1143     CTLFLAG_RW | CTLFLAG_LOCKED,
1144     &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1145 
1146 int bridge_enable_early_input = 1;   /* DLIL early input */
1147 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1148     CTLFLAG_RW | CTLFLAG_LOCKED,
1149     &bridge_enable_early_input, 0,
1150     "Bridge enable early input");
1151 
1152 int bridge_allow_lro_num_seg = 1;   /* allow LRO_NUM_SEG to keep LRO enabled */
1153 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1154     CTLFLAG_RW | CTLFLAG_LOCKED,
1155     &bridge_allow_lro_num_seg, 0,
1156     "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1157 
1158 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1159 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1160 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1161 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1162 
1163 static u_int if_bridge_tso_reduce_mss_forwarding
1164         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1165 static u_int if_bridge_tso_reduce_mss_tx
1166         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1167 
1168 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1169 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1170 {
1171 	int     changed;
1172 	int     error;
1173 	u_int   new_value;
1174 
1175 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1176 	    &changed);
1177 	if (error == 0 && changed != 0) {
1178 		if (new_value > val_max) {
1179 			return EINVAL;
1180 		}
1181 		*val = new_value;
1182 	}
1183 	return error;
1184 }
1185 
1186 static int
1187 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1188 {
1189 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1190     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1191 }
1192 
1193 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1194     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1195     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1196     "Bridge tso reduce mss when forwarding");
1197 
1198 static int
1199 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1200 {
1201 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1202     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1203 }
1204 
1205 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1206     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1207     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1208     "Bridge tso reduce mss on transmit");
1209 
1210 #if DEBUG || DEVELOPMENT
1211 /*
1212  * net.link.bridge.reduce_tso_mtu
1213  * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1214  *   value (i.e. 16K) to enable testing the "use GSO instead" path
1215  */
1216 static int if_bridge_reduce_tso_mtu = 0;
1217 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1218     CTLFLAG_RW | CTLFLAG_LOCKED,
1219     &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1220 
1221 #endif /* DEBUG || DEVELOPMENT */
1222 
1223 static void brlog_ether_header(struct ether_header *);
1224 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1225 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1226 static void brlog_mbuf(mbuf_t, const char *, const char *);
1227 static void brlog_link(struct bridge_softc * sc);
1228 
1229 #if BRIDGE_LOCK_DEBUG
1230 static void bridge_lock(struct bridge_softc *);
1231 static void bridge_unlock(struct bridge_softc *);
1232 static int bridge_lock2ref(struct bridge_softc *);
1233 static void bridge_unref(struct bridge_softc *);
1234 static void bridge_xlock(struct bridge_softc *);
1235 static void bridge_xdrop(struct bridge_softc *);
1236 
1237 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1238 
1239 static void
bridge_lock(struct bridge_softc * sc)1240 bridge_lock(struct bridge_softc *sc)
1241 {
1242 	DECL_RETURN_ADDR(lr_saved);
1243 
1244 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1245 
1246 	_BRIDGE_LOCK(sc);
1247 
1248 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1249 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1250 }
1251 
1252 static void
bridge_unlock(struct bridge_softc * sc)1253 bridge_unlock(struct bridge_softc *sc)
1254 {
1255 	DECL_RETURN_ADDR(lr_saved);
1256 
1257 	BRIDGE_LOCK_ASSERT_HELD(sc);
1258 
1259 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1260 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1261 
1262 	_BRIDGE_UNLOCK(sc);
1263 }
1264 
1265 static int
bridge_lock2ref(struct bridge_softc * sc)1266 bridge_lock2ref(struct bridge_softc *sc)
1267 {
1268 	int error = 0;
1269 	DECL_RETURN_ADDR(lr_saved);
1270 
1271 	BRIDGE_LOCK_ASSERT_HELD(sc);
1272 
1273 	if (sc->sc_iflist_xcnt > 0) {
1274 		error = EBUSY;
1275 	} else {
1276 		sc->sc_iflist_ref++;
1277 	}
1278 
1279 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1280 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1281 
1282 	_BRIDGE_UNLOCK(sc);
1283 
1284 	return error;
1285 }
1286 
1287 static void
bridge_unref(struct bridge_softc * sc)1288 bridge_unref(struct bridge_softc *sc)
1289 {
1290 	DECL_RETURN_ADDR(lr_saved);
1291 
1292 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1293 
1294 	_BRIDGE_LOCK(sc);
1295 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1296 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1297 
1298 	sc->sc_iflist_ref--;
1299 
1300 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1301 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1302 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1303 		_BRIDGE_UNLOCK(sc);
1304 		wakeup(&sc->sc_cv);
1305 	} else {
1306 		_BRIDGE_UNLOCK(sc);
1307 	}
1308 }
1309 
1310 static void
bridge_xlock(struct bridge_softc * sc)1311 bridge_xlock(struct bridge_softc *sc)
1312 {
1313 	DECL_RETURN_ADDR(lr_saved);
1314 
1315 	BRIDGE_LOCK_ASSERT_HELD(sc);
1316 
1317 	sc->sc_iflist_xcnt++;
1318 	while (sc->sc_iflist_ref > 0) {
1319 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1320 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1321 
1322 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1323 
1324 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1325 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1326 	}
1327 }
1328 
1329 #undef DECL_RETURN_ADDR
1330 
1331 static void
bridge_xdrop(struct bridge_softc * sc)1332 bridge_xdrop(struct bridge_softc *sc)
1333 {
1334 	BRIDGE_LOCK_ASSERT_HELD(sc);
1335 
1336 	sc->sc_iflist_xcnt--;
1337 }
1338 
1339 #endif /* BRIDGE_LOCK_DEBUG */
1340 
1341 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1342 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1343 {
1344 	if (m) {
1345 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1346 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1347 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1348 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1349 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1350 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1351 		    suffix ? suffix : "");
1352 	} else {
1353 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1354 	}
1355 }
1356 
1357 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1358 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1359 {
1360 	if (m) {
1361 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1362 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1363 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1364 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1365 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1366 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1367 		    (unsigned int)mbuf_maxlen(m),
1368 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1369 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1370 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1371 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1372 			brlog_mbuf_pkthdr(m, "", suffix);
1373 		}
1374 	} else {
1375 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1376 	}
1377 }
1378 
1379 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1380 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1381 {
1382 	mbuf_t                  n;
1383 	size_t                  i, j;
1384 	size_t                  pktlen, mlen, maxlen;
1385 	unsigned char   *ptr;
1386 
1387 	pktlen = mbuf_pkthdr_len(m);
1388 
1389 	if (offset > pktlen) {
1390 		return;
1391 	}
1392 
1393 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1394 	n = m;
1395 	mlen = mbuf_len(n);
1396 	ptr = mtod(n, unsigned char *);
1397 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1398 		if (j >= mlen) {
1399 			n = mbuf_next(n);
1400 			if (n == 0) {
1401 				break;
1402 			}
1403 			ptr = mtod(n, unsigned char *);
1404 			mlen = mbuf_len(n);
1405 			j = 0;
1406 		}
1407 		if (i >= offset) {
1408 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1409 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1410 		}
1411 	}
1412 }
1413 
1414 static void
brlog_ether_header(struct ether_header * eh)1415 brlog_ether_header(struct ether_header *eh)
1416 {
1417 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1418 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1419 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1420 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1421 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1422 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1423 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1424 	    ntohs(eh->ether_type));
1425 }
1426 
1427 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1428 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1429 {
1430 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1431 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1432 
1433 	return buf;
1434 }
1435 
1436 static void
brlog_link(struct bridge_softc * sc)1437 brlog_link(struct bridge_softc * sc)
1438 {
1439 	int i;
1440 	uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1441 	IFNAMSIZ + ETHER_ADDR_LEN)];
1442 	struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1443 	const u_char * lladdr;
1444 	char lladdr_str[48];
1445 
1446 	memset(sdl_buffer, 0, sizeof(sdl_buffer));
1447 	sdl->sdl_family = AF_LINK;
1448 	sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1449 	sdl->sdl_alen = ETHER_ADDR_LEN;
1450 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1451 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1452 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1453 	lladdr_str[0] = '\0';
1454 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1455 	    i < sdl->sdl_alen;
1456 	    i++, lladdr++) {
1457 		char    byte_str[4];
1458 
1459 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1460 		    *lladdr);
1461 		strbufcat(lladdr_str, byte_str);
1462 	}
1463 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1464 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1465 	    " slen %d addr %s", sc->sc_if_xname,
1466 	    sdl->sdl_len, sdl->sdl_index,
1467 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1468 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1469 }
1470 
1471 static int
_mbuf_get_tso_mss(mbuf_t m)1472 _mbuf_get_tso_mss(mbuf_t m)
1473 {
1474 	int     mss = 0;
1475 
1476 	if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1477 		mss = m->m_pkthdr.tso_segsz;
1478 	}
1479 	return mss;
1480 }
1481 
1482 /*
1483  * bridgeattach:
1484  *
1485  *	Pseudo-device attach routine.
1486  */
1487 __private_extern__ int
bridgeattach(int n)1488 bridgeattach(int n)
1489 {
1490 #pragma unused(n)
1491 	int error;
1492 
1493 	LIST_INIT(&bridge_list);
1494 
1495 #if BRIDGESTP
1496 	bstp_sys_init();
1497 #endif /* BRIDGESTP */
1498 
1499 	error = if_clone_attach(&bridge_cloner);
1500 	if (error != 0) {
1501 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1502 	}
1503 	return error;
1504 }
1505 
1506 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1507 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1508 {
1509 	mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1510 	mbuf_pkthdr_adjustlen(m, -len);
1511 }
1512 
1513 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1514 bridge_ifnet_set_attrs(struct ifnet * ifp)
1515 {
1516 	errno_t         error;
1517 
1518 	error = ifnet_set_mtu(ifp, ETHERMTU);
1519 	if (error != 0) {
1520 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1521 		goto done;
1522 	}
1523 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1524 	if (error != 0) {
1525 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1526 		goto done;
1527 	}
1528 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1529 	if (error != 0) {
1530 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1531 		goto done;
1532 	}
1533 	error = ifnet_set_flags(ifp,
1534 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1535 	    0xffff);
1536 
1537 	if (error != 0) {
1538 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1539 		goto done;
1540 	}
1541 done:
1542 	return error;
1543 }
1544 
1545 static void
bridge_interface_proto_attach_changed(ifnet_t ifp)1546 bridge_interface_proto_attach_changed(ifnet_t ifp)
1547 {
1548 	uint32_t                        proto_count;
1549 	struct bridge_softc * __single  sc = ifp->if_softc;
1550 
1551 	proto_count = if_get_protolist(ifp, NULL, 0);
1552 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1553 	    "%s: proto count %d", ifp->if_xname, proto_count);
1554 
1555 	if (sc == NULL) {
1556 		return;
1557 	}
1558 	BRIDGE_LOCK(sc);
1559 	if ((sc->sc_flags & SCF_DETACHING) != 0) {
1560 		BRIDGE_UNLOCK(sc);
1561 		return;
1562 	}
1563 	if (proto_count >= 2) {
1564 		/* an upper layer protocol is attached */
1565 		sc->sc_flags |= SCF_PROTO_ATTACHED;
1566 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1567 		    "%s: setting SCF_PROTO_ATTACHED", ifp->if_xname);
1568 	} else {
1569 		/* an upper layer protocol was detached */
1570 		sc->sc_flags &= ~SCF_PROTO_ATTACHED;
1571 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1572 		    "%s: clearing SCF_PROTO_ATTACHED", ifp->if_xname);
1573 	}
1574 	BRIDGE_UNLOCK(sc);
1575 }
1576 
1577 static void
bridge_interface_event(struct ifnet * ifp,__unused protocol_family_t protocol,const struct kev_msg * event)1578 bridge_interface_event(struct ifnet * ifp,
1579     __unused protocol_family_t protocol, const struct kev_msg * event)
1580 {
1581 	int         event_code;
1582 
1583 	if (event->vendor_code != KEV_VENDOR_APPLE
1584 	    || event->kev_class != KEV_NETWORK_CLASS
1585 	    || event->kev_subclass != KEV_DL_SUBCLASS) {
1586 		return;
1587 	}
1588 	event_code = event->event_code;
1589 	switch (event_code) {
1590 	case KEV_DL_PROTO_DETACHED:
1591 	case KEV_DL_PROTO_ATTACHED:
1592 		bridge_interface_proto_attach_changed(ifp);
1593 		break;
1594 	default:
1595 		break;
1596 	}
1597 	return;
1598 }
1599 
1600 /*
1601  * Function: bridge_interface_attach_protocol
1602  * Purpose:
1603  *   Attach a protocol to the bridge to get events on the interface,
1604  *   in particular, whether protocols are attached/detached.
1605  */
1606 static int
bridge_interface_attach_protocol(ifnet_t ifp)1607 bridge_interface_attach_protocol(ifnet_t ifp)
1608 {
1609 	int                                 error;
1610 	struct ifnet_attach_proto_param_v2  reg;
1611 
1612 	bzero(&reg, sizeof(reg));
1613 	reg.event = bridge_interface_event;
1614 
1615 	error = ifnet_attach_protocol_v2(ifp, PF_BRIDGE, &reg);
1616 	if (error != 0) {
1617 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1618 		    "%s: ifnet_attach_protocol failed, %d",
1619 		    ifp->if_xname, error);
1620 	}
1621 	return error;
1622 }
1623 
1624 static void
bridge_interface_detach_protocol(ifnet_t ifp)1625 bridge_interface_detach_protocol(ifnet_t ifp)
1626 {
1627 	(void)ifnet_detach_protocol(ifp, PF_BRIDGE);
1628 }
1629 
1630 /*
1631  * bridge_clone_create:
1632  *
1633  *	Create a new bridge instance.
1634  */
1635 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1636 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1637 {
1638 #pragma unused(params)
1639 	ifnet_ref_t ifp = NULL;
1640 	struct bridge_softc *sc = NULL;
1641 	struct bridge_softc *sc2 = NULL;
1642 	struct ifnet_init_eparams init_params;
1643 	errno_t error = 0;
1644 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1645 	int fb, retry, has_hostid;
1646 
1647 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1648 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1649 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1650 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1651 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1652 	sc->sc_filter_flags = 0;
1653 
1654 	TAILQ_INIT(&sc->sc_iflist);
1655 
1656 	/* use the interface name as the unique id for ifp recycle */
1657 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1658 	    ifc->ifc_name, unit);
1659 	bzero(&init_params, sizeof(init_params));
1660 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1661 	init_params.len                 = sizeof(init_params);
1662 	/* Initialize our routing table. */
1663 	error = bridge_rtable_init(sc);
1664 	if (error != 0) {
1665 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1666 		goto done;
1667 	}
1668 	TAILQ_INIT(&sc->sc_spanlist);
1669 	if (if_bridge_txstart) {
1670 		init_params.start = bridge_start;
1671 	} else {
1672 		init_params.flags = IFNET_INIT_LEGACY;
1673 		init_params.output = bridge_output;
1674 	}
1675 	init_params.uniqueid_len        = strbuflen(sc->sc_if_xname);
1676 	init_params.uniqueid            = sc->sc_if_xname;
1677 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1678 	init_params.name                = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1679 	init_params.unit                = unit;
1680 	init_params.family              = IFNET_FAMILY_ETHERNET;
1681 	init_params.type                = IFT_BRIDGE;
1682 	init_params.demux               = ether_demux;
1683 	init_params.add_proto           = ether_add_proto;
1684 	init_params.del_proto           = ether_del_proto;
1685 	init_params.check_multi         = ether_check_multi;
1686 	init_params.framer_extended     = ether_frameout_extended;
1687 	init_params.softc               = sc;
1688 	init_params.ioctl               = bridge_ioctl;
1689 	init_params.detach              = bridge_detach;
1690 	init_params.broadcast_addr      = etherbroadcastaddr;
1691 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1692 
1693 	error = ifnet_allocate_extended(&init_params, &ifp);
1694 	if (error != 0) {
1695 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1696 		goto done;
1697 	}
1698 	LIST_INIT(&sc->sc_mne_list);
1699 	LIST_INIT(&sc->sc_mne_list_v6);
1700 	sc->sc_ifp = ifp;
1701 	error = bridge_ifnet_set_attrs(ifp);
1702 	if (error != 0) {
1703 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1704 		    error);
1705 		goto done;
1706 	}
1707 	/*
1708 	 * Generate an ethernet address with a locally administered address.
1709 	 *
1710 	 * Since we are using random ethernet addresses for the bridge, it is
1711 	 * possible that we might have address collisions, so make sure that
1712 	 * this hardware address isn't already in use on another bridge.
1713 	 * The first try uses the "hostid" and falls back to read_frandom();
1714 	 * for "hostid", we use the MAC address of the first-encountered
1715 	 * Ethernet-type interface that is currently configured.
1716 	 */
1717 	fb = 0;
1718 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1719 	for (retry = 1; retry != 0;) {
1720 		if (fb || has_hostid == 0) {
1721 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1722 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1723 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1724 		} else {
1725 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1726 			    ETHER_ADDR_LEN);
1727 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1728 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1729 			sc->sc_defaddr[3] =     /* stir it up a bit */
1730 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1731 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1732 			/*
1733 			 * Mix in the LSB as it's actually pretty significant,
1734 			 * see rdar://14076061
1735 			 */
1736 			sc->sc_defaddr[4] =
1737 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1738 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1739 			    sc->sc_defaddr[5];
1740 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1741 		}
1742 
1743 		fb = 1;
1744 		retry = 0;
1745 		lck_mtx_lock(&bridge_list_mtx);
1746 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1747 			if (_ether_cmp(sc->sc_defaddr,
1748 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1749 				retry = 1;
1750 			}
1751 		}
1752 		lck_mtx_unlock(&bridge_list_mtx);
1753 	}
1754 
1755 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1756 
1757 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1758 		brlog_link(sc);
1759 	}
1760 	error = ifnet_attach(ifp, NULL);
1761 	if (error != 0) {
1762 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1763 		goto done;
1764 	}
1765 	(void)bridge_interface_attach_protocol(ifp);
1766 
1767 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1768 	    IFT_ETHER);
1769 	if (error != 0) {
1770 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1771 		    error);
1772 		goto done;
1773 	}
1774 
1775 	ifnet_set_offload(ifp,
1776 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1777 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1778 	error = bridge_set_tso(sc);
1779 	if (error != 0) {
1780 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1781 		goto done;
1782 	}
1783 #if BRIDGESTP
1784 	bstp_attach(&sc->sc_stp, &bridge_ops);
1785 #endif /* BRIDGESTP */
1786 
1787 	lck_mtx_lock(&bridge_list_mtx);
1788 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1789 	lck_mtx_unlock(&bridge_list_mtx);
1790 
1791 	/* attach as ethernet */
1792 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1793 	    NULL, NULL);
1794 
1795 done:
1796 	if (error != 0) {
1797 		if (ifp != NULL) {
1798 			bridge_interface_detach_protocol(ifp);
1799 		}
1800 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1801 		/* TBD: Clean up: sc, sc_rthash etc */
1802 	}
1803 
1804 	return error;
1805 }
1806 
1807 /*
1808  * bridge_clone_destroy:
1809  *
1810  *	Destroy a bridge instance.
1811  */
1812 static int
bridge_clone_destroy(struct ifnet * ifp)1813 bridge_clone_destroy(struct ifnet *ifp)
1814 {
1815 	struct bridge_softc * __single sc = ifp->if_softc;
1816 	struct bridge_iflist *bif;
1817 	errno_t error;
1818 
1819 	bridge_interface_detach_protocol(ifp);
1820 
1821 	BRIDGE_LOCK(sc);
1822 	if ((sc->sc_flags & SCF_DETACHING)) {
1823 		BRIDGE_UNLOCK(sc);
1824 		return 0;
1825 	}
1826 	sc->sc_flags |= SCF_DETACHING;
1827 
1828 	bridge_ifstop(ifp, 1);
1829 
1830 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1831 
1832 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1833 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1834 
1835 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1836 	if (error != 0) {
1837 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1838 	}
1839 
1840 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1841 		bridge_delete_member(sc, bif);
1842 	}
1843 
1844 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1845 		bridge_delete_span(sc, bif);
1846 	}
1847 	BRIDGE_UNLOCK(sc);
1848 
1849 	error = ifnet_detach(ifp);
1850 	if (error != 0) {
1851 		panic("%s (%d): ifnet_detach(%p) failed %d",
1852 		    __func__, __LINE__, ifp, error);
1853 	}
1854 	return 0;
1855 }
1856 
1857 #define DRVSPEC do { \
1858 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1859 	        error = EINVAL;                                         \
1860 	        break;                                                  \
1861 	}                                                               \
1862 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1863                                                                         \
1864 	if (cmd == SIOCGDRVSPEC &&                                      \
1865 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1866 	        error = EINVAL;                                         \
1867 	        break;                                                  \
1868 	} else if (cmd == SIOCSDRVSPEC &&                               \
1869 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1870 	        error = EINVAL;                                         \
1871 	        break;                                                  \
1872 	}                                                               \
1873                                                                         \
1874 	if (bc->bc_flags & BC_F_SUSER) {                                \
1875 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1876 	            KAUTH_GENERIC_ISSUSER);                             \
1877 	        if (error)                                              \
1878 	                break;                                          \
1879 	}                                                               \
1880                                                                         \
1881 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1882 	    ifd->ifd_len > sizeof (args)) {                             \
1883 	        error = EINVAL;                                         \
1884 	        break;                                                  \
1885 	}                                                               \
1886                                                                         \
1887 	bzero(&args, sizeof (args));                                    \
1888 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1889 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1890 	        if (error)                                              \
1891 	                break;                                          \
1892 	}                                                               \
1893                                                                         \
1894 	BRIDGE_LOCK(sc);                                                \
1895 	error = (*bc->bc_func)(sc, &args, sizeof(args));                \
1896 	BRIDGE_UNLOCK(sc);                                              \
1897 	if (error)                                                      \
1898 	        break;                                                  \
1899                                                                         \
1900 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1901 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1902 } while (0)
1903 
1904 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1905 interface_needs_input_broadcast(struct ifnet * ifp)
1906 {
1907 	/*
1908 	 * Selectively enable input broadcast only when necessary.
1909 	 * The bridge interface itself attaches a fake protocol
1910 	 * so checking for at least two protocols means that the
1911 	 * interface is being used for something besides bridging
1912 	 * and needs to see broadcast packets from other members.
1913 	 */
1914 	return if_get_protolist(ifp, NULL, 0) >= 2;
1915 }
1916 
1917 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1918 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1919 {
1920 	boolean_t       old_input_broadcast;
1921 
1922 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1923 	if (input_broadcast) {
1924 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1925 	} else {
1926 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1927 	}
1928 	return old_input_broadcast != input_broadcast;
1929 }
1930 
1931 /*
1932  * bridge_ioctl:
1933  *
1934  *	Handle a control request from the operator.
1935  */
1936 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1937 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1938 {
1939 	struct bridge_softc * __single sc = ifp->if_softc;
1940 	struct ifreq *ifr = (struct ifreq *)data;
1941 	struct bridge_iflist *bif;
1942 	int error = 0;
1943 
1944 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1945 
1946 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1947 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1948 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1949 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1950 	    (char)IOCGROUP(cmd), cmd & 0xff);
1951 
1952 	switch (cmd) {
1953 	case SIOCSIFADDR:
1954 	case SIOCAIFADDR:
1955 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1956 		break;
1957 
1958 	case SIOCGIFMEDIA32:
1959 	case SIOCGIFMEDIA64: {
1960 		// cast to 32bit version to work within bounds with 32bit userspace
1961 		struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1962 		user_addr_t user_addr;
1963 
1964 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1965 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
1966 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1967 
1968 		ifmr->ifm_status = IFM_AVALID;
1969 		ifmr->ifm_mask = 0;
1970 		ifmr->ifm_count = 1;
1971 
1972 		BRIDGE_LOCK(sc);
1973 		if (!(sc->sc_flags & SCF_DETACHING) &&
1974 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1975 			ifmr->ifm_status |= IFM_ACTIVE;
1976 			ifmr->ifm_active = ifmr->ifm_current =
1977 			    IFM_ETHER | IFM_AUTO;
1978 		} else {
1979 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1980 		}
1981 		BRIDGE_UNLOCK(sc);
1982 
1983 		if (user_addr != USER_ADDR_NULL) {
1984 			error = copyout(&ifmr->ifm_current, user_addr,
1985 			    sizeof(int));
1986 		}
1987 		break;
1988 	}
1989 
1990 	case SIOCADDMULTI:
1991 	case SIOCDELMULTI:
1992 		break;
1993 
1994 	case SIOCSDRVSPEC32:
1995 	case SIOCGDRVSPEC32: {
1996 		union {
1997 			struct ifbreq ifbreq;
1998 			struct ifbifconf32 ifbifconf;
1999 			struct ifbareq32 ifbareq;
2000 			struct ifbaconf32 ifbaconf;
2001 			struct ifbrparam ifbrparam;
2002 			struct ifbropreq32 ifbropreq;
2003 		} args;
2004 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
2005 		const struct bridge_control *bridge_control_table =
2006 		    bridge_control_table32, *bc;
2007 
2008 		DRVSPEC;
2009 
2010 		break;
2011 	}
2012 	case SIOCSDRVSPEC64:
2013 	case SIOCGDRVSPEC64: {
2014 		union {
2015 			struct ifbreq ifbreq;
2016 			struct ifbifconf64 ifbifconf;
2017 			struct ifbareq64 ifbareq;
2018 			struct ifbaconf64 ifbaconf;
2019 			struct ifbrparam ifbrparam;
2020 			struct ifbropreq64 ifbropreq;
2021 		} args;
2022 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
2023 		const struct bridge_control *bridge_control_table =
2024 		    bridge_control_table64, *bc;
2025 
2026 		DRVSPEC;
2027 
2028 		break;
2029 	}
2030 
2031 	case SIOCSIFFLAGS:
2032 		if (!(ifp->if_flags & IFF_UP) &&
2033 		    (ifp->if_flags & IFF_RUNNING)) {
2034 			/*
2035 			 * If interface is marked down and it is running,
2036 			 * then stop and disable it.
2037 			 */
2038 			BRIDGE_LOCK(sc);
2039 			bridge_ifstop(ifp, 1);
2040 			BRIDGE_UNLOCK(sc);
2041 		} else if ((ifp->if_flags & IFF_UP) &&
2042 		    !(ifp->if_flags & IFF_RUNNING)) {
2043 			/*
2044 			 * If interface is marked up and it is stopped, then
2045 			 * start it.
2046 			 */
2047 			BRIDGE_LOCK(sc);
2048 			error = bridge_init(ifp);
2049 			BRIDGE_UNLOCK(sc);
2050 		}
2051 		break;
2052 
2053 	case SIOCSIFLLADDR:
2054 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
2055 		    ifr->ifr_addr.sa_len);
2056 		if (error != 0) {
2057 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2058 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
2059 			    error);
2060 		}
2061 		break;
2062 
2063 	case SIOCSIFMTU:
2064 		if (ifr->ifr_mtu < 576) {
2065 			error = EINVAL;
2066 			break;
2067 		}
2068 		BRIDGE_LOCK(sc);
2069 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2070 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2071 			BRIDGE_UNLOCK(sc);
2072 			break;
2073 		}
2074 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2075 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
2076 				BRIDGE_LOG(LOG_NOTICE, 0,
2077 				    "%s invalid MTU: %u(%s) != %d",
2078 				    sc->sc_ifp->if_xname,
2079 				    bif->bif_ifp->if_mtu,
2080 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
2081 				error = EINVAL;
2082 				break;
2083 			}
2084 		}
2085 		if (!error) {
2086 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2087 		}
2088 		BRIDGE_UNLOCK(sc);
2089 		break;
2090 
2091 	default:
2092 		error = ether_ioctl(ifp, cmd, data);
2093 		if (error != 0 && error != EOPNOTSUPP) {
2094 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2095 			    "ifp %s cmd 0x%08lx "
2096 			    "(%c%c [%lu] %c %lu) failed error: %d",
2097 			    ifp->if_xname, cmd,
2098 			    (cmd & IOC_IN) ? 'I' : ' ',
2099 			    (cmd & IOC_OUT) ? 'O' : ' ',
2100 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2101 			    cmd & 0xff, error);
2102 		}
2103 		break;
2104 	}
2105 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2106 
2107 	return error;
2108 }
2109 
2110 #if HAS_IF_CAP
2111 /*
2112  * bridge_mutecaps:
2113  *
2114  *	Clear or restore unwanted capabilities on the member interface
2115  */
2116 static void
bridge_mutecaps(struct bridge_softc * sc)2117 bridge_mutecaps(struct bridge_softc *sc)
2118 {
2119 	struct bridge_iflist *bif;
2120 	int enabled, mask;
2121 
2122 	/* Initial bitmask of capabilities to test */
2123 	mask = BRIDGE_IFCAPS_MASK;
2124 
2125 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2126 		/* Every member must support it or its disabled */
2127 		mask &= bif->bif_savedcaps;
2128 	}
2129 
2130 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2131 		enabled = bif->bif_ifp->if_capenable;
2132 		enabled &= ~BRIDGE_IFCAPS_STRIP;
2133 		/* strip off mask bits and enable them again if allowed */
2134 		enabled &= ~BRIDGE_IFCAPS_MASK;
2135 		enabled |= mask;
2136 
2137 		bridge_set_ifcap(sc, bif, enabled);
2138 	}
2139 }
2140 
2141 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2142 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2143 {
2144 	struct ifnet *ifp = bif->bif_ifp;
2145 	struct ifreq ifr;
2146 	int error;
2147 
2148 	bzero(&ifr, sizeof(ifr));
2149 	ifr.ifr_reqcap = set;
2150 
2151 	if (ifp->if_capenable != set) {
2152 		IFF_LOCKGIANT(ifp);
2153 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2154 		IFF_UNLOCKGIANT(ifp);
2155 		if (error) {
2156 			BRIDGE_LOG(LOG_NOTICE, 0,
2157 			    "%s error setting interface capabilities on %s",
2158 			    sc->sc_ifp->if_xname, ifp->if_xname);
2159 		}
2160 	}
2161 }
2162 #endif /* HAS_IF_CAP */
2163 
2164 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2165 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2166 {
2167 	struct ifreq    ifr;
2168 
2169 	bzero(&ifr, sizeof(ifr));
2170 	ifr.ifr_reqcap = cap_enable;
2171 	return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2172 }
2173 
2174 static const char *
enable_disable_str(boolean_t enable)2175 enable_disable_str(boolean_t enable)
2176 {
2177 	return (const char * __null_terminated)(enable ? "enable" : "disable");
2178 }
2179 
2180 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2181 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2182 {
2183 	uint32_t        cap_enable;
2184 	uint32_t        cap_supported;
2185 	boolean_t       changed = FALSE;
2186 	boolean_t       lro_enabled;
2187 
2188 	cap_supported = ifnet_capabilities_supported(ifp);
2189 	if ((cap_supported & IFCAP_LRO) == 0) {
2190 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2191 		    "%s doesn't support LRO",
2192 		    ifp->if_xname);
2193 		goto done;
2194 	}
2195 	if (bridge_allow_lro_num_seg != 0 &&
2196 	    (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2197 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2198 		    "%s supports LRO_NUM_SEG, leaving LRO enabled",
2199 		    ifp->if_xname);
2200 		goto done;
2201 	}
2202 	cap_enable = ifnet_capabilities_enabled(ifp);
2203 	lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2204 	if (lro_enabled != enable) {
2205 		errno_t         error;
2206 
2207 		if (enable) {
2208 			cap_enable |= IFCAP_LRO;
2209 		} else {
2210 			cap_enable &= ~IFCAP_LRO;
2211 		}
2212 		error = siocsifcap(ifp, cap_enable);
2213 		if (error != 0) {
2214 			BRIDGE_LOG(LOG_NOTICE, 0,
2215 			    "%s %s failed (cap 0x%x) %d",
2216 			    ifp->if_xname,
2217 			    enable_disable_str(enable),
2218 			    cap_enable,
2219 			    error);
2220 		} else {
2221 			changed = TRUE;
2222 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2223 			    "%s %s success (cap 0x%x)",
2224 			    ifp->if_xname,
2225 			    enable_disable_str(enable),
2226 			    cap_enable);
2227 		}
2228 	}
2229 done:
2230 	return changed;
2231 }
2232 
2233 static errno_t
bridge_set_tso(struct bridge_softc * sc)2234 bridge_set_tso(struct bridge_softc *sc)
2235 {
2236 	struct bridge_iflist *bif;
2237 	u_int32_t tso_v4_mtu;
2238 	u_int32_t tso_v6_mtu;
2239 	ifnet_offload_t offload;
2240 	errno_t error = 0;
2241 
2242 	/* By default, support TSO */
2243 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2244 	tso_v4_mtu = IP_MAXPACKET;
2245 	tso_v6_mtu = IP_MAXPACKET;
2246 
2247 	/* Use the lowest common denominator of the members */
2248 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2249 		ifnet_t ifp = bif->bif_ifp;
2250 
2251 		if (ifp == NULL) {
2252 			continue;
2253 		}
2254 
2255 		if (offload & IFNET_TSO_IPV4) {
2256 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2257 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2258 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2259 				}
2260 			} else {
2261 				offload &= ~IFNET_TSO_IPV4;
2262 				tso_v4_mtu = 0;
2263 			}
2264 		}
2265 		if (offload & IFNET_TSO_IPV6) {
2266 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2267 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2268 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2269 				}
2270 			} else {
2271 				offload &= ~IFNET_TSO_IPV6;
2272 				tso_v6_mtu = 0;
2273 			}
2274 		}
2275 	}
2276 
2277 	if (offload != sc->sc_ifp->if_hwassist) {
2278 		error = ifnet_set_offload(sc->sc_ifp, offload);
2279 		if (error != 0) {
2280 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2281 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2282 			    sc->sc_ifp->if_xname, offload, error);
2283 			goto done;
2284 		}
2285 		/*
2286 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2287 		 * as large as the interface MTU
2288 		 */
2289 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2290 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2291 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2292 			}
2293 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2294 			    tso_v4_mtu);
2295 			if (error != 0) {
2296 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2297 				    "ifnet_set_tso_mtu(%s, "
2298 				    "AF_INET, %u) failed %d",
2299 				    sc->sc_ifp->if_xname,
2300 				    tso_v4_mtu, error);
2301 				goto done;
2302 			}
2303 		}
2304 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2305 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2306 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2307 			}
2308 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2309 			    tso_v6_mtu);
2310 			if (error != 0) {
2311 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2312 				    "ifnet_set_tso_mtu(%s, "
2313 				    "AF_INET6, %u) failed %d",
2314 				    sc->sc_ifp->if_xname,
2315 				    tso_v6_mtu, error);
2316 				goto done;
2317 			}
2318 		}
2319 	}
2320 done:
2321 	return error;
2322 }
2323 
2324 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2325 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2326 {
2327 	ifname[IFNAMSIZ - 1] = '\0';
2328 	return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2329 }
2330 
2331 /*
2332  * bridge_lookup_member:
2333  *
2334  *	Lookup a bridge member interface.
2335  */
2336 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2337 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2338 {
2339 	struct bridge_iflist *bif;
2340 	struct ifnet *ifp;
2341 	const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2342 
2343 	BRIDGE_LOCK_ASSERT_HELD(sc);
2344 
2345 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2346 		ifp = bif->bif_ifp;
2347 		if (strcmp(ifp->if_xname, name) == 0) {
2348 			return bif;
2349 		}
2350 	}
2351 
2352 	return NULL;
2353 }
2354 
2355 /*
2356  * bridge_lookup_member_if:
2357  *
2358  *	Lookup a bridge member interface by ifnet*.
2359  */
2360 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2361 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2362 {
2363 	struct bridge_iflist *bif;
2364 
2365 	BRIDGE_LOCK_ASSERT_HELD(sc);
2366 
2367 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2368 		if (bif->bif_ifp == member_ifp) {
2369 			return bif;
2370 		}
2371 	}
2372 
2373 	return NULL;
2374 }
2375 
2376 static inline bool
get_and_clear_promisc(mbuf_t m)2377 get_and_clear_promisc(mbuf_t m)
2378 {
2379 	bool    is_promisc;
2380 
2381 	/*
2382 	 * Need to clear the promiscuous flag otherwise the packet will be
2383 	 * dropped by DLIL after processing filters
2384 	 */
2385 	is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2386 	if (is_promisc) {
2387 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2388 	}
2389 	return is_promisc;
2390 }
2391 
2392 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2393 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2394     mbuf_t *data, char **frame_ptr)
2395 {
2396 #pragma unused(protocol)
2397 	errno_t error = 0;
2398 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2399 	struct bridge_softc *sc = bif->bif_sc;
2400 	int included = 0;
2401 	struct ether_header * eh_p;
2402 	size_t frmlen = 0;
2403 	bool is_promisc;
2404 	mblist list;
2405 	mbuf_t m = *data;
2406 	uint32_t sc_filter_flags;
2407 
2408 	if ((m->m_flags & M_PROTO1)) {
2409 		goto out;
2410 	}
2411 
2412 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2413 	    *frame_ptr <= mtod(m, char *)) {
2414 		included = 1;
2415 		frmlen = mtod(m, char *) - *frame_ptr;
2416 	}
2417 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2418 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2419 	    "frmlen %lu", sc->sc_ifp->if_xname,
2420 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2421 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2422 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2423 	    included ? "inside" : "outside", frmlen);
2424 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2425 		brlog_mbuf(m, "bridge_iff_input[", "");
2426 		brlog_ether_header((struct ether_header *)
2427 		    (void *)*frame_ptr);
2428 		brlog_mbuf_data(m, 0, 20);
2429 	}
2430 	if (included == 0) {
2431 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2432 		goto out;
2433 	}
2434 
2435 	/* Move data pointer to start of frame to the link layer header */
2436 	_mbuf_adjust_pkthdr_and_data(m, -frmlen);
2437 
2438 	/* make sure we can access the ethernet header */
2439 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2440 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2441 		    "short frame %lu < %lu",
2442 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2443 		goto out;
2444 	}
2445 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2446 		error = mbuf_pullup(data, sizeof(struct ether_header));
2447 		if (error != 0) {
2448 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2449 			    "mbuf_pullup(%lu) failed %d",
2450 			    sizeof(struct ether_header),
2451 			    error);
2452 			error = EJUSTRETURN;
2453 			goto out;
2454 		}
2455 		if (m != *data) {
2456 			m = *data;
2457 			*frame_ptr = mtod(m, char *);
2458 		}
2459 	}
2460 	sc_filter_flags = sc->sc_filter_flags;
2461 	if ((sc_filter_flags & IFBF_FILT_MEMBER) != 0 && PF_IS_ENABLED) {
2462 		error = bridge_pf(data, ifp, sc_filter_flags, true);
2463 		m = *data;
2464 		if (error != 0 || m == NULL) {
2465 			return EJUSTRETURN;
2466 		}
2467 	}
2468 	mblist_init(&list);
2469 	mblist_append(&list, m);
2470 	is_promisc = get_and_clear_promisc(m);
2471 	eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2472 	list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2473 	m = *data = list.head;
2474 	if (m == NULL) {
2475 		error = EJUSTRETURN;
2476 	}
2477 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2478 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2479 		brlog_mbuf(m, "bridge_iff_input]", "");
2480 	}
2481 
2482 out:
2483 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2484 
2485 	return error;
2486 }
2487 
2488 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2489 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2490     mbuf_t *data)
2491 {
2492 #pragma unused(protocol)
2493 	errno_t error = 0;
2494 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2495 	struct bridge_softc *sc = bif->bif_sc;
2496 	mbuf_t m = *data;
2497 
2498 	if ((m->m_flags & M_PROTO1)) {
2499 		goto out;
2500 	}
2501 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2502 	    "%s from %s m 0x%llx data 0x%llx",
2503 	    sc->sc_ifp->if_xname, ifp->if_xname,
2504 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2505 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2506 
2507 	error = bridge_member_output(sc, ifp, data);
2508 	if (error != 0 && error != EJUSTRETURN) {
2509 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2510 		    "bridge_member_output failed error %d",
2511 		    error);
2512 	}
2513 out:
2514 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2515 
2516 	return error;
2517 }
2518 
2519 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2520 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2521     const struct kev_msg *event_msg)
2522 {
2523 #pragma unused(protocol)
2524 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2525 	struct bridge_softc *sc = bif->bif_sc;
2526 
2527 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2528 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2529 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2530 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2531 		    "%s event_code %u - %s",
2532 		    ifp->if_xname, event_msg->event_code,
2533 		    dlil_kev_dl_code_str(event_msg->event_code));
2534 
2535 		switch (event_msg->event_code) {
2536 		case KEV_DL_LINK_OFF:
2537 		case KEV_DL_LINK_ON: {
2538 			bridge_iflinkevent(ifp);
2539 #if BRIDGESTP
2540 			bstp_linkstate(ifp, event_msg->event_code);
2541 #endif /* BRIDGESTP */
2542 			break;
2543 		}
2544 		case KEV_DL_SIFFLAGS: {
2545 			if ((ifp->if_flags & IFF_UP) == 0) {
2546 				break;
2547 			}
2548 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2549 				errno_t error;
2550 
2551 				error = ifnet_set_promiscuous(ifp, 1);
2552 				if (error != 0) {
2553 					BRIDGE_LOG(LOG_NOTICE, 0,
2554 					    "ifnet_set_promiscuous (%s)"
2555 					    " failed %d", ifp->if_xname,
2556 					    error);
2557 				} else {
2558 					bif->bif_flags |= BIFF_PROMISC;
2559 				}
2560 			}
2561 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2562 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2563 				errno_t error;
2564 
2565 				error = if_allmulti(ifp, 1);
2566 				if (error != 0) {
2567 					BRIDGE_LOG(LOG_NOTICE, 0,
2568 					    "if_allmulti (%s)"
2569 					    " failed %d", ifp->if_xname,
2570 					    error);
2571 				} else {
2572 					bif->bif_flags |= BIFF_ALL_MULTI;
2573 #ifdef XNU_PLATFORM_AppleTVOS
2574 					ip6_forwarding = 1;
2575 #endif /* XNU_PLATFORM_AppleTVOS */
2576 				}
2577 			}
2578 			break;
2579 		}
2580 		case KEV_DL_IFCAP_CHANGED: {
2581 			BRIDGE_LOCK(sc);
2582 			bridge_set_tso(sc);
2583 			BRIDGE_UNLOCK(sc);
2584 			break;
2585 		}
2586 		case KEV_DL_PROTO_DETACHED:
2587 		case KEV_DL_PROTO_ATTACHED: {
2588 			bridge_proto_attach_changed(ifp);
2589 			break;
2590 		}
2591 		default:
2592 			break;
2593 		}
2594 	}
2595 }
2596 
2597 /*
2598  * bridge_iff_detached:
2599  *
2600  *      Called when our interface filter has been detached from a
2601  *      member interface.
2602  */
2603 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2604 bridge_iff_detached(void *cookie, ifnet_t ifp)
2605 {
2606 #pragma unused(cookie)
2607 	struct bridge_iflist *bif;
2608 	struct bridge_softc * __single sc = ifp->if_bridge;
2609 
2610 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2611 
2612 	/* Check if the interface is a bridge member */
2613 	if (sc != NULL) {
2614 		BRIDGE_LOCK(sc);
2615 		bif = bridge_lookup_member_if(sc, ifp);
2616 		if (bif != NULL) {
2617 			bridge_delete_member(sc, bif);
2618 		}
2619 		BRIDGE_UNLOCK(sc);
2620 		return;
2621 	}
2622 	/* Check if the interface is a span port */
2623 	lck_mtx_lock(&bridge_list_mtx);
2624 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2625 		BRIDGE_LOCK(sc);
2626 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2627 		if (ifp == bif->bif_ifp) {
2628 			bridge_delete_span(sc, bif);
2629 			break;
2630 		}
2631 		BRIDGE_UNLOCK(sc);
2632 	}
2633 	lck_mtx_unlock(&bridge_list_mtx);
2634 }
2635 
2636 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2637 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2638     char *header)
2639 {
2640 #pragma unused(protocol, packet, header)
2641 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2642 	    ifp->if_xname);
2643 	return 0;
2644 }
2645 
2646 static int
bridge_attach_protocol(struct ifnet * ifp)2647 bridge_attach_protocol(struct ifnet *ifp)
2648 {
2649 	int     error;
2650 	struct ifnet_attach_proto_param reg;
2651 
2652 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2653 	bzero(&reg, sizeof(reg));
2654 	reg.input = bridge_proto_input;
2655 
2656 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2657 	if (error) {
2658 		BRIDGE_LOG(LOG_NOTICE, 0,
2659 		    "ifnet_attach_protocol(%s) failed, %d",
2660 		    ifp->if_xname, error);
2661 	}
2662 
2663 	return error;
2664 }
2665 
2666 static int
bridge_detach_protocol(struct ifnet * ifp)2667 bridge_detach_protocol(struct ifnet *ifp)
2668 {
2669 	int     error;
2670 
2671 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2672 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2673 	if (error) {
2674 		BRIDGE_LOG(LOG_NOTICE, 0,
2675 		    "ifnet_detach_protocol(%s) failed, %d",
2676 		    ifp->if_xname, error);
2677 	}
2678 
2679 	return error;
2680 }
2681 
2682 /*
2683  * bridge_delete_member:
2684  *
2685  *	Delete the specified member interface.
2686  */
2687 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2688 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2689 {
2690 #if SKYWALK
2691 	boolean_t add_netagent = FALSE;
2692 #endif /* SKYWALK */
2693 	uint32_t    bif_flags;
2694 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2695 	int lladdr_changed = 0, error;
2696 	uint8_t eaddr[ETHER_ADDR_LEN];
2697 	u_int32_t event_code = 0;
2698 
2699 	BRIDGE_LOCK_ASSERT_HELD(sc);
2700 	VERIFY(ifs != NULL);
2701 
2702 	/*
2703 	 * Remove the member from the list first so it cannot be found anymore
2704 	 * when we release the bridge lock below
2705 	 */
2706 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2707 		bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2708 		BRIDGE_XLOCK(sc);
2709 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2710 		BRIDGE_XDROP(sc);
2711 	}
2712 	if (sc->sc_mac_nat_bif != NULL) {
2713 		if (bif == sc->sc_mac_nat_bif) {
2714 			bridge_mac_nat_disable(sc);
2715 		} else {
2716 			bridge_mac_nat_flush_entries(sc, bif);
2717 		}
2718 	}
2719 #if BRIDGESTP
2720 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2721 		bstp_disable(&bif->bif_stp);
2722 	}
2723 #endif /* BRIDGESTP */
2724 
2725 	/*
2726 	 * If removing the interface that gave the bridge its mac address, set
2727 	 * the mac address of the bridge to the address of the next member, or
2728 	 * to its default address if no members are left.
2729 	 */
2730 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2731 		ifnet_release(sc->sc_ifaddr);
2732 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2733 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2734 			sc->sc_ifaddr = NULL;
2735 		} else {
2736 			struct ifnet *fif =
2737 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2738 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2739 			sc->sc_ifaddr = fif;
2740 			ifnet_reference(fif);   /* for sc_ifaddr */
2741 		}
2742 		lladdr_changed = 1;
2743 	}
2744 
2745 #if HAS_IF_CAP
2746 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2747 #endif /* HAS_IF_CAP */
2748 
2749 	error = bridge_set_tso(sc);
2750 	if (error != 0) {
2751 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2752 	}
2753 
2754 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2755 
2756 	KASSERT(bif->bif_addrcnt == 0,
2757 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2758 
2759 	/*
2760 	 * Update link status of the bridge based on its remaining members
2761 	 */
2762 	event_code = bridge_updatelinkstatus(sc);
2763 	bif_flags = bif->bif_flags;
2764 	BRIDGE_UNLOCK(sc);
2765 
2766 	/* only perform these steps if the interface is still attached */
2767 	if (ifnet_get_ioref(ifs)) {
2768 #if SKYWALK
2769 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2770 
2771 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2772 			ifnet_detach_flowswitch_nexus(ifs);
2773 		}
2774 #endif /* SKYWALK */
2775 		/* disable promiscuous mode */
2776 		if ((bif_flags & BIFF_PROMISC) != 0) {
2777 			(void) ifnet_set_promiscuous(ifs, 0);
2778 		}
2779 		/* disable all multi */
2780 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2781 			(void)if_allmulti(ifs, 0);
2782 		}
2783 #if HAS_IF_CAP
2784 		/* re-enable any interface capabilities */
2785 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2786 #endif
2787 		/* detach bridge "protocol" */
2788 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2789 			(void)bridge_detach_protocol(ifs);
2790 		}
2791 		/* detach interface filter */
2792 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2793 			iflt_detach(bif->bif_iff_ref);
2794 		}
2795 		/* re-enable LRO */
2796 		if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2797 			(void)bridge_set_lro(ifs, TRUE);
2798 		}
2799 		ifnet_decr_iorefcnt(ifs);
2800 	}
2801 
2802 	if (lladdr_changed &&
2803 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2804 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2805 	}
2806 
2807 	if (event_code != 0) {
2808 		bridge_link_event(bifp, event_code);
2809 	}
2810 
2811 #if BRIDGESTP
2812 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2813 #endif /* BRIDGESTP */
2814 
2815 	kfree_type(struct bridge_iflist, bif);
2816 	ifs->if_bridge = NULL;
2817 #if SKYWALK
2818 	if (add_netagent && ifnet_get_ioref(ifs)) {
2819 		(void)ifnet_add_netagent(ifs);
2820 		ifnet_decr_iorefcnt(ifs);
2821 	}
2822 #endif /* SKYWALK */
2823 
2824 	ifnet_release(ifs);
2825 
2826 	BRIDGE_LOCK(sc);
2827 }
2828 
2829 /*
2830  * bridge_delete_span:
2831  *
2832  *	Delete the specified span interface.
2833  */
2834 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2835 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2836 {
2837 	BRIDGE_LOCK_ASSERT_HELD(sc);
2838 
2839 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2840 	    ("%s: not a span interface", __func__));
2841 
2842 	ifnet_release(bif->bif_ifp);
2843 
2844 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2845 	kfree_type(struct bridge_iflist, bif);
2846 }
2847 
2848 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2849 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2850 {
2851 	struct ifbreq * __single req = arg;
2852 	struct bridge_iflist *bif = NULL;
2853 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2854 	int error = 0, lladdr_changed = 0;
2855 	uint8_t eaddr[ETHER_ADDR_LEN];
2856 	struct iff_filter iff;
2857 	u_int32_t event_code = 0;
2858 	boolean_t input_broadcast;
2859 	int media_active;
2860 	boolean_t wifi_infra = FALSE;
2861 
2862 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2863 	if (ifs == NULL) {
2864 		return ENOENT;
2865 	}
2866 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2867 		return EINVAL;
2868 	}
2869 
2870 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2871 		return EINVAL;
2872 	}
2873 
2874 	/* If it's in the span list, it can't be a member. */
2875 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2876 		if (ifs == bif->bif_ifp) {
2877 			return EBUSY;
2878 		}
2879 	}
2880 
2881 	if (ifs->if_bridge == sc) {
2882 		return EEXIST;
2883 	}
2884 
2885 	if (ifs->if_bridge != NULL) {
2886 		return EBUSY;
2887 	}
2888 
2889 	switch (ifs->if_type) {
2890 	case IFT_ETHER:
2891 		if (strcmp(ifs->if_name, "en") == 0 &&
2892 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2893 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2894 			/* XXX is there a better way to identify Wi-Fi STA? */
2895 			wifi_infra = TRUE;
2896 		}
2897 		break;
2898 	case IFT_L2VLAN:
2899 	case IFT_IEEE8023ADLAG:
2900 		break;
2901 	default:
2902 		return EINVAL;
2903 	}
2904 
2905 	/* fail to add the interface if the MTU doesn't match */
2906 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2907 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2908 		    sc->sc_ifp->if_xname,
2909 		    ifs->if_xname);
2910 		return EINVAL;
2911 	}
2912 
2913 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2914 		/* there's already an interface that's doing MAC NAT */
2915 		return EBUSY;
2916 	}
2917 
2918 	/* prevent the interface from detaching while we add the member */
2919 	if (!ifnet_get_ioref(ifs)) {
2920 		return ENXIO;
2921 	}
2922 
2923 	/* allocate a new member */
2924 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2925 	bif->bif_ifp = ifs;
2926 	ifnet_reference(ifs);
2927 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2928 #if HAS_IF_CAP
2929 	bif->bif_savedcaps = ifs->if_capenable;
2930 #endif /* HAS_IF_CAP */
2931 	bif->bif_sc = sc;
2932 	if (wifi_infra) {
2933 		(void)bridge_mac_nat_enable(sc, bif);
2934 	}
2935 
2936 	/* Allow the first Ethernet member to define the MTU */
2937 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2938 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2939 	}
2940 
2941 	/*
2942 	 * Assign the interface's MAC address to the bridge if it's the first
2943 	 * member and the MAC address of the bridge has not been changed from
2944 	 * the default (randomly) generated one.
2945 	 */
2946 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2947 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2948 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2949 		sc->sc_ifaddr = ifs;
2950 		ifnet_reference(ifs);   /* for sc_ifaddr */
2951 		lladdr_changed = 1;
2952 	}
2953 
2954 	ifs->if_bridge = sc;
2955 #if BRIDGESTP
2956 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2957 #endif /* BRIDGESTP */
2958 
2959 #if HAS_IF_CAP
2960 	/* Set interface capabilities to the intersection set of all members */
2961 	bridge_mutecaps(sc);
2962 #endif /* HAS_IF_CAP */
2963 
2964 	/*
2965 	 * Respect lock ordering with DLIL lock for the following operations
2966 	 */
2967 	BRIDGE_UNLOCK(sc);
2968 
2969 	/* enable promiscuous mode */
2970 	error = ifnet_set_promiscuous(ifs, 1);
2971 	switch (error) {
2972 	case 0:
2973 		bif->bif_flags |= BIFF_PROMISC;
2974 		break;
2975 	case ENETDOWN:
2976 	case EPWROFF:
2977 		BRIDGE_LOG(LOG_NOTICE, 0,
2978 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2979 		    ifs->if_xname, error);
2980 		/* Ignore error when device is not up */
2981 		error = 0;
2982 		break;
2983 	default:
2984 		BRIDGE_LOG(LOG_NOTICE, 0,
2985 		    "ifnet_set_promiscuous(%s) failed %d",
2986 		    ifs->if_xname, error);
2987 		BRIDGE_LOCK(sc);
2988 		goto out;
2989 	}
2990 	if (wifi_infra) {
2991 		int this_error;
2992 
2993 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2994 		bif->bif_flags |= BIFF_WIFI_INFRA;
2995 		this_error = if_allmulti(ifs, 1);
2996 		if (this_error == 0) {
2997 			bif->bif_flags |= BIFF_ALL_MULTI;
2998 #ifdef XNU_PLATFORM_AppleTVOS
2999 			ip6_forwarding = 1;
3000 #endif /* XNU_PLATFORM_AppleTVOS */
3001 		} else {
3002 			BRIDGE_LOG(LOG_NOTICE, 0,
3003 			    "if_allmulti(%s) failed %d, ignoring",
3004 			    ifs->if_xname, this_error);
3005 		}
3006 	}
3007 #if SKYWALK
3008 	/* ensure that the flowswitch is present for native interface */
3009 	if (SKYWALK_NATIVE(ifs)) {
3010 		if (ifnet_attach_flowswitch_nexus(ifs)) {
3011 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
3012 		}
3013 	}
3014 	/* remove the netagent on the flowswitch (rdar://75050182) */
3015 	if (if_is_fsw_netagent_enabled()) {
3016 		(void)ifnet_remove_netagent(ifs);
3017 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
3018 	}
3019 #endif /* SKYWALK */
3020 
3021 	/*
3022 	 * install an interface filter
3023 	 */
3024 	memset(&iff, 0, sizeof(struct iff_filter));
3025 	iff.iff_cookie = bif;
3026 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
3027 	iff.iff_input = bridge_iff_input;
3028 	iff.iff_output = bridge_iff_output;
3029 	iff.iff_event = bridge_iff_event;
3030 	iff.iff_detached = bridge_iff_detached;
3031 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
3032 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
3033 	if (error != 0) {
3034 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
3035 		BRIDGE_LOCK(sc);
3036 		goto out;
3037 	}
3038 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
3039 
3040 	/*
3041 	 * install a dummy "bridge" protocol
3042 	 */
3043 	if ((error = bridge_attach_protocol(ifs)) != 0) {
3044 		if (error != 0) {
3045 			BRIDGE_LOG(LOG_NOTICE, 0,
3046 			    "bridge_attach_protocol failed %d", error);
3047 			BRIDGE_LOCK(sc);
3048 			goto out;
3049 		}
3050 	}
3051 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
3052 
3053 	if (lladdr_changed &&
3054 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
3055 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
3056 	}
3057 
3058 	media_active = interface_media_active(ifs);
3059 
3060 	/* disable LRO if needed */
3061 	if (bridge_set_lro(ifs, FALSE)) {
3062 		bif->bif_flags |= BIFF_LRO_DISABLED;
3063 	}
3064 
3065 	/*
3066 	 * No failures past this point. Add the member to the list.
3067 	 */
3068 	BRIDGE_LOCK(sc);
3069 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
3070 	BRIDGE_XLOCK(sc);
3071 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
3072 	BRIDGE_XDROP(sc);
3073 
3074 	/* cache the member link status */
3075 	if (media_active != 0) {
3076 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3077 	} else {
3078 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3079 	}
3080 
3081 	/* the new member may change the link status of the bridge interface */
3082 	event_code = bridge_updatelinkstatus(sc);
3083 
3084 	/* check whether we need input broadcast or not */
3085 	input_broadcast = interface_needs_input_broadcast(ifs);
3086 	bif_set_input_broadcast(bif, input_broadcast);
3087 	BRIDGE_UNLOCK(sc);
3088 
3089 	if (event_code != 0) {
3090 		bridge_link_event(bifp, event_code);
3091 	}
3092 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3093 	    "%s input broadcast %s", ifs->if_xname,
3094 	    input_broadcast ? "ENABLED" : "DISABLED");
3095 
3096 	BRIDGE_LOCK(sc);
3097 	bridge_set_tso(sc);
3098 
3099 out:
3100 	/* allow the interface to detach */
3101 	ifnet_decr_iorefcnt(ifs);
3102 
3103 	if (error != 0) {
3104 		if (bif != NULL) {
3105 			bridge_delete_member(sc, bif);
3106 		}
3107 	} else if (IFNET_IS_VMNET(ifs)) {
3108 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3109 	}
3110 
3111 	return error;
3112 }
3113 
3114 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3115 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3116 {
3117 	struct ifbreq * __single req = arg;
3118 	struct bridge_iflist *bif;
3119 
3120 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3121 	if (bif == NULL) {
3122 		return ENOENT;
3123 	}
3124 
3125 	bridge_delete_member(sc, bif);
3126 
3127 	return 0;
3128 }
3129 
3130 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3131 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3132 {
3133 #pragma unused(sc, arg, arg_len)
3134 	return 0;
3135 }
3136 
3137 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3138 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3139 {
3140 	struct ifbreq * __single req = arg;
3141 	struct bridge_iflist *bif;
3142 
3143 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3144 	if (bif == NULL) {
3145 		return ENOENT;
3146 	}
3147 
3148 	struct bstp_port *bp;
3149 
3150 	bp = &bif->bif_stp;
3151 	req->ifbr_state = bp->bp_state;
3152 	req->ifbr_priority = bp->bp_priority;
3153 	req->ifbr_path_cost = bp->bp_path_cost;
3154 	req->ifbr_proto = bp->bp_protover;
3155 	req->ifbr_role = bp->bp_role;
3156 	req->ifbr_stpflags = bp->bp_flags;
3157 	req->ifbr_ifsflags = bif->bif_ifflags;
3158 
3159 	/* Copy STP state options as flags */
3160 	if (bp->bp_operedge) {
3161 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3162 	}
3163 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3164 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3165 	}
3166 	if (bp->bp_ptp_link) {
3167 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3168 	}
3169 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3170 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3171 	}
3172 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3173 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3174 	}
3175 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3176 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3177 	}
3178 
3179 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3180 	req->ifbr_addrcnt = bif->bif_addrcnt;
3181 	req->ifbr_addrmax = bif->bif_addrmax;
3182 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
3183 
3184 	return 0;
3185 }
3186 
3187 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3188 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3189 {
3190 	struct ifbreq * __single req = arg;
3191 	struct bridge_iflist *bif;
3192 #if BRIDGESTP
3193 	struct bstp_port *bp;
3194 #endif /* BRIDGESTP */
3195 	errno_t error;
3196 	uint32_t ifsflags;
3197 
3198 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3199 	if (bif == NULL) {
3200 		return ENOENT;
3201 	}
3202 
3203 	ifsflags = req->ifbr_ifsflags;
3204 	if (ifsflags & IFBIF_SPAN) {
3205 		/* SPAN is readonly */
3206 		return EINVAL;
3207 	}
3208 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3209 	if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3210 		/* can't specify checksum and virtio */
3211 		return EINVAL;
3212 	}
3213 	if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3214 	    ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3215 	    (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3216 		/* MAC-NAT can't be used with checksum, host filter, or virtio */
3217 		return EINVAL;
3218 	}
3219 	if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3220 		error = bridge_mac_nat_enable(sc, bif);
3221 		if (error != 0) {
3222 			return error;
3223 		}
3224 	} else if (sc->sc_mac_nat_bif == bif) {
3225 		bridge_mac_nat_disable(sc);
3226 	}
3227 
3228 #if BRIDGESTP
3229 	if (ifsflags & IFBIF_STP) {
3230 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3231 			error = bstp_enable(&bif->bif_stp);
3232 			if (error) {
3233 				return error;
3234 			}
3235 		}
3236 	} else {
3237 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3238 			bstp_disable(&bif->bif_stp);
3239 		}
3240 	}
3241 
3242 	/* Pass on STP flags */
3243 	bp = &bif->bif_stp;
3244 	bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3245 	bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3246 	bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3247 	bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3248 #else /* !BRIDGESTP */
3249 	if (ifsflags & IFBIF_STP) {
3250 		return EOPNOTSUPP;
3251 	}
3252 #endif /* !BRIDGESTP */
3253 
3254 	/* Save the bits relating to the bridge */
3255 	bif->bif_ifflags = ifsflags & IFBIFMASK;
3256 
3257 	return 0;
3258 }
3259 
3260 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3261 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3262 {
3263 	struct ifbrparam * __single param = arg;
3264 
3265 	sc->sc_brtmax = param->ifbrp_csize;
3266 	bridge_rttrim(sc);
3267 	return 0;
3268 }
3269 
3270 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3271 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3272 {
3273 	struct ifbrparam * __single param = arg;
3274 
3275 	param->ifbrp_csize = sc->sc_brtmax;
3276 
3277 	return 0;
3278 }
3279 
3280 #define BRIDGE_IOCTL_GIFS do { \
3281 	struct bridge_iflist *bif;                                      \
3282 	struct ifbreq breq;                                             \
3283 	char *buf, *outbuf;                                             \
3284 	unsigned int count, buflen, len;                                \
3285                                                                         \
3286 	count = 0;                                                      \
3287 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3288 	        count++;                                                \
3289 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3290 	        count++;                                                \
3291                                                                         \
3292 	buflen = sizeof (breq) * count;                                 \
3293 	if (bifc->ifbic_len == 0) {                                     \
3294 	        bifc->ifbic_len = buflen;                               \
3295 	        return (0);                                             \
3296 	}                                                               \
3297 	BRIDGE_UNLOCK(sc);                                              \
3298 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3299 	BRIDGE_LOCK(sc);                                                \
3300                                                                         \
3301 	count = 0;                                                      \
3302 	buf = outbuf;                                                   \
3303 	len = min(bifc->ifbic_len, buflen);                             \
3304 	bzero(&breq, sizeof (breq));                                    \
3305 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3306 	        if (len < sizeof (breq))                                \
3307 	                break;                                          \
3308                                                                         \
3309 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3310 	            "%s", bif->bif_ifp->if_xname);                      \
3311 	/* Fill in the ifbreq structure */                      \
3312 	        error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3313 	        if (error)                                              \
3314 	                break;                                          \
3315 	        memcpy(buf, &breq, sizeof (breq));                      \
3316 	        count++;                                                \
3317 	        buf += sizeof (breq);                                   \
3318 	        len -= sizeof (breq);                                   \
3319 	}                                                               \
3320 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3321 	        if (len < sizeof (breq))                                \
3322 	                break;                                          \
3323                                                                         \
3324 	        snprintf(breq.ifbr_ifsname,                             \
3325 	                 sizeof (breq.ifbr_ifsname),                    \
3326 	                 "%s", bif->bif_ifp->if_xname);                 \
3327 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3328 	        breq.ifbr_portno                                        \
3329 	                = bif->bif_ifp->if_index & 0xfff;               \
3330 	        memcpy(buf, &breq, sizeof (breq));                      \
3331 	        count++;                                                \
3332 	        buf += sizeof (breq);                                   \
3333 	        len -= sizeof (breq);                                   \
3334 	}                                                               \
3335                                                                         \
3336 	BRIDGE_UNLOCK(sc);                                              \
3337 	bifc->ifbic_len = sizeof (breq) * count;                        \
3338 	if (bifc->ifbic_len > 0) {                                      \
3339 	        error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3340 	}                                                               \
3341 	BRIDGE_LOCK(sc);                                                \
3342 	kfree_data(outbuf, buflen);                                     \
3343 } while (0)
3344 
3345 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3346 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3347 {
3348 	struct ifbifconf64 * __single bifc = arg;
3349 	int error = 0;
3350 
3351 	BRIDGE_IOCTL_GIFS;
3352 
3353 	return error;
3354 }
3355 
3356 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3357 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3358 {
3359 	struct ifbifconf32 * __single bifc = arg;
3360 	int error = 0;
3361 
3362 	BRIDGE_IOCTL_GIFS;
3363 
3364 	return error;
3365 }
3366 
3367 #define BRIDGE_IOCTL_RTS do {                                               \
3368 	struct bridge_rtnode *brt;                                          \
3369 	char *buf;                                                          \
3370 	char *outbuf = NULL;                                                \
3371 	unsigned int count, buflen, len;                                    \
3372 	unsigned long now;                                                  \
3373                                                                             \
3374 	if (bac->ifbac_len == 0)                                            \
3375 	        return (0);                                                 \
3376                                                                             \
3377 	bzero(&bareq, sizeof (bareq));                                      \
3378 	count = 0;                                                          \
3379 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3380 	        count++;                                                    \
3381 	buflen = sizeof (bareq) * count;                                    \
3382                                                                             \
3383 	BRIDGE_UNLOCK(sc);                                                  \
3384 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                    \
3385 	BRIDGE_LOCK(sc);                                                    \
3386                                                                             \
3387 	count = 0;                                                          \
3388 	buf = outbuf;                                                       \
3389 	len = min(bac->ifbac_len, buflen);                                  \
3390 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3391 	        if (len < sizeof (bareq))                                   \
3392 	                goto out;                                           \
3393 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3394 	                 "%s", brt->brt_ifp->if_xname);                     \
3395 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3396 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3397 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3398 	                now = (unsigned long) net_uptime();                 \
3399 	                if (now < brt->brt_expire)                          \
3400 	                        bareq.ifba_expire =                         \
3401 	                            brt->brt_expire - now;                  \
3402 	        } else                                                      \
3403 	                bareq.ifba_expire = 0;                              \
3404 	        bareq.ifba_flags = brt->brt_flags;                          \
3405                                                                             \
3406 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3407 	        count++;                                                    \
3408 	        buf += sizeof (bareq);                                      \
3409 	        len -= sizeof (bareq);                                      \
3410 	}                                                                   \
3411 out:                                                                        \
3412 	bac->ifbac_len = sizeof (bareq) * count;                            \
3413 	if (outbuf != NULL) {                                               \
3414 	        BRIDGE_UNLOCK(sc);                                          \
3415 	        if (bac->ifbac_len > 0) {                                   \
3416 	                error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3417 	        }                                                           \
3418 	        kfree_data(outbuf, buflen);                                 \
3419 	        BRIDGE_LOCK(sc);                                            \
3420 	}                                                                   \
3421 	return (error);                                                     \
3422 } while (0)
3423 
3424 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3425 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3426 {
3427 	struct ifbaconf64 * __single bac = arg;
3428 	struct ifbareq64 bareq;
3429 	int error = 0;
3430 
3431 	BRIDGE_IOCTL_RTS;
3432 	return error;
3433 }
3434 
3435 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3436 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3437 {
3438 	struct ifbaconf32 * __single bac = arg;
3439 	struct ifbareq32 bareq;
3440 	int error = 0;
3441 
3442 	BRIDGE_IOCTL_RTS;
3443 	return error;
3444 }
3445 
3446 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3447 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3448 {
3449 	struct ifbareq32 * __single req = arg;
3450 	struct bridge_iflist *bif;
3451 	int error;
3452 
3453 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3454 	if (bif == NULL) {
3455 		return ENOENT;
3456 	}
3457 
3458 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3459 	    req->ifba_flags);
3460 
3461 	return error;
3462 }
3463 
3464 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3465 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3466 {
3467 	struct ifbareq64 * __single req = arg;
3468 	struct bridge_iflist *bif;
3469 	int error;
3470 
3471 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3472 	if (bif == NULL) {
3473 		return ENOENT;
3474 	}
3475 
3476 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3477 	    req->ifba_flags);
3478 
3479 	return error;
3480 }
3481 
3482 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3483 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3484 {
3485 	struct ifbrparam * __single param = arg;
3486 
3487 	sc->sc_brttimeout = param->ifbrp_ctime;
3488 	return 0;
3489 }
3490 
3491 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3492 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3493 {
3494 	struct ifbrparam * __single param = arg;
3495 
3496 	param->ifbrp_ctime = sc->sc_brttimeout;
3497 	return 0;
3498 }
3499 
3500 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3501 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3502 {
3503 	struct ifbareq32 * __single req = arg;
3504 
3505 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3506 }
3507 
3508 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3509 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3510 {
3511 	struct ifbareq64 * __single req = arg;
3512 
3513 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3514 }
3515 
3516 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3517 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3518 {
3519 	struct ifbreq * __single req = arg;
3520 
3521 	bridge_rtflush(sc, req->ifbr_ifsflags);
3522 	return 0;
3523 }
3524 
3525 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3526 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3527 {
3528 	struct ifbrparam * __single param = arg;
3529 	struct bstp_state *bs = &sc->sc_stp;
3530 
3531 	param->ifbrp_prio = bs->bs_bridge_priority;
3532 	return 0;
3533 }
3534 
3535 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3536 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3537 {
3538 #if BRIDGESTP
3539 	struct ifbrparam *param = arg;
3540 
3541 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3542 #else /* !BRIDGESTP */
3543 #pragma unused(sc, arg)
3544 	return EOPNOTSUPP;
3545 #endif /* !BRIDGESTP */
3546 }
3547 
3548 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3549 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3550 {
3551 	struct ifbrparam * __single param = arg;
3552 	struct bstp_state *bs = &sc->sc_stp;
3553 
3554 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3555 	return 0;
3556 }
3557 
3558 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3559 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3560 {
3561 #if BRIDGESTP
3562 	struct ifbrparam *param = arg;
3563 
3564 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3565 #else /* !BRIDGESTP */
3566 #pragma unused(sc, arg)
3567 	return EOPNOTSUPP;
3568 #endif /* !BRIDGESTP */
3569 }
3570 
3571 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3572 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3573 {
3574 	struct ifbrparam * __single param;
3575 	struct bstp_state *bs;
3576 
3577 	param = arg;
3578 	bs = &sc->sc_stp;
3579 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3580 	return 0;
3581 }
3582 
3583 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3584 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3585 {
3586 #if BRIDGESTP
3587 	struct ifbrparam *param = arg;
3588 
3589 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3590 #else /* !BRIDGESTP */
3591 #pragma unused(sc, arg)
3592 	return EOPNOTSUPP;
3593 #endif /* !BRIDGESTP */
3594 }
3595 
3596 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3597 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3598 {
3599 	struct ifbrparam * __single param;
3600 	struct bstp_state *bs;
3601 
3602 	param = arg;
3603 	bs = &sc->sc_stp;
3604 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3605 	return 0;
3606 }
3607 
3608 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3609 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3610 {
3611 #if BRIDGESTP
3612 	struct ifbrparam *param = arg;
3613 
3614 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3615 #else /* !BRIDGESTP */
3616 #pragma unused(sc, arg)
3617 	return EOPNOTSUPP;
3618 #endif /* !BRIDGESTP */
3619 }
3620 
3621 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3622 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3623 {
3624 #if BRIDGESTP
3625 	struct ifbreq *req = arg;
3626 	struct bridge_iflist *bif;
3627 
3628 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3629 	if (bif == NULL) {
3630 		return ENOENT;
3631 	}
3632 
3633 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3634 #else /* !BRIDGESTP */
3635 #pragma unused(sc, arg)
3636 	return EOPNOTSUPP;
3637 #endif /* !BRIDGESTP */
3638 }
3639 
3640 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3641 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3642 {
3643 #if BRIDGESTP
3644 	struct ifbreq *req = arg;
3645 	struct bridge_iflist *bif;
3646 
3647 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3648 	if (bif == NULL) {
3649 		return ENOENT;
3650 	}
3651 
3652 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3653 #else /* !BRIDGESTP */
3654 #pragma unused(sc, arg)
3655 	return EOPNOTSUPP;
3656 #endif /* !BRIDGESTP */
3657 }
3658 
3659 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3660 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3661 {
3662 	struct ifbrparam * __single param = arg;
3663 
3664 	param->ifbrp_filter = sc->sc_filter_flags;
3665 
3666 	return 0;
3667 }
3668 
3669 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3670 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3671 {
3672 	struct ifbrparam * __single param = arg;
3673 
3674 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3675 		return EINVAL;
3676 	}
3677 
3678 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3679 		return EINVAL;
3680 	}
3681 
3682 	sc->sc_filter_flags = param->ifbrp_filter;
3683 
3684 	return 0;
3685 }
3686 
3687 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3688 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3689 {
3690 	struct ifbreq * __single req = arg;
3691 	struct bridge_iflist *bif;
3692 
3693 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3694 	if (bif == NULL) {
3695 		return ENOENT;
3696 	}
3697 
3698 	bif->bif_addrmax = req->ifbr_addrmax;
3699 	return 0;
3700 }
3701 
3702 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3703 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3704 {
3705 	struct ifbreq * __single req = arg;
3706 	struct bridge_iflist *bif = NULL;
3707 	struct ifnet *ifs;
3708 
3709 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3710 	if (ifs == NULL) {
3711 		return ENOENT;
3712 	}
3713 
3714 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3715 		return EINVAL;
3716 	}
3717 
3718 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3719 	if (ifs == bif->bif_ifp) {
3720 		return EBUSY;
3721 	}
3722 
3723 	if (ifs->if_bridge != NULL) {
3724 		return EBUSY;
3725 	}
3726 
3727 	switch (ifs->if_type) {
3728 	case IFT_ETHER:
3729 	case IFT_L2VLAN:
3730 	case IFT_IEEE8023ADLAG:
3731 		break;
3732 	default:
3733 		return EINVAL;
3734 	}
3735 
3736 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3737 
3738 	bif->bif_ifp = ifs;
3739 	bif->bif_ifflags = IFBIF_SPAN;
3740 
3741 	ifnet_reference(bif->bif_ifp);
3742 
3743 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3744 
3745 	return 0;
3746 }
3747 
3748 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3749 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3750 {
3751 	struct ifbreq * __single req = arg;
3752 	struct bridge_iflist *bif;
3753 	struct ifnet *ifs;
3754 
3755 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3756 	if (ifs == NULL) {
3757 		return ENOENT;
3758 	}
3759 
3760 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3761 	if (ifs == bif->bif_ifp) {
3762 		break;
3763 	}
3764 
3765 	if (bif == NULL) {
3766 		return ENOENT;
3767 	}
3768 
3769 	bridge_delete_span(sc, bif);
3770 
3771 	return 0;
3772 }
3773 
3774 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3775 	struct bstp_state *bs = &sc->sc_stp;                            \
3776 	struct bstp_port *root_port;                                    \
3777                                                                         \
3778 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3779 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3780 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3781                                                                         \
3782 	root_port = bs->bs_root_port;                                   \
3783 	if (root_port == NULL)                                          \
3784 	        req->ifbop_root_port = 0;                               \
3785 	else                                                            \
3786 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3787                                                                         \
3788 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3789 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3790 	req->ifbop_protocol = bs->bs_protover;                          \
3791 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3792 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3793 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3794 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3795 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3796 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3797 } while (0)
3798 
3799 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3800 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3801 {
3802 	struct ifbropreq32 * __single req = arg;
3803 
3804 	BRIDGE_IOCTL_GBPARAM;
3805 	return 0;
3806 }
3807 
3808 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3809 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3810 {
3811 	struct ifbropreq64 * __single req = arg;
3812 
3813 	BRIDGE_IOCTL_GBPARAM;
3814 	return 0;
3815 }
3816 
3817 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3818 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3819 {
3820 	struct ifbrparam * __single param = arg;
3821 
3822 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3823 	return 0;
3824 }
3825 
3826 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3827 	struct bridge_iflist *bif;                                      \
3828 	struct bstp_port *bp;                                           \
3829 	struct ifbpstpreq bpreq;                                        \
3830 	char *buf, *outbuf;                                             \
3831 	unsigned int count, buflen, len;                                \
3832                                                                         \
3833 	count = 0;                                                      \
3834 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3835 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3836 	                count++;                                        \
3837 	}                                                               \
3838                                                                         \
3839 	buflen = sizeof (bpreq) * count;                                \
3840 	if (bifstp->ifbpstp_len == 0) {                                 \
3841 	        bifstp->ifbpstp_len = buflen;                           \
3842 	        return (0);                                             \
3843 	}                                                               \
3844                                                                         \
3845 	BRIDGE_UNLOCK(sc);                                              \
3846 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3847 	BRIDGE_LOCK(sc);                                                \
3848                                                                         \
3849 	count = 0;                                                      \
3850 	buf = outbuf;                                                   \
3851 	len = min(bifstp->ifbpstp_len, buflen);                         \
3852 	bzero(&bpreq, sizeof (bpreq));                                  \
3853 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3854 	        if (len < sizeof (bpreq))                               \
3855 	                break;                                          \
3856                                                                         \
3857 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3858 	                continue;                                       \
3859                                                                         \
3860 	        bp = &bif->bif_stp;                                     \
3861 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3862 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3863 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3864 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3865 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3866 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3867                                                                         \
3868 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3869 	        count++;                                                \
3870 	        buf += sizeof (bpreq);                                  \
3871 	        len -= sizeof (bpreq);                                  \
3872 	}                                                               \
3873                                                                         \
3874 	BRIDGE_UNLOCK(sc);                                              \
3875 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3876 	if (bifstp->ifbpstp_len > 0) {                                  \
3877 	        error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3878 	}                                                               \
3879 	BRIDGE_LOCK(sc);                                                \
3880 	kfree_data(outbuf, buflen);                                     \
3881 	return (error);                                                 \
3882 } while (0)
3883 
3884 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3885 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3886 {
3887 	struct ifbpstpconf32 * __single bifstp = arg;
3888 	int error = 0;
3889 
3890 	BRIDGE_IOCTL_GIFSSTP;
3891 	return error;
3892 }
3893 
3894 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3895 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3896 {
3897 	struct ifbpstpconf64 * __single bifstp = arg;
3898 	int error = 0;
3899 
3900 	BRIDGE_IOCTL_GIFSSTP;
3901 	return error;
3902 }
3903 
3904 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3905 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3906 {
3907 #if BRIDGESTP
3908 	struct ifbrparam *param = arg;
3909 
3910 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3911 #else /* !BRIDGESTP */
3912 #pragma unused(sc, arg)
3913 	return EOPNOTSUPP;
3914 #endif /* !BRIDGESTP */
3915 }
3916 
3917 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3918 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3919 {
3920 #if BRIDGESTP
3921 	struct ifbrparam *param = arg;
3922 
3923 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3924 #else /* !BRIDGESTP */
3925 #pragma unused(sc, arg)
3926 	return EOPNOTSUPP;
3927 #endif /* !BRIDGESTP */
3928 }
3929 
3930 
3931 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3932 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3933 {
3934 	struct ifbrhostfilter * __single req = arg;
3935 	struct bridge_iflist *bif;
3936 
3937 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3938 	if (bif == NULL) {
3939 		return ENOENT;
3940 	}
3941 
3942 	bzero(req, sizeof(struct ifbrhostfilter));
3943 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3944 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3945 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3946 		    ETHER_ADDR_LEN);
3947 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3948 	}
3949 	return 0;
3950 }
3951 
3952 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3953 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3954 {
3955 	struct ifbrhostfilter * __single req = arg;
3956 	struct bridge_iflist *bif;
3957 
3958 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3959 	if (bif == NULL) {
3960 		return ENOENT;
3961 	}
3962 	if (bif_has_mac_nat(bif)) {
3963 		/* no host filter with MAC-NAT */
3964 		return EINVAL;
3965 	}
3966 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3967 		bif->bif_flags |= BIFF_HOST_FILTER;
3968 
3969 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3970 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3971 			    ETHER_ADDR_LEN);
3972 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3973 			    ETHER_ADDR_LEN) != 0) {
3974 				bif->bif_flags |= BIFF_HF_HWSRC;
3975 			} else {
3976 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3977 			}
3978 		}
3979 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3980 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3981 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3982 				bif->bif_flags |= BIFF_HF_IPSRC;
3983 			} else {
3984 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3985 			}
3986 		}
3987 	} else {
3988 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3989 		    BIFF_HF_IPSRC);
3990 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3991 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3992 	}
3993 
3994 	return 0;
3995 }
3996 
3997 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3998 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3999     unsigned int * count_p, char *__indexable buf,
4000     unsigned int * len_p)
4001 {
4002 	unsigned int            count = *count_p;
4003 	struct ifbrmne          ifbmne;
4004 	unsigned int            len = *len_p;
4005 	struct mac_nat_entry    *mne;
4006 	unsigned long           now;
4007 
4008 	bzero(&ifbmne, sizeof(ifbmne));
4009 	LIST_FOREACH(mne, list, mne_list) {
4010 		if (len < sizeof(ifbmne)) {
4011 			break;
4012 		}
4013 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
4014 		    "%s", mne->mne_bif->bif_ifp->if_xname);
4015 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
4016 		    sizeof(ifbmne.ifbmne_mac));
4017 		now = (unsigned long) net_uptime();
4018 		if (now < mne->mne_expire) {
4019 			ifbmne.ifbmne_expire = mne->mne_expire - now;
4020 		} else {
4021 			ifbmne.ifbmne_expire = 0;
4022 		}
4023 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
4024 			ifbmne.ifbmne_af = AF_INET6;
4025 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
4026 		} else {
4027 			ifbmne.ifbmne_af = AF_INET;
4028 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
4029 		}
4030 		memcpy(buf, &ifbmne, sizeof(ifbmne));
4031 		count++;
4032 		buf += sizeof(ifbmne);
4033 		len -= sizeof(ifbmne);
4034 	}
4035 	*count_p = count;
4036 	*len_p = len;
4037 	return buf;
4038 }
4039 
4040 /*
4041  * bridge_ioctl_gmnelist()
4042  *   Perform the get mac_nat_entry list ioctl.
4043  *
4044  * Note:
4045  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
4046  *   field size/layout except for the last field ifbml_buf, the user-supplied
4047  *   buffer pointer. That is passed in separately via the 'user_addr'
4048  *   parameter from the respective 32-bit or 64-bit ioctl routine.
4049  */
4050 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)4051 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
4052     user_addr_t user_addr)
4053 {
4054 	unsigned int            count;
4055 	char                    *buf;
4056 	int                     error = 0;
4057 	char                    *outbuf = NULL;
4058 	struct mac_nat_entry    *mne;
4059 	unsigned int            buflen;
4060 	unsigned int            len;
4061 
4062 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
4063 	count = 0;
4064 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
4065 		count++;
4066 	}
4067 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
4068 		count++;
4069 	}
4070 	buflen = sizeof(struct ifbrmne) * count;
4071 	if (buflen == 0 || mnl->ifbml_len == 0) {
4072 		mnl->ifbml_len = buflen;
4073 		return error;
4074 	}
4075 	BRIDGE_UNLOCK(sc);
4076 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
4077 	BRIDGE_LOCK(sc);
4078 	count = 0;
4079 	buf = outbuf;
4080 	len = min(mnl->ifbml_len, buflen);
4081 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
4082 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
4083 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
4084 	BRIDGE_UNLOCK(sc);
4085 	if (mnl->ifbml_len > 0) {
4086 		error = copyout(outbuf, user_addr, mnl->ifbml_len);
4087 	}
4088 	kfree_data(outbuf, buflen);
4089 	BRIDGE_LOCK(sc);
4090 	return error;
4091 }
4092 
4093 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4094 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4095 {
4096 	struct ifbrmnelist64 * __single mnl = arg;
4097 
4098 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4099 }
4100 
4101 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4102 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4103 {
4104 	struct ifbrmnelist32 * __single mnl = arg;
4105 
4106 	return bridge_ioctl_gmnelist(sc, arg,
4107 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
4108 }
4109 
4110 /*
4111  * bridge_ioctl_gifstats()
4112  *   Return per-member stats.
4113  *
4114  * Note:
4115  *   The ifbrmreq32 and ifbrmreq64 structures have the same
4116  *   field size/layout except for the last field brmr_buf, the user-supplied
4117  *   buffer pointer. That is passed in separately via the 'user_addr'
4118  *   parameter from the respective 32-bit or 64-bit ioctl routine.
4119  */
4120 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4121 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4122     user_addr_t user_addr)
4123 {
4124 	struct bridge_iflist    *bif;
4125 	int                     error = 0;
4126 	unsigned int            buflen;
4127 
4128 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4129 	if (bif == NULL) {
4130 		error = ENOENT;
4131 		goto done;
4132 	}
4133 
4134 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4135 	if (buflen == 0 || mreq->brmr_len == 0) {
4136 		mreq->brmr_len = buflen;
4137 		goto done;
4138 	}
4139 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4140 		error = ENOBUFS;
4141 		goto done;
4142 	}
4143 	mreq->brmr_len = buflen;
4144 	error = copyout(&bif->bif_stats, user_addr, buflen);
4145 done:
4146 	return error;
4147 }
4148 
4149 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4150 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4151 {
4152 	struct ifbrmreq32 * __single mreq = arg;
4153 
4154 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4155 }
4156 
4157 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4158 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4159 {
4160 	struct ifbrmreq64 * __single mreq = arg;
4161 
4162 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4163 }
4164 
4165 /*
4166  * bridge_proto_attach_changed
4167  *
4168  *	Called when protocol attachment on the interface changes.
4169  */
4170 static void
bridge_proto_attach_changed(struct ifnet * ifp)4171 bridge_proto_attach_changed(struct ifnet *ifp)
4172 {
4173 	boolean_t changed = FALSE;
4174 	struct bridge_iflist *bif;
4175 	boolean_t input_broadcast;
4176 	struct bridge_softc * __single sc = ifp->if_bridge;
4177 
4178 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4179 	if (sc == NULL) {
4180 		return;
4181 	}
4182 	input_broadcast = interface_needs_input_broadcast(ifp);
4183 	BRIDGE_LOCK(sc);
4184 	bif = bridge_lookup_member_if(sc, ifp);
4185 	if (bif != NULL) {
4186 		changed = bif_set_input_broadcast(bif, input_broadcast);
4187 	}
4188 	BRIDGE_UNLOCK(sc);
4189 	if (changed) {
4190 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4191 		    "%s input broadcast %s", ifp->if_xname,
4192 		    input_broadcast ? "ENABLED" : "DISABLED");
4193 	}
4194 	return;
4195 }
4196 
4197 /*
4198  * interface_media_active:
4199  *
4200  *	Tells if an interface media is active.
4201  */
4202 static int
interface_media_active(struct ifnet * ifp)4203 interface_media_active(struct ifnet *ifp)
4204 {
4205 	struct ifmediareq   ifmr;
4206 	int status = 0;
4207 
4208 	bzero(&ifmr, sizeof(ifmr));
4209 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4210 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4211 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4212 		}
4213 	}
4214 
4215 	return status;
4216 }
4217 
4218 /*
4219  * bridge_updatelinkstatus:
4220  *
4221  *      Update the media active status of the bridge based on the
4222  *	media active status of its member.
4223  *	If changed, return the corresponding onf/off link event.
4224  */
4225 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4226 bridge_updatelinkstatus(struct bridge_softc *sc)
4227 {
4228 	struct bridge_iflist *bif;
4229 	int active_member = 0;
4230 	u_int32_t event_code = 0;
4231 
4232 	BRIDGE_LOCK_ASSERT_HELD(sc);
4233 
4234 	/*
4235 	 * Find out if we have an active interface
4236 	 */
4237 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4238 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4239 			active_member = 1;
4240 			break;
4241 		}
4242 	}
4243 
4244 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4245 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
4246 		event_code = KEV_DL_LINK_ON;
4247 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4248 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4249 		event_code = KEV_DL_LINK_OFF;
4250 	}
4251 
4252 	return event_code;
4253 }
4254 
4255 /*
4256  * bridge_iflinkevent:
4257  */
4258 static void
bridge_iflinkevent(struct ifnet * ifp)4259 bridge_iflinkevent(struct ifnet *ifp)
4260 {
4261 	struct bridge_softc * __single sc = ifp->if_bridge;
4262 	struct bridge_iflist *bif;
4263 	u_int32_t event_code = 0;
4264 	int media_active;
4265 
4266 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4267 
4268 	/* Check if the interface is a bridge member */
4269 	if (sc == NULL) {
4270 		return;
4271 	}
4272 
4273 	media_active = interface_media_active(ifp);
4274 	BRIDGE_LOCK(sc);
4275 	bif = bridge_lookup_member_if(sc, ifp);
4276 	if (bif != NULL) {
4277 		if (media_active) {
4278 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4279 		} else {
4280 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4281 		}
4282 		if (sc->sc_mac_nat_bif != NULL) {
4283 			bridge_mac_nat_flush_entries(sc, bif);
4284 		}
4285 
4286 		event_code = bridge_updatelinkstatus(sc);
4287 	}
4288 	BRIDGE_UNLOCK(sc);
4289 
4290 	if (event_code != 0) {
4291 		bridge_link_event(sc->sc_ifp, event_code);
4292 	}
4293 }
4294 
4295 /*
4296  * bridge_delayed_callback:
4297  *
4298  *	Makes a delayed call
4299  */
4300 static void
bridge_delayed_callback(void * param,__unused void * param2)4301 bridge_delayed_callback(void *param, __unused void *param2)
4302 {
4303 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4304 	struct bridge_softc *sc = call->bdc_sc;
4305 
4306 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4307 	if (bridge_delayed_callback_delay > 0) {
4308 		struct timespec ts;
4309 
4310 		ts.tv_sec = bridge_delayed_callback_delay;
4311 		ts.tv_nsec = 0;
4312 
4313 		BRIDGE_LOG(LOG_NOTICE, 0,
4314 		    "sleeping for %d seconds",
4315 		    bridge_delayed_callback_delay);
4316 
4317 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4318 		    __func__, &ts);
4319 
4320 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4321 	}
4322 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4323 
4324 	BRIDGE_LOCK(sc);
4325 
4326 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4327 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4328 	    "%s call 0x%llx flags 0x%x",
4329 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4330 	    call->bdc_flags);
4331 }
4332 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4333 
4334 	if (call->bdc_flags & BDCF_CANCELLING) {
4335 		wakeup(call);
4336 	} else {
4337 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4338 			(*call->bdc_func)(sc);
4339 		}
4340 	}
4341 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4342 	BRIDGE_UNLOCK(sc);
4343 }
4344 
4345 /*
4346  * bridge_schedule_delayed_call:
4347  *
4348  *	Schedule a function to be called on a separate thread
4349  *      The actual call may be scheduled to run at a given time or ASAP.
4350  */
4351 static void
4352 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4353 {
4354 	uint64_t deadline = 0;
4355 	struct bridge_softc *sc = call->bdc_sc;
4356 
4357 	BRIDGE_LOCK_ASSERT_HELD(sc);
4358 
4359 	if ((sc->sc_flags & SCF_DETACHING) ||
4360 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4361 		return;
4362 	}
4363 
4364 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4365 		nanoseconds_to_absolutetime(
4366 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4367 			call->bdc_ts.tv_nsec, &deadline);
4368 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4369 	}
4370 
4371 	call->bdc_flags = BDCF_OUTSTANDING;
4372 
4373 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4374 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4375 	    "%s call 0x%llx flags 0x%x",
4376 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4377 	    call->bdc_flags);
4378 }
4379 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4380 
4381 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4382 		thread_call_func_delayed(
4383 			(thread_call_func_t)bridge_delayed_callback,
4384 			call, deadline);
4385 	} else {
4386 		if (call->bdc_thread_call == NULL) {
4387 			call->bdc_thread_call = thread_call_allocate(
4388 				(thread_call_func_t)bridge_delayed_callback,
4389 				call);
4390 		}
4391 		thread_call_enter(call->bdc_thread_call);
4392 	}
4393 }
4394 
4395 /*
4396  * bridge_cancel_delayed_call:
4397  *
4398  *	Cancel a queued or running delayed call.
4399  *	If call is running, does not return until the call is done to
4400  *	prevent race condition with the brigde interface getting destroyed
4401  */
4402 static void
4403 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4404 {
4405 	boolean_t result;
4406 	struct bridge_softc *sc = call->bdc_sc;
4407 
4408 	/*
4409 	 * The call was never scheduled
4410 	 */
4411 	if (sc == NULL) {
4412 		return;
4413 	}
4414 
4415 	BRIDGE_LOCK_ASSERT_HELD(sc);
4416 
4417 	call->bdc_flags |= BDCF_CANCELLING;
4418 
4419 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4420 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4421 		    "%s call 0x%llx flags 0x%x",
4422 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4423 		    call->bdc_flags);
4424 		result = thread_call_func_cancel(
4425 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4426 
4427 		if (result) {
4428 			/*
4429 			 * We managed to dequeue the delayed call
4430 			 */
4431 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4432 		} else {
4433 			/*
4434 			 * Wait for delayed call do be done running
4435 			 */
4436 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4437 		}
4438 	}
4439 	call->bdc_flags &= ~BDCF_CANCELLING;
4440 }
4441 
4442 /*
4443  * bridge_cleanup_delayed_call:
4444  *
4445  *	Dispose resource allocated for a delayed call
4446  *	Assume the delayed call is not queued or running .
4447  */
4448 static void
4449 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4450 {
4451 	boolean_t result;
4452 	struct bridge_softc *sc = call->bdc_sc;
4453 
4454 	/*
4455 	 * The call was never scheduled
4456 	 */
4457 	if (sc == NULL) {
4458 		return;
4459 	}
4460 
4461 	BRIDGE_LOCK_ASSERT_HELD(sc);
4462 
4463 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4464 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4465 
4466 	if (call->bdc_thread_call != NULL) {
4467 		result = thread_call_free(call->bdc_thread_call);
4468 		if (result == FALSE) {
4469 			panic("%s thread_call_free() failed for call %p",
4470 			    __func__, call);
4471 		}
4472 		call->bdc_thread_call = NULL;
4473 	}
4474 }
4475 
4476 /*
4477  * bridge_init:
4478  *
4479  *	Initialize a bridge interface.
4480  */
4481 static int
4482 bridge_init(struct ifnet *ifp)
4483 {
4484 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4485 	errno_t error;
4486 
4487 	BRIDGE_LOCK_ASSERT_HELD(sc);
4488 
4489 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4490 		return 0;
4491 	}
4492 
4493 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4494 
4495 	/*
4496 	 * Calling bridge_aging_timer() is OK as there are no entries to
4497 	 * age so we're just going to arm the timer
4498 	 */
4499 	bridge_aging_timer(sc);
4500 #if BRIDGESTP
4501 	if (error == 0) {
4502 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4503 	}
4504 #endif /* BRIDGESTP */
4505 	return error;
4506 }
4507 
4508 /*
4509  * bridge_ifstop:
4510  *
4511  *	Stop the bridge interface.
4512  */
4513 static void
4514 bridge_ifstop(struct ifnet *ifp, int disable)
4515 {
4516 #pragma unused(disable)
4517 	struct bridge_softc * __single sc = ifp->if_softc;
4518 
4519 	BRIDGE_LOCK_ASSERT_HELD(sc);
4520 
4521 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4522 		return;
4523 	}
4524 
4525 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4526 
4527 #if BRIDGESTP
4528 	bstp_stop(&sc->sc_stp);
4529 #endif /* BRIDGESTP */
4530 
4531 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4532 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4533 }
4534 
4535 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4536     MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4537 
4538 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4539     (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4540     | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4541 
4542 /*
4543  * bridge_compute_cksum:
4544  *
4545  *	If the packet has checksum flags, compare the hardware checksum
4546  *	capabilities of the source and destination interfaces. If they
4547  *	are the same, there's nothing to do. If they are different,
4548  *	finalize the checksum so that it can be sent on the destination
4549  *	interface.
4550  */
4551 static void
4552 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4553 {
4554 	uint32_t csum_flags;
4555 	uint16_t dst_hw_csum;
4556 	uint32_t did_sw = 0;
4557 	struct ether_header *eh;
4558 	uint16_t src_hw_csum;
4559 
4560 	if (src_if == dst_if) {
4561 		return;
4562 	}
4563 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4564 	if (csum_flags == 0) {
4565 		/* no checksum offload */
4566 		return;
4567 	}
4568 
4569 	/*
4570 	 * if destination/source differ in checksum offload
4571 	 * capabilities, finalize/compute the checksum
4572 	 */
4573 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4574 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4575 	if (dst_hw_csum == src_hw_csum) {
4576 		return;
4577 	}
4578 	eh = mtod(m, struct ether_header *);
4579 	switch (eh->ether_type) {
4580 	case HTONS_ETHERTYPE_IP:
4581 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4582 		break;
4583 	case HTONS_ETHERTYPE_IPV6:
4584 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4585 		break;
4586 	}
4587 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4588 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4589 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4590 	    m->m_pkthdr.csum_flags);
4591 }
4592 
4593 static inline errno_t
4594 bridge_transmit(ifnet_t ifp, mbuf_t m)
4595 {
4596 	struct flowadv  adv = { .code = FADV_SUCCESS };
4597 	errno_t         error;
4598 	int             flags = DLIL_OUTPUT_FLAGS_RAW;
4599 
4600 	flags = (if_bridge_output_skip_filters != 0)
4601 	    ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4602 	    : DLIL_OUTPUT_FLAGS_RAW;
4603 	error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4604 	if (error == 0) {
4605 		if (adv.code == FADV_FLOW_CONTROLLED) {
4606 			error = EQFULL;
4607 		} else if (adv.code == FADV_SUSPENDED) {
4608 			error = EQSUSPENDED;
4609 		}
4610 	}
4611 	return error;
4612 }
4613 
4614 static int
4615 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4616     bool *is_fragmented)
4617 {
4618 	int newoff;
4619 
4620 	*is_fragmented = false;
4621 	while (1) {
4622 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4623 		if (newoff < 0) {
4624 			return off;
4625 		} else if (newoff < off) {
4626 			return -1;    /* invalid */
4627 		} else if (newoff == off) {
4628 			return newoff;
4629 		}
4630 		off = newoff;
4631 		proto = *nxtp;
4632 		if (proto == IPPROTO_FRAGMENT) {
4633 			*is_fragmented = true;
4634 		}
4635 	}
4636 }
4637 
4638 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4639 
4640 static int
4641 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4642     ip_packet_info_t info_p, struct bripstats * stats_p)
4643 {
4644 	int             error = 0;
4645 	u_int           hlen;
4646 	u_int           ip_hlen;
4647 	u_int           ip_pay_len;
4648 	struct mbuf *   m0 = *mp;
4649 	int             off;
4650 	int             opt_len = 0;
4651 	int             proto = 0;
4652 
4653 	bzero(info_p, sizeof(*info_p));
4654 	if (is_ipv4) {
4655 		struct ip *     ip;
4656 		u_int           ip_total_len;
4657 
4658 		/* IPv4 */
4659 		hlen = mac_hlen + sizeof(struct ip);
4660 		if (m0->m_pkthdr.len < hlen) {
4661 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4662 			    "Short IP packet %d < %d",
4663 			    m0->m_pkthdr.len, hlen);
4664 			error = _EBADIP;
4665 			__ATOMIC_INC(stats_p->bips_bad_ip);
4666 			goto done;
4667 		}
4668 		if (m0->m_len < hlen) {
4669 			*mp = m0 = m_pullup(m0, hlen);
4670 			if (m0 == NULL) {
4671 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4672 				    "m_pullup failed hlen %d",
4673 				    hlen);
4674 				error = ENOBUFS;
4675 				__ATOMIC_INC(stats_p->bips_bad_ip);
4676 				goto done;
4677 			}
4678 		}
4679 		ip = (struct ip *)mtodo(m0, mac_hlen);
4680 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4681 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4682 			    "bad IP version");
4683 			error = _EBADIP;
4684 			__ATOMIC_INC(stats_p->bips_bad_ip);
4685 			goto done;
4686 		}
4687 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4688 		if (ip_hlen < sizeof(struct ip)) {
4689 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4690 			    "bad IP header length %d < %d",
4691 			    ip_hlen,
4692 			    (int)sizeof(struct ip));
4693 			error = _EBADIP;
4694 			__ATOMIC_INC(stats_p->bips_bad_ip);
4695 			goto done;
4696 		}
4697 		hlen = mac_hlen + ip_hlen;
4698 		if (m0->m_len < hlen) {
4699 			*mp = m0 = m_pullup(m0, hlen);
4700 			if (m0 == NULL) {
4701 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4702 				    "m_pullup failed hlen %d",
4703 				    hlen);
4704 				error = ENOBUFS;
4705 				__ATOMIC_INC(stats_p->bips_bad_ip);
4706 				goto done;
4707 			}
4708 			ip = (struct ip *)mtodo(m0, mac_hlen);
4709 		}
4710 
4711 		ip_total_len = ntohs(ip->ip_len);
4712 		if (ip_total_len < ip_hlen) {
4713 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4714 			    "IP total len %d < header len %d",
4715 			    ip_total_len, ip_hlen);
4716 			error = _EBADIP;
4717 			__ATOMIC_INC(stats_p->bips_bad_ip);
4718 			goto done;
4719 		}
4720 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4721 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4722 			    "invalid IP payload length %d > %d",
4723 			    ip_total_len,
4724 			    (m0->m_pkthdr.len - mac_hlen));
4725 			error = _EBADIP;
4726 			__ATOMIC_INC(stats_p->bips_bad_ip);
4727 			goto done;
4728 		}
4729 		ip_pay_len = ip_total_len - ip_hlen;
4730 		info_p->ip_proto = ip->ip_p;
4731 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4732 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4733 		info_p->ip_hlen = ip_hlen;
4734 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4735 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4736 			info_p->ip_is_fragmented = true;
4737 		}
4738 		__ATOMIC_INC(stats_p->bips_ip);
4739 	} else {
4740 		struct ip6_hdr *ip6;
4741 
4742 		/* IPv6 */
4743 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4744 		if (m0->m_pkthdr.len < hlen) {
4745 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4746 			    "short IPv6 packet %d < %d",
4747 			    m0->m_pkthdr.len, hlen);
4748 			error = _EBADIPV6;
4749 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4750 			goto done;
4751 		}
4752 		if (m0->m_len < hlen) {
4753 			*mp = m0 = m_pullup(m0, hlen);
4754 			if (m0 == NULL) {
4755 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4756 				    "m_pullup failed hlen %d",
4757 				    hlen);
4758 				error = ENOBUFS;
4759 				__ATOMIC_INC(stats_p->bips_bad_ip6);
4760 				goto done;
4761 			}
4762 		}
4763 		ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4764 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4765 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4766 			    "bad IPv6 version");
4767 			error = _EBADIPV6;
4768 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4769 			goto done;
4770 		}
4771 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4772 		    &info_p->ip_is_fragmented);
4773 		if (off < 0 || m0->m_pkthdr.len < off) {
4774 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4775 			    "ip6_lasthdr() returned %d",
4776 			    off);
4777 			error = _EBADIPV6;
4778 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4779 			goto done;
4780 		}
4781 		ip_hlen = sizeof(*ip6);
4782 		opt_len = off - mac_hlen - ip_hlen;
4783 		if (opt_len < 0) {
4784 			error = _EBADIPV6;
4785 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4786 			goto done;
4787 		}
4788 		ip_pay_len = ntohs(ip6->ip6_plen);
4789 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4790 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4791 			    "invalid IPv6 payload length %d > %d",
4792 			    ip_pay_len,
4793 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4794 			error = _EBADIPV6;
4795 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4796 			goto done;
4797 		}
4798 		info_p->ip_proto = proto;
4799 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4800 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4801 		info_p->ip_hlen = ip_hlen;
4802 		__ATOMIC_INC(stats_p->bips_ip6);
4803 	}
4804 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4805 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4806 	    is_ipv4 ? '4' : '6',
4807 	    proto, ip_hlen, ip_pay_len, opt_len,
4808 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4809 	info_p->ip_pay_len = ip_pay_len;
4810 	info_p->ip_opt_len = opt_len;
4811 	info_p->ip_is_ipv4 = is_ipv4;
4812 done:
4813 	return error;
4814 }
4815 
4816 static int
4817 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4818     ip_packet_info_t info_p, struct bripstats * stats_p)
4819 {
4820 	int             error;
4821 	u_int           hlen;
4822 
4823 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4824 	if (error != 0) {
4825 		goto done;
4826 	}
4827 	if (info_p->ip_proto != IPPROTO_TCP) {
4828 		/* not a TCP frame, not an error, just a bad guess */
4829 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4830 		    "non-TCP (%d) IPv%c frame %d bytes",
4831 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4832 		    (*mp)->m_pkthdr.len);
4833 		goto done;
4834 	}
4835 	if (info_p->ip_is_fragmented) {
4836 		/* both TSO and IP fragmentation don't make sense */
4837 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4838 		    "fragmented TSO packet?");
4839 		__ATOMIC_INC(stats_p->bips_bad_tcp);
4840 		error = _EBADTCP;
4841 		goto done;
4842 	}
4843 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4844 	    info_p->ip_opt_len;
4845 	if ((*mp)->m_len < hlen) {
4846 		*mp = m_pullup(*mp, hlen);
4847 		if (*mp == NULL) {
4848 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4849 			    "m_pullup %d failed",
4850 			    hlen);
4851 			__ATOMIC_INC(stats_p->bips_bad_tcp);
4852 			error = _EBADTCP;
4853 			goto done;
4854 		}
4855 	}
4856 	info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4857 	    info_p->ip_opt_len;
4858 done:
4859 	return error;
4860 }
4861 
4862 static inline void
4863 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4864 {
4865 	if (proto == IPPROTO_TCP) {
4866 		__ATOMIC_INC(stats_p->brcs_tcp_checksum);
4867 	} else {
4868 		__ATOMIC_INC(stats_p->brcs_udp_checksum);
4869 	}
4870 	return;
4871 }
4872 
4873 #define ETHER_TYPE_FLAG_NONE    0x00
4874 #define ETHER_TYPE_FLAG_IPV4    0x01
4875 #define ETHER_TYPE_FLAG_IPV6    0x02
4876 #define ETHER_TYPE_FLAG_ARP     0x04
4877 #define ETHER_TYPE_FLAG_IP      (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4878 #define ETHER_TYPE_FLAG_IP_ARP  (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4879 
4880 static inline bool
4881 ether_type_flag_is_ip(ether_type_flag_t flag)
4882 {
4883 	return (flag & ETHER_TYPE_FLAG_IP) != 0;
4884 }
4885 
4886 static inline ether_type_flag_t
4887 ether_type_flag_get(uint16_t ether_type)
4888 {
4889 	ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4890 
4891 	switch (ether_type) {
4892 	case HTONS_ETHERTYPE_IP:
4893 		flag = ETHER_TYPE_FLAG_IPV4;
4894 		break;
4895 	case HTONS_ETHERTYPE_IPV6:
4896 		flag = ETHER_TYPE_FLAG_IPV6;
4897 		break;
4898 	case HTONS_ETHERTYPE_ARP:
4899 		flag = ETHER_TYPE_FLAG_ARP;
4900 		break;
4901 	default:
4902 		break;
4903 	}
4904 	return flag;
4905 }
4906 
4907 static bool
4908 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4909 {
4910 	uint16_t        ether_type;
4911 	bool            is_ip = TRUE;
4912 
4913 	ether_type = ntohs(eh->ether_type);
4914 	switch (ether_type) {
4915 	case ETHERTYPE_IP:
4916 		*is_ipv4 = TRUE;
4917 		break;
4918 	case ETHERTYPE_IPV6:
4919 		*is_ipv4 = FALSE;
4920 		break;
4921 	default:
4922 		is_ip = FALSE;
4923 		break;
4924 	}
4925 	return is_ip;
4926 }
4927 
4928 static errno_t
4929 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4930 {
4931 	struct brcsumstats *csum_stats_p;
4932 	struct ether_header     *eh;
4933 	errno_t         error = 0;
4934 	ip_packet_info  info;
4935 	bool            is_ipv4;
4936 	struct mbuf *   m;
4937 	u_int           mac_hlen = sizeof(struct ether_header);
4938 	uint16_t        sum;
4939 	bool            valid;
4940 
4941 	eh = mtod(*mp, struct ether_header *);
4942 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4943 		goto done;
4944 	}
4945 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4946 	    &stats_p->brms_out_ip);
4947 	m = *mp;
4948 	if (error != 0) {
4949 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4950 		    "bridge_get_ip_proto failed %d",
4951 		    error);
4952 		goto done;
4953 	}
4954 	if (is_ipv4) {
4955 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4956 			/* hardware offloaded IP header checksum */
4957 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4958 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4959 			    "IP checksum HW %svalid",
4960 			    valid ? "" : "in");
4961 			if (!valid) {
4962 				__ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4963 				error = _EBADIPCHECKSUM;
4964 				goto done;
4965 			}
4966 			__ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4967 		} else {
4968 			/* verify */
4969 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4970 			valid = (sum == 0);
4971 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4972 			    "IP checksum SW %svalid",
4973 			    valid ? "" : "in");
4974 			if (!valid) {
4975 				__ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4976 				error = _EBADIPCHECKSUM;
4977 				goto done;
4978 			}
4979 			__ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4980 		}
4981 	}
4982 	if (info.ip_is_fragmented) {
4983 		/* can't verify checksum on fragmented packets */
4984 		goto done;
4985 	}
4986 	switch (info.ip_proto) {
4987 	case IPPROTO_TCP:
4988 		__ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4989 		break;
4990 	case IPPROTO_UDP:
4991 		__ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4992 		break;
4993 	default:
4994 		goto done;
4995 	}
4996 	/* check for hardware offloaded UDP/TCP checksum */
4997 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4998 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4999 		/* checksum verified by hardware */
5000 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
5001 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5002 		    "IPv%c %s checksum HW 0x%x %svalid",
5003 		    is_ipv4 ? '4' : '6',
5004 		    (info.ip_proto == IPPROTO_TCP)
5005 		    ? "TCP" : "UDP",
5006 		    m->m_pkthdr.csum_data,
5007 		    valid ? "" : "in" );
5008 		if (!valid) {
5009 			/* bad checksum */
5010 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
5011 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
5012 			    : _EBADTCPCHECKSUM;
5013 		} else {
5014 			/* good checksum */
5015 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
5016 		}
5017 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5018 		goto done;
5019 	}
5020 	/* adjust frame to skip mac-layer header */
5021 	_mbuf_adjust_pkthdr_and_data(m, mac_hlen);
5022 	if (is_ipv4) {
5023 		sum = inet_cksum(m, info.ip_proto,
5024 		    info.ip_hlen,
5025 		    info.ip_pay_len);
5026 	} else {
5027 		sum = inet6_cksum(m, info.ip_proto,
5028 		    info.ip_hlen + info.ip_opt_len,
5029 		    info.ip_pay_len - info.ip_opt_len);
5030 	}
5031 	valid = (sum == 0);
5032 	if (valid) {
5033 		csum_stats_p = &stats_p->brms_out_cksum_good;
5034 	} else {
5035 		csum_stats_p = &stats_p->brms_out_cksum_bad;
5036 		error = (info.ip_proto == IPPROTO_TCP)
5037 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
5038 	}
5039 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5040 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5041 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
5042 	    is_ipv4 ? '4' : '6',
5043 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5044 	    valid ? "" : "in",
5045 	    sum, info.ip_hlen, info.ip_pay_len);
5046 	/* adjust frame back to start of mac-layer header */
5047 	_mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
5048 
5049 done:
5050 	return error;
5051 }
5052 
5053 static mbuf_t
5054 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
5055     mbuf_t in_list, bool is_ipv4)
5056 {
5057 	mbuf_t          next_packet;
5058 	mblist          ret;
5059 
5060 	mblist_init(&ret);
5061 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5062 		errno_t         error;
5063 
5064 		/* take packet out of the list */
5065 		next_packet = scan->m_nextpkt;
5066 		scan->m_nextpkt = NULL;
5067 
5068 		if (scan->m_pkthdr.rx_seg_cnt > 1) {
5069 			/* LRO packet, compute checksum on large packet */
5070 			scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
5071 			    is_ipv4, false, true);
5072 		} else {
5073 			/* verify checksum */
5074 			error = bridge_verify_checksum(&scan, &dbif->bif_stats);
5075 			if (error != 0) {
5076 				if (scan != NULL) {
5077 					m_drop(scan, DROPTAP_FLAG_DIR_IN,
5078 					    DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
5079 					scan = NULL;
5080 				}
5081 			}
5082 		}
5083 
5084 		/* add it back to the list */
5085 		if (scan != NULL) {
5086 			mblist_append(&ret, scan);
5087 		}
5088 	}
5089 	return ret.head;
5090 }
5091 
5092 
5093 static errno_t
5094 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
5095     struct ifbrmstats * stats_p)
5096 {
5097 	uint16_t *      csum_p;
5098 	errno_t         error = 0;
5099 	u_int           hlen;
5100 	struct mbuf *   m0 = *mp;
5101 	u_int           mac_hlen = sizeof(struct ether_header);
5102 	u_int           pkt_hdr_len;
5103 	struct tcphdr * tcp;
5104 	u_int           tcp_hlen;
5105 	struct udphdr * udp;
5106 
5107 	if (info_p->ip_is_ipv4) {
5108 		/* compute IP header checksum */
5109 		struct ip *ip = (struct ip *)info_p->ip_hdr;
5110 		ip->ip_sum = 0;
5111 		ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5112 		__ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5113 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5114 		    "IPv4 checksum 0x%x",
5115 		    ntohs(ip->ip_sum));
5116 	}
5117 	if (info_p->ip_is_fragmented) {
5118 		/* can't compute checksum on fragmented packets */
5119 		goto done;
5120 	}
5121 	pkt_hdr_len = m0->m_pkthdr.len;
5122 	switch (info_p->ip_proto) {
5123 	case IPPROTO_TCP:
5124 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5125 		    + sizeof(struct tcphdr);
5126 		if (m0->m_len < hlen) {
5127 			*mp = m0 = m_pullup(m0, hlen);
5128 			if (m0 == NULL) {
5129 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5130 				error = _EBADTCP;
5131 				goto done;
5132 			}
5133 		}
5134 		tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5135 		    + info_p->ip_opt_len);
5136 		tcp_hlen = tcp->th_off << 2;
5137 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5138 		if (hlen > pkt_hdr_len) {
5139 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5140 			    "bad tcp header length %u",
5141 			    tcp_hlen);
5142 			__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5143 			error = _EBADTCP;
5144 			goto done;
5145 		}
5146 		csum_p = &tcp->th_sum;
5147 		__ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5148 		break;
5149 	case IPPROTO_UDP:
5150 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5151 		if (m0->m_len < hlen) {
5152 			*mp = m0 = m_pullup(m0, hlen);
5153 			if (m0 == NULL) {
5154 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5155 				error = ENOBUFS;
5156 				goto done;
5157 			}
5158 		}
5159 		udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5160 		    + info_p->ip_opt_len);
5161 		csum_p = &udp->uh_sum;
5162 		__ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5163 		break;
5164 	default:
5165 		/* not TCP or UDP */
5166 		goto done;
5167 	}
5168 	*csum_p = 0;
5169 	/* adjust frame to skip mac-layer header */
5170 	_mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5171 	if (info_p->ip_is_ipv4) {
5172 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5173 		    info_p->ip_pay_len);
5174 	} else {
5175 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
5176 		    info_p->ip_hlen + info_p->ip_opt_len,
5177 		    info_p->ip_pay_len - info_p->ip_opt_len);
5178 	}
5179 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5180 		/* RFC 1122 4.1.3.4 */
5181 		*csum_p = 0xffff;
5182 	}
5183 	/* adjust frame back to start of mac-layer header */
5184 	_mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5185 	proto_csum_stats_increment(info_p->ip_proto,
5186 	    &stats_p->brms_in_computed_cksum);
5187 
5188 	/* indicate that the checksum is good */
5189 	mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5190 
5191 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5192 	    "IPv%c %s set checksum 0x%x",
5193 	    info_p->ip_is_ipv4 ? '4' : '6',
5194 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5195 	    ntohs(*csum_p));
5196 done:
5197 	return error;
5198 }
5199 
5200 static inline void
5201 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5202     mbuf_t m, ChecksumOperation cksum_op)
5203 {
5204 	switch (cksum_op) {
5205 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5206 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5207 		break;
5208 	case CHECKSUM_OPERATION_FINALIZE:
5209 		/* the checksum might not be correct, finalize now */
5210 		VERIFY(dst_ifp != NULL);
5211 		bridge_finalize_cksum(dst_ifp, m);
5212 		break;
5213 	case CHECKSUM_OPERATION_COMPUTE:
5214 		VERIFY(dst_ifp != NULL && src_ifp != NULL);
5215 		bridge_compute_cksum(src_ifp, dst_ifp, m);
5216 		break;
5217 	default:
5218 		break;
5219 	}
5220 	return;
5221 }
5222 
5223 static uint32_t
5224 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5225 {
5226 	uint32_t tso_mtu;
5227 
5228 	tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5229 	if (tso_mtu == 0) {
5230 		tso_mtu = IP_MAXPACKET;
5231 	}
5232 
5233 #if DEBUG || DEVELOPMENT
5234 #define REDUCED_TSO_MTU         (16 * 1024)
5235 	if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5236 		tso_mtu = REDUCED_TSO_MTU;
5237 	}
5238 #endif /* DEBUG || DEVELOPMENT */
5239 	return tso_mtu;
5240 }
5241 
5242 /*
5243  * tso_hwassist:
5244  * - determine whether the destination interface supports TSO offload
5245  * - if the packet is already marked for offload and the hardware supports
5246  *   it, just allow the packet to continue on
5247  * - if not, parse the packet headers to verify that this is a large TCP
5248  *   packet requiring segmentation; if the hardware doesn't support it
5249  *   set need_sw_tso; otherwise, mark the packet for TSO offload
5250  */
5251 static int
5252 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5253     int * mss_p, bool * need_gso, bool * is_large_tcp)
5254 {
5255 	uint32_t                csum_flags;
5256 	int                     error = 0;
5257 	ip_packet_info          info;
5258 	u_int32_t               if_csum;
5259 	u_int32_t               if_tso;
5260 	u_int32_t               mbuf_tso;
5261 	int                     mss = *mss_p;
5262 	uint8_t                 seg_cnt = 0;
5263 	bool                    supports_cksum = false;
5264 	uint32_t                pkt_mtu;
5265 	struct bripstats        stats;
5266 
5267 	*need_gso = false;
5268 	*is_large_tcp = false;
5269 	if (is_ipv4) {
5270 		/*
5271 		 * Enable both TCP and IP offload if the hardware supports it.
5272 		 * If the hardware doesn't support TCP offload, supports_cksum
5273 		 * will be false so we won't set either offload.
5274 		 */
5275 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5276 		supports_cksum = (if_csum & CSUM_TCP) != 0;
5277 		if_tso = IFNET_TSO_IPV4;
5278 		mbuf_tso = CSUM_TSO_IPV4;
5279 	} else {
5280 		if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5281 		supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5282 		if_tso = IFNET_TSO_IPV6;
5283 		mbuf_tso = CSUM_TSO_IPV6;
5284 	}
5285 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5286 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
5287 	    ifp->if_xname, supports_cksum ? "" : " not",
5288 	    ifp->if_hwassist, if_csum);
5289 
5290 	/* verify that this is a large TCP frame */
5291 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5292 	    &info, &stats);
5293 	if (error != 0) {
5294 		/* bad packet */
5295 		goto done;
5296 	}
5297 	if (info.ip_proto_hdr == NULL) {
5298 		/* not a TCP packet */
5299 		goto done;
5300 	}
5301 	pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5302 	if (mss == 0) {
5303 		/* check for LRO */
5304 		seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5305 		if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5306 			/* not actually a large packet */
5307 			goto done;
5308 		}
5309 	}
5310 	if (mss == 0) {
5311 		uint32_t            hdr_len;
5312 		struct tcphdr *     tcp;
5313 
5314 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5315 		hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5316 
5317 		/* packet isn't marked, mark it now */
5318 		if (seg_cnt != 0) {
5319 			uint32_t    len;
5320 
5321 			/* approximate the MSS using the LRO seg cnt */
5322 			len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5323 			mss = len / seg_cnt;
5324 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5325 			    "%s: mss %d = len %d / seg cnt %d",
5326 			    ifp->if_xname, mss, len, seg_cnt);
5327 			if (mss <= 0) {
5328 				/* unexpected value */
5329 				mss = 0;
5330 				goto done;
5331 			}
5332 		} else {
5333 			mss = ifp->if_mtu - hdr_len
5334 			    - if_bridge_tso_reduce_mss_tx;
5335 			assert(mss > 0);
5336 		}
5337 		csum_flags = mbuf_tso;
5338 		if (supports_cksum) {
5339 			csum_flags |= if_csum;
5340 		}
5341 		(*mp)->m_pkthdr.tso_segsz = mss;
5342 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5343 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5344 	}
5345 	*is_large_tcp = true;
5346 	(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5347 	if ((ifp->if_hwassist & if_tso) == 0) {
5348 		/* need gso if no hardware support */
5349 		*need_gso = true;
5350 	} else {
5351 		uint32_t                tso_mtu = 0;
5352 
5353 		tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5354 		if (pkt_mtu > tso_mtu) {
5355 			/* need gso if tso_mtu too small */
5356 			*need_gso = true;
5357 		}
5358 	}
5359 done:
5360 	*mss_p = mss;
5361 	return error;
5362 }
5363 
5364 /*
5365  * bridge_enqueue:
5366  *
5367  *	Enqueue a packet list on a bridge member interface.
5368  *
5369  */
5370 static int
5371 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5372     ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5373     pkt_direction_t direction)
5374 {
5375 	int             enqueue_error = 0;
5376 	mbuf_t          next_packet;
5377 	uint32_t        out_errors = 0;
5378 	mblist          out_list;
5379 
5380 	VERIFY(dst_if != NULL);
5381 
5382 	mblist_init(&out_list);
5383 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5384 		bool            check_gso = false;
5385 		ChecksumOperation cksum_op = orig_cksum_op;
5386 		errno_t         error = 0;
5387 		bool            is_ipv4 = false;
5388 		int             len;
5389 		int             mss = 0;
5390 		bool            need_gso = false;
5391 
5392 		scan->m_flags |= M_PROTO1; /* set to avoid loops */
5393 		next_packet = scan->m_nextpkt;
5394 		scan->m_nextpkt = NULL;
5395 		len = mbuf_pkthdr_len(scan);
5396 		is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5397 		mss = _mbuf_get_tso_mss(scan);
5398 		if (mss != 0) {
5399 			/* packet is marked for segmentation */
5400 			check_gso = true;
5401 		} else if (direction == pkt_direction_RX &&
5402 		    scan->m_pkthdr.rx_seg_cnt != 0) {
5403 			/* LRO packet */
5404 			check_gso = true;
5405 		} else if (ether_type_flag_is_ip(etypef) &&
5406 		    len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5407 			/*
5408 			 * Need to segment the packet if it is a large frame
5409 			 * and the destination interface does not support TSO.
5410 			 *
5411 			 * Note that with trailers, it's possible for a packet to
5412 			 * be large but not actually require segmentation.
5413 			 */
5414 			check_gso = true;
5415 		}
5416 		if (check_gso) {
5417 			bool    is_large_tcp = false;
5418 
5419 			error = tso_hwassist(&scan, is_ipv4,
5420 			    dst_if, sizeof(struct ether_header), &mss,
5421 			    &need_gso, &is_large_tcp);
5422 			if (is_large_tcp &&
5423 			    cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5424 				cksum_op = CHECKSUM_OPERATION_NONE;
5425 			}
5426 		}
5427 		if (error != 0) {
5428 			if (scan != NULL) {
5429 				m_drop(scan,
5430 				    direction == pkt_direction_RX ? DROPTAP_FLAG_DIR_IN : DROPTAP_FLAG_DIR_OUT,
5431 				    DROP_REASON_BRIDGE_HWASSIST, NULL, 0);
5432 				scan = NULL;
5433 			}
5434 			out_errors++;
5435 		} else if (need_gso) {
5436 			int             mac_hlen = sizeof(struct ether_header);
5437 			mblist          segs;
5438 
5439 			/* segment packets, add to list */
5440 			segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5441 			    is_ipv4);
5442 			if (segs.head != NULL) {
5443 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5444 				    "%s (%s) append gso #segs %u bytes %u",
5445 				    bridge_ifp->if_xname,
5446 				    dst_if->if_xname,
5447 				    segs.count, segs.bytes);
5448 				mblist_append_list(&out_list, segs);
5449 			} else {
5450 				out_errors++;
5451 			}
5452 		} else {
5453 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5454 			    "%s (%s) append %d bytes mss %d op %d",
5455 			    bridge_ifp->if_xname,
5456 			    dst_if->if_xname,
5457 			    len, mss, cksum_op);
5458 			bridge_handle_checksum_op(src_if, dst_if,
5459 			    scan, cksum_op);
5460 			mblist_append(&out_list, scan);
5461 		}
5462 	}
5463 	if (out_list.head != NULL) {
5464 		enqueue_error = bridge_transmit(dst_if, out_list.head);
5465 		if (enqueue_error != 0) {
5466 			out_errors++;
5467 		}
5468 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5469 		    "%s (%s) bridge_transmit packets %u bytes %u error %d",
5470 		    bridge_ifp->if_xname,
5471 		    dst_if->if_xname,
5472 		    out_list.count, out_list.bytes, enqueue_error);
5473 	}
5474 	if (out_list.count != 0 || out_errors != 0) {
5475 		ifnet_stat_increment_out(bridge_ifp, out_list.count,
5476 		    out_list.bytes, out_errors);
5477 	}
5478 	return enqueue_error;
5479 }
5480 
5481 /*
5482  * bridge_member_output:
5483  *
5484  *	Send output from a bridge member interface.  This
5485  *	performs the bridging function for locally originated
5486  *	packets.
5487  *
5488  *	The mbuf has the Ethernet header already attached.
5489  */
5490 static errno_t
5491 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5492 {
5493 	struct bridge_iflist * bif = NULL;
5494 	ifnet_t bridge_ifp;
5495 	struct ether_header *eh;
5496 	ether_type_flag_t etypef;
5497 	struct ifnet *dst_if = NULL;
5498 	uint16_t vlan;
5499 	struct bridge_iflist *mac_nat_bif;
5500 	ifnet_t mac_nat_ifp;
5501 	mbuf_t m = *data;
5502 
5503 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5504 	    "ifp %s", ifp->if_xname);
5505 	if (m->m_len < ETHER_HDR_LEN) {
5506 		m = m_pullup(m, ETHER_HDR_LEN);
5507 		if (m == NULL) {
5508 			*data = NULL;
5509 			return EJUSTRETURN;
5510 		}
5511 	}
5512 
5513 	BRIDGE_LOCK(sc);
5514 	mac_nat_bif = sc->sc_mac_nat_bif;
5515 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5516 	if (mac_nat_ifp == ifp) {
5517 		/* record the IP address used by the MAC NAT interface */
5518 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5519 		m = *data;
5520 		if (m == NULL) {
5521 			/* packet was deallocated */
5522 			BRIDGE_UNLOCK(sc);
5523 			return EJUSTRETURN;
5524 		}
5525 	}
5526 	bridge_ifp = sc->sc_ifp;
5527 	eh = mtod(m, struct ether_header *);
5528 	vlan = VLANTAGOF(m);
5529 	etypef = ether_type_flag_get(eh->ether_type);
5530 
5531 	/*
5532 	 * APPLE MODIFICATION
5533 	 * If the packet is an 802.1X ethertype, then only send on the
5534 	 * original output interface.
5535 	 */
5536 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5537 		dst_if = ifp;
5538 		goto sendunicast;
5539 	}
5540 
5541 	/*
5542 	 * If bridge is down, but the original output interface is up,
5543 	 * go ahead and send out that interface.  Otherwise, the packet
5544 	 * is dropped below.
5545 	 */
5546 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5547 		dst_if = ifp;
5548 		goto sendunicast;
5549 	}
5550 
5551 	/*
5552 	 * If the packet is a multicast, or we don't know a better way to
5553 	 * get there, send to all interfaces.
5554 	 */
5555 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5556 		dst_if = NULL;
5557 	} else {
5558 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5559 		if (bif != NULL) {
5560 			dst_if = bif->bif_ifp;
5561 		}
5562 	}
5563 	if (dst_if == NULL) {
5564 		struct mbuf *mc;
5565 		errno_t error;
5566 
5567 
5568 		bridge_span(sc, etypef, m);
5569 
5570 		BRIDGE_LOCK2REF(sc, error);
5571 		if (error != 0) {
5572 			m_drop(m, DROPTAP_FLAG_DIR_OUT,
5573 			    DROP_REASON_BRIDGE_NOREF, NULL, 0);
5574 			return EJUSTRETURN;
5575 		}
5576 
5577 		/*
5578 		 * Duplicate and send the packet across all member interfaces
5579 		 * except the originating interface.
5580 		 */
5581 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5582 			dst_if = bif->bif_ifp;
5583 			if (dst_if == ifp) {
5584 				/* skip the originating interface */
5585 				continue;
5586 			}
5587 			/* skip interface with inactive link status */
5588 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5589 				continue;
5590 			}
5591 
5592 			/* skip interface that isn't running */
5593 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5594 				continue;
5595 			}
5596 			/*
5597 			 * If the interface is participating in spanning
5598 			 * tree, make sure the port is in a state that
5599 			 * allows forwarding.
5600 			 */
5601 			if ((bif->bif_ifflags & IFBIF_STP) &&
5602 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5603 				continue;
5604 			}
5605 			/*
5606 			 * If the destination is the MAC NAT interface,
5607 			 * skip sending the packet. The packet can't be sent
5608 			 * if the source MAC is incorrect.
5609 			 */
5610 			if (dst_if == mac_nat_ifp) {
5611 				continue;
5612 			}
5613 
5614 			/* make a deep copy to send on this member interface */
5615 			mc = m_dup(m, M_DONTWAIT);
5616 			if (mc == NULL) {
5617 				(void)ifnet_stat_increment_out(bridge_ifp,
5618 				    0, 0, 1);
5619 				continue;
5620 			}
5621 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5622 			    mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5623 		}
5624 		BRIDGE_UNREF(sc);
5625 
5626 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5627 			m_drop(m, DROPTAP_FLAG_DIR_OUT,
5628 			    DROP_REASON_BRIDGE_NOT_RUNNING, NULL, 0);
5629 			return EJUSTRETURN;
5630 		}
5631 		/* allow packet to continue on the originating interface */
5632 		return 0;
5633 	}
5634 
5635 sendunicast:
5636 	/*
5637 	 * XXX Spanning tree consideration here?
5638 	 */
5639 
5640 	bridge_span(sc, etypef, m);
5641 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5642 		m_drop(m, DROPTAP_FLAG_DIR_OUT,
5643 		    DROP_REASON_BRIDGE_NOT_RUNNING, NULL, 0);
5644 		BRIDGE_UNLOCK(sc);
5645 		return EJUSTRETURN;
5646 	}
5647 
5648 	BRIDGE_UNLOCK(sc);
5649 	if (dst_if == ifp) {
5650 		/* allow packet to continue on the originating interface */
5651 		return 0;
5652 	}
5653 	if (dst_if != mac_nat_ifp) {
5654 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5655 		    CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5656 	} else {
5657 		/*
5658 		 * This is not the original output interface
5659 		 * and the destination is the MAC NAT interface.
5660 		 * Drop the packet because the packet can't be sent
5661 		 * if the source MAC is incorrect.
5662 		 */
5663 		m_drop(m, DROPTAP_FLAG_DIR_OUT,
5664 		    DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
5665 	}
5666 	return EJUSTRETURN;
5667 }
5668 
5669 /*
5670  * Output callback.
5671  *
5672  * This routine is called externally from above only when if_bridge_txstart
5673  * is disabled; otherwise it is called internally by bridge_start().
5674  */
5675 static int
5676 bridge_output(struct ifnet *ifp, struct mbuf *m)
5677 {
5678 	struct bridge_iflist *bif;
5679 	struct bridge_softc * __single sc = ifnet_softc(ifp);
5680 	struct ether_header *eh;
5681 	ether_type_flag_t etypef;
5682 	struct ifnet *dst_if = NULL;
5683 	int error = 0;
5684 
5685 	eh = mtod(m, struct ether_header *);
5686 	etypef = ether_type_flag_get(eh->ether_type);
5687 	BRIDGE_LOCK(sc);
5688 
5689 	if (!IS_BCAST_MCAST(m)) {
5690 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5691 		if (bif != NULL) {
5692 			dst_if = bif->bif_ifp;
5693 		}
5694 	}
5695 
5696 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5697 
5698 	BRIDGE_BPF_TAP_OUT(ifp, m);
5699 
5700 	if (dst_if == NULL) {
5701 		/* callee will unlock */
5702 		bridge_broadcast(sc, NULL, etypef, m);
5703 	} else {
5704 		ifnet_t bridge_ifp;
5705 
5706 		bridge_ifp = sc->sc_ifp;
5707 		BRIDGE_UNLOCK(sc);
5708 
5709 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5710 		    CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5711 	}
5712 
5713 	return error;
5714 }
5715 
5716 static void
5717 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5718 {
5719 	struct ether_header *eh;
5720 	bool is_ipv4;
5721 	uint32_t sw_csum, hwcap;
5722 	uint32_t did_sw;
5723 	uint32_t csum_flags;
5724 
5725 	eh = mtod(m, struct ether_header *);
5726 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5727 		return;
5728 	}
5729 
5730 	/* do in software what the hardware cannot */
5731 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5732 	csum_flags = m->m_pkthdr.csum_flags;
5733 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5734 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5735 
5736 	if (is_ipv4) {
5737 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5738 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5739 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5740 				uint16_t start =
5741 				    sizeof(*eh) + sizeof(struct ip);
5742 				uint16_t ulpoff =
5743 				    m->m_pkthdr.csum_data & 0xffff;
5744 				m->m_pkthdr.csum_flags |=
5745 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5746 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5747 				m->m_pkthdr.csum_tx_start = start;
5748 			} else {
5749 				sw_csum |= (CSUM_DELAY_DATA &
5750 				    m->m_pkthdr.csum_flags);
5751 			}
5752 		}
5753 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5754 	} else {
5755 		if ((hwcap & CSUM_PARTIAL) &&
5756 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5757 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5758 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5759 				uint16_t start =
5760 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5761 				uint16_t ulpoff =
5762 				    m->m_pkthdr.csum_data & 0xffff;
5763 				m->m_pkthdr.csum_flags |=
5764 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5765 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5766 				m->m_pkthdr.csum_tx_start = start;
5767 			} else {
5768 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5769 				    m->m_pkthdr.csum_flags);
5770 			}
5771 		}
5772 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5773 	}
5774 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5775 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5776 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5777 	    did_sw, m->m_pkthdr.csum_flags);
5778 }
5779 
5780 /*
5781  * bridge_start:
5782  *
5783  *	Start output on a bridge.
5784  *
5785  * This routine is invoked by the start worker thread; because we never call
5786  * it directly, there is no need do deploy any serialization mechanism other
5787  * than what's already used by the worker thread, i.e. this is already single
5788  * threaded.
5789  *
5790  * This routine is called only when if_bridge_txstart is enabled.
5791  */
5792 static void
5793 bridge_start(struct ifnet *ifp)
5794 {
5795 	mbuf_ref_t m;
5796 
5797 	for (;;) {
5798 		if (ifnet_dequeue(ifp, &m) != 0) {
5799 			break;
5800 		}
5801 
5802 		(void) bridge_output(ifp, m);
5803 	}
5804 }
5805 
5806 static void
5807 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5808 {
5809 	mbuf_pkthdr_setrcvif(m, ifp);
5810 	mbuf_pkthdr_setheader(m, mtod(m, void *));
5811 	/* adjust frame to skip mac-layer header */
5812 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5813 }
5814 
5815 static void
5816 mark_tso_checksum_ok(mbuf_t m)
5817 {
5818 	if (_mbuf_get_tso_mss(m) != 0 ||
5819 	    (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5820 		mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5821 	}
5822 }
5823 
5824 static void
5825 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5826 {
5827 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5828 		/* mark the packets as arriving on the interface */
5829 		BRIDGE_BPF_TAP_IN(ifp, scan);
5830 		if (m_proto1) {
5831 			scan->m_flags |= M_PROTO1; /* set to avoid loops */
5832 		}
5833 		prepare_input_packet(ifp, scan);
5834 		mark_tso_checksum_ok(scan);
5835 	}
5836 	dlil_input_packet_list(ifp, in_list);
5837 	return;
5838 }
5839 
5840 static void
5841 adjust_input_packet_list(mbuf_t in_list)
5842 {
5843 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5844 		mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5845 		_mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5846 	}
5847 }
5848 
5849 static bool
5850 in_addr_is_ours(struct in_addr ip)
5851 {
5852 	struct in_ifaddr *ia;
5853 	bool             ours = false;
5854 
5855 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5856 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5857 		if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5858 			ours = true;
5859 			break;
5860 		}
5861 	}
5862 	lck_rw_done(&in_ifaddr_rwlock);
5863 	return ours;
5864 }
5865 
5866 static bool
5867 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5868 {
5869 	struct in6_addr         dst_ip;
5870 	struct in6_ifaddr       *ia6;
5871 	bool                    ours = false;
5872 
5873 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5874 		/* need to embed scope ID for comparison */
5875 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5876 		dst_ip.s6_addr16[1] = htons(ifscope);
5877 		ip6_p = &dst_ip;
5878 	}
5879 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5880 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5881 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5882 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5883 			ours = true;
5884 			break;
5885 		}
5886 	}
5887 	lck_rw_done(&in6_ifaddr_rwlock);
5888 	return ours;
5889 }
5890 
5891 static bool
5892 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5893 {
5894 	/* if the destination is our IP address, don't segment */
5895 	bool    our_ip = false;
5896 
5897 	if (info_p->ip_is_ipv4) {
5898 		struct ip *     hdr;
5899 		struct in_addr  dst_ip;
5900 
5901 		hdr = (struct ip *)(info_p->ip_hdr);
5902 		bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5903 		our_ip = in_addr_is_ours(dst_ip);
5904 	} else {
5905 		struct ip6_hdr *        hdr;
5906 
5907 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5908 		our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5909 	}
5910 	return our_ip;
5911 }
5912 
5913 typedef union {
5914 	struct in_addr  ip;
5915 	struct in6_addr ip6;
5916 } ip_addr, *ip_addr_t;
5917 
5918 static void
5919 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5920 {
5921 	if (info_p->ip_is_ipv4) {
5922 		struct ip *     hdr;
5923 
5924 		hdr = (struct ip *)(info_p->ip_hdr);
5925 		bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5926 	} else {
5927 		struct ip6_hdr *        hdr;
5928 
5929 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5930 		bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5931 	}
5932 }
5933 
5934 static bool
5935 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5936 {
5937 	bool    equal;
5938 
5939 	if (is_ipv4) {
5940 		equal = addr1->ip.s_addr == addr2->ip.s_addr;
5941 	} else {
5942 		equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5943 	}
5944 	return equal;
5945 }
5946 
5947 static bool
5948 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5949 {
5950 	bool    our_ip;
5951 
5952 	if (is_ipv4) {
5953 		our_ip = in_addr_is_ours(ipaddr->ip);
5954 	} else {
5955 		our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5956 	}
5957 	return our_ip;
5958 }
5959 
5960 static void
5961 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5962     mblist list, bool bif_uses_virtio)
5963 {
5964 	uint32_t        in_errors = 0;
5965 	bool            is_ipv4;
5966 	mblist          in_list;
5967 	ip_addr         last_ip;
5968 	bool            last_ip_ours = false;
5969 	bool            last_ip_valid = false;
5970 	u_int           mac_hlen;
5971 	bool            may_forward = false;
5972 	mbuf_t          next_packet;
5973 
5974 	switch (etypef) {
5975 	case ETHER_TYPE_FLAG_IPV4:
5976 		is_ipv4 = true;
5977 		may_forward = (ipforwarding != 0);
5978 		break;
5979 	case ETHER_TYPE_FLAG_IPV6:
5980 		is_ipv4 = false;
5981 		may_forward = (ip6_forwarding != 0);
5982 		break;
5983 	}
5984 	if (!may_forward) {
5985 		in_list = list;
5986 		goto done;
5987 	}
5988 
5989 	mblist_init(&in_list);
5990 	mac_hlen = sizeof(struct ether_header);
5991 	bzero(&last_ip, sizeof(last_ip));
5992 	for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5993 		int             error;
5994 		ip_packet_info  info;
5995 		bool            ip_ours;
5996 		struct ifbrmstats stats; /* XXX should really be accounted */
5997 		ip_addr         this_ip;
5998 
5999 		/* take it out of the list */
6000 		next_packet = scan->m_nextpkt;
6001 		scan->m_nextpkt = NULL;
6002 
6003 		/* check for TCP packet and get IP header */
6004 		error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
6005 		    &info, &stats.brms_in_ip);
6006 		if (error != 0) {
6007 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6008 			    "%s bridge_get_tcp_header failed %d",
6009 			    bridge_ifp->if_xname, error);
6010 			if (scan != NULL) {
6011 				m_freem(scan);
6012 				scan = NULL;
6013 			}
6014 			in_errors++;
6015 			continue;
6016 		}
6017 		ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
6018 		if (last_ip_valid &&
6019 		    ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
6020 			/* use cached result */
6021 			ip_ours = last_ip_ours;
6022 		} else {
6023 			ip_ours = ip_addr_is_ours(&this_ip,
6024 			    bridge_ifp->if_index,
6025 			    is_ipv4);
6026 			/* cache the result */
6027 			last_ip_valid = true;
6028 			last_ip_ours = ip_ours;
6029 			last_ip = this_ip;
6030 		}
6031 
6032 		/* if the packet is destined to us, just send it up */
6033 		if (ip_ours) {
6034 			mblist_append(&in_list, scan);
6035 			continue;
6036 		}
6037 		/*
6038 		 * If this is a TCP packet that's marked for TSO or LRO, or
6039 		 * we think it's a large packet, segment it.
6040 		 */
6041 		if (info.ip_proto_hdr != NULL &&
6042 		    ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
6043 		    (!bif_uses_virtio &&
6044 		    (scan->m_pkthdr.rx_seg_cnt > 1 ||
6045 		    (mbuf_pkthdr_len(scan) >
6046 		    (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
6047 			mblist          seg;
6048 
6049 			seg = gso_tcp_with_info(bridge_ifp, scan, &info,
6050 			    mac_hlen, is_ipv4, false);
6051 			if (seg.head == NULL) {
6052 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6053 				    "gso_tcp returned no packets");
6054 				in_errors++;
6055 				continue;
6056 			}
6057 			if (seg.count > 1) {
6058 				/* packet was segmented+checksummed */
6059 				mblist_append_list(&in_list, seg);
6060 				continue;
6061 			}
6062 			/* there's just one packet, no segmentation */
6063 			scan = seg.head;
6064 		}
6065 		/* need checksum if it's marked for checksum offload */
6066 		if (bif_uses_virtio &&
6067 		    (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
6068 			error = bridge_offload_checksum(&scan, &info, &stats);
6069 			if (error != 0) {
6070 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6071 				    "%s bridge_offload_checksum failed %d",
6072 				    bridge_ifp->if_xname, error);
6073 				if (scan != NULL) {
6074 					m_freem(scan);
6075 					scan = NULL;
6076 				}
6077 				in_errors++;
6078 				continue;
6079 			}
6080 		}
6081 		mblist_append(&in_list, scan);
6082 	}
6083 
6084 done:
6085 	if (in_list.head != NULL) {
6086 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6087 		    "%s packets %d bytes %d",
6088 		    bridge_ifp->if_xname,
6089 		    in_list.count, in_list.bytes);
6090 		/* Mark the packets as arriving on the bridge interface */
6091 		inject_input_packet_list(bridge_ifp, in_list.head, false);
6092 		ifnet_stat_increment_in(bridge_ifp, in_list.count,
6093 		    in_list.bytes, in_errors);
6094 	} else if (in_errors != 0) {
6095 		ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
6096 	}
6097 	return;
6098 }
6099 
6100 /*
6101  * bridge_broadcast:
6102  *
6103  *	Send a frame to all interfaces that are members of
6104  *	the bridge, except for the one on which the packet
6105  *	arrived.
6106  *
6107  *	NOTE: Releases the lock on return.
6108  */
6109 static void
6110 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6111     ether_type_flag_t etypef, mbuf_t m)
6112 {
6113 	ifnet_t bridge_ifp;
6114 	struct bridge_iflist *dbif;
6115 	struct ifnet * src_if;
6116 	mbuf_ref_t mc;
6117 	struct mbuf *mc_in;
6118 	int error = 0, used = 0;
6119 	ChecksumOperation cksum_op;
6120 	struct mac_nat_record mnr;
6121 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6122 	boolean_t translate_mac = FALSE;
6123 	uint32_t sc_filter_flags;
6124 	bool is_bcast_mcast;
6125 
6126 	bridge_ifp = sc->sc_ifp;
6127 	if (sbif != NULL) {
6128 		src_if = sbif->bif_ifp;
6129 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6130 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6131 			/* get the translation record */
6132 			translate_mac
6133 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6134 			if (m == NULL) {
6135 				/* packet was deallocated */
6136 				BRIDGE_UNLOCK(sc);
6137 				return;
6138 			}
6139 		}
6140 	} else {
6141 		/*
6142 		 * sbif is NULL when the bridge interface calls
6143 		 * bridge_broadcast().
6144 		 */
6145 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6146 		src_if = NULL;
6147 	}
6148 
6149 	BRIDGE_LOCK2REF(sc, error);
6150 	if (error) {
6151 		m_freem(m);
6152 		return;
6153 	}
6154 	is_bcast_mcast = IS_BCAST_MCAST(m);
6155 	sc_filter_flags = sc->sc_filter_flags;
6156 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6157 		ifnet_t         dst_if;
6158 
6159 		dst_if = dbif->bif_ifp;
6160 		if (dst_if == src_if) {
6161 			/* skip the interface that the packet came in on */
6162 			continue;
6163 		}
6164 
6165 		/* Private segments can not talk to each other */
6166 		if (sbif != NULL &&
6167 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6168 			continue;
6169 		}
6170 
6171 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6172 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6173 			continue;
6174 		}
6175 
6176 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6177 		    !is_bcast_mcast) {
6178 			continue;
6179 		}
6180 
6181 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6182 			continue;
6183 		}
6184 
6185 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6186 			continue;
6187 		}
6188 
6189 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6190 			mc = m;
6191 			used = 1;
6192 		} else {
6193 			mc = m_dup(m, M_DONTWAIT);
6194 			if (mc == NULL) {
6195 				(void) ifnet_stat_increment_out(bridge_ifp,
6196 				    0, 0, 1);
6197 				continue;
6198 			}
6199 		}
6200 
6201 		/*
6202 		 * If broadcast input is enabled, do so only if this
6203 		 * is an input packet.
6204 		 */
6205 		if (sbif != NULL && is_bcast_mcast &&
6206 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6207 			mc_in = m_dup(mc, M_DONTWAIT);
6208 			/* this could fail, but we continue anyways */
6209 		} else {
6210 			mc_in = NULL;
6211 		}
6212 
6213 		/* out */
6214 		if (translate_mac && mac_nat_bif == dbif) {
6215 			/* translate the packet */
6216 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6217 		}
6218 
6219 		if (mc != NULL && sbif != NULL &&
6220 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6221 			if (used == 0) {
6222 				/* Keep the layer3 header aligned */
6223 				int i = min(mc->m_pkthdr.len, max_protohdr);
6224 				mc = m_copyup(mc, i, ETHER_ALIGN);
6225 				if (mc == NULL) {
6226 					(void) ifnet_stat_increment_out(
6227 						sc->sc_ifp, 0, 0, 1);
6228 					if (mc_in != NULL) {
6229 						m_freem(mc_in);
6230 						mc_in = NULL;
6231 					}
6232 					continue;
6233 				}
6234 			}
6235 			if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6236 				if (mc_in != NULL) {
6237 					m_freem(mc_in);
6238 					mc_in = NULL;
6239 				}
6240 				continue;
6241 			}
6242 			if (mc == NULL) {
6243 				if (mc_in != NULL) {
6244 					m_freem(mc_in);
6245 					mc_in = NULL;
6246 				}
6247 				continue;
6248 			}
6249 		}
6250 
6251 		if (mc != NULL) {
6252 			/* verify checksum if necessary */
6253 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6254 			    !bif_has_checksum_offload(sbif)) {
6255 				error = bridge_verify_checksum(&mc,
6256 				    &dbif->bif_stats);
6257 				if (error != 0) {
6258 					if (mc != NULL) {
6259 						m_freem(mc);
6260 					}
6261 					mc = NULL;
6262 				}
6263 			}
6264 			if (mc != NULL) {
6265 				(void) bridge_enqueue(bridge_ifp,
6266 				    NULL, dst_if, etypef, mc, cksum_op,
6267 				    pkt_direction_TX);
6268 			}
6269 		}
6270 
6271 		/* in */
6272 		if (mc_in == NULL) {
6273 			continue;
6274 		}
6275 		BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6276 		prepare_input_packet(dst_if, mc_in);
6277 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6278 		dlil_input_packet_list(dst_if, mc_in);
6279 	}
6280 	if (used == 0) {
6281 		m_freem(m);
6282 	}
6283 
6284 
6285 	BRIDGE_UNREF(sc);
6286 }
6287 
6288 static mbuf_t
6289 copy_packet_list(mbuf_t m)
6290 {
6291 	mblist  ret;
6292 	mbuf_t  next_packet;
6293 
6294 	mblist_init(&ret);
6295 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6296 		mbuf_t  copy_m;
6297 
6298 		/* take it out of the list */
6299 		next_packet = scan->m_nextpkt;
6300 		scan->m_nextpkt = NULL;
6301 
6302 		/* create a copy and add it to the new list */
6303 		copy_m = m_dup(scan, M_DONTWAIT);
6304 		if (copy_m != NULL) {
6305 			mblist_append(&ret, copy_m);
6306 		}
6307 
6308 		/* put it back in the original list */
6309 		scan->m_nextpkt = next_packet;
6310 	}
6311 	return ret.head;
6312 }
6313 
6314 /*
6315  * bridge_broadcast_list:
6316  *
6317  *      Broadcast a list of packets to all members except `sbif`.
6318  *      Consumes `m` before returning.
6319  *
6320  *	NOTE: Releases the lock on return.
6321  */
6322 static void
6323 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6324     ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6325 {
6326 	ifnet_t                 bridge_ifp;
6327 	bool                    bridge_needs_input;
6328 	struct bridge_iflist *  dbif;
6329 	bool                    is_bcast_mcast;
6330 	errno_t                 error = 0;
6331 	ChecksumOperation       cksum_op;
6332 	struct bridge_iflist *  mac_nat_bif = sc->sc_mac_nat_bif;
6333 	ifnet_t                 mac_nat_if = NULL;
6334 	bool                    need_mac_nat = false;
6335 	mbuf_t                  out_mac_nat = NULL;
6336 	ifnet_t                 src_if;
6337 	uint32_t                sc_filter_flags;
6338 	bool                    used = false;
6339 
6340 	bridge_ifp = sc->sc_ifp;
6341 	if (sbif != NULL) {
6342 		src_if = sbif->bif_ifp;
6343 
6344 		if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6345 			bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6346 
6347 			/* compute checksum on packets marked with offload */
6348 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6349 			    m, is_ipv4);
6350 			if (m == NULL) {
6351 				BRIDGE_UNLOCK(sc);
6352 				goto done;
6353 			}
6354 			cksum_op = CHECKSUM_OPERATION_NONE;
6355 		} else {
6356 			cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6357 		}
6358 
6359 		/*
6360 		 * If MAC-NAT is enabled and we'll be sending the packets
6361 		 * over it, verify that it is up and active before
6362 		 * deciding to make a translated copy.
6363 		 */
6364 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6365 			mac_nat_if = mac_nat_bif->bif_ifp;
6366 			if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6367 			    (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6368 				need_mac_nat = true;
6369 			}
6370 		}
6371 	} else {
6372 		/*
6373 		 * sbif is NULL when the bridge interface calls
6374 		 * bridge_broadcast_list() (TBD).
6375 		 */
6376 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6377 		src_if = NULL;
6378 	}
6379 
6380 	/*
6381 	 * Create a translated copy for packets destined to MAC-NAT interface.
6382 	 */
6383 	if (need_mac_nat) {
6384 		out_mac_nat
6385 		        = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6386 		    mac_nat_if, m);
6387 	}
6388 	sc_filter_flags = sc->sc_filter_flags;
6389 	bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
6390 	BRIDGE_LOCK2REF(sc, error);
6391 	if (error) {
6392 		goto done;
6393 	}
6394 	is_bcast_mcast = IS_BCAST_MCAST(m);
6395 
6396 	/* make a copy for the bridge interface */
6397 	if (sbif != NULL && is_bcast_mcast && bridge_needs_input) {
6398 		mbuf_t  in_list;
6399 
6400 		in_list = copy_packet_list(m);
6401 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6402 		    "%s mcast for us in_m %p",
6403 		    bridge_ifp->if_xname, in_list);
6404 		if (in_list != NULL) {
6405 			inject_input_packet_list(bridge_ifp, in_list, false);
6406 		}
6407 	}
6408 
6409 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6410 		ifnet_t         dst_if;
6411 		mbuf_t          in_m = NULL;
6412 		mbuf_t          out_m = NULL;
6413 
6414 		dst_if = dbif->bif_ifp;
6415 		if (dst_if == src_if) {
6416 			/* skip the interface that the packet came in on */
6417 			continue;
6418 		}
6419 
6420 		/* Private segments can not talk to each other */
6421 		if (sbif != NULL &&
6422 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6423 			continue;
6424 		}
6425 
6426 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6427 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6428 			continue;
6429 		}
6430 
6431 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6432 		    !is_bcast_mcast) {
6433 			continue;
6434 		}
6435 
6436 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6437 			continue;
6438 		}
6439 
6440 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6441 			continue;
6442 		}
6443 		if (dbif == mac_nat_bif) {
6444 			/* translated copy was created above, use that */
6445 			out_m = out_mac_nat;
6446 			out_mac_nat = NULL;
6447 		} else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6448 			/* consume `m` */
6449 			out_m = m;
6450 			used = true;
6451 		} else {
6452 			/* needs a copy */
6453 			out_m = copy_packet_list(m);
6454 		}
6455 
6456 		if (out_m == NULL) {
6457 			ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6458 			continue;
6459 		}
6460 		/*
6461 		 * If broadcast input is enabled, do so only if this
6462 		 * is an input packet.
6463 		 */
6464 		if (sbif != NULL && is_bcast_mcast &&
6465 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6466 			in_m = copy_packet_list(m);
6467 			/* this could fail, but we continue anyways */
6468 		} else {
6469 			in_m = NULL;
6470 		}
6471 
6472 		if (sbif != NULL &&
6473 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6474 			out_m = bridge_pf_list_out(out_m, dst_if,
6475 			    sc_filter_flags);
6476 		}
6477 		if (out_m != NULL) {
6478 			/* verify checksum if necessary */
6479 			if (sbif != NULL &&
6480 			    ether_type_flag_is_ip(etypef) &&
6481 			    bif_has_checksum_offload(dbif) &&
6482 			    !bif_has_checksum_offload(sbif)) {
6483 				bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6484 
6485 				out_m = bridge_verify_checksum_list(bridge_ifp,
6486 				    dbif, out_m, is_ipv4);
6487 			}
6488 			if (out_m != NULL) {
6489 				bridge_enqueue(bridge_ifp, src_if, dst_if,
6490 				    etypef, out_m, cksum_op, direction);
6491 			}
6492 		}
6493 
6494 		/* in */
6495 		if (in_m != NULL) {
6496 			inject_input_packet_list(dst_if, in_m, true);
6497 		}
6498 	}
6499 
6500 	BRIDGE_UNREF(sc);
6501 
6502 done:
6503 	if (out_mac_nat != NULL) {
6504 		m_freem_list(out_mac_nat);
6505 	}
6506 	if (!used) {
6507 		m_freem_list(m);
6508 	}
6509 	return;
6510 }
6511 
6512 #define NEEDED_CSUM_IPV4   (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6513 #define NEEDED_CSUM_IPV6   (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6514 
6515 static bool
6516 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6517 {
6518 	uint32_t        hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6519 	uint32_t        needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6520 	bool            supports;
6521 
6522 	supports = (hwcap & needed) == needed;
6523 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6524 	    ifp->if_xname, supports ? "" : "not ");
6525 	return supports;
6526 }
6527 
6528 static void
6529 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6530     ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6531 {
6532 	bool                    checksum_ok = false;
6533 	ChecksumOperation       cksum_op;
6534 	ifnet_t                 bridge_ifp = NULL;
6535 	struct bridge_iflist *  dbif;
6536 	uint32_t                sc_filter_flags;
6537 	ifnet_t                 src_if;
6538 	drop_reason_t           drop_reason = DROP_REASON_BRIDGE_UNSPECIFIED;
6539 
6540 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6541 		drop_reason = DROP_REASON_BRIDGE_NOT_RUNNING;
6542 		goto drop;
6543 	}
6544 	dbif = bridge_lookup_member_if(sc, dst_if);
6545 	if (dbif == NULL) {
6546 		/* Not a member of the bridge (anymore?) */
6547 		drop_reason = DROP_REASON_BRIDGE_NOT_A_MEMBER;
6548 		goto drop;
6549 	}
6550 
6551 	/* Private segments can not talk to each other */
6552 	if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6553 		drop_reason = DROP_REASON_BRIDGE_PRIVATE_SEGMENT;
6554 		goto drop;
6555 	}
6556 	bridge_ifp = sc->sc_ifp;
6557 	src_if = sbif->bif_ifp;
6558 	cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6559 	if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6560 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6561 
6562 		if (dbif == sc->sc_mac_nat_bif ||
6563 		    (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6564 		    !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6565 			/* compute checksums now if necessary */
6566 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6567 			    m, is_ipv4);
6568 			checksum_ok = true;
6569 		} else {
6570 			cksum_op = CHECKSUM_OPERATION_NONE;
6571 		}
6572 	}
6573 
6574 	if (dbif == sc->sc_mac_nat_bif) {
6575 		/* translate the packets before forwarding them */
6576 		if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6577 			m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6578 		}
6579 	} else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6580 	    bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6581 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6582 
6583 		/*
6584 		 * If the destination interface has checksum offload enabled,
6585 		 * verify the checksum now, unless the source interface also has
6586 		 * checksum offload enabled. The checksum in that case has
6587 		 * already just been computed and verifying it is unnecessary.
6588 		 */
6589 		m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6590 	}
6591 	sc_filter_flags = sc->sc_filter_flags;
6592 	BRIDGE_UNLOCK(sc);
6593 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6594 		m = bridge_pf_list_out(m, dst_if, sc_filter_flags);
6595 	}
6596 
6597 	/*
6598 	 * We're forwarding inbound packets for which the checksums must
6599 	 * already have been computed and if required, verified, or
6600 	 * packets from a virtio-enabled interface for which we rely
6601 	 * on the packet containing appropriate offload flags.
6602 	 */
6603 	if (m != NULL) {
6604 		bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6605 		    cksum_op, pkt_direction_RX);
6606 	}
6607 	return;
6608 
6609 drop:
6610 	BRIDGE_UNLOCK(sc);
6611 	m_drop_list(m, bridge_ifp, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
6612 	return;
6613 }
6614 
6615 /*
6616  * bridge_span:
6617  *
6618  *	Duplicate a packet out one or more interfaces that are in span mode,
6619  *	the original mbuf is unmodified.
6620  */
6621 static void
6622 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6623 {
6624 	struct bridge_iflist *bif;
6625 	struct ifnet *dst_if;
6626 	struct mbuf *mc;
6627 
6628 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6629 		return;
6630 	}
6631 
6632 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6633 		dst_if = bif->bif_ifp;
6634 
6635 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6636 			continue;
6637 		}
6638 
6639 		mc = m_copypacket(m, M_DONTWAIT);
6640 		if (mc == NULL) {
6641 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6642 			continue;
6643 		}
6644 
6645 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6646 		    CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6647 	}
6648 }
6649 
6650 /*
6651  * bridge_rtupdate:
6652  *
6653  *	Add a bridge routing entry.
6654  */
6655 static int
6656 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6657     struct bridge_iflist *bif, int setflags, uint8_t flags)
6658 {
6659 	struct bridge_rtnode *brt;
6660 	int error;
6661 
6662 	BRIDGE_LOCK_ASSERT_HELD(sc);
6663 
6664 	/* Check the source address is valid and not multicast. */
6665 	if (ETHER_IS_MULTICAST(dst) ||
6666 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6667 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6668 		return EINVAL;
6669 	}
6670 
6671 	/* 802.1p frames map to vlan 1 */
6672 	if (vlan == 0) {
6673 		vlan = 1;
6674 	}
6675 
6676 	/*
6677 	 * A route for this destination might already exist.  If so,
6678 	 * update it, otherwise create a new one.
6679 	 */
6680 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6681 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6682 			sc->sc_brtexceeded++;
6683 			return ENOSPC;
6684 		}
6685 		/* Check per interface address limits (if enabled) */
6686 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6687 			bif->bif_addrexceeded++;
6688 			return ENOSPC;
6689 		}
6690 
6691 		/*
6692 		 * Allocate a new bridge forwarding node, and
6693 		 * initialize the expiration time and Ethernet
6694 		 * address.
6695 		 */
6696 		brt = zalloc_noblock(bridge_rtnode_pool);
6697 		if (brt == NULL) {
6698 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6699 			    "zalloc_nolock failed");
6700 			return ENOMEM;
6701 		}
6702 		bzero(brt, sizeof(struct bridge_rtnode));
6703 
6704 		if (bif->bif_ifflags & IFBIF_STICKY) {
6705 			brt->brt_flags = IFBAF_STICKY;
6706 		} else {
6707 			brt->brt_flags = IFBAF_DYNAMIC;
6708 		}
6709 
6710 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6711 		brt->brt_vlan = vlan;
6712 
6713 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6714 			zfree(bridge_rtnode_pool, brt);
6715 			return error;
6716 		}
6717 		brt->brt_dst = bif;
6718 		bif->bif_addrcnt++;
6719 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6720 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6721 		    "on %s count %u hashsize %u",
6722 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6723 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6724 		    sc->sc_rthash_size);
6725 	}
6726 
6727 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6728 	    brt->brt_dst != bif) {
6729 		brt->brt_dst->bif_addrcnt--;
6730 		brt->brt_dst = bif;
6731 		brt->brt_dst->bif_addrcnt++;
6732 	}
6733 
6734 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6735 		unsigned long now;
6736 
6737 		now = (unsigned long) net_uptime();
6738 		brt->brt_expire = now + sc->sc_brttimeout;
6739 	}
6740 	if (setflags) {
6741 		brt->brt_flags = flags;
6742 	}
6743 
6744 	return 0;
6745 }
6746 
6747 /*
6748  * bridge_rtlookup:
6749  *
6750  *	Lookup the destination interface for an address.
6751  */
6752 static struct bridge_iflist *
6753 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6754     uint16_t vlan)
6755 {
6756 	struct bridge_rtnode *brt;
6757 
6758 	BRIDGE_LOCK_ASSERT_HELD(sc);
6759 
6760 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6761 		return NULL;
6762 	}
6763 
6764 	return brt->brt_dst;
6765 }
6766 
6767 /*
6768  * bridge_rttrim:
6769  *
6770  *	Trim the routine table so that we have a number
6771  *	of routing entries less than or equal to the
6772  *	maximum number.
6773  */
6774 static void
6775 bridge_rttrim(struct bridge_softc *sc)
6776 {
6777 	struct bridge_rtnode *brt, *nbrt;
6778 
6779 	BRIDGE_LOCK_ASSERT_HELD(sc);
6780 
6781 	/* Make sure we actually need to do this. */
6782 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6783 		return;
6784 	}
6785 
6786 	/* Force an aging cycle; this might trim enough addresses. */
6787 	bridge_rtage(sc);
6788 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6789 		return;
6790 	}
6791 
6792 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6793 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6794 			bridge_rtnode_destroy(sc, brt);
6795 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6796 				return;
6797 			}
6798 		}
6799 	}
6800 }
6801 
6802 /*
6803  * bridge_aging_timer:
6804  *
6805  *	Aging periodic timer for the bridge routing table.
6806  */
6807 static void
6808 bridge_aging_timer(struct bridge_softc *sc)
6809 {
6810 	BRIDGE_LOCK_ASSERT_HELD(sc);
6811 
6812 	bridge_rtage(sc);
6813 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6814 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6815 		sc->sc_aging_timer.bdc_sc = sc;
6816 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6817 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6818 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6819 	}
6820 }
6821 
6822 /*
6823  * bridge_rtage:
6824  *
6825  *	Perform an aging cycle.
6826  */
6827 static void
6828 bridge_rtage(struct bridge_softc *sc)
6829 {
6830 	struct bridge_rtnode *brt, *nbrt;
6831 	unsigned long now;
6832 
6833 	BRIDGE_LOCK_ASSERT_HELD(sc);
6834 
6835 	now = (unsigned long) net_uptime();
6836 
6837 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6838 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6839 			if (now >= brt->brt_expire) {
6840 				bridge_rtnode_destroy(sc, brt);
6841 			}
6842 		}
6843 	}
6844 	if (sc->sc_mac_nat_bif != NULL) {
6845 		bridge_mac_nat_age_entries(sc, now);
6846 	}
6847 }
6848 
6849 /*
6850  * bridge_rtflush:
6851  *
6852  *	Remove all dynamic addresses from the bridge.
6853  */
6854 static void
6855 bridge_rtflush(struct bridge_softc *sc, int full)
6856 {
6857 	struct bridge_rtnode *brt, *nbrt;
6858 
6859 	BRIDGE_LOCK_ASSERT_HELD(sc);
6860 
6861 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6862 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6863 			bridge_rtnode_destroy(sc, brt);
6864 		}
6865 	}
6866 }
6867 
6868 /*
6869  * bridge_rtdaddr:
6870  *
6871  *	Remove an address from the table.
6872  */
6873 static int
6874 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6875 {
6876 	struct bridge_rtnode *brt;
6877 	int found = 0;
6878 
6879 	BRIDGE_LOCK_ASSERT_HELD(sc);
6880 
6881 	/*
6882 	 * If vlan is zero then we want to delete for all vlans so the lookup
6883 	 * may return more than one.
6884 	 */
6885 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6886 		bridge_rtnode_destroy(sc, brt);
6887 		found = 1;
6888 	}
6889 
6890 	return found ? 0 : ENOENT;
6891 }
6892 
6893 /*
6894  * bridge_rtdelete:
6895  *
6896  *	Delete routes to a specific member interface.
6897  */
6898 static void
6899 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6900 {
6901 	struct bridge_rtnode *brt, *nbrt;
6902 
6903 	BRIDGE_LOCK_ASSERT_HELD(sc);
6904 
6905 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6906 		if (brt->brt_ifp == ifp && (full ||
6907 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6908 			bridge_rtnode_destroy(sc, brt);
6909 		}
6910 	}
6911 }
6912 
6913 /*
6914  * bridge_rtable_init:
6915  *
6916  *	Initialize the route table for this bridge.
6917  */
6918 static int
6919 bridge_rtable_init(struct bridge_softc *sc)
6920 {
6921 	u_int32_t i;
6922 
6923 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6924 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6925 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6926 
6927 	for (i = 0; i < sc->sc_rthash_size; i++) {
6928 		LIST_INIT(&sc->sc_rthash[i]);
6929 	}
6930 
6931 	sc->sc_rthash_key = RandomULong();
6932 
6933 	LIST_INIT(&sc->sc_rtlist);
6934 
6935 	return 0;
6936 }
6937 
6938 /*
6939  * bridge_rthash_delayed_resize:
6940  *
6941  *	Resize the routing table hash on a delayed thread call.
6942  */
6943 static void
6944 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6945 {
6946 	u_int32_t new_rthash_size = 0;
6947 	u_int32_t old_rthash_size = 0;
6948 	struct _bridge_rtnode_list *new_rthash = NULL;
6949 	struct _bridge_rtnode_list *old_rthash = NULL;
6950 	u_int32_t i;
6951 	struct bridge_rtnode *brt;
6952 	int error = 0;
6953 
6954 	BRIDGE_LOCK_ASSERT_HELD(sc);
6955 
6956 	/*
6957 	 * Four entries per hash bucket is our ideal load factor
6958 	 */
6959 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6960 		goto out;
6961 	}
6962 
6963 	/*
6964 	 * Doubling the number of hash buckets may be too simplistic
6965 	 * especially when facing a spike of new entries
6966 	 */
6967 	new_rthash_size = sc->sc_rthash_size * 2;
6968 
6969 	sc->sc_flags |= SCF_RESIZING;
6970 	BRIDGE_UNLOCK(sc);
6971 
6972 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6973 	    Z_WAITOK | Z_ZERO);
6974 
6975 	BRIDGE_LOCK(sc);
6976 	sc->sc_flags &= ~SCF_RESIZING;
6977 
6978 	if (new_rthash == NULL) {
6979 		error = ENOMEM;
6980 		goto out;
6981 	}
6982 	if ((sc->sc_flags & SCF_DETACHING)) {
6983 		error = ENODEV;
6984 		goto out;
6985 	}
6986 	/*
6987 	 * Fail safe from here on
6988 	 */
6989 	old_rthash = sc->sc_rthash;
6990 	old_rthash_size = sc->sc_rthash_size;
6991 	sc->sc_rthash = new_rthash;
6992 	sc->sc_rthash_size = new_rthash_size;
6993 
6994 	/*
6995 	 * Get a new key to force entries to be shuffled around to reduce
6996 	 * the likelihood they will land in the same buckets
6997 	 */
6998 	sc->sc_rthash_key = RandomULong();
6999 
7000 	for (i = 0; i < sc->sc_rthash_size; i++) {
7001 		LIST_INIT(&sc->sc_rthash[i]);
7002 	}
7003 
7004 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7005 		LIST_REMOVE(brt, brt_hash);
7006 		(void) bridge_rtnode_hash(sc, brt);
7007 	}
7008 out:
7009 	if (error == 0) {
7010 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7011 		    "%s new size %u",
7012 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
7013 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
7014 	} else {
7015 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
7016 		    "%s failed %d", sc->sc_ifp->if_xname, error);
7017 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
7018 	}
7019 }
7020 
7021 /*
7022  * Resize the number of hash buckets based on the load factor
7023  * Currently only grow
7024  * Failing to resize the hash table is not fatal
7025  */
7026 static void
7027 bridge_rthash_resize(struct bridge_softc *sc)
7028 {
7029 	BRIDGE_LOCK_ASSERT_HELD(sc);
7030 
7031 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7032 		return;
7033 	}
7034 
7035 	/*
7036 	 * Four entries per hash bucket is our ideal load factor
7037 	 */
7038 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7039 		return;
7040 	}
7041 	/*
7042 	 * Hard limit on the size of the routing hash table
7043 	 */
7044 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7045 		return;
7046 	}
7047 
7048 	sc->sc_resize_call.bdc_sc = sc;
7049 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7050 	bridge_schedule_delayed_call(&sc->sc_resize_call);
7051 }
7052 
7053 /*
7054  * bridge_rtable_fini:
7055  *
7056  *	Deconstruct the route table for this bridge.
7057  */
7058 static void
7059 bridge_rtable_fini(struct bridge_softc *sc)
7060 {
7061 	KASSERT(sc->sc_brtcnt == 0,
7062 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7063 	kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
7064 	    sc->sc_rthash);
7065 	sc->sc_rthash = NULL;
7066 	sc->sc_rthash_size = 0;
7067 }
7068 
7069 /*
7070  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7071  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7072  */
7073 #define mix(a, b, c)                                                    \
7074 do {                                                                    \
7075 	a -= b; a -= c; a ^= (c >> 13);                                 \
7076 	b -= c; b -= a; b ^= (a << 8);                                  \
7077 	c -= a; c -= b; c ^= (b >> 13);                                 \
7078 	a -= b; a -= c; a ^= (c >> 12);                                 \
7079 	b -= c; b -= a; b ^= (a << 16);                                 \
7080 	c -= a; c -= b; c ^= (b >> 5);                                  \
7081 	a -= b; a -= c; a ^= (c >> 3);                                  \
7082 	b -= c; b -= a; b ^= (a << 10);                                 \
7083 	c -= a; c -= b; c ^= (b >> 15);                                 \
7084 } while ( /*CONSTCOND*/ 0)
7085 
7086 static __inline uint32_t
7087 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
7088 {
7089 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7090 
7091 	b += addr[5] << 8;
7092 	b += addr[4];
7093 	a += addr[3] << 24;
7094 	a += addr[2] << 16;
7095 	a += addr[1] << 8;
7096 	a += addr[0];
7097 
7098 	mix(a, b, c);
7099 
7100 	return c & BRIDGE_RTHASH_MASK(sc);
7101 }
7102 
7103 #undef mix
7104 
7105 static int
7106 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7107 {
7108 	int i, d;
7109 
7110 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7111 		d = ((int)a[i]) - ((int)b[i]);
7112 	}
7113 
7114 	return d;
7115 }
7116 
7117 /*
7118  * bridge_rtnode_lookup:
7119  *
7120  *	Look up a bridge route node for the specified destination. Compare the
7121  *	vlan id or if zero then just return the first match.
7122  */
7123 static struct bridge_rtnode *
7124 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7125     uint16_t vlan)
7126 {
7127 	struct bridge_rtnode *brt;
7128 	uint32_t hash;
7129 	int dir;
7130 
7131 	BRIDGE_LOCK_ASSERT_HELD(sc);
7132 
7133 	hash = bridge_rthash(sc, addr);
7134 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7135 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7136 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7137 			return brt;
7138 		}
7139 		if (dir > 0) {
7140 			return NULL;
7141 		}
7142 	}
7143 
7144 	return NULL;
7145 }
7146 
7147 /*
7148  * bridge_rtnode_hash:
7149  *
7150  *	Insert the specified bridge node into the route hash table.
7151  *	This is used when adding a new node or to rehash when resizing
7152  *	the hash table
7153  */
7154 static int
7155 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7156 {
7157 	struct bridge_rtnode *lbrt;
7158 	uint32_t hash;
7159 	int dir;
7160 
7161 	BRIDGE_LOCK_ASSERT_HELD(sc);
7162 
7163 	hash = bridge_rthash(sc, brt->brt_addr);
7164 
7165 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7166 	if (lbrt == NULL) {
7167 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7168 		goto out;
7169 	}
7170 
7171 	do {
7172 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7173 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7174 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7175 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7176 			    sc->sc_ifp->if_xname,
7177 			    brt->brt_addr[0], brt->brt_addr[1],
7178 			    brt->brt_addr[2], brt->brt_addr[3],
7179 			    brt->brt_addr[4], brt->brt_addr[5]);
7180 			return EEXIST;
7181 		}
7182 		if (dir > 0) {
7183 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7184 			goto out;
7185 		}
7186 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7187 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7188 			goto out;
7189 		}
7190 		lbrt = LIST_NEXT(lbrt, brt_hash);
7191 	} while (lbrt != NULL);
7192 
7193 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7194 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7195 	    sc->sc_ifp->if_xname,
7196 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7197 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7198 out:
7199 	return 0;
7200 }
7201 
7202 /*
7203  * bridge_rtnode_insert:
7204  *
7205  *	Insert the specified bridge node into the route table.  We
7206  *	assume the entry is not already in the table.
7207  */
7208 static int
7209 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7210 {
7211 	int error;
7212 
7213 	error = bridge_rtnode_hash(sc, brt);
7214 	if (error != 0) {
7215 		return error;
7216 	}
7217 
7218 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7219 	sc->sc_brtcnt++;
7220 
7221 	bridge_rthash_resize(sc);
7222 
7223 	return 0;
7224 }
7225 
7226 /*
7227  * bridge_rtnode_destroy:
7228  *
7229  *	Destroy a bridge rtnode.
7230  */
7231 static void
7232 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7233 {
7234 	BRIDGE_LOCK_ASSERT_HELD(sc);
7235 
7236 	LIST_REMOVE(brt, brt_hash);
7237 
7238 	LIST_REMOVE(brt, brt_list);
7239 	sc->sc_brtcnt--;
7240 	brt->brt_dst->bif_addrcnt--;
7241 	zfree(bridge_rtnode_pool, brt);
7242 }
7243 
7244 #if BRIDGESTP
7245 /*
7246  * bridge_rtable_expire:
7247  *
7248  *	Set the expiry time for all routes on an interface.
7249  */
7250 static void
7251 bridge_rtable_expire(struct ifnet *ifp, int age)
7252 {
7253 	struct bridge_softc *sc = ifp->if_bridge;
7254 	struct bridge_rtnode *brt;
7255 
7256 	BRIDGE_LOCK(sc);
7257 
7258 	/*
7259 	 * If the age is zero then flush, otherwise set all the expiry times to
7260 	 * age for the interface
7261 	 */
7262 	if (age == 0) {
7263 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7264 	} else {
7265 		unsigned long now;
7266 
7267 		now = (unsigned long) net_uptime();
7268 
7269 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7270 			/* Cap the expiry time to 'age' */
7271 			if (brt->brt_ifp == ifp &&
7272 			    brt->brt_expire > now + age &&
7273 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7274 				brt->brt_expire = now + age;
7275 			}
7276 		}
7277 	}
7278 	BRIDGE_UNLOCK(sc);
7279 }
7280 
7281 /*
7282  * bridge_state_change:
7283  *
7284  *	Callback from the bridgestp code when a port changes states.
7285  */
7286 static void
7287 bridge_state_change(struct ifnet *ifp, int state)
7288 {
7289 	struct bridge_softc *sc = ifp->if_bridge;
7290 	static const char *stpstates[] = {
7291 		"disabled",
7292 		"listening",
7293 		"learning",
7294 		"forwarding",
7295 		"blocking",
7296 		"discarding"
7297 	};
7298 
7299 	if (log_stp) {
7300 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7301 		    sc->sc_ifp->if_xname,
7302 		    stpstates[state], ifp->if_xname);
7303 	}
7304 }
7305 #endif /* BRIDGESTP */
7306 
7307 /*
7308  * bridge_detach:
7309  *
7310  *	Callback when interface has been detached.
7311  */
7312 static void
7313 bridge_detach(ifnet_t ifp)
7314 {
7315 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7316 
7317 #if BRIDGESTP
7318 	bstp_detach(&sc->sc_stp);
7319 #endif /* BRIDGESTP */
7320 
7321 	/* Tear down the routing table. */
7322 	bridge_rtable_fini(sc);
7323 
7324 	lck_mtx_lock(&bridge_list_mtx);
7325 	LIST_REMOVE(sc, sc_list);
7326 	lck_mtx_unlock(&bridge_list_mtx);
7327 
7328 	ifnet_release(ifp);
7329 
7330 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7331 	kfree_type(struct bridge_softc, sc);
7332 }
7333 
7334 /*
7335  * bridge_link_event:
7336  *
7337  *	Report a data link event on an interface
7338  */
7339 static void
7340 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7341 {
7342 	struct event {
7343 		u_int32_t ifnet_family;
7344 		u_int32_t unit;
7345 		char if_name[IFNAMSIZ];
7346 	};
7347 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7348 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7349 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7350 
7351 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7352 	    "%s event_code %u - %s", ifp->if_xname,
7353 	    event_code, dlil_kev_dl_code_str(event_code));
7354 	header->total_size   = sizeof(message);
7355 	header->vendor_code  = KEV_VENDOR_APPLE;
7356 	header->kev_class    = KEV_NETWORK_CLASS;
7357 	header->kev_subclass = KEV_DL_SUBCLASS;
7358 	header->event_code   = event_code;
7359 	data->ifnet_family   = ifnet_family(ifp);
7360 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7361 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7362 	ifnet_event(ifp, header);
7363 }
7364 
7365 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7366 	        bridge_hostfilter_stats.reason++;                       \
7367 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7368 	                   "%s.%d" #reason, func, line);                \
7369 	        error = EINVAL;                                         \
7370 	}
7371 
7372 static int
7373 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7374 {
7375 	struct ether_arp *ea;
7376 	struct ether_header *eh;
7377 	int error = EINVAL;
7378 	mbuf_t m = *data;
7379 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7380 
7381 	/*
7382 	 * Make the Ethernet and ARP headers contiguous
7383 	 */
7384 	if (mbuf_pkthdr_len(m) < minlen) {
7385 		BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7386 		goto done;
7387 	}
7388 	if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7389 		BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7390 		    __func__, __LINE__);
7391 		goto done;
7392 	}
7393 	m = *data;
7394 
7395 	/*
7396 	 * Restrict Ethernet protocols to ARP and IP/IPv6
7397 	 */
7398 	eh = mtod(m, struct ether_header *);
7399 	ea = (struct ether_arp *)(eh + 1);
7400 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7401 		BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7402 		    __func__, __LINE__);
7403 		goto done;
7404 	}
7405 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7406 		BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7407 		    __func__, __LINE__);
7408 		goto done;
7409 	}
7410 	/*
7411 	 * Verify the address lengths are correct
7412 	 */
7413 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7414 		BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7415 		goto done;
7416 	}
7417 	if (ea->arp_pln != sizeof(struct in_addr)) {
7418 		BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7419 		    __func__, __LINE__);
7420 		goto done;
7421 	}
7422 	/*
7423 	 * Allow only ARP request or ARP reply
7424 	 */
7425 	if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7426 	    ea->arp_op != HTONS_ARPOP_REPLY) {
7427 		BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7428 		goto done;
7429 	}
7430 	if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7431 		/*
7432 		 * Verify source hardware address matches
7433 		 */
7434 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7435 		    ETHER_ADDR_LEN) != 0) {
7436 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7437 			goto done;
7438 		}
7439 	}
7440 	if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7441 		/*
7442 		 * Verify source protocol address:
7443 		 * May be null for an ARP probe
7444 		 */
7445 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7446 		    sizeof(struct in_addr)) != 0 &&
7447 		    bcmp(ea->arp_spa, &inaddr_any,
7448 		    sizeof(struct in_addr)) != 0) {
7449 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7450 			goto done;
7451 		}
7452 	}
7453 	bridge_hostfilter_stats.brhf_arp_ok += 1;
7454 	error = 0;
7455 done:
7456 	return error;
7457 }
7458 
7459 /*
7460  * MAC NAT
7461  */
7462 
7463 static errno_t
7464 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7465 {
7466 	errno_t         error = 0;
7467 
7468 	BRIDGE_LOCK_ASSERT_HELD(sc);
7469 
7470 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7471 		error = EINVAL;
7472 		goto done;
7473 	}
7474 	if (sc->sc_mac_nat_bif != NULL) {
7475 		if (sc->sc_mac_nat_bif != bif) {
7476 			error = EBUSY;
7477 		}
7478 		goto done;
7479 	}
7480 	sc->sc_mac_nat_bif = bif;
7481 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7482 	bridge_mac_nat_populate_entries(sc);
7483 
7484 done:
7485 	return error;
7486 }
7487 
7488 static void
7489 bridge_mac_nat_disable(struct bridge_softc *sc)
7490 {
7491 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7492 
7493 	assert(mac_nat_bif != NULL);
7494 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7495 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7496 	sc->sc_mac_nat_bif = NULL;
7497 	return;
7498 }
7499 
7500 static void
7501 mac_nat_entry_print2(struct mac_nat_entry *mne,
7502     const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7503 {
7504 	int             af;
7505 	char            etopbuf[24];
7506 	char            ntopbuf[MAX_IPv6_STR_LEN];
7507 	const char      *space;
7508 
7509 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7510 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7511 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7512 	if (msg2 == NULL) {
7513 		msg2 = "";
7514 		space = "";
7515 	} else {
7516 		space = " ";
7517 	}
7518 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7519 	    "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7520 	    mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7521 }
7522 
7523 static void
7524 mac_nat_entry_print(struct mac_nat_entry *mne,
7525     const char ifname[IFNAMSIZ], const char *msg)
7526 {
7527 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7528 }
7529 
7530 static struct mac_nat_entry *
7531 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7532 {
7533 	struct mac_nat_entry    *mne;
7534 	struct mac_nat_entry    *ret_mne = NULL;
7535 
7536 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7537 		if (mne->mne_ip.s_addr == ip->s_addr) {
7538 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7539 				mac_nat_entry_print(mne, sc->sc_if_xname,
7540 				    "found");
7541 			}
7542 			ret_mne = mne;
7543 			break;
7544 		}
7545 	}
7546 
7547 	return ret_mne;
7548 }
7549 
7550 static struct mac_nat_entry *
7551 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7552 {
7553 	struct mac_nat_entry    *mne;
7554 	struct mac_nat_entry    *ret_mne = NULL;
7555 
7556 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7557 		if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7558 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7559 				mac_nat_entry_print(mne, sc->sc_if_xname,
7560 				    "found");
7561 			}
7562 			ret_mne = mne;
7563 			break;
7564 		}
7565 	}
7566 
7567 	return ret_mne;
7568 }
7569 
7570 static void
7571 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7572     struct mac_nat_entry *mne, const char *reason)
7573 {
7574 	LIST_REMOVE(mne, mne_list);
7575 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7576 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7577 	}
7578 	zfree(bridge_mne_pool, mne);
7579 	sc->sc_mne_count--;
7580 }
7581 
7582 static struct mac_nat_entry *
7583 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7584     struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7585 {
7586 	struct mac_nat_entry *mne;
7587 
7588 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7589 		sc->sc_mne_allocation_failures++;
7590 		return NULL;
7591 	}
7592 
7593 	mne = zalloc_noblock(bridge_mne_pool);
7594 	if (mne == NULL) {
7595 		sc->sc_mne_allocation_failures++;
7596 		return NULL;
7597 	}
7598 
7599 	sc->sc_mne_count++;
7600 	bzero(mne, sizeof(*mne));
7601 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7602 
7603 	mne->mne_bif = bif;
7604 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7605 
7606 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7607 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7608 	}
7609 
7610 	return mne;
7611 }
7612 
7613 static struct mac_nat_entry *
7614 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7615     struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7616 {
7617 	struct mac_nat_entry *mne;
7618 
7619 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7620 	if (mne == NULL) {
7621 		return NULL;
7622 	}
7623 
7624 	bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7625 	LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7626 
7627 	return mne;
7628 }
7629 
7630 static struct mac_nat_entry *
7631 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7632     struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7633 {
7634 	struct mac_nat_entry *mne;
7635 
7636 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7637 	if (mne == NULL) {
7638 		return NULL;
7639 	}
7640 
7641 	bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7642 	mne->mne_flags |= MNE_FLAGS_IPV6;
7643 	LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7644 
7645 	return mne;
7646 }
7647 
7648 static struct mac_nat_entry *
7649 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7650     struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7651 {
7652 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7653 
7654 	if (mne->mne_bif == mac_nat_bif) {
7655 		/* the MAC NAT interface takes precedence */
7656 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7657 			if (mne->mne_bif != bif) {
7658 				mac_nat_entry_print2(mne,
7659 				    sc->sc_if_xname, "reject",
7660 				    bif->bif_ifp->if_xname);
7661 			}
7662 		}
7663 	} else if (mne->mne_bif != bif) {
7664 		const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7665 
7666 		mne->mne_bif = bif;
7667 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7668 			mac_nat_entry_print2(mne,
7669 			    sc->sc_if_xname, "replaced",
7670 			    old_if);
7671 		}
7672 		bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7673 	}
7674 
7675 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7676 
7677 	return mne;
7678 }
7679 
7680 static struct mac_nat_entry *
7681 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7682     struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7683 {
7684 	struct mac_nat_entry *mne;
7685 
7686 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7687 	if (mne != NULL) {
7688 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7689 	}
7690 
7691 	mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7692 	return mne;
7693 }
7694 
7695 static struct mac_nat_entry *
7696 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7697     struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7698 {
7699 	struct mac_nat_entry *mne;
7700 
7701 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7702 	if (mne != NULL) {
7703 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7704 	}
7705 
7706 	mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7707 	return mne;
7708 }
7709 
7710 static void
7711 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7712     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7713 {
7714 	struct mac_nat_entry *mne;
7715 	struct mac_nat_entry *tmne;
7716 
7717 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7718 		if (bif != NULL && mne->mne_bif != bif) {
7719 			continue;
7720 		}
7721 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7722 	}
7723 }
7724 
7725 /*
7726  * bridge_mac_nat_flush_entries:
7727  *
7728  * Flush MAC NAT entries for the specified member. Flush all entries if
7729  * the member is the one that requires MAC NAT, otherwise just flush the
7730  * ones for the specified member.
7731  */
7732 static void
7733 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7734 {
7735 	struct bridge_iflist *flush_bif;
7736 
7737 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7738 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7739 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7740 }
7741 
7742 static void
7743 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7744 {
7745 	errno_t                 error;
7746 	ifnet_t                 ifp;
7747 	uint16_t                addresses_count = 0;
7748 	ifaddr_t                * __counted_by(addresses_count) list;
7749 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7750 
7751 	assert(mac_nat_bif != NULL);
7752 	ifp = mac_nat_bif->bif_ifp;
7753 	error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7754 	if (error != 0) {
7755 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7756 		    "ifnet_get_address_list(%s) failed %d",
7757 		    ifp->if_xname, error);
7758 		return;
7759 	}
7760 
7761 	for (uint16_t i = 0; i < addresses_count; ++i) {
7762 		sa_family_t af;
7763 
7764 		af = ifaddr_address_family(list[i]);
7765 		switch (af) {
7766 		case AF_INET: {
7767 			struct sockaddr_in sin;
7768 
7769 			error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7770 			if (error != 0) {
7771 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7772 				    "ifaddr_address failed %d",
7773 				    error);
7774 				break;
7775 			}
7776 
7777 			bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7778 			break;
7779 		}
7780 
7781 		case AF_INET6: {
7782 			struct sockaddr_in6 sin6;
7783 
7784 			error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7785 			if (error != 0) {
7786 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7787 				    "ifaddr_address failed %d",
7788 				    error);
7789 				break;
7790 			}
7791 
7792 			if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7793 				/* remove scope ID */
7794 				sin6.sin6_addr.s6_addr16[1] = 0;
7795 			}
7796 
7797 			bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7798 			break;
7799 		}
7800 
7801 		default:
7802 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7803 			    "ifaddr_address_family unknown %d",
7804 			    af);
7805 			break;
7806 		}
7807 	}
7808 
7809 	ifnet_address_list_free_counted_by(list, addresses_count);
7810 	return;
7811 }
7812 
7813 static void
7814 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7815     struct mac_nat_entry_list *list, unsigned long now)
7816 {
7817 	struct mac_nat_entry *mne;
7818 	struct mac_nat_entry *tmne;
7819 
7820 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7821 		if (now >= mne->mne_expire) {
7822 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7823 		}
7824 	}
7825 }
7826 
7827 static void
7828 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7829 {
7830 	if (sc->sc_mac_nat_bif == NULL) {
7831 		return;
7832 	}
7833 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7834 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7835 }
7836 
7837 static const char *
7838 get_in_out_string(boolean_t is_output)
7839 {
7840 	return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7841 }
7842 
7843 /*
7844  * is_valid_arp_packet:
7845  *	Verify that this is a valid ARP packet.
7846  *
7847  *	Returns TRUE if the packet is valid, FALSE otherwise.
7848  */
7849 static boolean_t
7850 is_valid_arp_packet(mbuf_t *data, bool is_output,
7851     struct ether_header **eh_p, struct ether_arp **ea_p)
7852 {
7853 	struct ether_arp *ea;
7854 	struct ether_header *eh;
7855 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7856 	boolean_t is_valid = FALSE;
7857 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7858 
7859 	if (mbuf_pkthdr_len(*data) < minlen) {
7860 		BRIDGE_LOG(LOG_DEBUG, flags,
7861 		    "ARP %s short frame %lu < %lu",
7862 		    get_in_out_string(is_output),
7863 		    mbuf_pkthdr_len(*data), minlen);
7864 		goto done;
7865 	}
7866 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7867 		BRIDGE_LOG(LOG_DEBUG, flags,
7868 		    "ARP %s size %lu mbuf_pullup fail",
7869 		    get_in_out_string(is_output),
7870 		    minlen);
7871 		*data = NULL;
7872 		goto done;
7873 	}
7874 
7875 	/* validate ARP packet */
7876 	eh = mtod(*data, struct ether_header *);
7877 	ea = (struct ether_arp *)(eh + 1);
7878 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7879 		BRIDGE_LOG(LOG_DEBUG, flags,
7880 		    "ARP %s htype not ethernet",
7881 		    get_in_out_string(is_output));
7882 		goto done;
7883 	}
7884 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7885 		BRIDGE_LOG(LOG_DEBUG, flags,
7886 		    "ARP %s hlen not ethernet",
7887 		    get_in_out_string(is_output));
7888 		goto done;
7889 	}
7890 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7891 		BRIDGE_LOG(LOG_DEBUG, flags,
7892 		    "ARP %s ptype not IP",
7893 		    get_in_out_string(is_output));
7894 		goto done;
7895 	}
7896 	if (ea->arp_pln != sizeof(struct in_addr)) {
7897 		BRIDGE_LOG(LOG_DEBUG, flags,
7898 		    "ARP %s plen not IP",
7899 		    get_in_out_string(is_output));
7900 		goto done;
7901 	}
7902 	is_valid = TRUE;
7903 	*ea_p = ea;
7904 	*eh_p = eh;
7905 done:
7906 	return is_valid;
7907 }
7908 
7909 static struct mac_nat_entry *
7910 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7911 {
7912 	struct ether_arp        * __single ea;
7913 	struct ether_header     * __single eh;
7914 	struct mac_nat_entry    *mne = NULL;
7915 	u_short                 op;
7916 	struct in_addr          tpa;
7917 
7918 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7919 		goto done;
7920 	}
7921 	op = ea->arp_op;
7922 	switch (op) {
7923 	case HTONS_ARPOP_REQUEST:
7924 	case HTONS_ARPOP_REPLY:
7925 		/* only care about REQUEST and REPLY */
7926 		break;
7927 	default:
7928 		goto done;
7929 	}
7930 
7931 	/* check the target IP address for a NAT entry */
7932 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7933 	if (tpa.s_addr != 0) {
7934 		mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7935 	}
7936 	if (mne != NULL) {
7937 		if (op == HTONS_ARPOP_REPLY) {
7938 			/* translate the MAC address */
7939 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7940 				char    mac_src[24];
7941 				char    mac_dst[24];
7942 
7943 				ether_ntop(mac_src, sizeof(mac_src),
7944 				    ea->arp_tha);
7945 				ether_ntop(mac_dst, sizeof(mac_dst),
7946 				    mne->mne_mac);
7947 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7948 				    "%s %s ARP %s -> %s",
7949 				    sc->sc_if_xname,
7950 				    mne->mne_bif->bif_ifp->if_xname,
7951 				    mac_src, mac_dst);
7952 			}
7953 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7954 		}
7955 	} else {
7956 		/* handle conflicting ARP (sender matches mne) */
7957 		struct in_addr spa;
7958 
7959 		bcopy(ea->arp_spa, &spa, sizeof(spa));
7960 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7961 			/* check the source IP for a NAT entry */
7962 			mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7963 		}
7964 	}
7965 
7966 done:
7967 	return mne;
7968 }
7969 
7970 static boolean_t
7971 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7972     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7973 {
7974 	struct ether_arp        * __single ea;
7975 	struct ether_header     * __single eh;
7976 	struct in_addr          ip;
7977 	struct mac_nat_entry    *mne = NULL;
7978 	u_short                 op;
7979 	boolean_t               translate = FALSE;
7980 
7981 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7982 		goto done;
7983 	}
7984 	op = ea->arp_op;
7985 	switch (op) {
7986 	case HTONS_ARPOP_REQUEST:
7987 	case HTONS_ARPOP_REPLY:
7988 		/* only care about REQUEST and REPLY */
7989 		break;
7990 	default:
7991 		goto done;
7992 	}
7993 
7994 	bcopy(ea->arp_spa, &ip, sizeof(ip));
7995 	if (ip.s_addr == 0) {
7996 		goto done;
7997 	}
7998 	/* XXX validate IP address: no multicast/broadcast */
7999 	mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8000 	    (const char *)ea->arp_sha);
8001 	if (mnr != NULL && mne != NULL) {
8002 		/* record the offset to do the replacement */
8003 		translate = TRUE;
8004 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8005 	}
8006 
8007 done:
8008 	return translate;
8009 }
8010 
8011 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
8012 	                         + sizeof(struct ip))
8013 static uint8_t * __indexable
8014 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
8015 {
8016 	uint8_t         *header = NULL;
8017 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8018 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
8019 
8020 	if (mbuf_pkthdr_len(*data) < minlen) {
8021 		BRIDGE_LOG(LOG_DEBUG, flags,
8022 		    "IP %s short frame %lu < %lu",
8023 		    get_in_out_string(is_output),
8024 		    mbuf_pkthdr_len(*data), minlen);
8025 		goto done;
8026 	}
8027 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8028 		BRIDGE_LOG(LOG_DEBUG, flags,
8029 		    "IP %s size %lu mbuf_pullup fail",
8030 		    get_in_out_string(is_output),
8031 		    minlen);
8032 		*data = NULL;
8033 		goto done;
8034 	}
8035 	header = mtod(*data, uint8_t *);
8036 done:
8037 	return header;
8038 }
8039 
8040 static struct mac_nat_entry *
8041 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8042 {
8043 	struct in_addr          dst;
8044 	uint8_t                 *header;
8045 	struct ip               *iphdr;
8046 	struct mac_nat_entry    *mne = NULL;
8047 
8048 	header = get_ether_ip_header_ptr(data, FALSE);
8049 	if (header == NULL) {
8050 		goto done;
8051 	}
8052 	iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
8053 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8054 	/* XXX validate IP address */
8055 	if (dst.s_addr == 0) {
8056 		goto done;
8057 	}
8058 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
8059 done:
8060 	return mne;
8061 }
8062 
8063 static void
8064 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8065     struct bridge_iflist *bif, mbuf_t m,
8066     uint8_t ip_header_len, struct mac_nat_record *mnr)
8067 {
8068 	uint16_t        dp_flags;
8069 	errno_t         error;
8070 	size_t          offset;
8071 	struct udphdr   udphdr;
8072 
8073 	/* copy the UDP header */
8074 	offset = sizeof(struct ether_header) + ip_header_len;
8075 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8076 	if (error != 0) {
8077 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8078 		    "mbuf_copydata udphdr failed %d",
8079 		    error);
8080 		return;
8081 	}
8082 	if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
8083 	    udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
8084 		/* not a BOOTP/DHCP packet */
8085 		return;
8086 	}
8087 	/* check whether the broadcast bit is already set */
8088 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8089 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8090 	if (error != 0) {
8091 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8092 		    "mbuf_copydata dp_flags failed %d",
8093 		    error);
8094 		return;
8095 	}
8096 	if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
8097 		/* it's already set, nothing to do */
8098 		return;
8099 	}
8100 	/* broadcast bit needs to be set */
8101 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8102 	mnr->mnr_ip_header_len = ip_header_len;
8103 	if (udphdr.uh_sum != 0) {
8104 		uint16_t        delta;
8105 
8106 		/* adjust checksum to take modified dp_flags into account */
8107 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8108 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8109 	}
8110 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8111 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8112 	    sc->sc_if_xname,
8113 	    bif->bif_ifp->if_xname,
8114 	    ntohs(mnr->mnr_ip_dhcp_flags),
8115 	    ntohs(mnr->mnr_ip_udp_csum));
8116 	return;
8117 }
8118 
8119 static boolean_t
8120 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8121     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8122 {
8123 #pragma unused(mnr)
8124 	uint8_t                 *header;
8125 	struct ether_header     *eh;
8126 	struct in_addr          ip;
8127 	struct ip               *iphdr;
8128 	uint8_t                 ip_header_len;
8129 	struct mac_nat_entry    *mne = NULL;
8130 	boolean_t               translate = FALSE;
8131 
8132 	header = get_ether_ip_header_ptr(data, TRUE);
8133 	if (header == NULL) {
8134 		goto done;
8135 	}
8136 
8137 	eh = (struct ether_header *)header;
8138 	iphdr = (struct ip *)(header + sizeof(*eh));
8139 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8140 	if (ip_header_len < sizeof(ip)) {
8141 		/* bogus IP header */
8142 		goto done;
8143 	}
8144 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8145 	/* XXX validate the source address */
8146 	if (ip.s_addr != 0) {
8147 		mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8148 		    (const char *)eh->ether_shost);
8149 	}
8150 	if (mnr != NULL) {
8151 		if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8152 			/* handle DHCP must broadcast */
8153 			bridge_mac_nat_udp_output(sc, bif, *data,
8154 			    ip_header_len, mnr);
8155 		}
8156 		translate = TRUE;
8157 	}
8158 done:
8159 	return translate;
8160 }
8161 
8162 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8163 	                         + sizeof(struct ip6_hdr))
8164 static uint8_t * __indexable
8165 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8166 {
8167 	uint8_t         *header = NULL;
8168 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8169 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8170 
8171 	if (mbuf_pkthdr_len(*data) < minlen) {
8172 		BRIDGE_LOG(LOG_DEBUG, flags,
8173 		    "IP %s short frame %lu < %lu",
8174 		    get_in_out_string(is_output),
8175 		    mbuf_pkthdr_len(*data), minlen);
8176 		goto done;
8177 	}
8178 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8179 		BRIDGE_LOG(LOG_DEBUG, flags,
8180 		    "IP %s size %lu mbuf_pullup fail",
8181 		    get_in_out_string(is_output),
8182 		    minlen);
8183 		*data = NULL;
8184 		goto done;
8185 	}
8186 	header = mtod(*data, uint8_t *);
8187 done:
8188 	return header;
8189 }
8190 
8191 #include <netinet/icmp6.h>
8192 #include <netinet6/nd6.h>
8193 
8194 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8195 
8196 static void
8197 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8198     struct bridge_iflist *bif,
8199     mbuf_t *data, struct ip6_hdr *ip6h,
8200     struct in6_addr *saddrp,
8201     struct mac_nat_record *mnr)
8202 {
8203 	uint8_t *header;
8204 	struct ether_header *eh;
8205 	struct icmp6_hdr *icmp6;
8206 	uint8_t         icmp6_type;
8207 	uint32_t        icmp6len;
8208 	int             lladdrlen = 0;
8209 	char            *lladdr = NULL;
8210 	unsigned int    off = sizeof(*ip6h);
8211 
8212 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8213 	if (icmp6len < sizeof(*icmp6)) {
8214 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8215 		    "short IPv6 payload length %d < %lu",
8216 		    icmp6len, sizeof(*icmp6));
8217 		return;
8218 	}
8219 
8220 	/* pullup IP6 header + ICMPv6 header */
8221 	header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8222 	if (header == NULL) {
8223 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8224 		    "failed to pullup icmp6 header");
8225 		return;
8226 	}
8227 	eh = (struct ether_header *)header;
8228 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8229 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8230 	icmp6_type = icmp6->icmp6_type;
8231 	switch (icmp6_type) {
8232 	case ND_NEIGHBOR_SOLICIT:
8233 	case ND_NEIGHBOR_ADVERT:
8234 	case ND_ROUTER_ADVERT:
8235 	case ND_ROUTER_SOLICIT:
8236 		break;
8237 	default:
8238 		return;
8239 	}
8240 
8241 	/* pullup IP6 header + payload */
8242 	header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8243 	if (header == NULL) {
8244 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8245 		    "failed to pullup icmp6 + payload");
8246 		return;
8247 	}
8248 	eh = (struct ether_header *)header;
8249 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8250 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8251 
8252 	switch (icmp6_type) {
8253 	case ND_NEIGHBOR_SOLICIT: {
8254 		struct nd_neighbor_solicit *nd_ns;
8255 		union nd_opts ndopts;
8256 		boolean_t is_dad_probe;
8257 		struct in6_addr taddr;
8258 
8259 		if (icmp6len < sizeof(*nd_ns)) {
8260 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8261 			    "short nd_ns %d < %lu",
8262 			    icmp6len, sizeof(*nd_ns));
8263 			return;
8264 		}
8265 
8266 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8267 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8268 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8269 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8270 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8271 			    "invalid target ignored");
8272 			return;
8273 		}
8274 
8275 		/* parse options */
8276 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8277 		if (nd6_options(&ndopts) < 0) {
8278 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8279 			    "invalid ND6 NS option");
8280 			return;
8281 		}
8282 		if (ndopts.nd_opts_src_lladdr != NULL) {
8283 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8284 			    lladdr, lladdrlen);
8285 		}
8286 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8287 		if (lladdr != NULL) {
8288 			if (is_dad_probe) {
8289 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8290 				    "bad ND6 DAD packet");
8291 				return;
8292 			}
8293 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8294 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8295 				    "source lladdrlen %d != %lu",
8296 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8297 				return;
8298 			}
8299 		}
8300 		if (is_dad_probe) {
8301 			/* node is trying use taddr, create an mne for taddr */
8302 			*saddrp = taddr;
8303 		}
8304 		break;
8305 	}
8306 	case ND_NEIGHBOR_ADVERT: {
8307 		struct nd_neighbor_advert *nd_na;
8308 		union nd_opts ndopts;
8309 		struct in6_addr taddr;
8310 
8311 
8312 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8313 
8314 		if (icmp6len < sizeof(*nd_na)) {
8315 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8316 			    "short nd_na %d < %lu",
8317 			    icmp6len, sizeof(*nd_na));
8318 			return;
8319 		}
8320 
8321 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8322 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8323 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8324 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8325 			    "invalid target ignored");
8326 			return;
8327 		}
8328 
8329 		/* parse options */
8330 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8331 		if (nd6_options(&ndopts) < 0) {
8332 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8333 			    "invalid ND6 NA option");
8334 			return;
8335 		}
8336 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8337 			/* target linklayer, nothing to do */
8338 			return;
8339 		}
8340 
8341 		ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8342 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8343 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8344 			    "target lladdrlen %d != %lu",
8345 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8346 			return;
8347 		}
8348 		break;
8349 	}
8350 	case ND_ROUTER_ADVERT:
8351 	case ND_ROUTER_SOLICIT: {
8352 		union nd_opts ndopts;
8353 		uint32_t type_length;
8354 		const char *description;
8355 
8356 		if (icmp6_type == ND_ROUTER_ADVERT) {
8357 			type_length = sizeof(struct nd_router_advert);
8358 			description = "RA";
8359 		} else {
8360 			type_length = sizeof(struct nd_router_solicit);
8361 			description = "RS";
8362 		}
8363 		if (icmp6len < type_length) {
8364 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8365 			    "short ND6 %s %d < %d",
8366 			    description, icmp6len, type_length);
8367 			return;
8368 		}
8369 
8370 		/* parse options */
8371 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8372 		    icmp6len - type_length, &ndopts);
8373 		if (nd6_options(&ndopts) < 0) {
8374 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8375 			    "invalid ND6 %s option", description);
8376 			return;
8377 		}
8378 		if (ndopts.nd_opts_src_lladdr != NULL) {
8379 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8380 
8381 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8382 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8383 				    "source lladdrlen %d != %lu",
8384 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8385 				return;
8386 			}
8387 		}
8388 		break;
8389 	}
8390 	default:
8391 		break;
8392 	}
8393 
8394 	if (lladdr != NULL) {
8395 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8396 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8397 		mnr->mnr_ip6_icmp6_len = icmp6len;
8398 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8399 		mnr->mnr_ip6_header_len = off;
8400 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8401 			const char *str;
8402 
8403 			switch (mnr->mnr_ip6_icmp6_type) {
8404 			case ND_ROUTER_ADVERT:
8405 				str = "ROUTER ADVERT";
8406 				break;
8407 			case ND_ROUTER_SOLICIT:
8408 				str = "ROUTER SOLICIT";
8409 				break;
8410 			case ND_NEIGHBOR_ADVERT:
8411 				str = "NEIGHBOR ADVERT";
8412 				break;
8413 			case ND_NEIGHBOR_SOLICIT:
8414 				str = "NEIGHBOR SOLICIT";
8415 				break;
8416 			default:
8417 				str = "";
8418 				break;
8419 			}
8420 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8421 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8422 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8423 			    mnr->mnr_ip6_header_len,
8424 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8425 		}
8426 	}
8427 }
8428 
8429 static struct mac_nat_entry *
8430 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8431 {
8432 	struct in6_addr         dst;
8433 	uint8_t                 *header;
8434 	struct ether_header     *eh;
8435 	struct ip6_hdr          *ip6h;
8436 	struct mac_nat_entry    *mne = NULL;
8437 
8438 	header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8439 	if (header == NULL) {
8440 		goto done;
8441 	}
8442 	eh = (struct ether_header *)header;
8443 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8444 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8445 	/* XXX validate IPv6 address */
8446 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8447 		goto done;
8448 	}
8449 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8450 
8451 done:
8452 	return mne;
8453 }
8454 
8455 static boolean_t
8456 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8457     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8458 {
8459 	uint8_t                 *header;
8460 	struct ether_header     *eh;
8461 	ether_addr_t            ether_shost;
8462 	struct ip6_hdr          *ip6h;
8463 	struct in6_addr         saddr;
8464 	boolean_t               translate;
8465 
8466 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8467 	header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8468 	if (header == NULL) {
8469 		translate = FALSE;
8470 		goto done;
8471 	}
8472 	eh = (struct ether_header *)header;
8473 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8474 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8475 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8476 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8477 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8478 	}
8479 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8480 		goto done;
8481 	}
8482 	(void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8483 	    (const char *)ether_shost.octet);
8484 
8485 done:
8486 	return translate;
8487 }
8488 
8489 /*
8490  * Function: bridge_mac_nat_input:
8491  *
8492  * Purpose:
8493  *   Process a unicast packet arriving on the external interface `external_ifp`.
8494  *
8495  *   If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8496  *   the mac_nat_entry table. If an entry is found, and the interface is
8497  *   not `external_ifp`, replace the destination MAC address in the
8498  *   ethernet header with the corresponding internal MAC address, and return
8499  *   the interface via `*dst_if`.
8500  *
8501  * Returns:
8502  *   NULL if the packet was deallocated during processing.
8503  *
8504  *   Otherwise, returns non-NULL packet that should:
8505  *   1) if `*dst_if` is NULL, continue on as an input packet
8506  *      over `external_ifp`, OR
8507  *   2) if `*dst_if` is not NULL, be delivered as an output packet
8508  *      over `*dst_if`.
8509  */
8510 static mbuf_t
8511 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8512     mbuf_t m, ifnet_t * dst_if)
8513 {
8514 	struct ether_header     *eh;
8515 	mbuf_t                  m0 = m;
8516 	struct mac_nat_entry    *mne = NULL;
8517 
8518 	BRIDGE_LOCK_ASSERT_HELD(sc);
8519 	*dst_if = NULL;
8520 	eh = mtod(m, struct ether_header *);
8521 	switch (eh->ether_type) {
8522 	case HTONS_ETHERTYPE_ARP:
8523 		mne = bridge_mac_nat_arp_input(sc, &m);
8524 		break;
8525 	case HTONS_ETHERTYPE_IP:
8526 		mne = bridge_mac_nat_ip_input(sc, &m);
8527 		break;
8528 	case HTONS_ETHERTYPE_IPV6:
8529 		mne = bridge_mac_nat_ipv6_input(sc, &m);
8530 		break;
8531 	default:
8532 		break;
8533 	}
8534 	if (m != NULL & mne != NULL) {
8535 		*dst_if = mne->mne_bif->bif_ifp;
8536 		if (*dst_if == external_ifp) {
8537 			/* receive packet for ifp */
8538 			*dst_if = NULL;
8539 		} else {
8540 			/* replace the destination MAC with internal one */
8541 			if (m != m0) {
8542 				/* it may have changed */
8543 				eh = mtod(m, struct ether_header *);
8544 			}
8545 			bcopy(mne->mne_mac, eh->ether_dhost,
8546 			    sizeof(eh->ether_dhost));
8547 		}
8548 	}
8549 	return m;
8550 }
8551 
8552 
8553 static mblist
8554 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8555     mbuf_t m, mbuf_t * forward_head)
8556 {
8557 	mblist          forward;
8558 	mbuf_t          next_packet;
8559 	mblist          ret;
8560 
8561 	mblist_init(&ret);
8562 	mblist_init(&forward);
8563 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8564 		ifnet_ref_t     dst_if;
8565 
8566 		/* take packet out of the list */
8567 		next_packet = scan->m_nextpkt;
8568 		scan->m_nextpkt = NULL;
8569 
8570 		scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8571 		if (scan != NULL) {
8572 			if (dst_if != NULL) {
8573 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8574 				    "%s MAC-NAT input translate to %s",
8575 				    sc->sc_if_xname, dst_if->if_xname);
8576 				/* use rcvif to store the egress interface */
8577 				mbuf_pkthdr_setrcvif(scan, dst_if);
8578 				/* add it to the forwarding list */
8579 				mblist_append(&forward, scan);
8580 			} else {
8581 				/* add it to the "continue on as input" list */
8582 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8583 				    "%s MAC-NAT input for %s",
8584 				    sc->sc_if_xname,
8585 				    external_ifp->if_xname);
8586 				mblist_append(&ret, scan);
8587 			}
8588 		}
8589 	}
8590 	*forward_head = forward.head;
8591 	return ret;
8592 }
8593 
8594 /*
8595  * bridge_mac_nat_translate_list:
8596  * Process a list of packets destined to the MAC-NAT interface `dst_if`
8597  * from the bridge member `sbif`.
8598  *
8599  * For each packet in the list, update the MAC-NAT record, and if
8600  * translation is required, translate it.
8601  *
8602  * Returns the list of packets that should be delivered to the MAC-NAT
8603  * interface.
8604  */
8605 static mbuf_t
8606 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8607     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8608 {
8609 	mbuf_t          next_packet;
8610 	mblist          ret;
8611 
8612 	mblist_init(&ret);
8613 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8614 		struct mac_nat_record   mnr;
8615 		bool                    translate_mac;
8616 
8617 		/* take packet out of the list */
8618 		next_packet = scan->m_nextpkt;
8619 		scan->m_nextpkt = NULL;
8620 		translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8621 		if (scan != NULL) {
8622 			if (translate_mac) {
8623 				bridge_mac_nat_translate(&scan, &mnr,
8624 				    IF_LLADDR(dst_if));
8625 			}
8626 			if (scan != NULL) {
8627 				/* add it back to the list */
8628 				mblist_append(&ret, scan);
8629 			}
8630 		}
8631 	}
8632 	return ret.head;
8633 }
8634 
8635 /*
8636  * bridge_mac_nat_copy_and_translate_list:
8637  * Same as bridge_mac_nat_translate_list() except that a copy of the
8638  * packet list is returned instead.
8639  *
8640  * The packet list `m` is left unaltered.
8641  */
8642 static mbuf_t
8643 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8644     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8645 {
8646 	mbuf_t          next_packet;
8647 	mblist          ret;
8648 
8649 	mblist_init(&ret);
8650 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8651 		mbuf_ref_t              mc = NULL;
8652 		struct mac_nat_record   mnr;
8653 		bool                    translate_mac;
8654 
8655 		/* take packet out of the list, make a copy, put it back */
8656 		next_packet = scan->m_nextpkt;
8657 		scan->m_nextpkt = NULL;
8658 		mc = m_dup(scan, M_DONTWAIT);
8659 		scan->m_nextpkt = next_packet;
8660 		if (mc == NULL) {
8661 			continue;
8662 		}
8663 		translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8664 		if (mc != NULL) {
8665 			if (translate_mac) {
8666 				bridge_mac_nat_translate(&mc, &mnr,
8667 				    IF_LLADDR(dst_if));
8668 			}
8669 			if (mc != NULL) {
8670 				/* add it to the new list */
8671 				mblist_append(&ret, mc);
8672 			}
8673 		}
8674 	}
8675 	return ret.head;
8676 }
8677 
8678 static void
8679 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8680     mbuf_t m)
8681 {
8682 	int             count = 0;
8683 	ifnet_t         dst_if;
8684 	mblist          list;
8685 	int             n_lists = 0;
8686 	mbuf_t          next_packet;
8687 
8688 	mblist_init(&list);
8689 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8690 		ifnet_t         this_if;
8691 
8692 		next_packet = scan->m_nextpkt;
8693 		this_if = mbuf_pkthdr_rcvif(scan);
8694 		mbuf_pkthdr_setrcvif(scan, NULL);
8695 		if (list.head == NULL) {
8696 			/* start a new list */
8697 			list.head = list.tail = scan;
8698 			count = 1;
8699 			dst_if = this_if;
8700 		} else if (dst_if != this_if) {
8701 			/* send up the previous chain */
8702 			if (list.tail != NULL) {
8703 				/* terminate the list */
8704 				list.tail->m_nextpkt = NULL;
8705 			}
8706 			n_lists++;
8707 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8708 			    "(%s): sublist %u pkts %u",
8709 			    dst_if->if_xname, n_lists, count);
8710 			bridge_enqueue(bridge_ifp, NULL,
8711 			    dst_if, etypef, list.head,
8712 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8713 
8714 			/* start new list */
8715 			list.head = list.tail = scan;
8716 			count = 1;
8717 			dst_if = this_if;
8718 		} else {
8719 			count++;
8720 			list.tail = scan;
8721 		}
8722 		if (next_packet == NULL) {
8723 			/* last list */
8724 			n_lists++;
8725 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8726 			    "(%s): sublist %u pkts %u",
8727 			    dst_if->if_xname, n_lists, count);
8728 			bridge_enqueue(bridge_ifp, NULL,
8729 			    dst_if, etypef, list.head,
8730 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8731 		}
8732 	}
8733 	return;
8734 }
8735 
8736 /*
8737  * bridge_mac_nat_output:
8738  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8739  * from the interface 'bif'.
8740  *
8741  * Create a mac_nat_entry containing the source IP address and MAC address
8742  * from the packet. Populate a mac_nat_record with information detailing
8743  * how to translate the packet. Translation takes place later by calling
8744  * `bridge_mac_nat_translate()`.
8745  *
8746  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8747  * interface is generating an output packet. No translation is required in this
8748  * case, we just record the IP address used to prevent another bif from
8749  * claiming our IP address.
8750  *
8751  * Returns:
8752  * TRUE if the packet should be translated (*mnr updated as well),
8753  * FALSE otherwise.
8754  *
8755  * *data may be updated to point at a different mbuf chain or NULL if
8756  * the chain was deallocated during processing.
8757  */
8758 
8759 static boolean_t
8760 bridge_mac_nat_output(struct bridge_softc *sc,
8761     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8762 {
8763 	struct ether_header     *eh;
8764 	boolean_t               translate = FALSE;
8765 
8766 	BRIDGE_LOCK_ASSERT_HELD(sc);
8767 	assert(sc->sc_mac_nat_bif != NULL);
8768 
8769 	eh = mtod(*data, struct ether_header *);
8770 	if (mnr != NULL) {
8771 		bzero(mnr, sizeof(*mnr));
8772 		mnr->mnr_ether_type = eh->ether_type;
8773 	}
8774 	switch (eh->ether_type) {
8775 	case HTONS_ETHERTYPE_ARP:
8776 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8777 		break;
8778 	case HTONS_ETHERTYPE_IP:
8779 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8780 		break;
8781 	case HTONS_ETHERTYPE_IPV6:
8782 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8783 		break;
8784 	default:
8785 		break;
8786 	}
8787 	return translate;
8788 }
8789 
8790 static void
8791 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8792     const char eaddr[ETHER_ADDR_LEN])
8793 {
8794 	errno_t                 error;
8795 
8796 	if (mnr->mnr_arp_offset == 0) {
8797 		return;
8798 	}
8799 	/* replace the source hardware address */
8800 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8801 	    ETHER_ADDR_LEN, eaddr,
8802 	    MBUF_DONTWAIT);
8803 	if (error != 0) {
8804 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8805 		    "mbuf_copyback failed");
8806 		m_drop(*data, DROPTAP_FLAG_DIR_IN,
8807 		    DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8808 		*data = NULL;
8809 	}
8810 	return;
8811 }
8812 
8813 static void
8814 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8815 {
8816 	errno_t         error;
8817 	size_t          offset;
8818 
8819 	if (mnr->mnr_ip_header_len == 0) {
8820 		return;
8821 	}
8822 	/* update the UDP checksum */
8823 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8824 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8825 	    sizeof(mnr->mnr_ip_udp_csum),
8826 	    &mnr->mnr_ip_udp_csum,
8827 	    MBUF_DONTWAIT);
8828 	if (error != 0) {
8829 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8830 		    "mbuf_copyback uh_sum failed");
8831 		m_drop(*data, DROPTAP_FLAG_DIR_IN,
8832 		    DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8833 		*data = NULL;
8834 	}
8835 	/* update the DHCP must broadcast flag */
8836 	offset += sizeof(struct udphdr);
8837 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8838 	    sizeof(mnr->mnr_ip_dhcp_flags),
8839 	    &mnr->mnr_ip_dhcp_flags,
8840 	    MBUF_DONTWAIT);
8841 	if (error != 0) {
8842 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8843 		    "mbuf_copyback dp_flags failed");
8844 		m_drop(*data, DROPTAP_FLAG_DIR_IN,
8845 		    DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8846 		*data = NULL;
8847 	}
8848 }
8849 
8850 static void
8851 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8852     const char eaddr[ETHER_ADDR_LEN])
8853 {
8854 	uint16_t        cksum;
8855 	errno_t         error;
8856 	mbuf_t          m = *data;
8857 
8858 	if (mnr->mnr_ip6_header_len == 0) {
8859 		return;
8860 	}
8861 	switch (mnr->mnr_ip6_icmp6_type) {
8862 	case ND_ROUTER_ADVERT:
8863 	case ND_ROUTER_SOLICIT:
8864 	case ND_NEIGHBOR_SOLICIT:
8865 	case ND_NEIGHBOR_ADVERT:
8866 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8867 			/* nothing to do */
8868 			return;
8869 		}
8870 		break;
8871 	default:
8872 		return;
8873 	}
8874 
8875 	/*
8876 	 * replace the lladdr
8877 	 */
8878 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8879 	    ETHER_ADDR_LEN, eaddr,
8880 	    MBUF_DONTWAIT);
8881 	if (error != 0) {
8882 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8883 		    "mbuf_copyback lladdr failed");
8884 		m_drop(m, DROPTAP_FLAG_DIR_IN,
8885 		    DROP_REASON_BRIDGE_MAC_NAT_FAILURE, NULL, 0);
8886 		*data = NULL;
8887 		return;
8888 	}
8889 
8890 	/*
8891 	 * recompute the icmp6 checksum
8892 	 */
8893 
8894 	/* skip past the ethernet header */
8895 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8896 
8897 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8898 	/* set the checksum to zero */
8899 	cksum = 0;
8900 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8901 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8902 	if (error != 0) {
8903 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8904 		    "mbuf_copyback cksum=0 failed");
8905 		m_drop(m, DROPTAP_FLAG_DIR_IN,
8906 		    DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
8907 		*data = NULL;
8908 		return;
8909 	}
8910 	/* compute and set the new checksum */
8911 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8912 	    mnr->mnr_ip6_icmp6_len);
8913 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8914 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8915 	if (error != 0) {
8916 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8917 		    "mbuf_copyback cksum failed");
8918 		m_drop(m, DROPTAP_FLAG_DIR_IN,
8919 		    DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
8920 		*data = NULL;
8921 		return;
8922 	}
8923 	/* restore the ethernet header */
8924 	_mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8925 	return;
8926 }
8927 
8928 static void
8929 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8930     const char eaddr[ETHER_ADDR_LEN])
8931 {
8932 	struct ether_header     *eh;
8933 
8934 	/* replace the source ethernet address with the single MAC */
8935 	eh = mtod(*data, struct ether_header *);
8936 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8937 	switch (mnr->mnr_ether_type) {
8938 	case HTONS_ETHERTYPE_ARP:
8939 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8940 		break;
8941 
8942 	case HTONS_ETHERTYPE_IP:
8943 		bridge_mac_nat_ip_translate(data, mnr);
8944 		break;
8945 
8946 	case HTONS_ETHERTYPE_IPV6:
8947 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8948 		break;
8949 
8950 	default:
8951 		break;
8952 	}
8953 	return;
8954 }
8955 
8956 /*
8957  * bridge packet filtering
8958  */
8959 
8960 /*
8961  * Perform basic checks on header size since
8962  * pfil assumes ip_input has already processed
8963  * it for it.  Cut-and-pasted from ip_input.c.
8964  * Given how simple the IPv6 version is,
8965  * does the IPv4 version really need to be
8966  * this complicated?
8967  *
8968  * XXX Should we update ipstat here, or not?
8969  * XXX Right now we update ipstat but not
8970  * XXX csum_counter.
8971  */
8972 static int
8973 bridge_ip_checkbasic(struct mbuf **mp)
8974 {
8975 	struct mbuf *m = *mp;
8976 	struct ip *ip;
8977 	int len, hlen;
8978 	u_short sum;
8979 
8980 	if (*mp == NULL) {
8981 		return -1;
8982 	}
8983 
8984 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8985 		/* max_linkhdr is already rounded up to nearest 4-byte */
8986 		if ((m = m_copyup(m, sizeof(struct ip),
8987 		    max_linkhdr)) == NULL) {
8988 			/* XXXJRT new stat, please */
8989 			ipstat.ips_toosmall++;
8990 			goto bad;
8991 		}
8992 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8993 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8994 			ipstat.ips_toosmall++;
8995 			goto bad;
8996 		}
8997 	}
8998 	ip = mtod(m, struct ip *);
8999 	if (ip == NULL) {
9000 		goto bad;
9001 	}
9002 
9003 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
9004 		ipstat.ips_badvers++;
9005 		goto bad;
9006 	}
9007 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9008 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
9009 		ipstat.ips_badhlen++;
9010 		goto bad;
9011 	}
9012 	if (hlen > m->m_len) {
9013 		if ((m = m_pullup(m, hlen)) == 0) {
9014 			ipstat.ips_badhlen++;
9015 			goto bad;
9016 		}
9017 		ip = mtod(m, struct ip *);
9018 		if (ip == NULL) {
9019 			goto bad;
9020 		}
9021 	}
9022 
9023 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
9024 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
9025 	} else {
9026 		if (hlen == sizeof(struct ip)) {
9027 			sum = in_cksum_hdr(ip);
9028 		} else {
9029 			sum = in_cksum(m, hlen);
9030 		}
9031 	}
9032 	if (sum) {
9033 		ipstat.ips_badsum++;
9034 		goto bad;
9035 	}
9036 
9037 	/* Retrieve the packet length. */
9038 	len = ntohs(ip->ip_len);
9039 
9040 	/*
9041 	 * Check for additional length bogosity
9042 	 */
9043 	if (len < hlen) {
9044 		ipstat.ips_badlen++;
9045 		goto bad;
9046 	}
9047 
9048 	/*
9049 	 * Check that the amount of data in the buffers
9050 	 * is as at least much as the IP header would have us expect.
9051 	 * Drop packet if shorter than we expect.
9052 	 */
9053 	if (m->m_pkthdr.len < len) {
9054 		ipstat.ips_tooshort++;
9055 		goto bad;
9056 	}
9057 
9058 	/* Checks out, proceed */
9059 	*mp = m;
9060 	return 0;
9061 
9062 bad:
9063 	*mp = m;
9064 	return -1;
9065 }
9066 
9067 /*
9068  * Same as above, but for IPv6.
9069  * Cut-and-pasted from ip6_input.c.
9070  * XXX Should we update ip6stat, or not?
9071  */
9072 static int
9073 bridge_ip6_checkbasic(struct mbuf **mp)
9074 {
9075 	struct mbuf *m = *mp;
9076 	struct ip6_hdr *ip6;
9077 
9078 	/*
9079 	 * If the IPv6 header is not aligned, slurp it up into a new
9080 	 * mbuf with space for link headers, in the event we forward
9081 	 * it.  Otherwise, if it is aligned, make sure the entire base
9082 	 * IPv6 header is in the first mbuf of the chain.
9083 	 */
9084 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9085 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9086 		/* max_linkhdr is already rounded up to nearest 4-byte */
9087 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9088 		    max_linkhdr)) == NULL) {
9089 			/* XXXJRT new stat, please */
9090 			ip6stat.ip6s_toosmall++;
9091 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9092 			goto bad;
9093 		}
9094 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9095 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9096 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9097 			ip6stat.ip6s_toosmall++;
9098 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9099 			goto bad;
9100 		}
9101 	}
9102 
9103 	ip6 = mtod(m, struct ip6_hdr *);
9104 
9105 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9106 		ip6stat.ip6s_badvers++;
9107 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9108 		goto bad;
9109 	}
9110 
9111 	/* Checks out, proceed */
9112 	*mp = m;
9113 	return 0;
9114 
9115 bad:
9116 	*mp = m;
9117 	return -1;
9118 }
9119 
9120 /*
9121  * the PF routines expect to be called from ip_input, so we
9122  * need to do and undo here some of the same processing.
9123  *
9124  * XXX : this is heavily inspired on bridge_pfil()
9125  */
9126 static int
9127 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9128     bool input)
9129 {
9130 	/*
9131 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9132 	 */
9133 
9134 	int snap, error, i, hlen;
9135 	struct ether_header *eh1, eh2;
9136 	struct ip *ip;
9137 	struct llc llc1;
9138 	u_int16_t ether_type;
9139 
9140 	snap = 0;
9141 	error = -1;     /* Default error if not error == 0 */
9142 
9143 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9144 		return 0; /* filtering is disabled */
9145 	}
9146 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9147 	if ((*mp)->m_len < i) {
9148 		*mp = m_pullup(*mp, i);
9149 		if (*mp == NULL) {
9150 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9151 			return -1;
9152 		}
9153 	}
9154 
9155 	eh1 = mtod(*mp, struct ether_header *);
9156 	ether_type = ntohs(eh1->ether_type);
9157 
9158 	/*
9159 	 * Check for SNAP/LLC.
9160 	 */
9161 	if (ether_type < ETHERMTU) {
9162 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9163 
9164 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9165 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9166 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9167 		    llc2->llc_control == LLC_UI) {
9168 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9169 			snap = 1;
9170 		}
9171 	}
9172 
9173 	/*
9174 	 * If we're trying to filter bridge traffic, don't look at anything
9175 	 * other than IP and ARP traffic.  If the filter doesn't understand
9176 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9177 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9178 	 * but of course we don't have an AppleTalk filter to begin with.
9179 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9180 	 * ARP traffic.)
9181 	 */
9182 	switch (ether_type) {
9183 	case ETHERTYPE_ARP:
9184 	case ETHERTYPE_REVARP:
9185 		return 0;         /* Automatically pass */
9186 
9187 	case ETHERTYPE_IP:
9188 	case ETHERTYPE_IPV6:
9189 		break;
9190 	default:
9191 		/*
9192 		 * Check to see if the user wants to pass non-ip
9193 		 * packets, these will not be checked by pf and
9194 		 * passed unconditionally so the default is to drop.
9195 		 */
9196 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9197 			goto bad;
9198 		}
9199 		break;
9200 	}
9201 
9202 	/* Strip off the Ethernet header and keep a copy. */
9203 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9204 	m_adj(*mp, ETHER_HDR_LEN);
9205 
9206 	/* Strip off snap header, if present */
9207 	if (snap) {
9208 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9209 		m_adj(*mp, sizeof(struct llc));
9210 	}
9211 
9212 	/*
9213 	 * Check the IP header for alignment and errors
9214 	 */
9215 	switch (ether_type) {
9216 	case ETHERTYPE_IP:
9217 		error = bridge_ip_checkbasic(mp);
9218 		break;
9219 	case ETHERTYPE_IPV6:
9220 		error = bridge_ip6_checkbasic(mp);
9221 		break;
9222 	default:
9223 		error = 0;
9224 		break;
9225 	}
9226 	if (error) {
9227 		goto bad;
9228 	}
9229 
9230 	error = 0;
9231 
9232 	/*
9233 	 * Run the packet through pf rules
9234 	 */
9235 	switch (ether_type) {
9236 	case ETHERTYPE_IP:
9237 		/*
9238 		 * before calling the firewall, swap fields the same as
9239 		 * IP does. here we assume the header is contiguous
9240 		 */
9241 		ip = mtod(*mp, struct ip *);
9242 
9243 		ip->ip_len = ntohs(ip->ip_len);
9244 		ip->ip_off = ntohs(ip->ip_off);
9245 
9246 		if (ifp != NULL) {
9247 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9248 		}
9249 
9250 		if (*mp == NULL || error != 0) { /* filter may consume */
9251 			break;
9252 		}
9253 
9254 		/* Recalculate the ip checksum and restore byte ordering */
9255 		ip = mtod(*mp, struct ip *);
9256 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9257 		if (hlen < (int)sizeof(struct ip)) {
9258 			goto bad;
9259 		}
9260 		if (hlen > (*mp)->m_len) {
9261 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9262 				goto bad;
9263 			}
9264 			ip = mtod(*mp, struct ip *);
9265 			if (ip == NULL) {
9266 				goto bad;
9267 			}
9268 		}
9269 		ip->ip_len = htons(ip->ip_len);
9270 		ip->ip_off = htons(ip->ip_off);
9271 		ip->ip_sum = 0;
9272 		if (hlen == sizeof(struct ip)) {
9273 			ip->ip_sum = in_cksum_hdr(ip);
9274 		} else {
9275 			ip->ip_sum = in_cksum(*mp, hlen);
9276 		}
9277 		break;
9278 
9279 	case ETHERTYPE_IPV6:
9280 		if (ifp != NULL) {
9281 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9282 		}
9283 
9284 		if (*mp == NULL || error != 0) { /* filter may consume */
9285 			break;
9286 		}
9287 		break;
9288 	default:
9289 		error = 0;
9290 		break;
9291 	}
9292 
9293 	if (*mp == NULL) {
9294 		return error;
9295 	}
9296 	if (error != 0) {
9297 		goto bad;
9298 	}
9299 
9300 	error = -1;
9301 
9302 	/*
9303 	 * Finally, put everything back the way it was and return
9304 	 */
9305 	if (snap) {
9306 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9307 		if (*mp == NULL) {
9308 			return error;
9309 		}
9310 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9311 	}
9312 
9313 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9314 	if (*mp == NULL) {
9315 		return error;
9316 	}
9317 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9318 
9319 	return 0;
9320 
9321 bad:
9322 	m_drop(*mp, DROPTAP_FLAG_DIR_IN, DROP_REASON_BRIDGE_PF, NULL, 0);
9323 	*mp = NULL;
9324 	return error;
9325 }
9326 
9327 #if BRIDGESTP
9328 static void
9329 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9330 {
9331 	mbuf_t  next_packet = NULL;
9332 
9333 	for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9334 		next_packet = scan->m_nextpkt;
9335 		scan->m_nextpkt = NULL;
9336 		bstp_input(bp, scan);
9337 	}
9338 }
9339 #endif /* BRIDGESTP */
9340 
9341 static mblist
9342 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9343 {
9344 	mbuf_t          next_packet = NULL;
9345 	mblist          ret;
9346 
9347 	mblist_init(&ret);
9348 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9349 		errno_t                 error;
9350 
9351 		/* take packet out of the list */
9352 		next_packet = scan->m_nextpkt;
9353 		scan->m_nextpkt = NULL;
9354 		/* filter the ARP packet */
9355 		error = bridge_host_filter_arp(bif, &scan);
9356 		if (error != 0 && scan != NULL) {
9357 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9358 				brlog_mbuf_data(scan, 0,
9359 				    sizeof(struct ether_header) +
9360 				    sizeof(struct ip));
9361 			}
9362 			m_drop(scan, DROPTAP_FLAG_DIR_IN,
9363 			    DROP_REASON_BRIDGE_HOST_FILTER, NULL, 0);
9364 			scan = NULL;
9365 		}
9366 		if (scan != NULL) {
9367 			/* add it to the list */
9368 			mblist_append(&ret, scan);
9369 		}
9370 	}
9371 	return ret;
9372 }
9373 
9374 static mbuf_t
9375 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9376     bool is_ipv4, bool host_filter, bool checksum)
9377 {
9378 	uint32_t                dbgf = 0;
9379 	errno_t                 error;
9380 	ip_packet_info          info;
9381 	u_int                   mac_hlen = sizeof(struct ether_header);
9382 	drop_reason_t           drop_reason = DROP_REASON_BRIDGE_UNSPECIFIED;
9383 
9384 	if (host_filter) {
9385 		dbgf |= BR_DBGF_HOSTFILTER;
9386 	}
9387 	if (checksum) {
9388 		dbgf |= BR_DBGF_CHECKSUM;
9389 	}
9390 	/* get the IP protocol header */
9391 	error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9392 	    &bif->bif_stats.brms_in_ip);
9393 	if (error != 0) {
9394 		BRIDGE_LOG(LOG_NOTICE, dbgf,
9395 		    "%s(%s) bridge_get_ip_proto failed %d",
9396 		    bridge_ifp->if_xname,
9397 		    bif->bif_ifp->if_xname, error);
9398 		drop_reason = DROP_REASON_BRIDGE_NO_PROTO;
9399 		goto drop;
9400 	}
9401 	if (host_filter) {
9402 		bool            drop = true;
9403 
9404 		/* restrict IP protocols */
9405 		switch (info.ip_proto) {
9406 		case IPPROTO_ICMP:
9407 		case IPPROTO_IGMP:
9408 			drop = !is_ipv4;
9409 			break;
9410 		case IPPROTO_TCP:
9411 		case IPPROTO_UDP:
9412 			drop = false;
9413 			break;
9414 		case IPPROTO_ICMPV6:
9415 			drop = is_ipv4;
9416 			break;
9417 		default:
9418 			break;
9419 		}
9420 		if (drop) {
9421 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9422 			drop_reason = DROP_REASON_BRIDGE_BAD_PROTO;
9423 			goto drop;
9424 		}
9425 		bridge_hostfilter_stats.brhf_ip_ok += 1;
9426 	}
9427 	if (checksum) {
9428 		/* need to compute IP/UDP/TCP/checksums */
9429 		error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9430 		if (error != 0) {
9431 			BRIDGE_LOG(LOG_NOTICE, dbgf,
9432 			    "%s(%s) bridge_offload_checksum failed %d",
9433 			    bridge_ifp->if_xname,
9434 			    bif->bif_ifp->if_xname, error);
9435 			drop_reason = DROP_REASON_BRIDGE_CHECKSUM;
9436 			goto drop;
9437 		}
9438 	}
9439 	return m;
9440 
9441 drop:
9442 	/* toss the packet */
9443 	if (m != NULL) {
9444 		if (host_filter &&
9445 		    BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9446 			brlog_mbuf_data(m, 0,
9447 			    sizeof(struct ether_header) +
9448 			    sizeof(struct ip));
9449 		}
9450 		m_drop(m, DROPTAP_FLAG_DIR_IN, drop_reason, NULL, 0);
9451 		m = NULL;
9452 	}
9453 	return NULL;
9454 }
9455 
9456 static mblist
9457 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9458     mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9459 {
9460 	bool                    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9461 	mbuf_t                  next_packet = NULL;
9462 	mblist                  ret;
9463 
9464 	mblist_init(&ret);
9465 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9466 		/* take packet out of the list */
9467 		next_packet = scan->m_nextpkt;
9468 		scan->m_nextpkt = NULL;
9469 		scan = bridge_filter_checksum(bridge_ifp, bif,
9470 		    scan, is_ipv4, host_filter, checksum);
9471 		if (scan != NULL) {
9472 			/* add packet to the list */
9473 			mblist_append(&ret, scan);
9474 		}
9475 	}
9476 	return ret;
9477 }
9478 
9479 static mbuf_t
9480 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9481     mbuf_t m, bool is_ipv4)
9482 {
9483 	mblist          ret;
9484 	mbuf_t          next_packet;
9485 
9486 	mblist_init(&ret);
9487 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9488 		uint32_t        csum_flags;
9489 
9490 		/* take it out of the list */
9491 		next_packet = scan->m_nextpkt;
9492 		scan->m_nextpkt = NULL;
9493 
9494 		csum_flags = scan->m_pkthdr.csum_flags;
9495 		if ((csum_flags & checksum_request_flags) != 0) {
9496 			/* compute the checksum now */
9497 			scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9498 			    is_ipv4, false, true);
9499 			if (scan != NULL) {
9500 				/* clear offload now */
9501 				scan->m_pkthdr.csum_flags &= csum_flags;
9502 			}
9503 		}
9504 		if (scan != NULL) {
9505 			mblist_append(&ret, scan);
9506 		}
9507 	}
9508 	return ret.head;
9509 }
9510 
9511 static mbuf_t
9512 copy_broadcast_packet(mbuf_t m)
9513 {
9514 	mbuf_t  mc;
9515 
9516 	/* make a copy of the packet */
9517 	mc = m_dup(m, M_DONTWAIT);
9518 	if (mc != NULL) {
9519 		struct ether_header *eh;
9520 
9521 		/* make copy look like it is broadcast */
9522 		mc->m_flags |= M_BCAST;
9523 		eh = mtod(mc, struct ether_header *);
9524 		bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9525 	}
9526 	return mc;
9527 }
9528 
9529 static mblist
9530 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9531 {
9532 	mblist          ip_bcast;
9533 	mbuf_t          next_packet = NULL;
9534 	mblist          ret;
9535 
9536 	mblist_init(&ret);
9537 	mblist_init(&ip_bcast);
9538 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9539 		mbuf_t  bcast_pkt = NULL;
9540 		uint8_t *header;
9541 
9542 		/* take packet out of the list */
9543 		next_packet = scan->m_nextpkt;
9544 		scan->m_nextpkt = NULL;
9545 
9546 		header = get_ether_ip_header_ptr(&scan, FALSE);
9547 		if (header != NULL) {
9548 			struct in_addr  dst;
9549 			struct ip       *iphdr;
9550 
9551 			iphdr = (struct ip *)(header + sizeof(struct ether_header));
9552 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9553 			if (dst.s_addr == INADDR_BROADCAST) {
9554 				bcast_pkt = copy_broadcast_packet(scan);
9555 			}
9556 		}
9557 		if (bcast_pkt != NULL) {
9558 			/* add packet to broadcast list */
9559 			mblist_append(&ip_bcast, bcast_pkt);
9560 		}
9561 		if (scan != NULL) {
9562 			/* add packet back into the list */
9563 			mblist_append(&ret, scan);
9564 		}
9565 	}
9566 	*ip_bcast_head = ip_bcast.head;
9567 	return ret;
9568 }
9569 
9570 static ifnet_t
9571 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9572     struct bridge_iflist * sbif)
9573 {
9574 	struct bridge_iflist * bif;
9575 
9576 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9577 		if (bif == sbif) {
9578 			/* skip the input member */
9579 			continue;
9580 		}
9581 		if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9582 			return bif->bif_ifp;
9583 		}
9584 	}
9585 	return NULL;
9586 }
9587 
9588 
9589 /*
9590  * Function: bridge_input_list
9591  *
9592  * Purpose:
9593  *   Process a list of input packets through the bridge.
9594  *   The caller ensures that all of the packets in the list
9595  *  `list_head` .. `list_tail` have the same ethernet header.
9596  *
9597  * Returns:
9598  *    Non-NULL head of the chain of packets that were not consumed/freed,
9599  *    *tail_p set to the tail of that chain.
9600  *
9601  *    NULL if all of the packets were consumed.
9602  */
9603 static mblist
9604 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9605     struct ether_header * eh_in_p, mblist list, bool is_promisc)
9606 {
9607 	struct bridge_iflist *  bif;
9608 	ifnet_t                 bridge_ifp;
9609 	bool                    bridge_needs_input;
9610 	bool                    checksum_offload;
9611 	uint8_t *               dhost;
9612 #if BRIDGESTP
9613 	bool                    discarding = false;
9614 #endif /* BRIDGESTP */
9615 	ifnet_t                 dst_if = NULL;
9616 	errno_t                 error;
9617 	ether_type_flag_t       etypef;
9618 	bool                    host_filter;
9619 	bool                    host_filter_drop = false;
9620 	mbuf_ref_t              ip_bcast = NULL;
9621 	bool                    is_bridge_mac = false;
9622 	bool                    is_broadcast;
9623 	bool                    is_ifp_mac;
9624 	ifnet_t                 member_input = NULL;
9625 	uint8_t *               shost;
9626 	bool                    uses_virtio = false;
9627 	uint16_t                vlan;
9628 
9629 	if (ifp->if_bridge == NULL) {
9630 		/* no longer part of bridge */
9631 		goto done;
9632 	}
9633 	bridge_ifp = sc->sc_ifp;
9634 	is_broadcast = IS_BCAST_MCAST(list.head);
9635 	is_ifp_mac = (!is_broadcast && !is_promisc);
9636 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9637 	    "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9638 	    bridge_ifp->if_xname, ifp->if_xname, list.count,
9639 	    (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9640 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9641 	    (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9642 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9643 
9644 	/* assume we'll return all packets */
9645 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9646 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9647 		    "%s not running passing along",
9648 		    bridge_ifp->if_xname);
9649 		goto done;
9650 	}
9651 
9652 	vlan = VLANTAGOF(m);
9653 
9654 	/* lookup the bridge member */
9655 	BRIDGE_LOCK(sc);
9656 	bif = bridge_lookup_member_if(sc, ifp);
9657 	if (bif == NULL) {
9658 		BRIDGE_UNLOCK(sc);
9659 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9660 		    "%s bridge_lookup_member_if failed",
9661 		    bridge_ifp->if_xname);
9662 		goto done;
9663 	}
9664 
9665 	uses_virtio = bif_uses_virtio(bif);
9666 
9667 	/*
9668 	 * host filter drops packets that:
9669 	 * - are not ARP, IPv4, or IPv6
9670 	 * - have incorrect source MAC address
9671 	 */
9672 	host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9673 	etypef = ether_type_flag_get(eh_in_p->ether_type);
9674 	if (host_filter
9675 	    && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9676 		/* ether type not one of ARP, IPv4, or IPv6 */
9677 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9678 		host_filter_drop = true;
9679 	} else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9680 	    bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9681 	    != 0) {
9682 		/* only allow the single source MAC address */
9683 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9684 		    __func__, __LINE__);
9685 		host_filter_drop = true;
9686 	}
9687 	if (host_filter_drop) {
9688 		BRIDGE_UNLOCK(sc);
9689 		m_drop_list(list.head, bridge_ifp, DROPTAP_FLAG_DIR_IN,
9690 		    DROP_REASON_BRIDGE_HOST_FILTER, NULL, 0);
9691 		list.head = list.tail = NULL;
9692 		goto done;
9693 	}
9694 
9695 #if BRIDGESTP
9696 	discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9697 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9698 #endif /* BRIDGESTP */
9699 
9700 	dhost = eh_in_p->ether_dhost;
9701 	shost = eh_in_p->ether_shost;
9702 	/*
9703 	 * Reserved multicast address listed in 802.1D section 7.12.6
9704 	 * must not be forwarded by the bridge.
9705 	 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9706 	 */
9707 	if (is_broadcast) {
9708 		if (IS_MCAST(list.head)) {
9709 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9710 			    " multicast: "
9711 			    "%02x:%02x:%02x:%02x:%02x:%02x",
9712 			    dhost[0], dhost[1],
9713 			    dhost[2], dhost[3],
9714 			    dhost[4], dhost[5]);
9715 		}
9716 		if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9717 			if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9718 				/* multicast for spanning tree */
9719 #if BRIDGESTP
9720 				bridge_bstp_input_list(&bif->bif_stp, list.head);
9721 #else /* BRIDGESTP */
9722 				m_freem_list(list.head);
9723 #endif /* BRIDGESTP */
9724 				list.head = list.tail = NULL;
9725 				BRIDGE_UNLOCK(sc);
9726 				goto done;
9727 			}
9728 			if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9729 				/* allow packet to continue up the stack */
9730 				BRIDGE_UNLOCK(sc);
9731 				goto done;
9732 			}
9733 		}
9734 		/* broadcast to all members */
9735 		os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9736 	}
9737 
9738 #if BRIDGESTP
9739 	if (discarding) {
9740 		BRIDGE_UNLOCK(sc);
9741 		goto done;
9742 	}
9743 #endif /* BRIDGESTP */
9744 
9745 	/* If the interface is learning, record the address. */
9746 	if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9747 		error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9748 		/*
9749 		 * If the interface has addresses limits then deny any source
9750 		 * that is not in the cache.
9751 		 */
9752 		if (error != 0 && bif->bif_addrmax) {
9753 			BRIDGE_UNLOCK(sc);
9754 			goto done;
9755 		}
9756 	}
9757 #if BRIDGESTP
9758 	if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9759 	    bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9760 		BRIDGE_UNLOCK(sc);
9761 		goto done;
9762 	}
9763 #endif /* BRIDGESTP */
9764 
9765 	/*
9766 	 * If the packet is not IP, let the host filter drop ARP packets.
9767 	 * Otherwise, if the host filter is enabled or we need to compute
9768 	 * checksums, do that.
9769 	 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9770 	 * check for IPv4 broadcast packets. Accumulate those in a separate
9771 	 * list `ip_bcast`.
9772 	 */
9773 	checksum_offload = bif_has_checksum_offload(bif);
9774 	if (!ether_type_flag_is_ip(etypef)) {
9775 		/* host filter process ARP */
9776 		if (host_filter) {
9777 			/* host filter check earlier means this must be ARP */
9778 			VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9779 			list = bridge_filter_arp_list(bif, list.head);
9780 			if (list.head == NULL) {
9781 				VERIFY(list.tail == NULL);
9782 				BRIDGE_UNLOCK(sc);
9783 				goto done;
9784 			}
9785 		}
9786 	} else if (host_filter || checksum_offload) {
9787 		/* host filter and/or checksum */
9788 		list = bridge_filter_checksum_list(bridge_ifp, bif,
9789 		    list.head, etypef, host_filter, checksum_offload);
9790 		if (list.head == NULL) {
9791 			VERIFY(list.tail == NULL);
9792 			BRIDGE_UNLOCK(sc);
9793 			goto done;
9794 		}
9795 	} else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9796 	    etypef == ETHER_TYPE_FLAG_IPV4) {
9797 		/* look for broadcast IPv4 packet */
9798 		list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9799 		if (list.head == NULL && ip_bcast == NULL) {
9800 			/* all packets were consumed */
9801 			BRIDGE_UNLOCK(sc);
9802 			goto done;
9803 		}
9804 	}
9805 
9806 	/*
9807 	 * If the bridge has ULP attached, and the destination MAC
9808 	 * matches the bridge interface, claim the packets for the bridge
9809 	 * interface.
9810 	 */
9811 	bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
9812 	if (bridge_needs_input &&
9813 	    !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9814 		is_bridge_mac = true;
9815 	}
9816 	if (is_ifp_mac) {
9817 		/* unicast to the interface */
9818 		if (sc->sc_mac_nat_bif == bif) {
9819 			mbuf_ref_t  forward = NULL;
9820 
9821 			if (list.head != NULL) {
9822 				/* handle MAC-NAT if enabled */
9823 				list = bridge_mac_nat_input_list(sc, ifp,
9824 				    list.head, &forward);
9825 			}
9826 			if (ip_bcast != NULL) {
9827 				/* forward to all members except this one */
9828 				/* bridge_broadcast_list unlocks */
9829 				bridge_broadcast_list(sc, bif, etypef,
9830 				    ip_bcast, pkt_direction_RX);
9831 			} else {
9832 				BRIDGE_UNLOCK(sc);
9833 			}
9834 			if (forward != NULL) {
9835 				bridge_mac_nat_forward_list(bridge_ifp, etypef,
9836 				    forward);
9837 			}
9838 		} else {
9839 			BRIDGE_UNLOCK(sc);
9840 		}
9841 		/* unicast packets for this interface do not get forwarded */
9842 		goto done;
9843 	}
9844 	if (is_bridge_mac || list.head == NULL) {
9845 		BRIDGE_UNLOCK(sc);
9846 		goto done;
9847 	}
9848 	if (!is_broadcast) {
9849 		/* find where to send the packet */
9850 		dst_if = bridge_rtlookup(sc, dhost, vlan);
9851 		if (ifp == dst_if) {
9852 			/* nothing to forward */
9853 			BRIDGE_UNLOCK(sc);
9854 			goto done;
9855 		}
9856 		if (dst_if == NULL) {
9857 			/* if a member is the dhost, deliver as input */
9858 			member_input = bridge_find_member(sc, dhost, bif);
9859 			if (member_input != NULL) {
9860 				/* grab packets destined to member */
9861 				BRIDGE_UNLOCK(sc);
9862 				goto done;
9863 			}
9864 			/* if a member is shost, there's a loop, drop it */
9865 			if (bridge_find_member(sc, shost, bif) != NULL) {
9866 				BRIDGE_UNLOCK(sc);
9867 				m_drop_list(list.head, bridge_ifp, DROPTAP_FLAG_DIR_IN,
9868 				    DROP_REASON_BRIDGE_LOOP, NULL, 0);
9869 				list.head = list.tail = NULL;
9870 				goto done;
9871 			}
9872 		}
9873 	}
9874 	if (dst_if == NULL) {
9875 		mbuf_t  m;
9876 
9877 		m = copy_packet_list(list.head);
9878 		if (m != NULL) {
9879 			/* bridge_broadcast_list unlocks */
9880 			bridge_broadcast_list(sc, bif, etypef, m,
9881 			    pkt_direction_RX);
9882 		} else {
9883 			BRIDGE_UNLOCK(sc);
9884 		}
9885 	} else {
9886 		/* bridge_forward_list() consumes list and unlocks */
9887 		bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9888 		list.head = list.tail = NULL;
9889 	}
9890 
9891 done:
9892 	if (list.head != NULL) {
9893 		if (member_input != NULL) {
9894 			/* member gets the packets */
9895 			inject_input_packet_list(member_input, list.head, true);
9896 			list.head = list.tail = NULL;
9897 		} else if (is_bridge_mac) {
9898 			/* bridge consumes all the unicast packets */
9899 			bridge_interface_input_list(bridge_ifp, etypef, list,
9900 			    uses_virtio);
9901 			list.head = list.tail = NULL;
9902 		} else {
9903 			adjust_input_packet_list(list.head);
9904 		}
9905 	}
9906 	return list;
9907 }
9908 
9909 static inline void
9910 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9911 {
9912 	/* duplicate some of the work done in ether_demux */
9913 	if ((eh->ether_dhost[0] & 1) == 0) {
9914 		if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9915 			m->m_flags |= M_PROMISC;
9916 		}
9917 	} else {
9918 		/* Check for broadcast */
9919 		if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9920 			m->m_flags |= M_BCAST;
9921 		} else {
9922 			m->m_flags |= M_MCAST;
9923 		}
9924 	}
9925 	if (m->m_flags & M_HASFCS) {
9926 		/*
9927 		 * If the M_HASFCS is set by the driver we want to make sure
9928 		 * that we strip off the trailing FCS data before handing it
9929 		 * up the stack.
9930 		 */
9931 		m_adj(m, -ETHER_CRC_LEN);
9932 		m->m_flags &= ~M_HASFCS;
9933 	}
9934 	return;
9935 }
9936 
9937 static mbuf_t
9938 bridge_pf_list_out(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags)
9939 {
9940 	mbuf_t  next_packet = NULL;
9941 	mblist  ret;
9942 
9943 	mblist_init(&ret);
9944 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9945 		next_packet = scan->m_nextpkt;
9946 
9947 		/* remove packet from list, and pass through PF */
9948 		scan->m_nextpkt = NULL;
9949 		bridge_pf(&scan, ifp, sc_filter_flags, false);
9950 		if (scan != NULL) {
9951 			/* add packet back to the list */
9952 			mblist_append(&ret, scan);
9953 		}
9954 	}
9955 	return ret.head;
9956 }
9957 
9958 static inline bool
9959 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9960 {
9961 	bool                    included = false;
9962 	char * __single         header;
9963 	size_t                  header_length = 0;
9964 
9965 	header = m->m_pkthdr.pkt_hdr;
9966 	if (header >= (char *)mbuf_datastart(m) &&
9967 	    header <= mtod(m, char *)) {
9968 		header_length = mtod(m, char *) - header;
9969 		if (header_length >= ETHER_HDR_LEN) {
9970 			included = true;
9971 		}
9972 	}
9973 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9974 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9975 	    "header length %lu", sc->sc_ifp->if_xname,
9976 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9977 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9978 	    (uint64_t)VM_KERNEL_ADDRPERM(header),
9979 	    included ? "inside" : "outside", header_length);
9980 	if (!included) {
9981 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9982 		    "%s: frame_header outside mbuf", ifp->if_xname);
9983 	}
9984 	return included;
9985 }
9986 
9987 
9988 mbuf_t
9989 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9990 {
9991 	struct ether_header eh;
9992 	mblist          list;
9993 	volatile bool   list_is_promisc;
9994 	int             n_lists = 0;
9995 	bool            need_pf;
9996 	mbuf_t          next_packet = NULL;
9997 	mblist          ret;
9998 	struct bridge_softc * __single sc = ifp->if_bridge;
9999 	uint32_t        sc_filter_flags;
10000 
10001 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10002 	    "(%s): count %u", ifp->if_xname, cnt);
10003 
10004 	sc_filter_flags = sc->sc_filter_flags;
10005 	need_pf = (sc_filter_flags & IFBF_FILT_MEMBER) != 0 && PF_IS_ENABLED;
10006 
10007 	/* form sublists with the same ethernet header */
10008 	mblist_init(&list);
10009 	mblist_init(&ret);
10010 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
10011 		struct ether_header *   eh_p;
10012 		volatile bool           is_promisc;
10013 		mblist                  resid;
10014 
10015 		/* take it out of the list */
10016 		next_packet = scan->m_nextpkt;
10017 		scan->m_nextpkt = NULL;
10018 
10019 		/* don't loop the packet */
10020 		if ((scan->m_flags & M_PROTO1) != 0) {
10021 			mblist_append(&ret, scan);
10022 			continue;
10023 		}
10024 		/* Check if this mbuf looks valid */
10025 		MBUF_INPUT_CHECK(scan, ifp);
10026 
10027 		/* if the frame header isn't in the first mbuf, ignore */
10028 		if (!bridge_check_frame_header(sc, ifp, scan)) {
10029 			mblist_append(&ret, scan);
10030 			continue;
10031 		}
10032 		/* set start back to include ether header */
10033 		_mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
10034 		eh_p = mtod(scan, struct ether_header *);
10035 		update_mbuf_flags(ifp, scan, eh_p);
10036 
10037 		/* pass through PF if required */
10038 		if (need_pf) {
10039 			bridge_pf(&scan, ifp, sc_filter_flags, true);
10040 			if (scan == NULL) {
10041 				continue;
10042 			}
10043 			/* `eh_p` could have changed */
10044 			eh_p = mtod(scan, struct ether_header *);
10045 		}
10046 
10047 		is_promisc = get_and_clear_promisc(scan);
10048 		if (list.head == NULL) {
10049 			/* start a new list */
10050 			mblist_append(&list, scan);
10051 			bcopy(eh_p, &eh, sizeof(eh));
10052 			list_is_promisc = is_promisc;
10053 		} else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
10054 			n_lists++;
10055 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10056 			    "(%s): sublist %u pkts %u",
10057 			    ifp->if_xname, n_lists, list.count);
10058 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10059 				brlog_ether_header(&eh);
10060 			}
10061 			resid = bridge_input_list(sc, ifp, &eh, list,
10062 			    list_is_promisc);
10063 			if (resid.head != NULL) {
10064 				/* add to the packets to be returned */
10065 				mblist_append_list(&ret, resid);
10066 			}
10067 			/* start new list */
10068 			mblist_init(&list);
10069 			mblist_append(&list, scan);
10070 			list_is_promisc = is_promisc;
10071 			bcopy(eh_p, &eh, sizeof(eh));
10072 		} else {
10073 			mblist_append(&list, scan);
10074 			VERIFY(is_promisc == list_is_promisc);
10075 		}
10076 		if (next_packet == NULL) {
10077 			/* last list */
10078 			n_lists++;
10079 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10080 			    "(%s): sublist %u pkts %u",
10081 			    ifp->if_xname, n_lists, list.count);
10082 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10083 				brlog_ether_header(&eh);
10084 			}
10085 			resid = bridge_input_list(sc, ifp, &eh, list,
10086 			    list_is_promisc);
10087 			if (resid.head != NULL) {
10088 				/* add to the packets to be returned */
10089 				mblist_append_list(&ret, resid);
10090 			}
10091 		}
10092 	}
10093 	return ret.head;
10094 }
10095 
10096 /*
10097  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
10098  * All rights reserved.
10099  *
10100  * Redistribution and use in source and binary forms, with or without
10101  * modification, are permitted provided that the following conditions
10102  * are met:
10103  *   1. Redistributions of source code must retain the above copyright
10104  *      notice, this list of conditions and the following disclaimer.
10105  *   2. Redistributions in binary form must reproduce the above copyright
10106  *      notice, this list of conditions and the following disclaimer in the
10107  *      documentation and/or other materials provided with the distribution.
10108  *
10109  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
10110  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10111  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
10112  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
10113  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
10114  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10115  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10116  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10117  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
10118  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
10119  * SUCH DAMAGE.
10120  */
10121 
10122 /*
10123  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
10124  *
10125  * Create a queue of packets/segments which fit the given mss + hdr_len.
10126  * m0 points to mbuf chain to be segmented.
10127  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10128  * into segments of length MSS bytes and then copy the first hdr_len bytes
10129  * from m0 at the top of each segment.
10130  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10131  * in each segment after the first hdr_len bytes
10132  *
10133  * Return the new queue with the segments on success, NULL on failure.
10134  * (the mbuf queue is freed in this case).
10135  */
10136 
10137 static mblist
10138 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10139 {
10140 	int off = 0, n, firstlen;
10141 	struct mbuf *mseg;
10142 	int total_len = m0->m_pkthdr.len;
10143 	mblist ret;
10144 
10145 	mblist_init(&ret);
10146 	mblist_append(&ret, m0);
10147 
10148 	/*
10149 	 * Segmentation useless
10150 	 */
10151 	if (total_len <= hdr_len + mss) {
10152 		n = 1;
10153 		goto done;
10154 	}
10155 	if (hdr2_buf == NULL || hdr2_len <= 0) {
10156 		hdr2_buf = NULL;
10157 		hdr2_len = 0;
10158 	}
10159 
10160 	off = hdr_len + mss;
10161 	firstlen = mss; /* first segment stored in the original mbuf */
10162 	ret.bytes = off;
10163 	for (n = 1; off < total_len; off += mss, n++) {
10164 		struct mbuf *m;
10165 		/*
10166 		 * Copy the header from the original packet
10167 		 * and create a new mbuf chain
10168 		 */
10169 		if (MHLEN < hdr_len) {
10170 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10171 		} else {
10172 			m = m_gethdr(M_NOWAIT, MT_DATA);
10173 		}
10174 
10175 		if (m == NULL) {
10176 #ifdef GSO_DEBUG
10177 			D("MGETHDR error\n");
10178 #endif
10179 			goto err;
10180 		}
10181 
10182 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10183 
10184 		m->m_len = hdr_len;
10185 		/*
10186 		 * if the optional header is present, copy it
10187 		 */
10188 		if (hdr2_buf != NULL) {
10189 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10190 		}
10191 
10192 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10193 		if (off + mss >= total_len) {           /* last segment */
10194 			mss = total_len - off;
10195 		}
10196 		/*
10197 		 * Copy the payload from original packet
10198 		 */
10199 		mseg = m_copym(m0, off, mss, M_NOWAIT);
10200 		if (mseg == NULL) {
10201 			m_freem(m);
10202 #ifdef GSO_DEBUG
10203 			D("m_copym error\n");
10204 #endif
10205 			goto err;
10206 		}
10207 		m_cat(m, mseg);
10208 
10209 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10210 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10211 		/*
10212 		 * Copy the checksum flags and data (in_cksum() need this)
10213 		 */
10214 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10215 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10216 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10217 
10218 		mblist_append(&ret, m);
10219 	}
10220 
10221 	/*
10222 	 * Update first segment.
10223 	 * If the optional header is present, is necessary
10224 	 * to insert it into the first segment.
10225 	 */
10226 	if (hdr2_buf == NULL) {
10227 		m_adj(m0, hdr_len + firstlen - total_len);
10228 		m0->m_pkthdr.len = hdr_len + firstlen;
10229 	} else {
10230 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10231 		if (mseg == NULL) {
10232 #ifdef GSO_DEBUG
10233 			D("m_copym error\n");
10234 #endif
10235 			goto err;
10236 		}
10237 		m_adj(m0, hdr_len - total_len);
10238 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10239 		m_cat(m0, mseg);
10240 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10241 	}
10242 
10243 done:
10244 	return ret;
10245 
10246 err:
10247 	if (ret.head != NULL) {
10248 		m_freem_list(ret.head);
10249 		mblist_init(&ret);
10250 	}
10251 	return ret;
10252 }
10253 
10254 /*
10255  * Wrappers of IPv4 checksum functions
10256  */
10257 static inline void
10258 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10259 {
10260 	m->m_data += mac_hlen;
10261 	m->m_len -= mac_hlen;
10262 	m->m_pkthdr.len -= mac_hlen;
10263 #if __FreeBSD_version < 1000000
10264 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10265 #endif
10266 
10267 	in_delayed_cksum(m);
10268 
10269 #if __FreeBSD_version < 1000000
10270 	ip->ip_len = htons(ip->ip_len);
10271 #endif
10272 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10273 	m->m_len += mac_hlen;
10274 	m->m_pkthdr.len += mac_hlen;
10275 	m->m_data -= mac_hlen;
10276 }
10277 
10278 static inline void
10279 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10280 {
10281 	m->m_data += mac_hlen;
10282 
10283 	ip->ip_sum = in_cksum(m, ip_hlen);
10284 
10285 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
10286 	m->m_data -= mac_hlen;
10287 }
10288 
10289 /*
10290  * Structure that contains the state during the TCP segmentation
10291  */
10292 struct gso_ip_tcp_state {
10293 	void    (*update)
10294 	(struct gso_ip_tcp_state*, struct mbuf*);
10295 	void    (*internal)
10296 	(struct gso_ip_tcp_state*, struct mbuf*);
10297 	u_int ip_m0_len;
10298 	uint8_t * __counted_by(ip_m0_len) hdr;
10299 	struct tcphdr *tcp;
10300 	int mac_hlen;
10301 	int ip_hlen;
10302 	int tcp_hlen;
10303 	int hlen;
10304 	int pay_len;
10305 	int sw_csum;
10306 	uint32_t tcp_seq;
10307 	uint16_t ip_id;
10308 	boolean_t is_tx;
10309 };
10310 
10311 /*
10312  * Update the pointers to TCP and IPv4 headers
10313  */
10314 static inline void
10315 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10316 {
10317 	state->hdr = mtodo(m, state->mac_hlen);
10318 	state->ip_m0_len = m->m_len - state->mac_hlen;
10319 	state->ip_hlen = state->ip_hlen;
10320 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10321 	state->pay_len = m->m_pkthdr.len - state->hlen;
10322 }
10323 
10324 /*
10325  * Set properly the TCP and IPv4 headers
10326  */
10327 static inline void
10328 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10329 {
10330 	struct ip *ip;
10331 	/*
10332 	 * Update IP header
10333 	 */
10334 	ip = (struct ip *)state->hdr;
10335 	ip->ip_id = htons((state->ip_id)++);
10336 	ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10337 	/*
10338 	 * TCP Checksum
10339 	 */
10340 	state->tcp->th_sum = 0;
10341 	state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10342 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10343 	/*
10344 	 * Checksum HW not supported (TCP)
10345 	 */
10346 	if (state->sw_csum & CSUM_DELAY_DATA) {
10347 		gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10348 	}
10349 
10350 	state->tcp_seq += state->pay_len;
10351 	/*
10352 	 * IP Checksum
10353 	 */
10354 	ip->ip_sum = 0;
10355 	/*
10356 	 * Checksum HW not supported (IP)
10357 	 */
10358 	if (state->sw_csum & CSUM_IP) {
10359 		gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10360 	}
10361 }
10362 
10363 
10364 /*
10365  * Updates the pointers to TCP and IPv6 headers
10366  */
10367 static inline void
10368 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10369 {
10370 	state->hdr = mtodo(m, state->mac_hlen);
10371 	state->ip_m0_len = m->m_len - state->mac_hlen;
10372 	state->ip_hlen = state->ip_hlen;
10373 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10374 	state->pay_len = m->m_pkthdr.len - state->hlen;
10375 }
10376 
10377 /*
10378  * Sets properly the TCP and IPv6 headers
10379  */
10380 static inline void
10381 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10382 {
10383 	struct ip6_hdr *ip6;
10384 
10385 	ip6 = (struct ip6_hdr *)state->hdr;
10386 	ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10387 	/*
10388 	 * TCP Checksum
10389 	 */
10390 	state->tcp->th_sum = 0;
10391 	state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10392 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10393 	/*
10394 	 * Checksum HW not supported (TCP)
10395 	 */
10396 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10397 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10398 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10399 	}
10400 	state->tcp_seq += state->pay_len;
10401 }
10402 
10403 /*
10404  * Init the state during the TCP segmentation
10405  */
10406 static void
10407 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10408     bool is_ipv4, int mac_hlen, int ip_hlen,
10409     uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10410     struct tcphdr * tcp_hdr)
10411 {
10412 #pragma unused(ifp)
10413 
10414 	state->hdr = ip_hdr;
10415 	state->ip_m0_len = ip_m0_len;
10416 	state->ip_hlen = ip_hlen;
10417 	state->tcp = tcp_hdr;
10418 	if (is_ipv4) {
10419 		state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10420 		state->update = gso_ipv4_tcp_update;
10421 		state->internal = gso_ipv4_tcp_internal;
10422 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10423 	} else {
10424 		state->update = gso_ipv6_tcp_update;
10425 		state->internal = gso_ipv6_tcp_internal;
10426 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10427 	}
10428 	state->mac_hlen = mac_hlen;
10429 	state->tcp_hlen = state->tcp->th_off << 2;
10430 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10431 	state->tcp_seq = ntohl(state->tcp->th_seq);
10432 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10433 	return;
10434 }
10435 
10436 /*
10437  * GSO on TCP/IP (v4 or v6)
10438  *
10439  * Segment the given mbuf and return the list of packets.
10440  *
10441  */
10442 static mblist
10443 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10444 {
10445 	struct mbuf *m;
10446 	int orig_mss;
10447 	int mss = 0;
10448 #ifdef GSO_STATS
10449 	int total_len = m0->m_pkthdr.len;
10450 #endif /* GSO_STATS */
10451 	mblist  seg;
10452 	bool tso_with_gso = false;
10453 
10454 	orig_mss = mss = _mbuf_get_tso_mss(m0);
10455 	if (mss == 0 && !is_tx) {
10456 		uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10457 
10458 		if (seg_cnt != 0) {
10459 			uint32_t        hdr_len;
10460 			uint32_t        len;
10461 
10462 			/* approximate the MSS using LRO seg cnt */
10463 			hdr_len = state->ip_hlen + state->tcp_hlen;
10464 			len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10465 			mss = len / seg_cnt;
10466 			m0->m_pkthdr.rx_seg_cnt = 0;
10467 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10468 			    "%s: mss %d = len %d / seg cnt %d",
10469 			    ifp->if_xname, mss, len, seg_cnt);
10470 		}
10471 	}
10472 	if (mss == 0) {
10473 		/* hack: we don't have the actual MSS */
10474 		u_int reduce_mss;
10475 
10476 		reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10477 		    : if_bridge_tso_reduce_mss_forwarding;
10478 		mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10479 		    reduce_mss;
10480 		assert(mss > 0);
10481 	} else if (is_tx) {
10482 		bool    is_ipv4;
10483 		bool    do_tso = true;
10484 
10485 		if (TSO_IPV4_OK(ifp, m0)) {
10486 			is_ipv4 = true;
10487 		} else if (TSO_IPV6_OK(ifp, m0)) {
10488 			is_ipv4 = false;
10489 		} else {
10490 			do_tso = false;
10491 		}
10492 		if (do_tso) { /* TSO with GSO */
10493 			uint32_t        if_tso_max;
10494 
10495 			if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10496 			mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10497 			    - ETHER_HDR_LEN;
10498 			tso_with_gso = true;
10499 		}
10500 	}
10501 	if (!tso_with_gso) {
10502 		/* clear TSO flags */
10503 		m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10504 	}
10505 	seg = m_seg(m0, state->hlen, mss, 0, 0);
10506 	if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10507 		return seg;
10508 	}
10509 	if (tso_with_gso) {
10510 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10511 		    "%s TX gso size %d mss %d nsegs %d",
10512 		    ifp->if_xname,
10513 		    mss, orig_mss, seg.count);
10514 	} else {
10515 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10516 		    "%s %s mss %d nsegs %d",
10517 		    ifp->if_xname,
10518 		    is_tx ? "TX" : "RX",
10519 		    mss, seg.count);
10520 	}
10521 #ifdef GSO_STATS
10522 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10523 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10524 	GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10525 #endif /* GSO_STATS */
10526 
10527 	/* first pkt */
10528 	VERIFY(seg.head == m0);
10529 	m = m0;
10530 
10531 	state->update(state, m);
10532 
10533 	do {
10534 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10535 
10536 		state->internal(state, m);
10537 		m = m->m_nextpkt;
10538 		state->update(state, m);
10539 		state->tcp->th_flags &= ~TH_CWR;
10540 		state->tcp->th_seq = htonl(state->tcp_seq);
10541 	} while (m->m_nextpkt);
10542 
10543 	/* last pkt */
10544 	state->internal(state, m);
10545 
10546 #ifdef GSO_STATS
10547 	if (!error) {
10548 		GSOSTAT_INC(tcp.gsos_segmented);
10549 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10550 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10551 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10552 	}
10553 #endif /* GSO_STATS */
10554 	return seg;
10555 }
10556 
10557 /*
10558  * GSO for TCP/IPv[46]
10559  */
10560 static mblist
10561 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10562     u_int mac_hlen, bool is_ipv4, bool is_tx)
10563 {
10564 	uint32_t csum_flags;
10565 	struct gso_ip_tcp_state state;
10566 	struct tcphdr *tcp;
10567 
10568 	assert(info_p->ip_proto_hdr != NULL);
10569 	tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10570 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10571 	    info_p->ip_hlen + info_p->ip_opt_len,
10572 	    info_p->ip_hdr, info_p->ip_m0_len, tcp);
10573 	csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10574 	m->m_pkthdr.csum_flags |= csum_flags;
10575 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10576 	return gso_ip_tcp(ifp, m, &state, is_tx);
10577 }
10578 
10579 static mblist
10580 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10581 {
10582 	int error;
10583 	ip_packet_info info;
10584 	struct bripstats stats; /* XXX ignored */
10585 	mblist ret;
10586 
10587 	error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10588 	if (error != 0) {
10589 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10590 		    "%s bridge_get_tcp_header failed %d (%s)",
10591 		    ifp->if_xname, error,
10592 		    is_tx ? "TX" : "RX");
10593 		if (m != NULL) {
10594 			m_drop(m, DROPTAP_FLAG_DIR_IN,
10595 			    DROP_REASON_BRIDGE_CHECKSUM, NULL, 0);
10596 			m = NULL;
10597 		}
10598 		goto no_segment;
10599 	}
10600 	if (info.ip_proto_hdr == NULL) {
10601 		/* not actually a TCP packet, no segmentation */
10602 		goto no_segment;
10603 	}
10604 	if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10605 		goto no_segment;
10606 	}
10607 	return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10608 
10609 no_segment:
10610 	mblist_init(&ret);
10611 	if (m != NULL) {
10612 		mblist_append(&ret, m);
10613 	}
10614 	return ret;
10615 }
10616