xref: /xnu-11417.121.6/bsd/net/if_bridge.c (revision a1e26a70f38d1d7daa7b49b258e2f8538ad81650)
1 /*
2  * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 
142 #include <netinet/in.h> /* for struct arpcom */
143 #include <netinet/tcp.h> /* for struct tcphdr */
144 #include <netinet/in_systm.h>
145 #include <netinet/in_var.h>
146 #define _IP_VHL
147 #include <netinet/ip.h>
148 #include <netinet/ip_var.h>
149 #include <netinet/ip6.h>
150 #include <netinet6/ip6_var.h>
151 #include <netinet/if_ether.h> /* for struct arpcom */
152 #include <net/bridgestp.h>
153 #include <net/if_bridgevar.h>
154 #include <net/if_llc.h>
155 #if NVLAN > 0
156 #include <net/if_vlan_var.h>
157 #endif /* NVLAN > 0 */
158 
159 #include <net/if_ether.h>
160 #include <net/dlil.h>
161 #include <net/kpi_interfacefilter.h>
162 #include <net/pfvar.h>
163 
164 #include <net/route.h>
165 #include <dev/random/randomdev.h>
166 
167 #include <netinet/bootp.h>
168 #include <netinet/dhcp.h>
169 
170 #if SKYWALK
171 #include <skywalk/nexus/netif/nx_netif.h>
172 #endif /* SKYWALK */
173 
174 #include <net/sockaddr_utils.h>
175 #include <net/mblist.h>
176 
177 #include <os/log.h>
178 
179 #define _TSO_CSUM       (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
180 
181 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
182 
183 
184 #define __M_FLAGS_ARE_SET(m, flags)     (((m)->m_flags & (flags)) != 0)
185 #define IS_BCAST(m)                     __M_FLAGS_ARE_SET(m, M_BCAST)
186 #define IS_MCAST(m)                     __M_FLAGS_ARE_SET(m, M_MCAST)
187 #define IS_BCAST_MCAST(m)               __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
188 
189 #define HTONS_ETHERTYPE_ARP             htons(ETHERTYPE_ARP)
190 #define HTONS_ETHERTYPE_IP              htons(ETHERTYPE_IP)
191 #define HTONS_ETHERTYPE_IPV6            htons(ETHERTYPE_IPV6)
192 #define HTONS_ARPHRD_ETHER              htons(ARPHRD_ETHER)
193 #define HTONS_ARPOP_REQUEST             htons(ARPOP_REQUEST)
194 #define HTONS_ARPOP_REPLY               htons(ARPOP_REPLY)
195 #define HTONS_IPPORT_BOOTPC             htons(IPPORT_BOOTPC)
196 #define HTONS_IPPORT_BOOTPS             htons(IPPORT_BOOTPS)
197 #define HTONS_DHCP_FLAGS_BROADCAST      htons(DHCP_FLAGS_BROADCAST)
198 
199 /*
200  * if_bridge_debug, BR_DBGF_*
201  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
202  *   to enable additional logs for the corresponding bridge function
203  * - "sysctl net.link.bridge.debug" controls the value of
204  *   'if_bridge_debug'
205  */
206 static uint32_t if_bridge_debug = 0;
207 #define BR_DBGF_LIFECYCLE       0x0001
208 #define BR_DBGF_INPUT           0x0002
209 #define BR_DBGF_OUTPUT          0x0004
210 #define BR_DBGF_RT_TABLE        0x0008
211 #define BR_DBGF_DELAYED_CALL    0x0010
212 #define BR_DBGF_IOCTL           0x0020
213 #define BR_DBGF_MBUF            0x0040
214 #define BR_DBGF_MCAST           0x0080
215 #define BR_DBGF_HOSTFILTER      0x0100
216 #define BR_DBGF_CHECKSUM        0x0200
217 #define BR_DBGF_MAC_NAT         0x0400
218 #define BR_DBGF_INPUT_LIST      0x0800
219 
220 /*
221  * if_bridge_log_level
222  * - 'if_bridge_log_level' ensures that by default important logs are
223  *   logged regardless of if_bridge_debug by comparing the log level
224  *   in BRIDGE_LOG to if_bridge_log_level
225  * - use "sysctl net.link.bridge.log_level" controls the value of
226  *   'if_bridge_log_level'
227  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
228  *   logs must use LOG_NOTICE to ensure they appear by default
229  */
230 static int if_bridge_log_level = LOG_NOTICE;
231 
232 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
233 
234 /*
235  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
236  * - macros to generate the specified log conditionally based on
237  *   the specified log level and debug flags
238  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
239  */
240 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
241 	do {                                                            \
242 	        if (__level <= if_bridge_log_level ||                   \
243 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
244 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
245 	                       __func__, ## __VA_ARGS__);       \
246 	        }                                                       \
247 	} while (0)
248 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
249 	do {                                                    \
250 	        if (__level <= if_bridge_log_level ||           \
251 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
252 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
253 	        }                                                               \
254 	} while (0)
255 
256 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
257 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
258 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
259 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
260 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
261 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
262 
263 #define BRIDGE_LOCK_DEBUG      1
264 #if BRIDGE_LOCK_DEBUG
265 
266 #define BR_LCKDBG_MAX                   4
267 
268 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
269 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
270 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
271 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
272 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
273 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
274 
275 #else /* !BRIDGE_LOCK_DEBUG */
276 
277 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
278 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
279 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
280 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
281 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
282 	        (_err) = EBUSY;                                         \
283 	else {                                                          \
284 	        (_sc)->sc_iflist_ref++;                                 \
285 	        (_err) = 0;                                             \
286 	}                                                               \
287 	_BRIDGE_UNLOCK(_sc);                                            \
288 } while (0)
289 #define BRIDGE_UNREF(_sc)               do {                            \
290 	_BRIDGE_LOCK(_sc);                                              \
291 	(_sc)->sc_iflist_ref--;                                         \
292 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
293 	        _BRIDGE_UNLOCK(_sc);                                    \
294 	        wakeup(&(_sc)->sc_cv);                                  \
295 	} else                                                          \
296 	        _BRIDGE_UNLOCK(_sc);                                    \
297 } while (0)
298 #define BRIDGE_XLOCK(_sc)               do {                            \
299 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
300 	(_sc)->sc_iflist_xcnt++;                                        \
301 	while ((_sc)->sc_iflist_ref > 0)                                \
302 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
303 	            "BRIDGE_XLOCK", NULL);                              \
304 } while (0)
305 #define BRIDGE_XDROP(_sc)               do {                            \
306 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
307 	(_sc)->sc_iflist_xcnt--;                                        \
308 } while (0)
309 
310 #endif /* BRIDGE_LOCK_DEBUG */
311 
312 #define BRIDGE_BPF_TAP_IN(ifp, m) \
313 	do {                                                            \
314 	        if (ifp->if_bpf != NULL) {                              \
315 	                bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0);        \
316 	        }                                                       \
317 	} while(0)
318 
319 #define BRIDGE_BPF_TAP_OUT(ifp, m)                                      \
320 	do {                                                            \
321 	        if (ifp->if_bpf != NULL) {                              \
322 	                bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);       \
323 	        }                                                       \
324 	} while(0)
325 
326 
327 /*
328  * Initial size of the route hash table.  Must be a power of two.
329  */
330 #ifndef BRIDGE_RTHASH_SIZE
331 #define BRIDGE_RTHASH_SIZE              16
332 #endif
333 
334 /*
335  * Maximum size of the routing hash table
336  */
337 #define BRIDGE_RTHASH_SIZE_MAX          2048
338 
339 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
340 
341 /*
342  * Maximum number of addresses to cache.
343  */
344 #ifndef BRIDGE_RTABLE_MAX
345 #define BRIDGE_RTABLE_MAX               100
346 #endif
347 
348 /*
349  * Timeout (in seconds) for entries learned dynamically.
350  */
351 #ifndef BRIDGE_RTABLE_TIMEOUT
352 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
353 #endif
354 
355 /*
356  * Number of seconds between walks of the route list.
357  */
358 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
359 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
360 #endif
361 
362 /*
363  * Number of MAC NAT entries
364  * - sized based on 16 clients (including MAC NAT interface)
365  *   each with 4 addresses
366  */
367 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
368 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
369 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
370 
371 /*
372  * List of capabilities to possibly mask on the member interface.
373  */
374 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
375 /*
376  * List of capabilities to disable on the member interface.
377  */
378 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
379 
380 /*
381  * Bridge interface list entry.
382  */
383 struct bridge_iflist {
384 	TAILQ_ENTRY(bridge_iflist) bif_next;
385 	struct ifnet            *bif_ifp;       /* member if */
386 	struct bstp_port        bif_stp;        /* STP state */
387 	uint32_t                bif_ifflags;    /* member if flags */
388 	int                     bif_savedcaps;  /* saved capabilities */
389 	uint32_t                bif_addrmax;    /* max # of addresses */
390 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
391 	uint32_t                bif_addrexceeded; /* # of address violations */
392 
393 	interface_filter_t      bif_iff_ref;
394 	struct bridge_softc     *bif_sc;
395 	uint32_t                bif_flags;
396 
397 	/* host filter */
398 	struct in_addr          bif_hf_ipsrc;
399 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
400 
401 	struct ifbrmstats       bif_stats;
402 };
403 
404 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)405 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
406 {
407 	return (bif->bif_ifflags & flags) != 0;
408 }
409 
410 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)411 bif_has_checksum_offload(struct bridge_iflist * bif)
412 {
413 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
414 }
415 
416 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)417 bif_has_mac_nat(struct bridge_iflist * bif)
418 {
419 	return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
420 }
421 
422 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)423 bif_uses_virtio(struct bridge_iflist * bif)
424 {
425 	return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
426 }
427 
428 /* fake errors to make the code clearer */
429 #define _EBADIP                 EJUSTRETURN
430 #define _EBADIPCHECKSUM         EJUSTRETURN
431 #define _EBADIPV6               EJUSTRETURN
432 #define _EBADUDP                EJUSTRETURN
433 #define _EBADTCP                EJUSTRETURN
434 #define _EBADUDPCHECKSUM        EJUSTRETURN
435 #define _EBADTCPCHECKSUM        EJUSTRETURN
436 
437 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
438 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
439 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
440 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
441 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
442 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
443 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
444 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
445 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
446 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
447 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
448 #define BIFF_LRO_DISABLED       0x800   /* LRO was disabled */
449 #if SKYWALK
450 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
451 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
452 #endif /* SKYWALK */
453 
454 /*
455  * mac_nat_entry
456  * - translates between an IP address and MAC address on a specific
457  *   bridge interface member
458  */
459 struct mac_nat_entry {
460 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
461 	struct bridge_iflist    *mne_bif;       /* originating interface */
462 	unsigned long           mne_expire;     /* expiration time */
463 	union {
464 		struct in_addr  mneu_ip;        /* originating IPv4 address */
465 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
466 	} mne_u;
467 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
468 	uint8_t                 mne_flags;
469 	uint8_t                 mne_reserved;
470 };
471 #define mne_ip  mne_u.mneu_ip
472 #define mne_ip6 mne_u.mneu_ip6
473 
474 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
475 
476 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
477 
478 /*
479  * mac_nat_record
480  * - used by bridge_mac_nat_output() to convey the translation that needs
481  *   to take place in bridge_mac_nat_translate
482  * - holds enough information so that the translation can be done later
483  *   when the destination interface is the MAC-NAT interface
484  */
485 struct mac_nat_record {
486 	uint16_t                mnr_ether_type;
487 	union {
488 		uint16_t        mnru_arp_offset;
489 		struct {
490 			uint16_t mnruip_dhcp_flags;
491 			uint16_t mnruip_udp_csum;
492 			uint8_t  mnruip_header_len;
493 		} mnru_ip;
494 		struct {
495 			uint16_t mnruip6_icmp6_len;
496 			uint16_t mnruip6_lladdr_offset;
497 			uint8_t mnruip6_icmp6_type;
498 			uint8_t mnruip6_header_len;
499 		} mnru_ip6;
500 	} mnr_u;
501 };
502 
503 #define mnr_arp_offset  mnr_u.mnru_arp_offset
504 
505 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
506 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
507 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
508 
509 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
510 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
511 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
512 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
513 
514 /*
515  * Bridge route node.
516  */
517 struct bridge_rtnode {
518 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
519 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
520 	struct bridge_iflist    *brt_dst;       /* destination if */
521 	unsigned long           brt_expire;     /* expiration time */
522 	uint8_t                 brt_flags;      /* address flags */
523 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
524 	uint16_t                brt_vlan;       /* vlan id */
525 };
526 
527 #define brt_ifp                 brt_dst->bif_ifp
528 
529 /*
530  * Bridge delayed function call context
531  */
532 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
533 
534 struct bridge_delayed_call {
535 	struct bridge_softc     *bdc_sc;
536 	bridge_delayed_func_t   bdc_func; /* Function to call */
537 	struct timespec         bdc_ts; /* Time to call */
538 	u_int32_t               bdc_flags;
539 	thread_call_t           bdc_thread_call;
540 };
541 
542 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
543 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
544 
545 /*
546  * Software state for each bridge.
547  */
548 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
549 
550 struct bridge_softc {
551 	struct ifnet            *sc_ifp;        /* make this an interface */
552 	uint32_t                sc_flags;
553 	LIST_ENTRY(bridge_softc) sc_list;
554 	decl_lck_mtx_data(, sc_mtx);
555 	struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash;  /* our forwarding table */
556 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
557 	uint32_t                sc_rthash_key;  /* key for hash */
558 	uint32_t                sc_rthash_size; /* size of the hash table */
559 	struct bridge_delayed_call sc_aging_timer;
560 	struct bridge_delayed_call sc_resize_call;
561 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
562 	struct bstp_state       sc_stp;         /* STP state */
563 	void                    *sc_cv;
564 	uint32_t                sc_brtmax;      /* max # of addresses */
565 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
566 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
567 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
568 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
569 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
570 	uint32_t                sc_brtexceeded; /* # of cache drops */
571 	uint32_t                sc_filter_flags; /* ipf and flags */
572 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
573 	u_char                  sc_defaddr[6];  /* Default MAC address */
574 	char                    sc_if_xname[IFNAMSIZ];
575 
576 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
577 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
578 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
579 	uint32_t                sc_mne_max;      /* max # of entries */
580 	uint32_t                sc_mne_count;    /* cur. # of entries */
581 	uint32_t                sc_mne_allocation_failures;
582 #if BRIDGE_LOCK_DEBUG
583 	/*
584 	 * Locking and unlocking calling history
585 	 */
586 	void                    *lock_lr[BR_LCKDBG_MAX];
587 	int                     next_lock_lr;
588 	void                    *unlock_lr[BR_LCKDBG_MAX];
589 	int                     next_unlock_lr;
590 #endif /* BRIDGE_LOCK_DEBUG */
591 };
592 
593 #define SCF_DETACHING            0x01
594 #define SCF_RESIZING             0x02
595 #define SCF_MEDIA_ACTIVE         0x04
596 #define SCF_ADDRESS_ASSIGNED     0x08
597 
598 typedef enum {
599 	CHECKSUM_OPERATION_NONE = 0,
600 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
601 	CHECKSUM_OPERATION_FINALIZE = 2,
602 	CHECKSUM_OPERATION_COMPUTE = 3,
603 } ChecksumOperation;
604 
605 typedef struct {
606 	u_int           ip_hlen;        /* IP header length */
607 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
608 	u_int           ip_m0_len;      /* bytes available at ip_hdr (without jumping mbufs) */
609 	u_int           ip_opt_len;     /* IPv6 options headers length */
610 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
611 	bool            ip_is_ipv4;
612 	bool            ip_is_fragmented;
613 	uint8_t         *__sized_by(ip_m0_len) ip_hdr;   /* pointer to IP header */
614 	uint8_t         *__indexable ip_proto_hdr;   /* ptr to protocol header (TCP) */
615 } ip_packet_info, *ip_packet_info_t;
616 
617 struct bridge_hostfilter_stats bridge_hostfilter_stats;
618 
619 typedef uint8_t ether_type_flag_t;
620 
621 typedef enum {
622 	pkt_direction_RX,
623 	pkt_direction_TX
624 } pkt_direction_t;
625 
626 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
627 #if BRIDGE_LOCK_DEBUG
628 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
629 #else
630 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
631 #endif
632 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
633 
634 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
635 
636 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
637 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
638 
639 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
640 static int      bridge_clone_destroy(struct ifnet *);
641 
642 static errno_t  bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
643 #if HAS_IF_CAP
644 static void     bridge_mutecaps(struct bridge_softc *);
645 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
646     int);
647 #endif
648 static errno_t bridge_set_tso(struct bridge_softc *);
649 static void     bridge_proto_attach_changed(struct ifnet *);
650 static int      bridge_init(struct ifnet *);
651 static void     bridge_ifstop(struct ifnet *, int);
652 static int      bridge_output(struct ifnet *, struct mbuf *);
653 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
654 static void     bridge_start(struct ifnet *);
655 static mblist   bridge_input_list(struct bridge_softc *, ifnet_t,
656     struct ether_header *, mblist, bool);
657 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
658     mbuf_t *, char **);
659 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
660     mbuf_t *);
661 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
662     mbuf_t *m);
663 static int      bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
664     ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
665 static mbuf_t   bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
666     mbuf_t, bool);
667 static mbuf_t   bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
668     mbuf_t m, bool, bool, bool);
669 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
670 
671 static void     bridge_aging_timer(struct bridge_softc *sc);
672 
673 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
674     ether_type_flag_t, mbuf_t);
675 static void     bridge_broadcast_list(struct bridge_softc *,
676     struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
677 
678 static void     bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
679 
680 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
681     uint16_t, struct bridge_iflist *, int, uint8_t);
682 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
683     const uint8_t[ETHER_ADDR_LEN], uint16_t);
684 static void     bridge_rttrim(struct bridge_softc *);
685 static void     bridge_rtage(struct bridge_softc *);
686 static void     bridge_rtflush(struct bridge_softc *, int);
687 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
688     uint16_t);
689 
690 static int      bridge_rtable_init(struct bridge_softc *);
691 static void     bridge_rtable_fini(struct bridge_softc *);
692 
693 static void     bridge_rthash_resize(struct bridge_softc *);
694 
695 static int      bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
696 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
697     const uint8_t[ETHER_ADDR_LEN], uint16_t);
698 static int      bridge_rtnode_hash(struct bridge_softc *,
699     struct bridge_rtnode *);
700 static int      bridge_rtnode_insert(struct bridge_softc *,
701     struct bridge_rtnode *);
702 static void     bridge_rtnode_destroy(struct bridge_softc *,
703     struct bridge_rtnode *);
704 #if BRIDGESTP
705 static void     bridge_rtable_expire(struct ifnet *, int);
706 static void     bridge_state_change(struct ifnet *, int);
707 #endif /* BRIDGESTP */
708 
709 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
710     char * __sized_by(IFNAMSIZ) name);
711 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
712     struct ifnet *ifp);
713 static void     bridge_delete_member(struct bridge_softc *,
714     struct bridge_iflist *);
715 static void     bridge_delete_span(struct bridge_softc *,
716     struct bridge_iflist *);
717 
718 static int      bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
719 static int      bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
720 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int      bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int      bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int      bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int      bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int      bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int      bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int      bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int      bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int      bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int      bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int      bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int      bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int      bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int      bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int      bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int      bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int      bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int      bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int      bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764 
765 static int      bridge_pf(struct mbuf **, struct ifnet *,
766     uint32_t sc_filter_flags, bool input);
767 static int bridge_ip_checkbasic(struct mbuf **);
768 static int bridge_ip6_checkbasic(struct mbuf **);
769 
770 static void bridge_detach(ifnet_t);
771 static void bridge_link_event(struct ifnet *, u_int32_t);
772 static void bridge_iflinkevent(struct ifnet *);
773 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
774 static int interface_media_active(struct ifnet *);
775 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
776 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
777 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
778 
779 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
780     struct bridge_iflist *);
781 static void bridge_mac_nat_disable(struct bridge_softc *sc);
782 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
783 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
784 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
785     struct bridge_iflist *);
786 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
787     ifnet_t * dst_if);
788 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
789     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
790 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
791     const char[ETHER_ADDR_LEN]);
792 
793 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
794     ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
795 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
796     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
797 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
798     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799 
800 static mbuf_t   bridge_pf_list(mbuf_t m, ifnet_t ifp,
801     uint32_t sc_filter_flags, bool input);
802 
803 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)804 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
805     uint16_t vlan)
806 {
807 	struct bridge_iflist *  bif;
808 	ifnet_t                 ifp = NULL;
809 
810 	bif = bridge_rtlookup_bif(sc, addr, vlan);
811 	if (bif != NULL) {
812 		ifp = bif->bif_ifp;
813 	}
814 	return ifp;
815 }
816 
817 static bool in_addr_is_ours(const struct in_addr);
818 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
819 
820 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
821 
822 static mblist
823 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
824 
825 static mblist
826 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
827     u_int mac_hlen, bool is_ipv4, bool is_tx);
828 
829 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)830 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
831 {
832 	return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
833 }
834 
835 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
836 #define VLANTAGOF(_m)   0
837 
838 #define BSTP_ETHERADDR_RANGE_FIRST      0x00
839 #define BSTP_ETHERADDR_RANGE_LAST       0x0f
840 
841 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
842 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
843 
844 
845 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
846 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
847 
848 #if BRIDGESTP
849 static struct bstp_cb_ops bridge_ops = {
850 	.bcb_state = bridge_state_change,
851 	.bcb_rtage = bridge_rtable_expire
852 };
853 #endif /* BRIDGESTP */
854 
855 SYSCTL_DECL(_net_link);
856 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
857     "Bridge");
858 
859 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
860 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
861     CTLFLAG_RW | CTLFLAG_LOCKED,
862     &bridge_inherit_mac, 0,
863     "Inherit MAC address from the first bridge member");
864 
865 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
866     CTLFLAG_RW | CTLFLAG_LOCKED,
867     &bridge_rtable_prune_period, 0,
868     "Interval between pruning of routing table");
869 
870 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
871 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
872     CTLFLAG_RW | CTLFLAG_LOCKED,
873     &bridge_rtable_hash_size_max, 0,
874     "Maximum size of the routing hash table");
875 
876 #if BRIDGE_DELAYED_CALLBACK_DEBUG
877 static int bridge_delayed_callback_delay = 0;
878 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
879     CTLFLAG_RW | CTLFLAG_LOCKED,
880     &bridge_delayed_callback_delay, 0,
881     "Delay before calling delayed function");
882 #endif
883 
884 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
885     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
886     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
887 
888 #if BRIDGESTP
889 static int log_stp   = 0;   /* log STP state changes */
890 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
891     &log_stp, 0, "Log STP state changes");
892 #endif /* BRIDGESTP */
893 
894 struct bridge_control {
895 	int             (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
896 	unsigned int    bc_argsize;
897 	unsigned int    bc_flags;
898 };
899 
900 #define BC_F_COPYIN             0x01    /* copy arguments in */
901 #define BC_F_COPYOUT            0x02    /* copy arguments out */
902 #define BC_F_SUSER              0x04    /* do super-user check */
903 
904 static const struct bridge_control bridge_control_table32[] = {
905 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
906 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
908 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909 
910 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
911 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
912 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
913 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
914 
915 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
916 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYOUT },
919 
920 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
921 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
922 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
923 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924 
925 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
926 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927 
928 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
929 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
930 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
931 	  .bc_flags = BC_F_COPYOUT },
932 
933 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 
939 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
940 	  .bc_flags = BC_F_COPYOUT },
941 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
942 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943 
944 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
945 	  .bc_flags = BC_F_COPYOUT },
946 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
947 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
948 
949 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
950 	  .bc_flags = BC_F_COPYOUT },
951 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
952 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953 
954 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
955 	  .bc_flags = BC_F_COPYOUT },
956 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
957 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958 
959 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
960 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961 
962 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
963 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964 
965 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
966 	  .bc_flags = BC_F_COPYOUT },
967 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
968 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
969 
970 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
971 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
972 
973 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
974 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
976 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 
978 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
979 	  .bc_flags = BC_F_COPYOUT },
980 
981 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
982 	  .bc_flags = BC_F_COPYOUT },
983 
984 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
985 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
986 
987 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
988 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
989 
990 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
991 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
992 
993 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
994 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995 
996 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
997 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
998 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1000 
1001 	{ .bc_func = bridge_ioctl_gmnelist32,
1002 	  .bc_argsize = sizeof(struct ifbrmnelist32),
1003 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1004 	{ .bc_func = bridge_ioctl_gifstats32,
1005 	  .bc_argsize = sizeof(struct ifbrmreq32),
1006 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1007 };
1008 
1009 static const struct bridge_control bridge_control_table64[] = {
1010 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
1011 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1013 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014 
1015 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1016 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1017 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1018 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1019 
1020 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1021 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYOUT },
1024 
1025 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1026 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1027 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1028 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029 
1030 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1031 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032 
1033 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1034 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1035 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
1036 	  .bc_flags = BC_F_COPYOUT },
1037 
1038 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1039 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040 
1041 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 
1044 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1045 	  .bc_flags = BC_F_COPYOUT },
1046 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1047 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048 
1049 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1052 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1053 
1054 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1055 	  .bc_flags = BC_F_COPYOUT },
1056 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1057 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1058 
1059 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1060 	  .bc_flags = BC_F_COPYOUT },
1061 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1062 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063 
1064 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1065 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1066 
1067 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1068 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1069 
1070 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1071 	  .bc_flags = BC_F_COPYOUT },
1072 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1074 
1075 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1076 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1077 
1078 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1079 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1080 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1081 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082 
1083 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1084 	  .bc_flags = BC_F_COPYOUT },
1085 
1086 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1087 	  .bc_flags = BC_F_COPYOUT },
1088 
1089 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1090 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1091 
1092 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1093 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1094 
1095 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1096 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1097 
1098 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1099 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1100 
1101 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1102 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1103 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1105 
1106 	{ .bc_func = bridge_ioctl_gmnelist64,
1107 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1108 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1109 	{ .bc_func = bridge_ioctl_gifstats64,
1110 	  .bc_argsize = sizeof(struct ifbrmreq64),
1111 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1112 };
1113 
1114 static const unsigned int bridge_control_table_size =
1115     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1116 
1117 static LIST_HEAD(, bridge_softc) bridge_list =
1118     LIST_HEAD_INITIALIZER(bridge_list);
1119 
1120 #define BRIDGENAME      "bridge"
1121 #define BRIDGES_MAX     IF_MAXUNIT
1122 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1123 
1124 static struct if_clone bridge_cloner =
1125     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1126     0, BRIDGES_MAX);
1127 
1128 static int if_bridge_txstart = 0;
1129 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1130     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1131 
1132 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1133     &if_bridge_debug, 0, "Bridge debug flags");
1134 
1135 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1136     CTLFLAG_RW | CTLFLAG_LOCKED,
1137     &if_bridge_log_level, 0, "Bridge log level");
1138 
1139 static int if_bridge_output_skip_filters = 1;
1140 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1141     CTLFLAG_RW | CTLFLAG_LOCKED,
1142     &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1143 
1144 int bridge_enable_early_input = 1;   /* DLIL early input */
1145 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1146     CTLFLAG_RW | CTLFLAG_LOCKED,
1147     &bridge_enable_early_input, 0,
1148     "Bridge enable early input");
1149 
1150 int bridge_allow_lro_num_seg = 1;   /* allow LRO_NUM_SEG to keep LRO enabled */
1151 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1152     CTLFLAG_RW | CTLFLAG_LOCKED,
1153     &bridge_allow_lro_num_seg, 0,
1154     "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1155 
1156 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1157 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1158 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1159 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1160 
1161 static u_int if_bridge_tso_reduce_mss_forwarding
1162         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1163 static u_int if_bridge_tso_reduce_mss_tx
1164         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1165 
1166 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1167 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1168 {
1169 	int     changed;
1170 	int     error;
1171 	u_int   new_value;
1172 
1173 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1174 	    &changed);
1175 	if (error == 0 && changed != 0) {
1176 		if (new_value > val_max) {
1177 			return EINVAL;
1178 		}
1179 		*val = new_value;
1180 	}
1181 	return error;
1182 }
1183 
1184 static int
1185 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1186 {
1187 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1188     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1189 }
1190 
1191 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1192     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1193     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1194     "Bridge tso reduce mss when forwarding");
1195 
1196 static int
1197 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1198 {
1199 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1200     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1201 }
1202 
1203 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1204     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1205     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1206     "Bridge tso reduce mss on transmit");
1207 
1208 #if DEBUG || DEVELOPMENT
1209 /*
1210  * net.link.bridge.reduce_tso_mtu
1211  * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1212  *   value (i.e. 16K) to enable testing the "use GSO instead" path
1213  */
1214 static int if_bridge_reduce_tso_mtu = 0;
1215 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1216     CTLFLAG_RW | CTLFLAG_LOCKED,
1217     &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1218 
1219 #endif /* DEBUG || DEVELOPMENT */
1220 
1221 static void brlog_ether_header(struct ether_header *);
1222 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1223 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1224 static void brlog_mbuf(mbuf_t, const char *, const char *);
1225 static void brlog_link(struct bridge_softc * sc);
1226 
1227 #if BRIDGE_LOCK_DEBUG
1228 static void bridge_lock(struct bridge_softc *);
1229 static void bridge_unlock(struct bridge_softc *);
1230 static int bridge_lock2ref(struct bridge_softc *);
1231 static void bridge_unref(struct bridge_softc *);
1232 static void bridge_xlock(struct bridge_softc *);
1233 static void bridge_xdrop(struct bridge_softc *);
1234 
1235 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1236 
1237 static void
bridge_lock(struct bridge_softc * sc)1238 bridge_lock(struct bridge_softc *sc)
1239 {
1240 	DECL_RETURN_ADDR(lr_saved);
1241 
1242 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1243 
1244 	_BRIDGE_LOCK(sc);
1245 
1246 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1247 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1248 }
1249 
1250 static void
bridge_unlock(struct bridge_softc * sc)1251 bridge_unlock(struct bridge_softc *sc)
1252 {
1253 	DECL_RETURN_ADDR(lr_saved);
1254 
1255 	BRIDGE_LOCK_ASSERT_HELD(sc);
1256 
1257 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1258 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1259 
1260 	_BRIDGE_UNLOCK(sc);
1261 }
1262 
1263 static int
bridge_lock2ref(struct bridge_softc * sc)1264 bridge_lock2ref(struct bridge_softc *sc)
1265 {
1266 	int error = 0;
1267 	DECL_RETURN_ADDR(lr_saved);
1268 
1269 	BRIDGE_LOCK_ASSERT_HELD(sc);
1270 
1271 	if (sc->sc_iflist_xcnt > 0) {
1272 		error = EBUSY;
1273 	} else {
1274 		sc->sc_iflist_ref++;
1275 	}
1276 
1277 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1278 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1279 
1280 	_BRIDGE_UNLOCK(sc);
1281 
1282 	return error;
1283 }
1284 
1285 static void
bridge_unref(struct bridge_softc * sc)1286 bridge_unref(struct bridge_softc *sc)
1287 {
1288 	DECL_RETURN_ADDR(lr_saved);
1289 
1290 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1291 
1292 	_BRIDGE_LOCK(sc);
1293 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1294 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1295 
1296 	sc->sc_iflist_ref--;
1297 
1298 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1299 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1300 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1301 		_BRIDGE_UNLOCK(sc);
1302 		wakeup(&sc->sc_cv);
1303 	} else {
1304 		_BRIDGE_UNLOCK(sc);
1305 	}
1306 }
1307 
1308 static void
bridge_xlock(struct bridge_softc * sc)1309 bridge_xlock(struct bridge_softc *sc)
1310 {
1311 	DECL_RETURN_ADDR(lr_saved);
1312 
1313 	BRIDGE_LOCK_ASSERT_HELD(sc);
1314 
1315 	sc->sc_iflist_xcnt++;
1316 	while (sc->sc_iflist_ref > 0) {
1317 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1318 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1319 
1320 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1321 
1322 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1323 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1324 	}
1325 }
1326 
1327 #undef DECL_RETURN_ADDR
1328 
1329 static void
bridge_xdrop(struct bridge_softc * sc)1330 bridge_xdrop(struct bridge_softc *sc)
1331 {
1332 	BRIDGE_LOCK_ASSERT_HELD(sc);
1333 
1334 	sc->sc_iflist_xcnt--;
1335 }
1336 
1337 #endif /* BRIDGE_LOCK_DEBUG */
1338 
1339 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1340 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1341 {
1342 	if (m) {
1343 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1344 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1345 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1346 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1347 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1348 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1349 		    suffix ? suffix : "");
1350 	} else {
1351 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1352 	}
1353 }
1354 
1355 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1356 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1357 {
1358 	if (m) {
1359 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1360 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1361 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1362 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1363 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1364 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1365 		    (unsigned int)mbuf_maxlen(m),
1366 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1367 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1368 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1369 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1370 			brlog_mbuf_pkthdr(m, "", suffix);
1371 		}
1372 	} else {
1373 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1374 	}
1375 }
1376 
1377 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1378 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1379 {
1380 	mbuf_t                  n;
1381 	size_t                  i, j;
1382 	size_t                  pktlen, mlen, maxlen;
1383 	unsigned char   *ptr;
1384 
1385 	pktlen = mbuf_pkthdr_len(m);
1386 
1387 	if (offset > pktlen) {
1388 		return;
1389 	}
1390 
1391 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1392 	n = m;
1393 	mlen = mbuf_len(n);
1394 	ptr = mtod(n, unsigned char *);
1395 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1396 		if (j >= mlen) {
1397 			n = mbuf_next(n);
1398 			if (n == 0) {
1399 				break;
1400 			}
1401 			ptr = mtod(n, unsigned char *);
1402 			mlen = mbuf_len(n);
1403 			j = 0;
1404 		}
1405 		if (i >= offset) {
1406 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1407 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1408 		}
1409 	}
1410 }
1411 
1412 static void
brlog_ether_header(struct ether_header * eh)1413 brlog_ether_header(struct ether_header *eh)
1414 {
1415 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1416 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1417 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1418 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1419 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1420 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1421 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1422 	    ntohs(eh->ether_type));
1423 }
1424 
1425 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1426 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1427 {
1428 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1429 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1430 
1431 	return buf;
1432 }
1433 
1434 static void
brlog_link(struct bridge_softc * sc)1435 brlog_link(struct bridge_softc * sc)
1436 {
1437 	int i;
1438 	uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1439 	IFNAMSIZ + ETHER_ADDR_LEN)];
1440 	struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1441 	const u_char * lladdr;
1442 	char lladdr_str[48];
1443 
1444 	memset(sdl_buffer, 0, sizeof(sdl_buffer));
1445 	sdl->sdl_family = AF_LINK;
1446 	sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1447 	sdl->sdl_alen = ETHER_ADDR_LEN;
1448 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1449 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1450 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1451 	lladdr_str[0] = '\0';
1452 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1453 	    i < sdl->sdl_alen;
1454 	    i++, lladdr++) {
1455 		char    byte_str[4];
1456 
1457 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1458 		    *lladdr);
1459 		strbufcat(lladdr_str, byte_str);
1460 	}
1461 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1462 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1463 	    " slen %d addr %s", sc->sc_if_xname,
1464 	    sdl->sdl_len, sdl->sdl_index,
1465 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1466 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1467 }
1468 
1469 static int
_mbuf_get_tso_mss(mbuf_t m)1470 _mbuf_get_tso_mss(mbuf_t m)
1471 {
1472 	int     mss = 0;
1473 
1474 	if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1475 		mss = m->m_pkthdr.tso_segsz;
1476 	}
1477 	return mss;
1478 }
1479 
1480 /*
1481  * bridgeattach:
1482  *
1483  *	Pseudo-device attach routine.
1484  */
1485 __private_extern__ int
bridgeattach(int n)1486 bridgeattach(int n)
1487 {
1488 #pragma unused(n)
1489 	int error;
1490 
1491 	LIST_INIT(&bridge_list);
1492 
1493 #if BRIDGESTP
1494 	bstp_sys_init();
1495 #endif /* BRIDGESTP */
1496 
1497 	error = if_clone_attach(&bridge_cloner);
1498 	if (error != 0) {
1499 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1500 	}
1501 	return error;
1502 }
1503 
1504 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1505 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1506 {
1507 	mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1508 	mbuf_pkthdr_adjustlen(m, -len);
1509 }
1510 
1511 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1512 bridge_ifnet_set_attrs(struct ifnet * ifp)
1513 {
1514 	errno_t         error;
1515 
1516 	error = ifnet_set_mtu(ifp, ETHERMTU);
1517 	if (error != 0) {
1518 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1519 		goto done;
1520 	}
1521 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1522 	if (error != 0) {
1523 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1524 		goto done;
1525 	}
1526 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1527 	if (error != 0) {
1528 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1529 		goto done;
1530 	}
1531 	error = ifnet_set_flags(ifp,
1532 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1533 	    0xffff);
1534 
1535 	if (error != 0) {
1536 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1537 		goto done;
1538 	}
1539 done:
1540 	return error;
1541 }
1542 
1543 /*
1544  * bridge_clone_create:
1545  *
1546  *	Create a new bridge instance.
1547  */
1548 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1549 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1550 {
1551 #pragma unused(params)
1552 	ifnet_ref_t ifp = NULL;
1553 	struct bridge_softc *sc = NULL;
1554 	struct bridge_softc *sc2 = NULL;
1555 	struct ifnet_init_eparams init_params;
1556 	errno_t error = 0;
1557 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1558 	int fb, retry, has_hostid;
1559 
1560 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1561 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1562 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1563 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1564 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1565 	sc->sc_filter_flags = 0;
1566 
1567 	TAILQ_INIT(&sc->sc_iflist);
1568 
1569 	/* use the interface name as the unique id for ifp recycle */
1570 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1571 	    ifc->ifc_name, unit);
1572 	bzero(&init_params, sizeof(init_params));
1573 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1574 	init_params.len                 = sizeof(init_params);
1575 	/* Initialize our routing table. */
1576 	error = bridge_rtable_init(sc);
1577 	if (error != 0) {
1578 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1579 		goto done;
1580 	}
1581 	TAILQ_INIT(&sc->sc_spanlist);
1582 	if (if_bridge_txstart) {
1583 		init_params.start = bridge_start;
1584 	} else {
1585 		init_params.flags = IFNET_INIT_LEGACY;
1586 		init_params.output = bridge_output;
1587 	}
1588 	init_params.uniqueid_len        = strbuflen(sc->sc_if_xname);
1589 	init_params.uniqueid            = sc->sc_if_xname;
1590 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1591 	init_params.name                = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1592 	init_params.unit                = unit;
1593 	init_params.family              = IFNET_FAMILY_ETHERNET;
1594 	init_params.type                = IFT_BRIDGE;
1595 	init_params.demux               = ether_demux;
1596 	init_params.add_proto           = ether_add_proto;
1597 	init_params.del_proto           = ether_del_proto;
1598 	init_params.check_multi         = ether_check_multi;
1599 	init_params.framer_extended     = ether_frameout_extended;
1600 	init_params.softc               = sc;
1601 	init_params.ioctl               = bridge_ioctl;
1602 	init_params.detach              = bridge_detach;
1603 	init_params.broadcast_addr      = etherbroadcastaddr;
1604 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1605 
1606 	error = ifnet_allocate_extended(&init_params, &ifp);
1607 	if (error != 0) {
1608 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1609 		goto done;
1610 	}
1611 	LIST_INIT(&sc->sc_mne_list);
1612 	LIST_INIT(&sc->sc_mne_list_v6);
1613 	sc->sc_ifp = ifp;
1614 	error = bridge_ifnet_set_attrs(ifp);
1615 	if (error != 0) {
1616 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1617 		    error);
1618 		goto done;
1619 	}
1620 	/*
1621 	 * Generate an ethernet address with a locally administered address.
1622 	 *
1623 	 * Since we are using random ethernet addresses for the bridge, it is
1624 	 * possible that we might have address collisions, so make sure that
1625 	 * this hardware address isn't already in use on another bridge.
1626 	 * The first try uses the "hostid" and falls back to read_frandom();
1627 	 * for "hostid", we use the MAC address of the first-encountered
1628 	 * Ethernet-type interface that is currently configured.
1629 	 */
1630 	fb = 0;
1631 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1632 	for (retry = 1; retry != 0;) {
1633 		if (fb || has_hostid == 0) {
1634 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1635 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1636 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1637 		} else {
1638 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1639 			    ETHER_ADDR_LEN);
1640 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1641 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1642 			sc->sc_defaddr[3] =     /* stir it up a bit */
1643 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1644 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1645 			/*
1646 			 * Mix in the LSB as it's actually pretty significant,
1647 			 * see rdar://14076061
1648 			 */
1649 			sc->sc_defaddr[4] =
1650 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1651 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1652 			    sc->sc_defaddr[5];
1653 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1654 		}
1655 
1656 		fb = 1;
1657 		retry = 0;
1658 		lck_mtx_lock(&bridge_list_mtx);
1659 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1660 			if (_ether_cmp(sc->sc_defaddr,
1661 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1662 				retry = 1;
1663 			}
1664 		}
1665 		lck_mtx_unlock(&bridge_list_mtx);
1666 	}
1667 
1668 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1669 
1670 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1671 		brlog_link(sc);
1672 	}
1673 	error = ifnet_attach(ifp, NULL);
1674 	if (error != 0) {
1675 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1676 		goto done;
1677 	}
1678 
1679 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1680 	    IFT_ETHER);
1681 	if (error != 0) {
1682 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1683 		    error);
1684 		goto done;
1685 	}
1686 
1687 	ifnet_set_offload(ifp,
1688 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1689 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1690 	error = bridge_set_tso(sc);
1691 	if (error != 0) {
1692 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1693 		goto done;
1694 	}
1695 #if BRIDGESTP
1696 	bstp_attach(&sc->sc_stp, &bridge_ops);
1697 #endif /* BRIDGESTP */
1698 
1699 	lck_mtx_lock(&bridge_list_mtx);
1700 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1701 	lck_mtx_unlock(&bridge_list_mtx);
1702 
1703 	/* attach as ethernet */
1704 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1705 	    NULL, NULL);
1706 
1707 done:
1708 	if (error != 0) {
1709 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1710 		/* TBD: Clean up: sc, sc_rthash etc */
1711 	}
1712 
1713 	return error;
1714 }
1715 
1716 /*
1717  * bridge_clone_destroy:
1718  *
1719  *	Destroy a bridge instance.
1720  */
1721 static int
bridge_clone_destroy(struct ifnet * ifp)1722 bridge_clone_destroy(struct ifnet *ifp)
1723 {
1724 	struct bridge_softc * __single sc = ifp->if_softc;
1725 	struct bridge_iflist *bif;
1726 	errno_t error;
1727 
1728 	BRIDGE_LOCK(sc);
1729 	if ((sc->sc_flags & SCF_DETACHING)) {
1730 		BRIDGE_UNLOCK(sc);
1731 		return 0;
1732 	}
1733 	sc->sc_flags |= SCF_DETACHING;
1734 
1735 	bridge_ifstop(ifp, 1);
1736 
1737 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1738 
1739 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1740 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1741 
1742 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1743 	if (error != 0) {
1744 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1745 	}
1746 
1747 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1748 		bridge_delete_member(sc, bif);
1749 	}
1750 
1751 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1752 		bridge_delete_span(sc, bif);
1753 	}
1754 	BRIDGE_UNLOCK(sc);
1755 
1756 	error = ifnet_detach(ifp);
1757 	if (error != 0) {
1758 		panic("%s (%d): ifnet_detach(%p) failed %d",
1759 		    __func__, __LINE__, ifp, error);
1760 	}
1761 	return 0;
1762 }
1763 
1764 #define DRVSPEC do { \
1765 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1766 	        error = EINVAL;                                         \
1767 	        break;                                                  \
1768 	}                                                               \
1769 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1770                                                                         \
1771 	if (cmd == SIOCGDRVSPEC &&                                      \
1772 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1773 	        error = EINVAL;                                         \
1774 	        break;                                                  \
1775 	} else if (cmd == SIOCSDRVSPEC &&                               \
1776 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1777 	        error = EINVAL;                                         \
1778 	        break;                                                  \
1779 	}                                                               \
1780                                                                         \
1781 	if (bc->bc_flags & BC_F_SUSER) {                                \
1782 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1783 	            KAUTH_GENERIC_ISSUSER);                             \
1784 	        if (error)                                              \
1785 	                break;                                          \
1786 	}                                                               \
1787                                                                         \
1788 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1789 	    ifd->ifd_len > sizeof (args)) {                             \
1790 	        error = EINVAL;                                         \
1791 	        break;                                                  \
1792 	}                                                               \
1793                                                                         \
1794 	bzero(&args, sizeof (args));                                    \
1795 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1796 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1797 	        if (error)                                              \
1798 	                break;                                          \
1799 	}                                                               \
1800                                                                         \
1801 	BRIDGE_LOCK(sc);                                                \
1802 	error = (*bc->bc_func)(sc, &args, sizeof(args));                \
1803 	BRIDGE_UNLOCK(sc);                                              \
1804 	if (error)                                                      \
1805 	        break;                                                  \
1806                                                                         \
1807 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1808 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1809 } while (0)
1810 
1811 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1812 interface_needs_input_broadcast(struct ifnet * ifp)
1813 {
1814 	/*
1815 	 * Selectively enable input broadcast only when necessary.
1816 	 * The bridge interface itself attaches a fake protocol
1817 	 * so checking for at least two protocols means that the
1818 	 * interface is being used for something besides bridging
1819 	 * and needs to see broadcast packets from other members.
1820 	 */
1821 	return if_get_protolist(ifp, NULL, 0) >= 2;
1822 }
1823 
1824 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1825 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1826 {
1827 	boolean_t       old_input_broadcast;
1828 
1829 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1830 	if (input_broadcast) {
1831 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1832 	} else {
1833 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1834 	}
1835 	return old_input_broadcast != input_broadcast;
1836 }
1837 
1838 /*
1839  * bridge_ioctl:
1840  *
1841  *	Handle a control request from the operator.
1842  */
1843 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1844 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1845 {
1846 	struct bridge_softc * __single sc = ifp->if_softc;
1847 	struct ifreq *ifr = (struct ifreq *)data;
1848 	struct bridge_iflist *bif;
1849 	int error = 0;
1850 
1851 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1852 
1853 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1854 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1855 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1856 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1857 	    (char)IOCGROUP(cmd), cmd & 0xff);
1858 
1859 	switch (cmd) {
1860 	case SIOCAIFADDR_IN6_32:
1861 	case SIOCAIFADDR_IN6_64:
1862 	case SIOCSIFADDR:
1863 	case SIOCAIFADDR:
1864 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1865 		BRIDGE_LOCK(sc);
1866 		sc->sc_flags |= SCF_ADDRESS_ASSIGNED;
1867 		BRIDGE_UNLOCK(sc);
1868 		BRIDGE_LOG(LOG_NOTICE, 0,
1869 		    "ifp %s has address", ifp->if_xname);
1870 		break;
1871 
1872 	case SIOCGIFMEDIA32:
1873 	case SIOCGIFMEDIA64: {
1874 		// cast to 32bit version to work within bounds with 32bit userspace
1875 		struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1876 		user_addr_t user_addr;
1877 
1878 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1879 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
1880 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1881 
1882 		ifmr->ifm_status = IFM_AVALID;
1883 		ifmr->ifm_mask = 0;
1884 		ifmr->ifm_count = 1;
1885 
1886 		BRIDGE_LOCK(sc);
1887 		if (!(sc->sc_flags & SCF_DETACHING) &&
1888 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1889 			ifmr->ifm_status |= IFM_ACTIVE;
1890 			ifmr->ifm_active = ifmr->ifm_current =
1891 			    IFM_ETHER | IFM_AUTO;
1892 		} else {
1893 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1894 		}
1895 		BRIDGE_UNLOCK(sc);
1896 
1897 		if (user_addr != USER_ADDR_NULL) {
1898 			error = copyout(&ifmr->ifm_current, user_addr,
1899 			    sizeof(int));
1900 		}
1901 		break;
1902 	}
1903 
1904 	case SIOCADDMULTI:
1905 	case SIOCDELMULTI:
1906 		break;
1907 
1908 	case SIOCSDRVSPEC32:
1909 	case SIOCGDRVSPEC32: {
1910 		union {
1911 			struct ifbreq ifbreq;
1912 			struct ifbifconf32 ifbifconf;
1913 			struct ifbareq32 ifbareq;
1914 			struct ifbaconf32 ifbaconf;
1915 			struct ifbrparam ifbrparam;
1916 			struct ifbropreq32 ifbropreq;
1917 		} args;
1918 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1919 		const struct bridge_control *bridge_control_table =
1920 		    bridge_control_table32, *bc;
1921 
1922 		DRVSPEC;
1923 
1924 		break;
1925 	}
1926 	case SIOCSDRVSPEC64:
1927 	case SIOCGDRVSPEC64: {
1928 		union {
1929 			struct ifbreq ifbreq;
1930 			struct ifbifconf64 ifbifconf;
1931 			struct ifbareq64 ifbareq;
1932 			struct ifbaconf64 ifbaconf;
1933 			struct ifbrparam ifbrparam;
1934 			struct ifbropreq64 ifbropreq;
1935 		} args;
1936 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1937 		const struct bridge_control *bridge_control_table =
1938 		    bridge_control_table64, *bc;
1939 
1940 		DRVSPEC;
1941 
1942 		break;
1943 	}
1944 
1945 	case SIOCSIFFLAGS:
1946 		if (!(ifp->if_flags & IFF_UP) &&
1947 		    (ifp->if_flags & IFF_RUNNING)) {
1948 			/*
1949 			 * If interface is marked down and it is running,
1950 			 * then stop and disable it.
1951 			 */
1952 			BRIDGE_LOCK(sc);
1953 			bridge_ifstop(ifp, 1);
1954 			BRIDGE_UNLOCK(sc);
1955 		} else if ((ifp->if_flags & IFF_UP) &&
1956 		    !(ifp->if_flags & IFF_RUNNING)) {
1957 			/*
1958 			 * If interface is marked up and it is stopped, then
1959 			 * start it.
1960 			 */
1961 			BRIDGE_LOCK(sc);
1962 			error = bridge_init(ifp);
1963 			BRIDGE_UNLOCK(sc);
1964 		}
1965 		break;
1966 
1967 	case SIOCSIFLLADDR:
1968 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1969 		    ifr->ifr_addr.sa_len);
1970 		if (error != 0) {
1971 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1972 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1973 			    error);
1974 		}
1975 		break;
1976 
1977 	case SIOCSIFMTU:
1978 		if (ifr->ifr_mtu < 576) {
1979 			error = EINVAL;
1980 			break;
1981 		}
1982 		BRIDGE_LOCK(sc);
1983 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1984 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1985 			BRIDGE_UNLOCK(sc);
1986 			break;
1987 		}
1988 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1989 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1990 				BRIDGE_LOG(LOG_NOTICE, 0,
1991 				    "%s invalid MTU: %u(%s) != %d",
1992 				    sc->sc_ifp->if_xname,
1993 				    bif->bif_ifp->if_mtu,
1994 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1995 				error = EINVAL;
1996 				break;
1997 			}
1998 		}
1999 		if (!error) {
2000 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2001 		}
2002 		BRIDGE_UNLOCK(sc);
2003 		break;
2004 
2005 	default:
2006 		error = ether_ioctl(ifp, cmd, data);
2007 		if (error != 0 && error != EOPNOTSUPP) {
2008 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2009 			    "ifp %s cmd 0x%08lx "
2010 			    "(%c%c [%lu] %c %lu) failed error: %d",
2011 			    ifp->if_xname, cmd,
2012 			    (cmd & IOC_IN) ? 'I' : ' ',
2013 			    (cmd & IOC_OUT) ? 'O' : ' ',
2014 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2015 			    cmd & 0xff, error);
2016 		}
2017 		break;
2018 	}
2019 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2020 
2021 	return error;
2022 }
2023 
2024 #if HAS_IF_CAP
2025 /*
2026  * bridge_mutecaps:
2027  *
2028  *	Clear or restore unwanted capabilities on the member interface
2029  */
2030 static void
bridge_mutecaps(struct bridge_softc * sc)2031 bridge_mutecaps(struct bridge_softc *sc)
2032 {
2033 	struct bridge_iflist *bif;
2034 	int enabled, mask;
2035 
2036 	/* Initial bitmask of capabilities to test */
2037 	mask = BRIDGE_IFCAPS_MASK;
2038 
2039 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2040 		/* Every member must support it or its disabled */
2041 		mask &= bif->bif_savedcaps;
2042 	}
2043 
2044 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2045 		enabled = bif->bif_ifp->if_capenable;
2046 		enabled &= ~BRIDGE_IFCAPS_STRIP;
2047 		/* strip off mask bits and enable them again if allowed */
2048 		enabled &= ~BRIDGE_IFCAPS_MASK;
2049 		enabled |= mask;
2050 
2051 		bridge_set_ifcap(sc, bif, enabled);
2052 	}
2053 }
2054 
2055 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2056 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2057 {
2058 	struct ifnet *ifp = bif->bif_ifp;
2059 	struct ifreq ifr;
2060 	int error;
2061 
2062 	bzero(&ifr, sizeof(ifr));
2063 	ifr.ifr_reqcap = set;
2064 
2065 	if (ifp->if_capenable != set) {
2066 		IFF_LOCKGIANT(ifp);
2067 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2068 		IFF_UNLOCKGIANT(ifp);
2069 		if (error) {
2070 			BRIDGE_LOG(LOG_NOTICE, 0,
2071 			    "%s error setting interface capabilities on %s",
2072 			    sc->sc_ifp->if_xname, ifp->if_xname);
2073 		}
2074 	}
2075 }
2076 #endif /* HAS_IF_CAP */
2077 
2078 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2079 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2080 {
2081 	struct ifreq    ifr;
2082 
2083 	bzero(&ifr, sizeof(ifr));
2084 	ifr.ifr_reqcap = cap_enable;
2085 	return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2086 }
2087 
2088 static const char *
enable_disable_str(boolean_t enable)2089 enable_disable_str(boolean_t enable)
2090 {
2091 	return (const char * __null_terminated)(enable ? "enable" : "disable");
2092 }
2093 
2094 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2095 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2096 {
2097 	uint32_t        cap_enable;
2098 	uint32_t        cap_supported;
2099 	boolean_t       changed = FALSE;
2100 	boolean_t       lro_enabled;
2101 
2102 	cap_supported = ifnet_capabilities_supported(ifp);
2103 	if ((cap_supported & IFCAP_LRO) == 0) {
2104 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2105 		    "%s doesn't support LRO",
2106 		    ifp->if_xname);
2107 		goto done;
2108 	}
2109 	if (bridge_allow_lro_num_seg != 0 &&
2110 	    (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2111 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2112 		    "%s supports LRO_NUM_SEG, leaving LRO enabled",
2113 		    ifp->if_xname);
2114 		goto done;
2115 	}
2116 	cap_enable = ifnet_capabilities_enabled(ifp);
2117 	lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2118 	if (lro_enabled != enable) {
2119 		errno_t         error;
2120 
2121 		if (enable) {
2122 			cap_enable |= IFCAP_LRO;
2123 		} else {
2124 			cap_enable &= ~IFCAP_LRO;
2125 		}
2126 		error = siocsifcap(ifp, cap_enable);
2127 		if (error != 0) {
2128 			BRIDGE_LOG(LOG_NOTICE, 0,
2129 			    "%s %s failed (cap 0x%x) %d",
2130 			    ifp->if_xname,
2131 			    enable_disable_str(enable),
2132 			    cap_enable,
2133 			    error);
2134 		} else {
2135 			changed = TRUE;
2136 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2137 			    "%s %s success (cap 0x%x)",
2138 			    ifp->if_xname,
2139 			    enable_disable_str(enable),
2140 			    cap_enable);
2141 		}
2142 	}
2143 done:
2144 	return changed;
2145 }
2146 
2147 static errno_t
bridge_set_tso(struct bridge_softc * sc)2148 bridge_set_tso(struct bridge_softc *sc)
2149 {
2150 	struct bridge_iflist *bif;
2151 	u_int32_t tso_v4_mtu;
2152 	u_int32_t tso_v6_mtu;
2153 	ifnet_offload_t offload;
2154 	errno_t error = 0;
2155 
2156 	/* By default, support TSO */
2157 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2158 	tso_v4_mtu = IP_MAXPACKET;
2159 	tso_v6_mtu = IP_MAXPACKET;
2160 
2161 	/* Use the lowest common denominator of the members */
2162 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2163 		ifnet_t ifp = bif->bif_ifp;
2164 
2165 		if (ifp == NULL) {
2166 			continue;
2167 		}
2168 
2169 		if (offload & IFNET_TSO_IPV4) {
2170 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2171 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2172 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2173 				}
2174 			} else {
2175 				offload &= ~IFNET_TSO_IPV4;
2176 				tso_v4_mtu = 0;
2177 			}
2178 		}
2179 		if (offload & IFNET_TSO_IPV6) {
2180 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2181 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2182 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2183 				}
2184 			} else {
2185 				offload &= ~IFNET_TSO_IPV6;
2186 				tso_v6_mtu = 0;
2187 			}
2188 		}
2189 	}
2190 
2191 	if (offload != sc->sc_ifp->if_hwassist) {
2192 		error = ifnet_set_offload(sc->sc_ifp, offload);
2193 		if (error != 0) {
2194 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2195 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2196 			    sc->sc_ifp->if_xname, offload, error);
2197 			goto done;
2198 		}
2199 		/*
2200 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2201 		 * as large as the interface MTU
2202 		 */
2203 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2204 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2205 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2206 			}
2207 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2208 			    tso_v4_mtu);
2209 			if (error != 0) {
2210 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2211 				    "ifnet_set_tso_mtu(%s, "
2212 				    "AF_INET, %u) failed %d",
2213 				    sc->sc_ifp->if_xname,
2214 				    tso_v4_mtu, error);
2215 				goto done;
2216 			}
2217 		}
2218 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2219 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2220 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2221 			}
2222 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2223 			    tso_v6_mtu);
2224 			if (error != 0) {
2225 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2226 				    "ifnet_set_tso_mtu(%s, "
2227 				    "AF_INET6, %u) failed %d",
2228 				    sc->sc_ifp->if_xname,
2229 				    tso_v6_mtu, error);
2230 				goto done;
2231 			}
2232 		}
2233 	}
2234 done:
2235 	return error;
2236 }
2237 
2238 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2239 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2240 {
2241 	ifname[IFNAMSIZ - 1] = '\0';
2242 	return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2243 }
2244 
2245 /*
2246  * bridge_lookup_member:
2247  *
2248  *	Lookup a bridge member interface.
2249  */
2250 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2251 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2252 {
2253 	struct bridge_iflist *bif;
2254 	struct ifnet *ifp;
2255 	const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2256 
2257 	BRIDGE_LOCK_ASSERT_HELD(sc);
2258 
2259 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2260 		ifp = bif->bif_ifp;
2261 		if (strcmp(ifp->if_xname, name) == 0) {
2262 			return bif;
2263 		}
2264 	}
2265 
2266 	return NULL;
2267 }
2268 
2269 /*
2270  * bridge_lookup_member_if:
2271  *
2272  *	Lookup a bridge member interface by ifnet*.
2273  */
2274 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2275 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2276 {
2277 	struct bridge_iflist *bif;
2278 
2279 	BRIDGE_LOCK_ASSERT_HELD(sc);
2280 
2281 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2282 		if (bif->bif_ifp == member_ifp) {
2283 			return bif;
2284 		}
2285 	}
2286 
2287 	return NULL;
2288 }
2289 
2290 static inline bool
get_and_clear_promisc(mbuf_t m)2291 get_and_clear_promisc(mbuf_t m)
2292 {
2293 	bool    is_promisc;
2294 
2295 	/*
2296 	 * Need to clear the promiscuous flag otherwise the packet will be
2297 	 * dropped by DLIL after processing filters
2298 	 */
2299 	is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2300 	if (is_promisc) {
2301 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2302 	}
2303 	return is_promisc;
2304 }
2305 
2306 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2307 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2308     mbuf_t *data, char **frame_ptr)
2309 {
2310 #pragma unused(protocol)
2311 	errno_t error = 0;
2312 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2313 	struct bridge_softc *sc = bif->bif_sc;
2314 	int included = 0;
2315 	struct ether_header * eh_p;
2316 	size_t frmlen = 0;
2317 	bool is_promisc;
2318 	mblist list;
2319 	mbuf_t m = *data;
2320 
2321 	if ((m->m_flags & M_PROTO1)) {
2322 		goto out;
2323 	}
2324 
2325 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2326 	    *frame_ptr <= mtod(m, char *)) {
2327 		included = 1;
2328 		frmlen = mtod(m, char *) - *frame_ptr;
2329 	}
2330 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2331 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2332 	    "frmlen %lu", sc->sc_ifp->if_xname,
2333 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2334 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2335 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2336 	    included ? "inside" : "outside", frmlen);
2337 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2338 		brlog_mbuf(m, "bridge_iff_input[", "");
2339 		brlog_ether_header((struct ether_header *)
2340 		    (void *)*frame_ptr);
2341 		brlog_mbuf_data(m, 0, 20);
2342 	}
2343 	if (included == 0) {
2344 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2345 		goto out;
2346 	}
2347 
2348 	/* Move data pointer to start of frame to the link layer header */
2349 	_mbuf_adjust_pkthdr_and_data(m, -frmlen);
2350 
2351 	/* make sure we can access the ethernet header */
2352 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2353 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2354 		    "short frame %lu < %lu",
2355 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2356 		goto out;
2357 	}
2358 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2359 		error = mbuf_pullup(data, sizeof(struct ether_header));
2360 		if (error != 0) {
2361 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2362 			    "mbuf_pullup(%lu) failed %d",
2363 			    sizeof(struct ether_header),
2364 			    error);
2365 			error = EJUSTRETURN;
2366 			goto out;
2367 		}
2368 		if (m != *data) {
2369 			m = *data;
2370 			*frame_ptr = mtod(m, char *);
2371 		}
2372 	}
2373 	mblist_init(&list);
2374 	mblist_append(&list, m);
2375 	is_promisc = get_and_clear_promisc(m);
2376 	eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2377 	list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2378 	m = *data = list.head;
2379 	if (m == NULL) {
2380 		error = EJUSTRETURN;
2381 	}
2382 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2383 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2384 		brlog_mbuf(m, "bridge_iff_input]", "");
2385 	}
2386 
2387 out:
2388 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2389 
2390 	return error;
2391 }
2392 
2393 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2394 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2395     mbuf_t *data)
2396 {
2397 #pragma unused(protocol)
2398 	errno_t error = 0;
2399 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2400 	struct bridge_softc *sc = bif->bif_sc;
2401 	mbuf_t m = *data;
2402 
2403 	if ((m->m_flags & M_PROTO1)) {
2404 		goto out;
2405 	}
2406 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2407 	    "%s from %s m 0x%llx data 0x%llx",
2408 	    sc->sc_ifp->if_xname, ifp->if_xname,
2409 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2410 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2411 
2412 	error = bridge_member_output(sc, ifp, data);
2413 	if (error != 0 && error != EJUSTRETURN) {
2414 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2415 		    "bridge_member_output failed error %d",
2416 		    error);
2417 	}
2418 out:
2419 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2420 
2421 	return error;
2422 }
2423 
2424 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2425 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2426     const struct kev_msg *event_msg)
2427 {
2428 #pragma unused(protocol)
2429 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2430 	struct bridge_softc *sc = bif->bif_sc;
2431 
2432 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2433 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2434 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2435 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2436 		    "%s event_code %u - %s",
2437 		    ifp->if_xname, event_msg->event_code,
2438 		    dlil_kev_dl_code_str(event_msg->event_code));
2439 
2440 		switch (event_msg->event_code) {
2441 		case KEV_DL_LINK_OFF:
2442 		case KEV_DL_LINK_ON: {
2443 			bridge_iflinkevent(ifp);
2444 #if BRIDGESTP
2445 			bstp_linkstate(ifp, event_msg->event_code);
2446 #endif /* BRIDGESTP */
2447 			break;
2448 		}
2449 		case KEV_DL_SIFFLAGS: {
2450 			if ((ifp->if_flags & IFF_UP) == 0) {
2451 				break;
2452 			}
2453 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2454 				errno_t error;
2455 
2456 				error = ifnet_set_promiscuous(ifp, 1);
2457 				if (error != 0) {
2458 					BRIDGE_LOG(LOG_NOTICE, 0,
2459 					    "ifnet_set_promiscuous (%s)"
2460 					    " failed %d", ifp->if_xname,
2461 					    error);
2462 				} else {
2463 					bif->bif_flags |= BIFF_PROMISC;
2464 				}
2465 			}
2466 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2467 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2468 				errno_t error;
2469 
2470 				error = if_allmulti(ifp, 1);
2471 				if (error != 0) {
2472 					BRIDGE_LOG(LOG_NOTICE, 0,
2473 					    "if_allmulti (%s)"
2474 					    " failed %d", ifp->if_xname,
2475 					    error);
2476 				} else {
2477 					bif->bif_flags |= BIFF_ALL_MULTI;
2478 #ifdef XNU_PLATFORM_AppleTVOS
2479 					ip6_forwarding = 1;
2480 #endif /* XNU_PLATFORM_AppleTVOS */
2481 				}
2482 			}
2483 			break;
2484 		}
2485 		case KEV_DL_IFCAP_CHANGED: {
2486 			BRIDGE_LOCK(sc);
2487 			bridge_set_tso(sc);
2488 			BRIDGE_UNLOCK(sc);
2489 			break;
2490 		}
2491 		case KEV_DL_PROTO_DETACHED:
2492 		case KEV_DL_PROTO_ATTACHED: {
2493 			bridge_proto_attach_changed(ifp);
2494 			break;
2495 		}
2496 		default:
2497 			break;
2498 		}
2499 	}
2500 }
2501 
2502 /*
2503  * bridge_iff_detached:
2504  *
2505  *      Called when our interface filter has been detached from a
2506  *      member interface.
2507  */
2508 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2509 bridge_iff_detached(void *cookie, ifnet_t ifp)
2510 {
2511 #pragma unused(cookie)
2512 	struct bridge_iflist *bif;
2513 	struct bridge_softc * __single sc = ifp->if_bridge;
2514 
2515 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2516 
2517 	/* Check if the interface is a bridge member */
2518 	if (sc != NULL) {
2519 		BRIDGE_LOCK(sc);
2520 		bif = bridge_lookup_member_if(sc, ifp);
2521 		if (bif != NULL) {
2522 			bridge_delete_member(sc, bif);
2523 		}
2524 		BRIDGE_UNLOCK(sc);
2525 		return;
2526 	}
2527 	/* Check if the interface is a span port */
2528 	lck_mtx_lock(&bridge_list_mtx);
2529 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2530 		BRIDGE_LOCK(sc);
2531 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2532 		if (ifp == bif->bif_ifp) {
2533 			bridge_delete_span(sc, bif);
2534 			break;
2535 		}
2536 		BRIDGE_UNLOCK(sc);
2537 	}
2538 	lck_mtx_unlock(&bridge_list_mtx);
2539 }
2540 
2541 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2542 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2543     char *header)
2544 {
2545 #pragma unused(protocol, packet, header)
2546 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2547 	    ifp->if_xname);
2548 	return 0;
2549 }
2550 
2551 static int
bridge_attach_protocol(struct ifnet * ifp)2552 bridge_attach_protocol(struct ifnet *ifp)
2553 {
2554 	int     error;
2555 	struct ifnet_attach_proto_param reg;
2556 
2557 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2558 	bzero(&reg, sizeof(reg));
2559 	reg.input = bridge_proto_input;
2560 
2561 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2562 	if (error) {
2563 		BRIDGE_LOG(LOG_NOTICE, 0,
2564 		    "ifnet_attach_protocol(%s) failed, %d",
2565 		    ifp->if_xname, error);
2566 	}
2567 
2568 	return error;
2569 }
2570 
2571 static int
bridge_detach_protocol(struct ifnet * ifp)2572 bridge_detach_protocol(struct ifnet *ifp)
2573 {
2574 	int     error;
2575 
2576 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2577 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2578 	if (error) {
2579 		BRIDGE_LOG(LOG_NOTICE, 0,
2580 		    "ifnet_detach_protocol(%s) failed, %d",
2581 		    ifp->if_xname, error);
2582 	}
2583 
2584 	return error;
2585 }
2586 
2587 /*
2588  * bridge_delete_member:
2589  *
2590  *	Delete the specified member interface.
2591  */
2592 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2593 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2594 {
2595 #if SKYWALK
2596 	boolean_t add_netagent = FALSE;
2597 #endif /* SKYWALK */
2598 	uint32_t    bif_flags;
2599 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2600 	int lladdr_changed = 0, error;
2601 	uint8_t eaddr[ETHER_ADDR_LEN];
2602 	u_int32_t event_code = 0;
2603 
2604 	BRIDGE_LOCK_ASSERT_HELD(sc);
2605 	VERIFY(ifs != NULL);
2606 
2607 	/*
2608 	 * Remove the member from the list first so it cannot be found anymore
2609 	 * when we release the bridge lock below
2610 	 */
2611 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2612 		bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2613 		BRIDGE_XLOCK(sc);
2614 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2615 		BRIDGE_XDROP(sc);
2616 	}
2617 	if (sc->sc_mac_nat_bif != NULL) {
2618 		if (bif == sc->sc_mac_nat_bif) {
2619 			bridge_mac_nat_disable(sc);
2620 		} else {
2621 			bridge_mac_nat_flush_entries(sc, bif);
2622 		}
2623 	}
2624 #if BRIDGESTP
2625 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2626 		bstp_disable(&bif->bif_stp);
2627 	}
2628 #endif /* BRIDGESTP */
2629 
2630 	/*
2631 	 * If removing the interface that gave the bridge its mac address, set
2632 	 * the mac address of the bridge to the address of the next member, or
2633 	 * to its default address if no members are left.
2634 	 */
2635 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2636 		ifnet_release(sc->sc_ifaddr);
2637 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2638 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2639 			sc->sc_ifaddr = NULL;
2640 		} else {
2641 			struct ifnet *fif =
2642 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2643 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2644 			sc->sc_ifaddr = fif;
2645 			ifnet_reference(fif);   /* for sc_ifaddr */
2646 		}
2647 		lladdr_changed = 1;
2648 	}
2649 
2650 #if HAS_IF_CAP
2651 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2652 #endif /* HAS_IF_CAP */
2653 
2654 	error = bridge_set_tso(sc);
2655 	if (error != 0) {
2656 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2657 	}
2658 
2659 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2660 
2661 	KASSERT(bif->bif_addrcnt == 0,
2662 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2663 
2664 	/*
2665 	 * Update link status of the bridge based on its remaining members
2666 	 */
2667 	event_code = bridge_updatelinkstatus(sc);
2668 	bif_flags = bif->bif_flags;
2669 	BRIDGE_UNLOCK(sc);
2670 
2671 	/* only perform these steps if the interface is still attached */
2672 	if (ifnet_is_attached(ifs, 1)) {
2673 #if SKYWALK
2674 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2675 
2676 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2677 			ifnet_detach_flowswitch_nexus(ifs);
2678 		}
2679 #endif /* SKYWALK */
2680 		/* disable promiscuous mode */
2681 		if ((bif_flags & BIFF_PROMISC) != 0) {
2682 			(void) ifnet_set_promiscuous(ifs, 0);
2683 		}
2684 		/* disable all multi */
2685 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2686 			(void)if_allmulti(ifs, 0);
2687 		}
2688 #if HAS_IF_CAP
2689 		/* re-enable any interface capabilities */
2690 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2691 #endif
2692 		/* detach bridge "protocol" */
2693 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2694 			(void)bridge_detach_protocol(ifs);
2695 		}
2696 		/* detach interface filter */
2697 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2698 			iflt_detach(bif->bif_iff_ref);
2699 		}
2700 		/* re-enable LRO */
2701 		if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2702 			(void)bridge_set_lro(ifs, TRUE);
2703 		}
2704 		ifnet_decr_iorefcnt(ifs);
2705 	}
2706 
2707 	if (lladdr_changed &&
2708 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2709 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2710 	}
2711 
2712 	if (event_code != 0) {
2713 		bridge_link_event(bifp, event_code);
2714 	}
2715 
2716 #if BRIDGESTP
2717 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2718 #endif /* BRIDGESTP */
2719 
2720 	kfree_type(struct bridge_iflist, bif);
2721 	ifs->if_bridge = NULL;
2722 #if SKYWALK
2723 	if (add_netagent && ifnet_is_attached(ifs, 1)) {
2724 		(void)ifnet_add_netagent(ifs);
2725 		ifnet_decr_iorefcnt(ifs);
2726 	}
2727 #endif /* SKYWALK */
2728 
2729 	ifnet_release(ifs);
2730 
2731 	BRIDGE_LOCK(sc);
2732 }
2733 
2734 /*
2735  * bridge_delete_span:
2736  *
2737  *	Delete the specified span interface.
2738  */
2739 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2740 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2741 {
2742 	BRIDGE_LOCK_ASSERT_HELD(sc);
2743 
2744 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2745 	    ("%s: not a span interface", __func__));
2746 
2747 	ifnet_release(bif->bif_ifp);
2748 
2749 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2750 	kfree_type(struct bridge_iflist, bif);
2751 }
2752 
2753 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2754 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2755 {
2756 	struct ifbreq * __single req = arg;
2757 	struct bridge_iflist *bif = NULL;
2758 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2759 	int error = 0, lladdr_changed = 0;
2760 	uint8_t eaddr[ETHER_ADDR_LEN];
2761 	struct iff_filter iff;
2762 	u_int32_t event_code = 0;
2763 	boolean_t input_broadcast;
2764 	int media_active;
2765 	boolean_t wifi_infra = FALSE;
2766 
2767 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2768 	if (ifs == NULL) {
2769 		return ENOENT;
2770 	}
2771 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2772 		return EINVAL;
2773 	}
2774 
2775 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2776 		return EINVAL;
2777 	}
2778 
2779 	/* If it's in the span list, it can't be a member. */
2780 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2781 		if (ifs == bif->bif_ifp) {
2782 			return EBUSY;
2783 		}
2784 	}
2785 
2786 	if (ifs->if_bridge == sc) {
2787 		return EEXIST;
2788 	}
2789 
2790 	if (ifs->if_bridge != NULL) {
2791 		return EBUSY;
2792 	}
2793 
2794 	switch (ifs->if_type) {
2795 	case IFT_ETHER:
2796 		if (strcmp(ifs->if_name, "en") == 0 &&
2797 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2798 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2799 			/* XXX is there a better way to identify Wi-Fi STA? */
2800 			wifi_infra = TRUE;
2801 		}
2802 		break;
2803 	case IFT_L2VLAN:
2804 	case IFT_IEEE8023ADLAG:
2805 		break;
2806 	default:
2807 		return EINVAL;
2808 	}
2809 
2810 	/* fail to add the interface if the MTU doesn't match */
2811 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2812 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2813 		    sc->sc_ifp->if_xname,
2814 		    ifs->if_xname);
2815 		return EINVAL;
2816 	}
2817 
2818 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2819 		/* there's already an interface that's doing MAC NAT */
2820 		return EBUSY;
2821 	}
2822 
2823 	/* prevent the interface from detaching while we add the member */
2824 	if (!ifnet_is_attached(ifs, 1)) {
2825 		return ENXIO;
2826 	}
2827 
2828 	/* allocate a new member */
2829 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2830 	bif->bif_ifp = ifs;
2831 	ifnet_reference(ifs);
2832 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2833 #if HAS_IF_CAP
2834 	bif->bif_savedcaps = ifs->if_capenable;
2835 #endif /* HAS_IF_CAP */
2836 	bif->bif_sc = sc;
2837 	if (wifi_infra) {
2838 		(void)bridge_mac_nat_enable(sc, bif);
2839 	}
2840 
2841 	/* Allow the first Ethernet member to define the MTU */
2842 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2843 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2844 	}
2845 
2846 	/*
2847 	 * Assign the interface's MAC address to the bridge if it's the first
2848 	 * member and the MAC address of the bridge has not been changed from
2849 	 * the default (randomly) generated one.
2850 	 */
2851 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2852 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2853 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2854 		sc->sc_ifaddr = ifs;
2855 		ifnet_reference(ifs);   /* for sc_ifaddr */
2856 		lladdr_changed = 1;
2857 	}
2858 
2859 	ifs->if_bridge = sc;
2860 #if BRIDGESTP
2861 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2862 #endif /* BRIDGESTP */
2863 
2864 #if HAS_IF_CAP
2865 	/* Set interface capabilities to the intersection set of all members */
2866 	bridge_mutecaps(sc);
2867 #endif /* HAS_IF_CAP */
2868 
2869 	/*
2870 	 * Respect lock ordering with DLIL lock for the following operations
2871 	 */
2872 	BRIDGE_UNLOCK(sc);
2873 
2874 	/* enable promiscuous mode */
2875 	error = ifnet_set_promiscuous(ifs, 1);
2876 	switch (error) {
2877 	case 0:
2878 		bif->bif_flags |= BIFF_PROMISC;
2879 		break;
2880 	case ENETDOWN:
2881 	case EPWROFF:
2882 		BRIDGE_LOG(LOG_NOTICE, 0,
2883 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2884 		    ifs->if_xname, error);
2885 		/* Ignore error when device is not up */
2886 		error = 0;
2887 		break;
2888 	default:
2889 		BRIDGE_LOG(LOG_NOTICE, 0,
2890 		    "ifnet_set_promiscuous(%s) failed %d",
2891 		    ifs->if_xname, error);
2892 		BRIDGE_LOCK(sc);
2893 		goto out;
2894 	}
2895 	if (wifi_infra) {
2896 		int this_error;
2897 
2898 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2899 		bif->bif_flags |= BIFF_WIFI_INFRA;
2900 		this_error = if_allmulti(ifs, 1);
2901 		if (this_error == 0) {
2902 			bif->bif_flags |= BIFF_ALL_MULTI;
2903 #ifdef XNU_PLATFORM_AppleTVOS
2904 			ip6_forwarding = 1;
2905 #endif /* XNU_PLATFORM_AppleTVOS */
2906 		} else {
2907 			BRIDGE_LOG(LOG_NOTICE, 0,
2908 			    "if_allmulti(%s) failed %d, ignoring",
2909 			    ifs->if_xname, this_error);
2910 		}
2911 	}
2912 #if SKYWALK
2913 	/* ensure that the flowswitch is present for native interface */
2914 	if (SKYWALK_NATIVE(ifs)) {
2915 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2916 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2917 		}
2918 	}
2919 	/* remove the netagent on the flowswitch (rdar://75050182) */
2920 	if (if_is_fsw_netagent_enabled()) {
2921 		(void)ifnet_remove_netagent(ifs);
2922 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2923 	}
2924 #endif /* SKYWALK */
2925 
2926 	/*
2927 	 * install an interface filter
2928 	 */
2929 	memset(&iff, 0, sizeof(struct iff_filter));
2930 	iff.iff_cookie = bif;
2931 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2932 	iff.iff_input = bridge_iff_input;
2933 	iff.iff_output = bridge_iff_output;
2934 	iff.iff_event = bridge_iff_event;
2935 	iff.iff_detached = bridge_iff_detached;
2936 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2937 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
2938 	if (error != 0) {
2939 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2940 		BRIDGE_LOCK(sc);
2941 		goto out;
2942 	}
2943 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2944 
2945 	/*
2946 	 * install a dummy "bridge" protocol
2947 	 */
2948 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2949 		if (error != 0) {
2950 			BRIDGE_LOG(LOG_NOTICE, 0,
2951 			    "bridge_attach_protocol failed %d", error);
2952 			BRIDGE_LOCK(sc);
2953 			goto out;
2954 		}
2955 	}
2956 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2957 
2958 	if (lladdr_changed &&
2959 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2960 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2961 	}
2962 
2963 	media_active = interface_media_active(ifs);
2964 
2965 	/* disable LRO if needed */
2966 	if (bridge_set_lro(ifs, FALSE)) {
2967 		bif->bif_flags |= BIFF_LRO_DISABLED;
2968 	}
2969 
2970 	/*
2971 	 * No failures past this point. Add the member to the list.
2972 	 */
2973 	BRIDGE_LOCK(sc);
2974 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2975 	BRIDGE_XLOCK(sc);
2976 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2977 	BRIDGE_XDROP(sc);
2978 
2979 	/* cache the member link status */
2980 	if (media_active != 0) {
2981 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2982 	} else {
2983 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2984 	}
2985 
2986 	/* the new member may change the link status of the bridge interface */
2987 	event_code = bridge_updatelinkstatus(sc);
2988 
2989 	/* check whether we need input broadcast or not */
2990 	input_broadcast = interface_needs_input_broadcast(ifs);
2991 	bif_set_input_broadcast(bif, input_broadcast);
2992 	BRIDGE_UNLOCK(sc);
2993 
2994 	if (event_code != 0) {
2995 		bridge_link_event(bifp, event_code);
2996 	}
2997 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2998 	    "%s input broadcast %s", ifs->if_xname,
2999 	    input_broadcast ? "ENABLED" : "DISABLED");
3000 
3001 	BRIDGE_LOCK(sc);
3002 	bridge_set_tso(sc);
3003 
3004 out:
3005 	/* allow the interface to detach */
3006 	ifnet_decr_iorefcnt(ifs);
3007 
3008 	if (error != 0) {
3009 		if (bif != NULL) {
3010 			bridge_delete_member(sc, bif);
3011 		}
3012 	} else if (IFNET_IS_VMNET(ifs)) {
3013 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3014 	}
3015 
3016 	return error;
3017 }
3018 
3019 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3020 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3021 {
3022 	struct ifbreq * __single req = arg;
3023 	struct bridge_iflist *bif;
3024 
3025 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3026 	if (bif == NULL) {
3027 		return ENOENT;
3028 	}
3029 
3030 	bridge_delete_member(sc, bif);
3031 
3032 	return 0;
3033 }
3034 
3035 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3036 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3037 {
3038 #pragma unused(sc, arg, arg_len)
3039 	return 0;
3040 }
3041 
3042 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3043 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3044 {
3045 	struct ifbreq * __single req = arg;
3046 	struct bridge_iflist *bif;
3047 
3048 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3049 	if (bif == NULL) {
3050 		return ENOENT;
3051 	}
3052 
3053 	struct bstp_port *bp;
3054 
3055 	bp = &bif->bif_stp;
3056 	req->ifbr_state = bp->bp_state;
3057 	req->ifbr_priority = bp->bp_priority;
3058 	req->ifbr_path_cost = bp->bp_path_cost;
3059 	req->ifbr_proto = bp->bp_protover;
3060 	req->ifbr_role = bp->bp_role;
3061 	req->ifbr_stpflags = bp->bp_flags;
3062 	req->ifbr_ifsflags = bif->bif_ifflags;
3063 
3064 	/* Copy STP state options as flags */
3065 	if (bp->bp_operedge) {
3066 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3067 	}
3068 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3069 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3070 	}
3071 	if (bp->bp_ptp_link) {
3072 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3073 	}
3074 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3075 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3076 	}
3077 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3078 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3079 	}
3080 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3081 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3082 	}
3083 
3084 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3085 	req->ifbr_addrcnt = bif->bif_addrcnt;
3086 	req->ifbr_addrmax = bif->bif_addrmax;
3087 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
3088 
3089 	return 0;
3090 }
3091 
3092 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3093 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3094 {
3095 	struct ifbreq * __single req = arg;
3096 	struct bridge_iflist *bif;
3097 #if BRIDGESTP
3098 	struct bstp_port *bp;
3099 #endif /* BRIDGESTP */
3100 	errno_t error;
3101 	uint32_t ifsflags;
3102 
3103 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3104 	if (bif == NULL) {
3105 		return ENOENT;
3106 	}
3107 
3108 	ifsflags = req->ifbr_ifsflags;
3109 	if (ifsflags & IFBIF_SPAN) {
3110 		/* SPAN is readonly */
3111 		return EINVAL;
3112 	}
3113 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3114 	if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3115 		/* can't specify checksum and virtio */
3116 		return EINVAL;
3117 	}
3118 	if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3119 	    ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3120 	    (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3121 		/* MAC-NAT can't be used with checksum, host filter, or virtio */
3122 		return EINVAL;
3123 	}
3124 	if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3125 		error = bridge_mac_nat_enable(sc, bif);
3126 		if (error != 0) {
3127 			return error;
3128 		}
3129 	} else if (sc->sc_mac_nat_bif == bif) {
3130 		bridge_mac_nat_disable(sc);
3131 	}
3132 
3133 #if BRIDGESTP
3134 	if (ifsflags & IFBIF_STP) {
3135 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3136 			error = bstp_enable(&bif->bif_stp);
3137 			if (error) {
3138 				return error;
3139 			}
3140 		}
3141 	} else {
3142 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3143 			bstp_disable(&bif->bif_stp);
3144 		}
3145 	}
3146 
3147 	/* Pass on STP flags */
3148 	bp = &bif->bif_stp;
3149 	bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3150 	bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3151 	bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3152 	bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3153 #else /* !BRIDGESTP */
3154 	if (ifsflags & IFBIF_STP) {
3155 		return EOPNOTSUPP;
3156 	}
3157 #endif /* !BRIDGESTP */
3158 
3159 	/* Save the bits relating to the bridge */
3160 	bif->bif_ifflags = ifsflags & IFBIFMASK;
3161 
3162 	return 0;
3163 }
3164 
3165 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3166 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3167 {
3168 	struct ifbrparam * __single param = arg;
3169 
3170 	sc->sc_brtmax = param->ifbrp_csize;
3171 	bridge_rttrim(sc);
3172 	return 0;
3173 }
3174 
3175 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3176 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3177 {
3178 	struct ifbrparam * __single param = arg;
3179 
3180 	param->ifbrp_csize = sc->sc_brtmax;
3181 
3182 	return 0;
3183 }
3184 
3185 #define BRIDGE_IOCTL_GIFS do { \
3186 	struct bridge_iflist *bif;                                      \
3187 	struct ifbreq breq;                                             \
3188 	char *buf, *outbuf;                                             \
3189 	unsigned int count, buflen, len;                                \
3190                                                                         \
3191 	count = 0;                                                      \
3192 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3193 	        count++;                                                \
3194 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3195 	        count++;                                                \
3196                                                                         \
3197 	buflen = sizeof (breq) * count;                                 \
3198 	if (bifc->ifbic_len == 0) {                                     \
3199 	        bifc->ifbic_len = buflen;                               \
3200 	        return (0);                                             \
3201 	}                                                               \
3202 	BRIDGE_UNLOCK(sc);                                              \
3203 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3204 	BRIDGE_LOCK(sc);                                                \
3205                                                                         \
3206 	count = 0;                                                      \
3207 	buf = outbuf;                                                   \
3208 	len = min(bifc->ifbic_len, buflen);                             \
3209 	bzero(&breq, sizeof (breq));                                    \
3210 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3211 	        if (len < sizeof (breq))                                \
3212 	                break;                                          \
3213                                                                         \
3214 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3215 	            "%s", bif->bif_ifp->if_xname);                      \
3216 	/* Fill in the ifbreq structure */                      \
3217 	        error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3218 	        if (error)                                              \
3219 	                break;                                          \
3220 	        memcpy(buf, &breq, sizeof (breq));                      \
3221 	        count++;                                                \
3222 	        buf += sizeof (breq);                                   \
3223 	        len -= sizeof (breq);                                   \
3224 	}                                                               \
3225 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3226 	        if (len < sizeof (breq))                                \
3227 	                break;                                          \
3228                                                                         \
3229 	        snprintf(breq.ifbr_ifsname,                             \
3230 	                 sizeof (breq.ifbr_ifsname),                    \
3231 	                 "%s", bif->bif_ifp->if_xname);                 \
3232 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3233 	        breq.ifbr_portno                                        \
3234 	                = bif->bif_ifp->if_index & 0xfff;               \
3235 	        memcpy(buf, &breq, sizeof (breq));                      \
3236 	        count++;                                                \
3237 	        buf += sizeof (breq);                                   \
3238 	        len -= sizeof (breq);                                   \
3239 	}                                                               \
3240                                                                         \
3241 	BRIDGE_UNLOCK(sc);                                              \
3242 	bifc->ifbic_len = sizeof (breq) * count;                        \
3243 	if (bifc->ifbic_len > 0) {                                      \
3244 	        error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3245 	}                                                               \
3246 	BRIDGE_LOCK(sc);                                                \
3247 	kfree_data(outbuf, buflen);                                     \
3248 } while (0)
3249 
3250 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3251 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3252 {
3253 	struct ifbifconf64 * __single bifc = arg;
3254 	int error = 0;
3255 
3256 	BRIDGE_IOCTL_GIFS;
3257 
3258 	return error;
3259 }
3260 
3261 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3262 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3263 {
3264 	struct ifbifconf32 * __single bifc = arg;
3265 	int error = 0;
3266 
3267 	BRIDGE_IOCTL_GIFS;
3268 
3269 	return error;
3270 }
3271 
3272 #define BRIDGE_IOCTL_RTS do {                                               \
3273 	struct bridge_rtnode *brt;                                          \
3274 	char *buf;                                                          \
3275 	char *outbuf = NULL;                                                \
3276 	unsigned int count, buflen, len;                                    \
3277 	unsigned long now;                                                  \
3278                                                                             \
3279 	if (bac->ifbac_len == 0)                                            \
3280 	        return (0);                                                 \
3281                                                                             \
3282 	bzero(&bareq, sizeof (bareq));                                      \
3283 	count = 0;                                                          \
3284 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3285 	        count++;                                                    \
3286 	buflen = sizeof (bareq) * count;                                    \
3287                                                                             \
3288 	BRIDGE_UNLOCK(sc);                                                  \
3289 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                    \
3290 	BRIDGE_LOCK(sc);                                                    \
3291                                                                             \
3292 	count = 0;                                                          \
3293 	buf = outbuf;                                                       \
3294 	len = min(bac->ifbac_len, buflen);                                  \
3295 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3296 	        if (len < sizeof (bareq))                                   \
3297 	                goto out;                                           \
3298 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3299 	                 "%s", brt->brt_ifp->if_xname);                     \
3300 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3301 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3302 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3303 	                now = (unsigned long) net_uptime();                 \
3304 	                if (now < brt->brt_expire)                          \
3305 	                        bareq.ifba_expire =                         \
3306 	                            brt->brt_expire - now;                  \
3307 	        } else                                                      \
3308 	                bareq.ifba_expire = 0;                              \
3309 	        bareq.ifba_flags = brt->brt_flags;                          \
3310                                                                             \
3311 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3312 	        count++;                                                    \
3313 	        buf += sizeof (bareq);                                      \
3314 	        len -= sizeof (bareq);                                      \
3315 	}                                                                   \
3316 out:                                                                        \
3317 	bac->ifbac_len = sizeof (bareq) * count;                            \
3318 	if (outbuf != NULL) {                                               \
3319 	        BRIDGE_UNLOCK(sc);                                          \
3320 	        if (bac->ifbac_len > 0) {                                   \
3321 	                error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3322 	        }                                                           \
3323 	        kfree_data(outbuf, buflen);                                 \
3324 	        BRIDGE_LOCK(sc);                                            \
3325 	}                                                                   \
3326 	return (error);                                                     \
3327 } while (0)
3328 
3329 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3330 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3331 {
3332 	struct ifbaconf64 * __single bac = arg;
3333 	struct ifbareq64 bareq;
3334 	int error = 0;
3335 
3336 	BRIDGE_IOCTL_RTS;
3337 	return error;
3338 }
3339 
3340 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3341 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3342 {
3343 	struct ifbaconf32 * __single bac = arg;
3344 	struct ifbareq32 bareq;
3345 	int error = 0;
3346 
3347 	BRIDGE_IOCTL_RTS;
3348 	return error;
3349 }
3350 
3351 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3352 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3353 {
3354 	struct ifbareq32 * __single req = arg;
3355 	struct bridge_iflist *bif;
3356 	int error;
3357 
3358 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3359 	if (bif == NULL) {
3360 		return ENOENT;
3361 	}
3362 
3363 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3364 	    req->ifba_flags);
3365 
3366 	return error;
3367 }
3368 
3369 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3370 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3371 {
3372 	struct ifbareq64 * __single req = arg;
3373 	struct bridge_iflist *bif;
3374 	int error;
3375 
3376 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3377 	if (bif == NULL) {
3378 		return ENOENT;
3379 	}
3380 
3381 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3382 	    req->ifba_flags);
3383 
3384 	return error;
3385 }
3386 
3387 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3388 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3389 {
3390 	struct ifbrparam * __single param = arg;
3391 
3392 	sc->sc_brttimeout = param->ifbrp_ctime;
3393 	return 0;
3394 }
3395 
3396 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3397 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3398 {
3399 	struct ifbrparam * __single param = arg;
3400 
3401 	param->ifbrp_ctime = sc->sc_brttimeout;
3402 	return 0;
3403 }
3404 
3405 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3406 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3407 {
3408 	struct ifbareq32 * __single req = arg;
3409 
3410 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3411 }
3412 
3413 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3414 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3415 {
3416 	struct ifbareq64 * __single req = arg;
3417 
3418 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3419 }
3420 
3421 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3422 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3423 {
3424 	struct ifbreq * __single req = arg;
3425 
3426 	bridge_rtflush(sc, req->ifbr_ifsflags);
3427 	return 0;
3428 }
3429 
3430 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3431 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3432 {
3433 	struct ifbrparam * __single param = arg;
3434 	struct bstp_state *bs = &sc->sc_stp;
3435 
3436 	param->ifbrp_prio = bs->bs_bridge_priority;
3437 	return 0;
3438 }
3439 
3440 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3441 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3442 {
3443 #if BRIDGESTP
3444 	struct ifbrparam *param = arg;
3445 
3446 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3447 #else /* !BRIDGESTP */
3448 #pragma unused(sc, arg)
3449 	return EOPNOTSUPP;
3450 #endif /* !BRIDGESTP */
3451 }
3452 
3453 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3454 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3455 {
3456 	struct ifbrparam * __single param = arg;
3457 	struct bstp_state *bs = &sc->sc_stp;
3458 
3459 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3460 	return 0;
3461 }
3462 
3463 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3464 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3465 {
3466 #if BRIDGESTP
3467 	struct ifbrparam *param = arg;
3468 
3469 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3470 #else /* !BRIDGESTP */
3471 #pragma unused(sc, arg)
3472 	return EOPNOTSUPP;
3473 #endif /* !BRIDGESTP */
3474 }
3475 
3476 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3477 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3478 {
3479 	struct ifbrparam * __single param;
3480 	struct bstp_state *bs;
3481 
3482 	param = arg;
3483 	bs = &sc->sc_stp;
3484 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3485 	return 0;
3486 }
3487 
3488 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3489 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3490 {
3491 #if BRIDGESTP
3492 	struct ifbrparam *param = arg;
3493 
3494 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3495 #else /* !BRIDGESTP */
3496 #pragma unused(sc, arg)
3497 	return EOPNOTSUPP;
3498 #endif /* !BRIDGESTP */
3499 }
3500 
3501 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3502 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3503 {
3504 	struct ifbrparam * __single param;
3505 	struct bstp_state *bs;
3506 
3507 	param = arg;
3508 	bs = &sc->sc_stp;
3509 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3510 	return 0;
3511 }
3512 
3513 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3514 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3515 {
3516 #if BRIDGESTP
3517 	struct ifbrparam *param = arg;
3518 
3519 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3520 #else /* !BRIDGESTP */
3521 #pragma unused(sc, arg)
3522 	return EOPNOTSUPP;
3523 #endif /* !BRIDGESTP */
3524 }
3525 
3526 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3527 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3528 {
3529 #if BRIDGESTP
3530 	struct ifbreq *req = arg;
3531 	struct bridge_iflist *bif;
3532 
3533 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3534 	if (bif == NULL) {
3535 		return ENOENT;
3536 	}
3537 
3538 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3539 #else /* !BRIDGESTP */
3540 #pragma unused(sc, arg)
3541 	return EOPNOTSUPP;
3542 #endif /* !BRIDGESTP */
3543 }
3544 
3545 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3546 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3547 {
3548 #if BRIDGESTP
3549 	struct ifbreq *req = arg;
3550 	struct bridge_iflist *bif;
3551 
3552 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3553 	if (bif == NULL) {
3554 		return ENOENT;
3555 	}
3556 
3557 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3558 #else /* !BRIDGESTP */
3559 #pragma unused(sc, arg)
3560 	return EOPNOTSUPP;
3561 #endif /* !BRIDGESTP */
3562 }
3563 
3564 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3565 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3566 {
3567 	struct ifbrparam * __single param = arg;
3568 
3569 	param->ifbrp_filter = sc->sc_filter_flags;
3570 
3571 	return 0;
3572 }
3573 
3574 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3575 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3576 {
3577 	struct ifbrparam * __single param = arg;
3578 
3579 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3580 		return EINVAL;
3581 	}
3582 
3583 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3584 		return EINVAL;
3585 	}
3586 
3587 	sc->sc_filter_flags = param->ifbrp_filter;
3588 
3589 	return 0;
3590 }
3591 
3592 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3593 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3594 {
3595 	struct ifbreq * __single req = arg;
3596 	struct bridge_iflist *bif;
3597 
3598 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3599 	if (bif == NULL) {
3600 		return ENOENT;
3601 	}
3602 
3603 	bif->bif_addrmax = req->ifbr_addrmax;
3604 	return 0;
3605 }
3606 
3607 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3608 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3609 {
3610 	struct ifbreq * __single req = arg;
3611 	struct bridge_iflist *bif = NULL;
3612 	struct ifnet *ifs;
3613 
3614 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3615 	if (ifs == NULL) {
3616 		return ENOENT;
3617 	}
3618 
3619 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3620 		return EINVAL;
3621 	}
3622 
3623 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3624 	if (ifs == bif->bif_ifp) {
3625 		return EBUSY;
3626 	}
3627 
3628 	if (ifs->if_bridge != NULL) {
3629 		return EBUSY;
3630 	}
3631 
3632 	switch (ifs->if_type) {
3633 	case IFT_ETHER:
3634 	case IFT_L2VLAN:
3635 	case IFT_IEEE8023ADLAG:
3636 		break;
3637 	default:
3638 		return EINVAL;
3639 	}
3640 
3641 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3642 
3643 	bif->bif_ifp = ifs;
3644 	bif->bif_ifflags = IFBIF_SPAN;
3645 
3646 	ifnet_reference(bif->bif_ifp);
3647 
3648 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3649 
3650 	return 0;
3651 }
3652 
3653 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3654 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3655 {
3656 	struct ifbreq * __single req = arg;
3657 	struct bridge_iflist *bif;
3658 	struct ifnet *ifs;
3659 
3660 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3661 	if (ifs == NULL) {
3662 		return ENOENT;
3663 	}
3664 
3665 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3666 	if (ifs == bif->bif_ifp) {
3667 		break;
3668 	}
3669 
3670 	if (bif == NULL) {
3671 		return ENOENT;
3672 	}
3673 
3674 	bridge_delete_span(sc, bif);
3675 
3676 	return 0;
3677 }
3678 
3679 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3680 	struct bstp_state *bs = &sc->sc_stp;                            \
3681 	struct bstp_port *root_port;                                    \
3682                                                                         \
3683 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3684 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3685 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3686                                                                         \
3687 	root_port = bs->bs_root_port;                                   \
3688 	if (root_port == NULL)                                          \
3689 	        req->ifbop_root_port = 0;                               \
3690 	else                                                            \
3691 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3692                                                                         \
3693 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3694 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3695 	req->ifbop_protocol = bs->bs_protover;                          \
3696 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3697 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3698 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3699 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3700 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3701 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3702 } while (0)
3703 
3704 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3705 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3706 {
3707 	struct ifbropreq32 * __single req = arg;
3708 
3709 	BRIDGE_IOCTL_GBPARAM;
3710 	return 0;
3711 }
3712 
3713 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3714 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3715 {
3716 	struct ifbropreq64 * __single req = arg;
3717 
3718 	BRIDGE_IOCTL_GBPARAM;
3719 	return 0;
3720 }
3721 
3722 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3723 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3724 {
3725 	struct ifbrparam * __single param = arg;
3726 
3727 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3728 	return 0;
3729 }
3730 
3731 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3732 	struct bridge_iflist *bif;                                      \
3733 	struct bstp_port *bp;                                           \
3734 	struct ifbpstpreq bpreq;                                        \
3735 	char *buf, *outbuf;                                             \
3736 	unsigned int count, buflen, len;                                \
3737                                                                         \
3738 	count = 0;                                                      \
3739 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3740 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3741 	                count++;                                        \
3742 	}                                                               \
3743                                                                         \
3744 	buflen = sizeof (bpreq) * count;                                \
3745 	if (bifstp->ifbpstp_len == 0) {                                 \
3746 	        bifstp->ifbpstp_len = buflen;                           \
3747 	        return (0);                                             \
3748 	}                                                               \
3749                                                                         \
3750 	BRIDGE_UNLOCK(sc);                                              \
3751 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3752 	BRIDGE_LOCK(sc);                                                \
3753                                                                         \
3754 	count = 0;                                                      \
3755 	buf = outbuf;                                                   \
3756 	len = min(bifstp->ifbpstp_len, buflen);                         \
3757 	bzero(&bpreq, sizeof (bpreq));                                  \
3758 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3759 	        if (len < sizeof (bpreq))                               \
3760 	                break;                                          \
3761                                                                         \
3762 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3763 	                continue;                                       \
3764                                                                         \
3765 	        bp = &bif->bif_stp;                                     \
3766 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3767 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3768 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3769 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3770 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3771 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3772                                                                         \
3773 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3774 	        count++;                                                \
3775 	        buf += sizeof (bpreq);                                  \
3776 	        len -= sizeof (bpreq);                                  \
3777 	}                                                               \
3778                                                                         \
3779 	BRIDGE_UNLOCK(sc);                                              \
3780 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3781 	if (bifstp->ifbpstp_len > 0) {                                  \
3782 	        error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3783 	}                                                               \
3784 	BRIDGE_LOCK(sc);                                                \
3785 	kfree_data(outbuf, buflen);                                     \
3786 	return (error);                                                 \
3787 } while (0)
3788 
3789 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3790 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3791 {
3792 	struct ifbpstpconf32 * __single bifstp = arg;
3793 	int error = 0;
3794 
3795 	BRIDGE_IOCTL_GIFSSTP;
3796 	return error;
3797 }
3798 
3799 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3800 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3801 {
3802 	struct ifbpstpconf64 * __single bifstp = arg;
3803 	int error = 0;
3804 
3805 	BRIDGE_IOCTL_GIFSSTP;
3806 	return error;
3807 }
3808 
3809 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3810 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3811 {
3812 #if BRIDGESTP
3813 	struct ifbrparam *param = arg;
3814 
3815 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3816 #else /* !BRIDGESTP */
3817 #pragma unused(sc, arg)
3818 	return EOPNOTSUPP;
3819 #endif /* !BRIDGESTP */
3820 }
3821 
3822 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3823 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3824 {
3825 #if BRIDGESTP
3826 	struct ifbrparam *param = arg;
3827 
3828 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3829 #else /* !BRIDGESTP */
3830 #pragma unused(sc, arg)
3831 	return EOPNOTSUPP;
3832 #endif /* !BRIDGESTP */
3833 }
3834 
3835 
3836 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3837 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3838 {
3839 	struct ifbrhostfilter * __single req = arg;
3840 	struct bridge_iflist *bif;
3841 
3842 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3843 	if (bif == NULL) {
3844 		return ENOENT;
3845 	}
3846 
3847 	bzero(req, sizeof(struct ifbrhostfilter));
3848 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3849 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3850 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3851 		    ETHER_ADDR_LEN);
3852 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3853 	}
3854 	return 0;
3855 }
3856 
3857 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3858 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3859 {
3860 	struct ifbrhostfilter * __single req = arg;
3861 	struct bridge_iflist *bif;
3862 
3863 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3864 	if (bif == NULL) {
3865 		return ENOENT;
3866 	}
3867 	if (bif_has_mac_nat(bif)) {
3868 		/* no host filter with MAC-NAT */
3869 		return EINVAL;
3870 	}
3871 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3872 		bif->bif_flags |= BIFF_HOST_FILTER;
3873 
3874 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3875 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3876 			    ETHER_ADDR_LEN);
3877 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3878 			    ETHER_ADDR_LEN) != 0) {
3879 				bif->bif_flags |= BIFF_HF_HWSRC;
3880 			} else {
3881 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3882 			}
3883 		}
3884 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3885 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3886 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3887 				bif->bif_flags |= BIFF_HF_IPSRC;
3888 			} else {
3889 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3890 			}
3891 		}
3892 	} else {
3893 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3894 		    BIFF_HF_IPSRC);
3895 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3896 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3897 	}
3898 
3899 	return 0;
3900 }
3901 
3902 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3903 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3904     unsigned int * count_p, char *__indexable buf,
3905     unsigned int * len_p)
3906 {
3907 	unsigned int            count = *count_p;
3908 	struct ifbrmne          ifbmne;
3909 	unsigned int            len = *len_p;
3910 	struct mac_nat_entry    *mne;
3911 	unsigned long           now;
3912 
3913 	bzero(&ifbmne, sizeof(ifbmne));
3914 	LIST_FOREACH(mne, list, mne_list) {
3915 		if (len < sizeof(ifbmne)) {
3916 			break;
3917 		}
3918 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3919 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3920 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3921 		    sizeof(ifbmne.ifbmne_mac));
3922 		now = (unsigned long) net_uptime();
3923 		if (now < mne->mne_expire) {
3924 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3925 		} else {
3926 			ifbmne.ifbmne_expire = 0;
3927 		}
3928 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3929 			ifbmne.ifbmne_af = AF_INET6;
3930 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3931 		} else {
3932 			ifbmne.ifbmne_af = AF_INET;
3933 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3934 		}
3935 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3936 		count++;
3937 		buf += sizeof(ifbmne);
3938 		len -= sizeof(ifbmne);
3939 	}
3940 	*count_p = count;
3941 	*len_p = len;
3942 	return buf;
3943 }
3944 
3945 /*
3946  * bridge_ioctl_gmnelist()
3947  *   Perform the get mac_nat_entry list ioctl.
3948  *
3949  * Note:
3950  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3951  *   field size/layout except for the last field ifbml_buf, the user-supplied
3952  *   buffer pointer. That is passed in separately via the 'user_addr'
3953  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3954  */
3955 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3956 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3957     user_addr_t user_addr)
3958 {
3959 	unsigned int            count;
3960 	char                    *buf;
3961 	int                     error = 0;
3962 	char                    *outbuf = NULL;
3963 	struct mac_nat_entry    *mne;
3964 	unsigned int            buflen;
3965 	unsigned int            len;
3966 
3967 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3968 	count = 0;
3969 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3970 		count++;
3971 	}
3972 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3973 		count++;
3974 	}
3975 	buflen = sizeof(struct ifbrmne) * count;
3976 	if (buflen == 0 || mnl->ifbml_len == 0) {
3977 		mnl->ifbml_len = buflen;
3978 		return error;
3979 	}
3980 	BRIDGE_UNLOCK(sc);
3981 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3982 	BRIDGE_LOCK(sc);
3983 	count = 0;
3984 	buf = outbuf;
3985 	len = min(mnl->ifbml_len, buflen);
3986 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3987 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3988 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3989 	BRIDGE_UNLOCK(sc);
3990 	if (mnl->ifbml_len > 0) {
3991 		error = copyout(outbuf, user_addr, mnl->ifbml_len);
3992 	}
3993 	kfree_data(outbuf, buflen);
3994 	BRIDGE_LOCK(sc);
3995 	return error;
3996 }
3997 
3998 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3999 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4000 {
4001 	struct ifbrmnelist64 * __single mnl = arg;
4002 
4003 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4004 }
4005 
4006 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4007 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4008 {
4009 	struct ifbrmnelist32 * __single mnl = arg;
4010 
4011 	return bridge_ioctl_gmnelist(sc, arg,
4012 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
4013 }
4014 
4015 /*
4016  * bridge_ioctl_gifstats()
4017  *   Return per-member stats.
4018  *
4019  * Note:
4020  *   The ifbrmreq32 and ifbrmreq64 structures have the same
4021  *   field size/layout except for the last field brmr_buf, the user-supplied
4022  *   buffer pointer. That is passed in separately via the 'user_addr'
4023  *   parameter from the respective 32-bit or 64-bit ioctl routine.
4024  */
4025 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4026 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4027     user_addr_t user_addr)
4028 {
4029 	struct bridge_iflist    *bif;
4030 	int                     error = 0;
4031 	unsigned int            buflen;
4032 
4033 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4034 	if (bif == NULL) {
4035 		error = ENOENT;
4036 		goto done;
4037 	}
4038 
4039 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4040 	if (buflen == 0 || mreq->brmr_len == 0) {
4041 		mreq->brmr_len = buflen;
4042 		goto done;
4043 	}
4044 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4045 		error = ENOBUFS;
4046 		goto done;
4047 	}
4048 	mreq->brmr_len = buflen;
4049 	error = copyout(&bif->bif_stats, user_addr, buflen);
4050 done:
4051 	return error;
4052 }
4053 
4054 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4055 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4056 {
4057 	struct ifbrmreq32 * __single mreq = arg;
4058 
4059 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4060 }
4061 
4062 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4063 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4064 {
4065 	struct ifbrmreq64 * __single mreq = arg;
4066 
4067 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4068 }
4069 
4070 /*
4071  * bridge_proto_attach_changed
4072  *
4073  *	Called when protocol attachment on the interface changes.
4074  */
4075 static void
bridge_proto_attach_changed(struct ifnet * ifp)4076 bridge_proto_attach_changed(struct ifnet *ifp)
4077 {
4078 	boolean_t changed = FALSE;
4079 	struct bridge_iflist *bif;
4080 	boolean_t input_broadcast;
4081 	struct bridge_softc * __single sc = ifp->if_bridge;
4082 
4083 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4084 	if (sc == NULL) {
4085 		return;
4086 	}
4087 	input_broadcast = interface_needs_input_broadcast(ifp);
4088 	BRIDGE_LOCK(sc);
4089 	bif = bridge_lookup_member_if(sc, ifp);
4090 	if (bif != NULL) {
4091 		changed = bif_set_input_broadcast(bif, input_broadcast);
4092 	}
4093 	BRIDGE_UNLOCK(sc);
4094 	if (changed) {
4095 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4096 		    "%s input broadcast %s", ifp->if_xname,
4097 		    input_broadcast ? "ENABLED" : "DISABLED");
4098 	}
4099 	return;
4100 }
4101 
4102 /*
4103  * interface_media_active:
4104  *
4105  *	Tells if an interface media is active.
4106  */
4107 static int
interface_media_active(struct ifnet * ifp)4108 interface_media_active(struct ifnet *ifp)
4109 {
4110 	struct ifmediareq   ifmr;
4111 	int status = 0;
4112 
4113 	bzero(&ifmr, sizeof(ifmr));
4114 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4115 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4116 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4117 		}
4118 	}
4119 
4120 	return status;
4121 }
4122 
4123 /*
4124  * bridge_updatelinkstatus:
4125  *
4126  *      Update the media active status of the bridge based on the
4127  *	media active status of its member.
4128  *	If changed, return the corresponding onf/off link event.
4129  */
4130 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4131 bridge_updatelinkstatus(struct bridge_softc *sc)
4132 {
4133 	struct bridge_iflist *bif;
4134 	int active_member = 0;
4135 	u_int32_t event_code = 0;
4136 
4137 	BRIDGE_LOCK_ASSERT_HELD(sc);
4138 
4139 	/*
4140 	 * Find out if we have an active interface
4141 	 */
4142 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4143 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4144 			active_member = 1;
4145 			break;
4146 		}
4147 	}
4148 
4149 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4150 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
4151 		event_code = KEV_DL_LINK_ON;
4152 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4153 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4154 		event_code = KEV_DL_LINK_OFF;
4155 	}
4156 
4157 	return event_code;
4158 }
4159 
4160 /*
4161  * bridge_iflinkevent:
4162  */
4163 static void
bridge_iflinkevent(struct ifnet * ifp)4164 bridge_iflinkevent(struct ifnet *ifp)
4165 {
4166 	struct bridge_softc * __single sc = ifp->if_bridge;
4167 	struct bridge_iflist *bif;
4168 	u_int32_t event_code = 0;
4169 	int media_active;
4170 
4171 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4172 
4173 	/* Check if the interface is a bridge member */
4174 	if (sc == NULL) {
4175 		return;
4176 	}
4177 
4178 	media_active = interface_media_active(ifp);
4179 	BRIDGE_LOCK(sc);
4180 	bif = bridge_lookup_member_if(sc, ifp);
4181 	if (bif != NULL) {
4182 		if (media_active) {
4183 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4184 		} else {
4185 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4186 		}
4187 		if (sc->sc_mac_nat_bif != NULL) {
4188 			bridge_mac_nat_flush_entries(sc, bif);
4189 		}
4190 
4191 		event_code = bridge_updatelinkstatus(sc);
4192 	}
4193 	BRIDGE_UNLOCK(sc);
4194 
4195 	if (event_code != 0) {
4196 		bridge_link_event(sc->sc_ifp, event_code);
4197 	}
4198 }
4199 
4200 /*
4201  * bridge_delayed_callback:
4202  *
4203  *	Makes a delayed call
4204  */
4205 static void
bridge_delayed_callback(void * param,__unused void * param2)4206 bridge_delayed_callback(void *param, __unused void *param2)
4207 {
4208 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4209 	struct bridge_softc *sc = call->bdc_sc;
4210 
4211 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4212 	if (bridge_delayed_callback_delay > 0) {
4213 		struct timespec ts;
4214 
4215 		ts.tv_sec = bridge_delayed_callback_delay;
4216 		ts.tv_nsec = 0;
4217 
4218 		BRIDGE_LOG(LOG_NOTICE, 0,
4219 		    "sleeping for %d seconds",
4220 		    bridge_delayed_callback_delay);
4221 
4222 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4223 		    __func__, &ts);
4224 
4225 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4226 	}
4227 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4228 
4229 	BRIDGE_LOCK(sc);
4230 
4231 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4232 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4233 	    "%s call 0x%llx flags 0x%x",
4234 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4235 	    call->bdc_flags);
4236 }
4237 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4238 
4239 	if (call->bdc_flags & BDCF_CANCELLING) {
4240 		wakeup(call);
4241 	} else {
4242 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4243 			(*call->bdc_func)(sc);
4244 		}
4245 	}
4246 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4247 	BRIDGE_UNLOCK(sc);
4248 }
4249 
4250 /*
4251  * bridge_schedule_delayed_call:
4252  *
4253  *	Schedule a function to be called on a separate thread
4254  *      The actual call may be scheduled to run at a given time or ASAP.
4255  */
4256 static void
4257 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4258 {
4259 	uint64_t deadline = 0;
4260 	struct bridge_softc *sc = call->bdc_sc;
4261 
4262 	BRIDGE_LOCK_ASSERT_HELD(sc);
4263 
4264 	if ((sc->sc_flags & SCF_DETACHING) ||
4265 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4266 		return;
4267 	}
4268 
4269 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4270 		nanoseconds_to_absolutetime(
4271 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4272 			call->bdc_ts.tv_nsec, &deadline);
4273 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4274 	}
4275 
4276 	call->bdc_flags = BDCF_OUTSTANDING;
4277 
4278 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4279 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4280 	    "%s call 0x%llx flags 0x%x",
4281 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4282 	    call->bdc_flags);
4283 }
4284 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4285 
4286 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4287 		thread_call_func_delayed(
4288 			(thread_call_func_t)bridge_delayed_callback,
4289 			call, deadline);
4290 	} else {
4291 		if (call->bdc_thread_call == NULL) {
4292 			call->bdc_thread_call = thread_call_allocate(
4293 				(thread_call_func_t)bridge_delayed_callback,
4294 				call);
4295 		}
4296 		thread_call_enter(call->bdc_thread_call);
4297 	}
4298 }
4299 
4300 /*
4301  * bridge_cancel_delayed_call:
4302  *
4303  *	Cancel a queued or running delayed call.
4304  *	If call is running, does not return until the call is done to
4305  *	prevent race condition with the brigde interface getting destroyed
4306  */
4307 static void
4308 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4309 {
4310 	boolean_t result;
4311 	struct bridge_softc *sc = call->bdc_sc;
4312 
4313 	/*
4314 	 * The call was never scheduled
4315 	 */
4316 	if (sc == NULL) {
4317 		return;
4318 	}
4319 
4320 	BRIDGE_LOCK_ASSERT_HELD(sc);
4321 
4322 	call->bdc_flags |= BDCF_CANCELLING;
4323 
4324 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4325 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4326 		    "%s call 0x%llx flags 0x%x",
4327 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4328 		    call->bdc_flags);
4329 		result = thread_call_func_cancel(
4330 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4331 
4332 		if (result) {
4333 			/*
4334 			 * We managed to dequeue the delayed call
4335 			 */
4336 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4337 		} else {
4338 			/*
4339 			 * Wait for delayed call do be done running
4340 			 */
4341 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4342 		}
4343 	}
4344 	call->bdc_flags &= ~BDCF_CANCELLING;
4345 }
4346 
4347 /*
4348  * bridge_cleanup_delayed_call:
4349  *
4350  *	Dispose resource allocated for a delayed call
4351  *	Assume the delayed call is not queued or running .
4352  */
4353 static void
4354 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4355 {
4356 	boolean_t result;
4357 	struct bridge_softc *sc = call->bdc_sc;
4358 
4359 	/*
4360 	 * The call was never scheduled
4361 	 */
4362 	if (sc == NULL) {
4363 		return;
4364 	}
4365 
4366 	BRIDGE_LOCK_ASSERT_HELD(sc);
4367 
4368 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4369 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4370 
4371 	if (call->bdc_thread_call != NULL) {
4372 		result = thread_call_free(call->bdc_thread_call);
4373 		if (result == FALSE) {
4374 			panic("%s thread_call_free() failed for call %p",
4375 			    __func__, call);
4376 		}
4377 		call->bdc_thread_call = NULL;
4378 	}
4379 }
4380 
4381 /*
4382  * bridge_init:
4383  *
4384  *	Initialize a bridge interface.
4385  */
4386 static int
4387 bridge_init(struct ifnet *ifp)
4388 {
4389 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4390 	errno_t error;
4391 
4392 	BRIDGE_LOCK_ASSERT_HELD(sc);
4393 
4394 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4395 		return 0;
4396 	}
4397 
4398 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4399 
4400 	/*
4401 	 * Calling bridge_aging_timer() is OK as there are no entries to
4402 	 * age so we're just going to arm the timer
4403 	 */
4404 	bridge_aging_timer(sc);
4405 #if BRIDGESTP
4406 	if (error == 0) {
4407 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4408 	}
4409 #endif /* BRIDGESTP */
4410 	return error;
4411 }
4412 
4413 /*
4414  * bridge_ifstop:
4415  *
4416  *	Stop the bridge interface.
4417  */
4418 static void
4419 bridge_ifstop(struct ifnet *ifp, int disable)
4420 {
4421 #pragma unused(disable)
4422 	struct bridge_softc * __single sc = ifp->if_softc;
4423 
4424 	BRIDGE_LOCK_ASSERT_HELD(sc);
4425 
4426 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4427 		return;
4428 	}
4429 
4430 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4431 
4432 #if BRIDGESTP
4433 	bstp_stop(&sc->sc_stp);
4434 #endif /* BRIDGESTP */
4435 
4436 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4437 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4438 }
4439 
4440 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4441     MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4442 
4443 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4444     (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4445     | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4446 
4447 /*
4448  * bridge_compute_cksum:
4449  *
4450  *	If the packet has checksum flags, compare the hardware checksum
4451  *	capabilities of the source and destination interfaces. If they
4452  *	are the same, there's nothing to do. If they are different,
4453  *	finalize the checksum so that it can be sent on the destination
4454  *	interface.
4455  */
4456 static void
4457 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4458 {
4459 	uint32_t csum_flags;
4460 	uint16_t dst_hw_csum;
4461 	uint32_t did_sw = 0;
4462 	struct ether_header *eh;
4463 	uint16_t src_hw_csum;
4464 
4465 	if (src_if == dst_if) {
4466 		return;
4467 	}
4468 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4469 	if (csum_flags == 0) {
4470 		/* no checksum offload */
4471 		return;
4472 	}
4473 
4474 	/*
4475 	 * if destination/source differ in checksum offload
4476 	 * capabilities, finalize/compute the checksum
4477 	 */
4478 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4479 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4480 	if (dst_hw_csum == src_hw_csum) {
4481 		return;
4482 	}
4483 	eh = mtod(m, struct ether_header *);
4484 	switch (eh->ether_type) {
4485 	case HTONS_ETHERTYPE_IP:
4486 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4487 		break;
4488 	case HTONS_ETHERTYPE_IPV6:
4489 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4490 		break;
4491 	}
4492 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4493 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4494 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4495 	    m->m_pkthdr.csum_flags);
4496 }
4497 
4498 static inline errno_t
4499 bridge_transmit(ifnet_t ifp, mbuf_t m)
4500 {
4501 	struct flowadv  adv = { .code = FADV_SUCCESS };
4502 	errno_t         error;
4503 	int             flags = DLIL_OUTPUT_FLAGS_RAW;
4504 
4505 	flags = (if_bridge_output_skip_filters != 0)
4506 	    ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4507 	    : DLIL_OUTPUT_FLAGS_RAW;
4508 	error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4509 	if (error == 0) {
4510 		if (adv.code == FADV_FLOW_CONTROLLED) {
4511 			error = EQFULL;
4512 		} else if (adv.code == FADV_SUSPENDED) {
4513 			error = EQSUSPENDED;
4514 		}
4515 	}
4516 	return error;
4517 }
4518 
4519 static int
4520 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4521     bool *is_fragmented)
4522 {
4523 	int newoff;
4524 
4525 	*is_fragmented = false;
4526 	while (1) {
4527 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4528 		if (newoff < 0) {
4529 			return off;
4530 		} else if (newoff < off) {
4531 			return -1;    /* invalid */
4532 		} else if (newoff == off) {
4533 			return newoff;
4534 		}
4535 		off = newoff;
4536 		proto = *nxtp;
4537 		if (proto == IPPROTO_FRAGMENT) {
4538 			*is_fragmented = true;
4539 		}
4540 	}
4541 }
4542 
4543 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4544 
4545 static int
4546 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4547     ip_packet_info_t info_p, struct bripstats * stats_p)
4548 {
4549 	int             error = 0;
4550 	u_int           hlen;
4551 	u_int           ip_hlen;
4552 	u_int           ip_pay_len;
4553 	struct mbuf *   m0 = *mp;
4554 	int             off;
4555 	int             opt_len = 0;
4556 	int             proto = 0;
4557 
4558 	bzero(info_p, sizeof(*info_p));
4559 	if (is_ipv4) {
4560 		struct ip *     ip;
4561 		u_int           ip_total_len;
4562 
4563 		/* IPv4 */
4564 		hlen = mac_hlen + sizeof(struct ip);
4565 		if (m0->m_pkthdr.len < hlen) {
4566 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4567 			    "Short IP packet %d < %d",
4568 			    m0->m_pkthdr.len, hlen);
4569 			error = _EBADIP;
4570 			__ATOMIC_INC(stats_p->bips_bad_ip);
4571 			goto done;
4572 		}
4573 		if (m0->m_len < hlen) {
4574 			*mp = m0 = m_pullup(m0, hlen);
4575 			if (m0 == NULL) {
4576 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4577 				    "m_pullup failed hlen %d",
4578 				    hlen);
4579 				error = ENOBUFS;
4580 				__ATOMIC_INC(stats_p->bips_bad_ip);
4581 				goto done;
4582 			}
4583 		}
4584 		ip = (struct ip *)mtodo(m0, mac_hlen);
4585 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4586 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4587 			    "bad IP version");
4588 			error = _EBADIP;
4589 			__ATOMIC_INC(stats_p->bips_bad_ip);
4590 			goto done;
4591 		}
4592 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4593 		if (ip_hlen < sizeof(struct ip)) {
4594 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4595 			    "bad IP header length %d < %d",
4596 			    ip_hlen,
4597 			    (int)sizeof(struct ip));
4598 			error = _EBADIP;
4599 			__ATOMIC_INC(stats_p->bips_bad_ip);
4600 			goto done;
4601 		}
4602 		hlen = mac_hlen + ip_hlen;
4603 		if (m0->m_len < hlen) {
4604 			*mp = m0 = m_pullup(m0, hlen);
4605 			if (m0 == NULL) {
4606 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4607 				    "m_pullup failed hlen %d",
4608 				    hlen);
4609 				error = ENOBUFS;
4610 				__ATOMIC_INC(stats_p->bips_bad_ip);
4611 				goto done;
4612 			}
4613 			ip = (struct ip *)mtodo(m0, mac_hlen);
4614 		}
4615 
4616 		ip_total_len = ntohs(ip->ip_len);
4617 		if (ip_total_len < ip_hlen) {
4618 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4619 			    "IP total len %d < header len %d",
4620 			    ip_total_len, ip_hlen);
4621 			error = _EBADIP;
4622 			__ATOMIC_INC(stats_p->bips_bad_ip);
4623 			goto done;
4624 		}
4625 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4626 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4627 			    "invalid IP payload length %d > %d",
4628 			    ip_total_len,
4629 			    (m0->m_pkthdr.len - mac_hlen));
4630 			error = _EBADIP;
4631 			__ATOMIC_INC(stats_p->bips_bad_ip);
4632 			goto done;
4633 		}
4634 		ip_pay_len = ip_total_len - ip_hlen;
4635 		info_p->ip_proto = ip->ip_p;
4636 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4637 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4638 		info_p->ip_hlen = ip_hlen;
4639 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4640 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4641 			info_p->ip_is_fragmented = true;
4642 		}
4643 		__ATOMIC_INC(stats_p->bips_ip);
4644 	} else {
4645 		struct ip6_hdr *ip6;
4646 
4647 		/* IPv6 */
4648 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4649 		if (m0->m_pkthdr.len < hlen) {
4650 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4651 			    "short IPv6 packet %d < %d",
4652 			    m0->m_pkthdr.len, hlen);
4653 			error = _EBADIPV6;
4654 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4655 			goto done;
4656 		}
4657 		if (m0->m_len < hlen) {
4658 			*mp = m0 = m_pullup(m0, hlen);
4659 			if (m0 == NULL) {
4660 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4661 				    "m_pullup failed hlen %d",
4662 				    hlen);
4663 				error = ENOBUFS;
4664 				__ATOMIC_INC(stats_p->bips_bad_ip6);
4665 				goto done;
4666 			}
4667 		}
4668 		ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4669 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4670 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4671 			    "bad IPv6 version");
4672 			error = _EBADIPV6;
4673 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4674 			goto done;
4675 		}
4676 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4677 		    &info_p->ip_is_fragmented);
4678 		if (off < 0 || m0->m_pkthdr.len < off) {
4679 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4680 			    "ip6_lasthdr() returned %d",
4681 			    off);
4682 			error = _EBADIPV6;
4683 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4684 			goto done;
4685 		}
4686 		ip_hlen = sizeof(*ip6);
4687 		opt_len = off - mac_hlen - ip_hlen;
4688 		if (opt_len < 0) {
4689 			error = _EBADIPV6;
4690 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4691 			goto done;
4692 		}
4693 		ip_pay_len = ntohs(ip6->ip6_plen);
4694 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4695 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4696 			    "invalid IPv6 payload length %d > %d",
4697 			    ip_pay_len,
4698 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4699 			error = _EBADIPV6;
4700 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4701 			goto done;
4702 		}
4703 		info_p->ip_proto = proto;
4704 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4705 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4706 		info_p->ip_hlen = ip_hlen;
4707 		__ATOMIC_INC(stats_p->bips_ip6);
4708 	}
4709 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4710 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4711 	    is_ipv4 ? '4' : '6',
4712 	    proto, ip_hlen, ip_pay_len, opt_len,
4713 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4714 	info_p->ip_pay_len = ip_pay_len;
4715 	info_p->ip_opt_len = opt_len;
4716 	info_p->ip_is_ipv4 = is_ipv4;
4717 done:
4718 	return error;
4719 }
4720 
4721 static int
4722 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4723     ip_packet_info_t info_p, struct bripstats * stats_p)
4724 {
4725 	int             error;
4726 	u_int           hlen;
4727 
4728 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4729 	if (error != 0) {
4730 		goto done;
4731 	}
4732 	if (info_p->ip_proto != IPPROTO_TCP) {
4733 		/* not a TCP frame, not an error, just a bad guess */
4734 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4735 		    "non-TCP (%d) IPv%c frame %d bytes",
4736 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4737 		    (*mp)->m_pkthdr.len);
4738 		goto done;
4739 	}
4740 	if (info_p->ip_is_fragmented) {
4741 		/* both TSO and IP fragmentation don't make sense */
4742 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4743 		    "fragmented TSO packet?");
4744 		__ATOMIC_INC(stats_p->bips_bad_tcp);
4745 		error = _EBADTCP;
4746 		goto done;
4747 	}
4748 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4749 	    info_p->ip_opt_len;
4750 	if ((*mp)->m_len < hlen) {
4751 		*mp = m_pullup(*mp, hlen);
4752 		if (*mp == NULL) {
4753 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4754 			    "m_pullup %d failed",
4755 			    hlen);
4756 			__ATOMIC_INC(stats_p->bips_bad_tcp);
4757 			error = _EBADTCP;
4758 			goto done;
4759 		}
4760 	}
4761 	info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4762 	    info_p->ip_opt_len;
4763 done:
4764 	return error;
4765 }
4766 
4767 static inline void
4768 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4769 {
4770 	if (proto == IPPROTO_TCP) {
4771 		__ATOMIC_INC(stats_p->brcs_tcp_checksum);
4772 	} else {
4773 		__ATOMIC_INC(stats_p->brcs_udp_checksum);
4774 	}
4775 	return;
4776 }
4777 
4778 #define ETHER_TYPE_FLAG_NONE    0x00
4779 #define ETHER_TYPE_FLAG_IPV4    0x01
4780 #define ETHER_TYPE_FLAG_IPV6    0x02
4781 #define ETHER_TYPE_FLAG_ARP     0x04
4782 #define ETHER_TYPE_FLAG_IP      (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4783 #define ETHER_TYPE_FLAG_IP_ARP  (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4784 
4785 static inline bool
4786 ether_type_flag_is_ip(ether_type_flag_t flag)
4787 {
4788 	return (flag & ETHER_TYPE_FLAG_IP) != 0;
4789 }
4790 
4791 static inline ether_type_flag_t
4792 ether_type_flag_get(uint16_t ether_type)
4793 {
4794 	ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4795 
4796 	switch (ether_type) {
4797 	case HTONS_ETHERTYPE_IP:
4798 		flag = ETHER_TYPE_FLAG_IPV4;
4799 		break;
4800 	case HTONS_ETHERTYPE_IPV6:
4801 		flag = ETHER_TYPE_FLAG_IPV6;
4802 		break;
4803 	case HTONS_ETHERTYPE_ARP:
4804 		flag = ETHER_TYPE_FLAG_ARP;
4805 		break;
4806 	default:
4807 		break;
4808 	}
4809 	return flag;
4810 }
4811 
4812 static bool
4813 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4814 {
4815 	uint16_t        ether_type;
4816 	bool            is_ip = TRUE;
4817 
4818 	ether_type = ntohs(eh->ether_type);
4819 	switch (ether_type) {
4820 	case ETHERTYPE_IP:
4821 		*is_ipv4 = TRUE;
4822 		break;
4823 	case ETHERTYPE_IPV6:
4824 		*is_ipv4 = FALSE;
4825 		break;
4826 	default:
4827 		is_ip = FALSE;
4828 		break;
4829 	}
4830 	return is_ip;
4831 }
4832 
4833 static errno_t
4834 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4835 {
4836 	struct brcsumstats *csum_stats_p;
4837 	struct ether_header     *eh;
4838 	errno_t         error = 0;
4839 	ip_packet_info  info;
4840 	bool            is_ipv4;
4841 	struct mbuf *   m;
4842 	u_int           mac_hlen = sizeof(struct ether_header);
4843 	uint16_t        sum;
4844 	bool            valid;
4845 
4846 	eh = mtod(*mp, struct ether_header *);
4847 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4848 		goto done;
4849 	}
4850 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4851 	    &stats_p->brms_out_ip);
4852 	m = *mp;
4853 	if (error != 0) {
4854 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4855 		    "bridge_get_ip_proto failed %d",
4856 		    error);
4857 		goto done;
4858 	}
4859 	if (is_ipv4) {
4860 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4861 			/* hardware offloaded IP header checksum */
4862 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4863 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4864 			    "IP checksum HW %svalid",
4865 			    valid ? "" : "in");
4866 			if (!valid) {
4867 				__ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4868 				error = _EBADIPCHECKSUM;
4869 				goto done;
4870 			}
4871 			__ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4872 		} else {
4873 			/* verify */
4874 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4875 			valid = (sum == 0);
4876 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4877 			    "IP checksum SW %svalid",
4878 			    valid ? "" : "in");
4879 			if (!valid) {
4880 				__ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4881 				error = _EBADIPCHECKSUM;
4882 				goto done;
4883 			}
4884 			__ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4885 		}
4886 	}
4887 	if (info.ip_is_fragmented) {
4888 		/* can't verify checksum on fragmented packets */
4889 		goto done;
4890 	}
4891 	switch (info.ip_proto) {
4892 	case IPPROTO_TCP:
4893 		__ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4894 		break;
4895 	case IPPROTO_UDP:
4896 		__ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4897 		break;
4898 	default:
4899 		goto done;
4900 	}
4901 	/* check for hardware offloaded UDP/TCP checksum */
4902 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4903 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4904 		/* checksum verified by hardware */
4905 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4906 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4907 		    "IPv%c %s checksum HW 0x%x %svalid",
4908 		    is_ipv4 ? '4' : '6',
4909 		    (info.ip_proto == IPPROTO_TCP)
4910 		    ? "TCP" : "UDP",
4911 		    m->m_pkthdr.csum_data,
4912 		    valid ? "" : "in" );
4913 		if (!valid) {
4914 			/* bad checksum */
4915 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4916 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4917 			    : _EBADTCPCHECKSUM;
4918 		} else {
4919 			/* good checksum */
4920 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4921 		}
4922 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4923 		goto done;
4924 	}
4925 	/* adjust frame to skip mac-layer header */
4926 	_mbuf_adjust_pkthdr_and_data(m, mac_hlen);
4927 	if (is_ipv4) {
4928 		sum = inet_cksum(m, info.ip_proto,
4929 		    info.ip_hlen,
4930 		    info.ip_pay_len);
4931 	} else {
4932 		sum = inet6_cksum(m, info.ip_proto,
4933 		    info.ip_hlen + info.ip_opt_len,
4934 		    info.ip_pay_len - info.ip_opt_len);
4935 	}
4936 	valid = (sum == 0);
4937 	if (valid) {
4938 		csum_stats_p = &stats_p->brms_out_cksum_good;
4939 	} else {
4940 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4941 		error = (info.ip_proto == IPPROTO_TCP)
4942 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4943 	}
4944 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4945 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4946 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4947 	    is_ipv4 ? '4' : '6',
4948 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4949 	    valid ? "" : "in",
4950 	    sum, info.ip_hlen, info.ip_pay_len);
4951 	/* adjust frame back to start of mac-layer header */
4952 	_mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
4953 
4954 done:
4955 	return error;
4956 }
4957 
4958 static mbuf_t
4959 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
4960     mbuf_t in_list, bool is_ipv4)
4961 {
4962 	mbuf_t          next_packet;
4963 	mblist          ret;
4964 
4965 	mblist_init(&ret);
4966 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
4967 		errno_t         error;
4968 
4969 		/* take packet out of the list */
4970 		next_packet = scan->m_nextpkt;
4971 		scan->m_nextpkt = NULL;
4972 
4973 		if (scan->m_pkthdr.rx_seg_cnt > 1) {
4974 			/* LRO packet, compute checksum on large packet */
4975 			scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
4976 			    is_ipv4, false, true);
4977 		} else {
4978 			/* verify checksum */
4979 			error = bridge_verify_checksum(&scan, &dbif->bif_stats);
4980 			if (error != 0) {
4981 				if (scan != NULL) {
4982 					m_freem(scan);
4983 					scan = NULL;
4984 				}
4985 			}
4986 		}
4987 
4988 		/* add it back to the list */
4989 		if (scan != NULL) {
4990 			mblist_append(&ret, scan);
4991 		}
4992 	}
4993 	return ret.head;
4994 }
4995 
4996 
4997 static errno_t
4998 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4999     struct ifbrmstats * stats_p)
5000 {
5001 	uint16_t *      csum_p;
5002 	errno_t         error = 0;
5003 	u_int           hlen;
5004 	struct mbuf *   m0 = *mp;
5005 	u_int           mac_hlen = sizeof(struct ether_header);
5006 	u_int           pkt_hdr_len;
5007 	struct tcphdr * tcp;
5008 	u_int           tcp_hlen;
5009 	struct udphdr * udp;
5010 
5011 	if (info_p->ip_is_ipv4) {
5012 		/* compute IP header checksum */
5013 		struct ip *ip = (struct ip *)info_p->ip_hdr;
5014 		ip->ip_sum = 0;
5015 		ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5016 		__ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5017 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5018 		    "IPv4 checksum 0x%x",
5019 		    ntohs(ip->ip_sum));
5020 	}
5021 	if (info_p->ip_is_fragmented) {
5022 		/* can't compute checksum on fragmented packets */
5023 		goto done;
5024 	}
5025 	pkt_hdr_len = m0->m_pkthdr.len;
5026 	switch (info_p->ip_proto) {
5027 	case IPPROTO_TCP:
5028 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5029 		    + sizeof(struct tcphdr);
5030 		if (m0->m_len < hlen) {
5031 			*mp = m0 = m_pullup(m0, hlen);
5032 			if (m0 == NULL) {
5033 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5034 				error = _EBADTCP;
5035 				goto done;
5036 			}
5037 		}
5038 		tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5039 		    + info_p->ip_opt_len);
5040 		tcp_hlen = tcp->th_off << 2;
5041 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5042 		if (hlen > pkt_hdr_len) {
5043 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5044 			    "bad tcp header length %u",
5045 			    tcp_hlen);
5046 			__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5047 			error = _EBADTCP;
5048 			goto done;
5049 		}
5050 		csum_p = &tcp->th_sum;
5051 		__ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5052 		break;
5053 	case IPPROTO_UDP:
5054 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5055 		if (m0->m_len < hlen) {
5056 			*mp = m0 = m_pullup(m0, hlen);
5057 			if (m0 == NULL) {
5058 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5059 				error = ENOBUFS;
5060 				goto done;
5061 			}
5062 		}
5063 		udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5064 		    + info_p->ip_opt_len);
5065 		csum_p = &udp->uh_sum;
5066 		__ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5067 		break;
5068 	default:
5069 		/* not TCP or UDP */
5070 		goto done;
5071 	}
5072 	*csum_p = 0;
5073 	/* adjust frame to skip mac-layer header */
5074 	_mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5075 	if (info_p->ip_is_ipv4) {
5076 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5077 		    info_p->ip_pay_len);
5078 	} else {
5079 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
5080 		    info_p->ip_hlen + info_p->ip_opt_len,
5081 		    info_p->ip_pay_len - info_p->ip_opt_len);
5082 	}
5083 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5084 		/* RFC 1122 4.1.3.4 */
5085 		*csum_p = 0xffff;
5086 	}
5087 	/* adjust frame back to start of mac-layer header */
5088 	_mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5089 	proto_csum_stats_increment(info_p->ip_proto,
5090 	    &stats_p->brms_in_computed_cksum);
5091 
5092 	/* indicate that the checksum is good */
5093 	mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5094 
5095 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5096 	    "IPv%c %s set checksum 0x%x",
5097 	    info_p->ip_is_ipv4 ? '4' : '6',
5098 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5099 	    ntohs(*csum_p));
5100 done:
5101 	return error;
5102 }
5103 
5104 static inline void
5105 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5106     mbuf_t m, ChecksumOperation cksum_op)
5107 {
5108 	switch (cksum_op) {
5109 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5110 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5111 		break;
5112 	case CHECKSUM_OPERATION_FINALIZE:
5113 		/* the checksum might not be correct, finalize now */
5114 		VERIFY(dst_ifp != NULL);
5115 		bridge_finalize_cksum(dst_ifp, m);
5116 		break;
5117 	case CHECKSUM_OPERATION_COMPUTE:
5118 		VERIFY(dst_ifp != NULL && src_ifp != NULL);
5119 		bridge_compute_cksum(src_ifp, dst_ifp, m);
5120 		break;
5121 	default:
5122 		break;
5123 	}
5124 	return;
5125 }
5126 
5127 static uint32_t
5128 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5129 {
5130 	uint32_t tso_mtu;
5131 
5132 	tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5133 	if (tso_mtu == 0) {
5134 		tso_mtu = IP_MAXPACKET;
5135 	}
5136 
5137 #if DEBUG || DEVELOPMENT
5138 #define REDUCED_TSO_MTU         (16 * 1024)
5139 	if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5140 		tso_mtu = REDUCED_TSO_MTU;
5141 	}
5142 #endif /* DEBUG || DEVELOPMENT */
5143 	return tso_mtu;
5144 }
5145 
5146 /*
5147  * tso_hwassist:
5148  * - determine whether the destination interface supports TSO offload
5149  * - if the packet is already marked for offload and the hardware supports
5150  *   it, just allow the packet to continue on
5151  * - if not, parse the packet headers to verify that this is a large TCP
5152  *   packet requiring segmentation; if the hardware doesn't support it
5153  *   set need_sw_tso; otherwise, mark the packet for TSO offload
5154  */
5155 static int
5156 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5157     int * mss_p, bool * need_gso, bool * is_large_tcp)
5158 {
5159 	uint32_t                csum_flags;
5160 	int                     error = 0;
5161 	ip_packet_info          info;
5162 	u_int32_t               if_csum;
5163 	u_int32_t               if_tso;
5164 	u_int32_t               mbuf_tso;
5165 	int                     mss = *mss_p;
5166 	uint8_t                 seg_cnt = 0;
5167 	bool                    supports_cksum = false;
5168 	uint32_t                pkt_mtu;
5169 	struct bripstats        stats;
5170 
5171 	*need_gso = false;
5172 	*is_large_tcp = false;
5173 	if (is_ipv4) {
5174 		/*
5175 		 * Enable both TCP and IP offload if the hardware supports it.
5176 		 * If the hardware doesn't support TCP offload, supports_cksum
5177 		 * will be false so we won't set either offload.
5178 		 */
5179 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5180 		supports_cksum = (if_csum & CSUM_TCP) != 0;
5181 		if_tso = IFNET_TSO_IPV4;
5182 		mbuf_tso = CSUM_TSO_IPV4;
5183 	} else {
5184 		if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5185 		supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5186 		if_tso = IFNET_TSO_IPV6;
5187 		mbuf_tso = CSUM_TSO_IPV6;
5188 	}
5189 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5190 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
5191 	    ifp->if_xname, supports_cksum ? "" : " not",
5192 	    ifp->if_hwassist, if_csum);
5193 
5194 	/* verify that this is a large TCP frame */
5195 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5196 	    &info, &stats);
5197 	if (error != 0) {
5198 		/* bad packet */
5199 		goto done;
5200 	}
5201 	if (info.ip_proto_hdr == NULL) {
5202 		/* not a TCP packet */
5203 		goto done;
5204 	}
5205 	pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5206 	if (mss == 0) {
5207 		/* check for LRO */
5208 		seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5209 		if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5210 			/* not actually a large packet */
5211 			goto done;
5212 		}
5213 	}
5214 	if (mss == 0) {
5215 		uint32_t            hdr_len;
5216 		struct tcphdr *     tcp;
5217 
5218 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5219 		hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5220 
5221 		/* packet isn't marked, mark it now */
5222 		if (seg_cnt != 0) {
5223 			uint32_t    len;
5224 
5225 			/* approximate the MSS using the LRO seg cnt */
5226 			len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5227 			mss = len / seg_cnt;
5228 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5229 			    "%s: mss %d = len %d / seg cnt %d",
5230 			    ifp->if_xname, mss, len, seg_cnt);
5231 			if (mss <= 0) {
5232 				/* unexpected value */
5233 				mss = 0;
5234 				goto done;
5235 			}
5236 		} else {
5237 			mss = ifp->if_mtu - hdr_len
5238 			    - if_bridge_tso_reduce_mss_tx;
5239 			assert(mss > 0);
5240 		}
5241 		csum_flags = mbuf_tso;
5242 		if (supports_cksum) {
5243 			csum_flags |= if_csum;
5244 		}
5245 		(*mp)->m_pkthdr.tso_segsz = mss;
5246 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5247 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5248 	}
5249 	*is_large_tcp = true;
5250 	(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5251 	if ((ifp->if_hwassist & if_tso) == 0) {
5252 		/* need gso if no hardware support */
5253 		*need_gso = true;
5254 	} else {
5255 		uint32_t                tso_mtu = 0;
5256 
5257 		tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5258 		if (pkt_mtu > tso_mtu) {
5259 			/* need gso if tso_mtu too small */
5260 			*need_gso = true;
5261 		}
5262 	}
5263 done:
5264 	*mss_p = mss;
5265 	return error;
5266 }
5267 
5268 /*
5269  * bridge_enqueue:
5270  *
5271  *	Enqueue a packet list on a bridge member interface.
5272  *
5273  */
5274 static int
5275 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5276     ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5277     pkt_direction_t direction)
5278 {
5279 	int             enqueue_error = 0;
5280 	mbuf_t          next_packet;
5281 	uint32_t        out_errors = 0;
5282 	mblist          out_list;
5283 
5284 	VERIFY(dst_if != NULL);
5285 
5286 	mblist_init(&out_list);
5287 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5288 		bool            check_gso = false;
5289 		ChecksumOperation cksum_op = orig_cksum_op;
5290 		errno_t         error = 0;
5291 		bool            is_ipv4 = false;
5292 		int             len;
5293 		int             mss = 0;
5294 		bool            need_gso = false;
5295 
5296 		scan->m_flags |= M_PROTO1; /* set to avoid loops */
5297 		next_packet = scan->m_nextpkt;
5298 		scan->m_nextpkt = NULL;
5299 		len = mbuf_pkthdr_len(scan);
5300 		is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5301 		mss = _mbuf_get_tso_mss(scan);
5302 		if (mss != 0) {
5303 			/* packet is marked for segmentation */
5304 			check_gso = true;
5305 		} else if (direction == pkt_direction_RX &&
5306 		    scan->m_pkthdr.rx_seg_cnt != 0) {
5307 			/* LRO packet */
5308 			check_gso = true;
5309 		} else if (ether_type_flag_is_ip(etypef) &&
5310 		    len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5311 			/*
5312 			 * Need to segment the packet if it is a large frame
5313 			 * and the destination interface does not support TSO.
5314 			 *
5315 			 * Note that with trailers, it's possible for a packet to
5316 			 * be large but not actually require segmentation.
5317 			 */
5318 			check_gso = true;
5319 		}
5320 		if (check_gso) {
5321 			bool    is_large_tcp = false;
5322 
5323 			error = tso_hwassist(&scan, is_ipv4,
5324 			    dst_if, sizeof(struct ether_header), &mss,
5325 			    &need_gso, &is_large_tcp);
5326 			if (is_large_tcp &&
5327 			    cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5328 				cksum_op = CHECKSUM_OPERATION_NONE;
5329 			}
5330 		}
5331 		if (error != 0) {
5332 			if (scan != NULL) {
5333 				m_freem(scan);
5334 				scan = NULL;
5335 			}
5336 			out_errors++;
5337 		} else if (need_gso) {
5338 			int             mac_hlen = sizeof(struct ether_header);
5339 			mblist          segs;
5340 
5341 			/* segment packets, add to list */
5342 			segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5343 			    is_ipv4);
5344 			if (segs.head != NULL) {
5345 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5346 				    "%s (%s) append gso #segs %u bytes %u",
5347 				    bridge_ifp->if_xname,
5348 				    dst_if->if_xname,
5349 				    segs.count, segs.bytes);
5350 				mblist_append_list(&out_list, segs);
5351 			} else {
5352 				out_errors++;
5353 			}
5354 		} else {
5355 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5356 			    "%s (%s) append %d bytes mss %d op %d",
5357 			    bridge_ifp->if_xname,
5358 			    dst_if->if_xname,
5359 			    len, mss, cksum_op);
5360 			bridge_handle_checksum_op(src_if, dst_if,
5361 			    scan, cksum_op);
5362 			mblist_append(&out_list, scan);
5363 		}
5364 	}
5365 	if (out_list.head != NULL) {
5366 		enqueue_error = bridge_transmit(dst_if, out_list.head);
5367 		if (enqueue_error != 0) {
5368 			out_errors++;
5369 		}
5370 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5371 		    "%s (%s) bridge_transmit packets %u bytes %u error %d",
5372 		    bridge_ifp->if_xname,
5373 		    dst_if->if_xname,
5374 		    out_list.count, out_list.bytes, enqueue_error);
5375 	}
5376 	if (out_list.count != 0 || out_errors != 0) {
5377 		ifnet_stat_increment_out(bridge_ifp, out_list.count,
5378 		    out_list.bytes, out_errors);
5379 	}
5380 	return enqueue_error;
5381 }
5382 
5383 /*
5384  * bridge_member_output:
5385  *
5386  *	Send output from a bridge member interface.  This
5387  *	performs the bridging function for locally originated
5388  *	packets.
5389  *
5390  *	The mbuf has the Ethernet header already attached.
5391  */
5392 static errno_t
5393 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5394 {
5395 	struct bridge_iflist * bif = NULL;
5396 	ifnet_t bridge_ifp;
5397 	struct ether_header *eh;
5398 	ether_type_flag_t etypef;
5399 	struct ifnet *dst_if = NULL;
5400 	uint16_t vlan;
5401 	struct bridge_iflist *mac_nat_bif;
5402 	ifnet_t mac_nat_ifp;
5403 	mbuf_t m = *data;
5404 
5405 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5406 	    "ifp %s", ifp->if_xname);
5407 	if (m->m_len < ETHER_HDR_LEN) {
5408 		m = m_pullup(m, ETHER_HDR_LEN);
5409 		if (m == NULL) {
5410 			*data = NULL;
5411 			return EJUSTRETURN;
5412 		}
5413 	}
5414 
5415 	eh = mtod(m, struct ether_header *);
5416 	vlan = VLANTAGOF(m);
5417 	etypef = ether_type_flag_get(eh->ether_type);
5418 
5419 	BRIDGE_LOCK(sc);
5420 	mac_nat_bif = sc->sc_mac_nat_bif;
5421 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5422 	if (mac_nat_ifp == ifp) {
5423 		/* record the IP address used by the MAC NAT interface */
5424 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5425 		m = *data;
5426 		if (m == NULL) {
5427 			/* packet was deallocated */
5428 			BRIDGE_UNLOCK(sc);
5429 			return EJUSTRETURN;
5430 		}
5431 	}
5432 	bridge_ifp = sc->sc_ifp;
5433 
5434 	/*
5435 	 * APPLE MODIFICATION
5436 	 * If the packet is an 802.1X ethertype, then only send on the
5437 	 * original output interface.
5438 	 */
5439 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5440 		dst_if = ifp;
5441 		goto sendunicast;
5442 	}
5443 
5444 	/*
5445 	 * If bridge is down, but the original output interface is up,
5446 	 * go ahead and send out that interface.  Otherwise, the packet
5447 	 * is dropped below.
5448 	 */
5449 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5450 		dst_if = ifp;
5451 		goto sendunicast;
5452 	}
5453 
5454 	/*
5455 	 * If the packet is a multicast, or we don't know a better way to
5456 	 * get there, send to all interfaces.
5457 	 */
5458 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5459 		dst_if = NULL;
5460 	} else {
5461 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5462 		if (bif != NULL) {
5463 			dst_if = bif->bif_ifp;
5464 		}
5465 	}
5466 	if (dst_if == NULL) {
5467 		struct mbuf *mc;
5468 		errno_t error;
5469 
5470 
5471 		bridge_span(sc, etypef, m);
5472 
5473 		BRIDGE_LOCK2REF(sc, error);
5474 		if (error != 0) {
5475 			m_freem(m);
5476 			return EJUSTRETURN;
5477 		}
5478 
5479 		/*
5480 		 * Duplicate and send the packet across all member interfaces
5481 		 * except the originating interface.
5482 		 */
5483 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5484 			dst_if = bif->bif_ifp;
5485 			if (dst_if == ifp) {
5486 				/* skip the originating interface */
5487 				continue;
5488 			}
5489 			/* skip interface with inactive link status */
5490 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5491 				continue;
5492 			}
5493 
5494 			/* skip interface that isn't running */
5495 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5496 				continue;
5497 			}
5498 			/*
5499 			 * If the interface is participating in spanning
5500 			 * tree, make sure the port is in a state that
5501 			 * allows forwarding.
5502 			 */
5503 			if ((bif->bif_ifflags & IFBIF_STP) &&
5504 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5505 				continue;
5506 			}
5507 			/*
5508 			 * If the destination is the MAC NAT interface,
5509 			 * skip sending the packet. The packet can't be sent
5510 			 * if the source MAC is incorrect.
5511 			 */
5512 			if (dst_if == mac_nat_ifp) {
5513 				continue;
5514 			}
5515 
5516 			/* make a deep copy to send on this member interface */
5517 			mc = m_dup(m, M_DONTWAIT);
5518 			if (mc == NULL) {
5519 				(void)ifnet_stat_increment_out(bridge_ifp,
5520 				    0, 0, 1);
5521 				continue;
5522 			}
5523 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5524 			    mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5525 		}
5526 		BRIDGE_UNREF(sc);
5527 
5528 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5529 			m_freem(m);
5530 			return EJUSTRETURN;
5531 		}
5532 		/* allow packet to continue on the originating interface */
5533 		return 0;
5534 	}
5535 
5536 sendunicast:
5537 	/*
5538 	 * XXX Spanning tree consideration here?
5539 	 */
5540 
5541 	bridge_span(sc, etypef, m);
5542 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5543 		m_freem(m);
5544 		BRIDGE_UNLOCK(sc);
5545 		return EJUSTRETURN;
5546 	}
5547 
5548 	BRIDGE_UNLOCK(sc);
5549 	if (dst_if == ifp) {
5550 		/* allow packet to continue on the originating interface */
5551 		return 0;
5552 	}
5553 	if (dst_if != mac_nat_ifp) {
5554 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5555 		    CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5556 	} else {
5557 		/*
5558 		 * This is not the original output interface
5559 		 * and the destination is the MAC NAT interface.
5560 		 * Drop the packet because the packet can't be sent
5561 		 * if the source MAC is incorrect.
5562 		 */
5563 		m_freem(m);
5564 	}
5565 	return EJUSTRETURN;
5566 }
5567 
5568 /*
5569  * Output callback.
5570  *
5571  * This routine is called externally from above only when if_bridge_txstart
5572  * is disabled; otherwise it is called internally by bridge_start().
5573  */
5574 static int
5575 bridge_output(struct ifnet *ifp, struct mbuf *m)
5576 {
5577 	struct bridge_iflist *bif;
5578 	struct bridge_softc * __single sc = ifnet_softc(ifp);
5579 	struct ether_header *eh;
5580 	ether_type_flag_t etypef;
5581 	struct ifnet *dst_if = NULL;
5582 	int error = 0;
5583 
5584 	eh = mtod(m, struct ether_header *);
5585 	etypef = ether_type_flag_get(eh->ether_type);
5586 	BRIDGE_LOCK(sc);
5587 
5588 	if (!IS_BCAST_MCAST(m)) {
5589 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5590 		if (bif != NULL) {
5591 			dst_if = bif->bif_ifp;
5592 		}
5593 	}
5594 
5595 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5596 
5597 	BRIDGE_BPF_TAP_OUT(ifp, m);
5598 
5599 	if (dst_if == NULL) {
5600 		/* callee will unlock */
5601 		bridge_broadcast(sc, NULL, etypef, m);
5602 	} else {
5603 		ifnet_t bridge_ifp;
5604 
5605 		bridge_ifp = sc->sc_ifp;
5606 		BRIDGE_UNLOCK(sc);
5607 
5608 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5609 		    CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5610 	}
5611 
5612 	return error;
5613 }
5614 
5615 static void
5616 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5617 {
5618 	struct ether_header *eh;
5619 	bool is_ipv4;
5620 	uint32_t sw_csum, hwcap;
5621 	uint32_t did_sw;
5622 	uint32_t csum_flags;
5623 
5624 	eh = mtod(m, struct ether_header *);
5625 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5626 		return;
5627 	}
5628 
5629 	/* do in software what the hardware cannot */
5630 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5631 	csum_flags = m->m_pkthdr.csum_flags;
5632 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5633 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5634 
5635 	if (is_ipv4) {
5636 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5637 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5638 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5639 				uint16_t start =
5640 				    sizeof(*eh) + sizeof(struct ip);
5641 				uint16_t ulpoff =
5642 				    m->m_pkthdr.csum_data & 0xffff;
5643 				m->m_pkthdr.csum_flags |=
5644 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5645 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5646 				m->m_pkthdr.csum_tx_start = start;
5647 			} else {
5648 				sw_csum |= (CSUM_DELAY_DATA &
5649 				    m->m_pkthdr.csum_flags);
5650 			}
5651 		}
5652 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5653 	} else {
5654 		if ((hwcap & CSUM_PARTIAL) &&
5655 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5656 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5657 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5658 				uint16_t start =
5659 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5660 				uint16_t ulpoff =
5661 				    m->m_pkthdr.csum_data & 0xffff;
5662 				m->m_pkthdr.csum_flags |=
5663 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5664 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5665 				m->m_pkthdr.csum_tx_start = start;
5666 			} else {
5667 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5668 				    m->m_pkthdr.csum_flags);
5669 			}
5670 		}
5671 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5672 	}
5673 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5674 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5675 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5676 	    did_sw, m->m_pkthdr.csum_flags);
5677 }
5678 
5679 /*
5680  * bridge_start:
5681  *
5682  *	Start output on a bridge.
5683  *
5684  * This routine is invoked by the start worker thread; because we never call
5685  * it directly, there is no need do deploy any serialization mechanism other
5686  * than what's already used by the worker thread, i.e. this is already single
5687  * threaded.
5688  *
5689  * This routine is called only when if_bridge_txstart is enabled.
5690  */
5691 static void
5692 bridge_start(struct ifnet *ifp)
5693 {
5694 	mbuf_ref_t m;
5695 
5696 	for (;;) {
5697 		if (ifnet_dequeue(ifp, &m) != 0) {
5698 			break;
5699 		}
5700 
5701 		(void) bridge_output(ifp, m);
5702 	}
5703 }
5704 
5705 static void
5706 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5707 {
5708 	mbuf_pkthdr_setrcvif(m, ifp);
5709 	mbuf_pkthdr_setheader(m, mtod(m, void *));
5710 	/* adjust frame to skip mac-layer header */
5711 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5712 }
5713 
5714 static void
5715 mark_tso_checksum_ok(mbuf_t m)
5716 {
5717 	if (_mbuf_get_tso_mss(m) != 0 ||
5718 	    (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5719 		mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5720 	}
5721 }
5722 
5723 static void
5724 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5725 {
5726 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5727 		/* mark the packets as arriving on the interface */
5728 		BRIDGE_BPF_TAP_IN(ifp, scan);
5729 		if (m_proto1) {
5730 			scan->m_flags |= M_PROTO1; /* set to avoid loops */
5731 		}
5732 		prepare_input_packet(ifp, scan);
5733 		mark_tso_checksum_ok(scan);
5734 	}
5735 	dlil_input_packet_list(ifp, in_list);
5736 	return;
5737 }
5738 
5739 static void
5740 adjust_input_packet_list(mbuf_t in_list)
5741 {
5742 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5743 		mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5744 		_mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5745 	}
5746 }
5747 
5748 static bool
5749 in_addr_is_ours(struct in_addr ip)
5750 {
5751 	struct in_ifaddr *ia;
5752 	bool             ours = false;
5753 
5754 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5755 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5756 		if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5757 			ours = true;
5758 			break;
5759 		}
5760 	}
5761 	lck_rw_done(&in_ifaddr_rwlock);
5762 	return ours;
5763 }
5764 
5765 static bool
5766 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5767 {
5768 	struct in6_addr         dst_ip;
5769 	struct in6_ifaddr       *ia6;
5770 	bool                    ours = false;
5771 
5772 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5773 		/* need to embed scope ID for comparison */
5774 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5775 		dst_ip.s6_addr16[1] = htons(ifscope);
5776 		ip6_p = &dst_ip;
5777 	}
5778 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5779 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5780 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5781 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5782 			ours = true;
5783 			break;
5784 		}
5785 	}
5786 	lck_rw_done(&in6_ifaddr_rwlock);
5787 	return ours;
5788 }
5789 
5790 static bool
5791 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5792 {
5793 	/* if the destination is our IP address, don't segment */
5794 	bool    our_ip = false;
5795 
5796 	if (info_p->ip_is_ipv4) {
5797 		struct ip *     hdr;
5798 		struct in_addr  dst_ip;
5799 
5800 		hdr = (struct ip *)(info_p->ip_hdr);
5801 		bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5802 		our_ip = in_addr_is_ours(dst_ip);
5803 	} else {
5804 		struct ip6_hdr *        hdr;
5805 
5806 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5807 		our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5808 	}
5809 	return our_ip;
5810 }
5811 
5812 typedef union {
5813 	struct in_addr  ip;
5814 	struct in6_addr ip6;
5815 } ip_addr, *ip_addr_t;
5816 
5817 static void
5818 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5819 {
5820 	if (info_p->ip_is_ipv4) {
5821 		struct ip *     hdr;
5822 
5823 		hdr = (struct ip *)(info_p->ip_hdr);
5824 		bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5825 	} else {
5826 		struct ip6_hdr *        hdr;
5827 
5828 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5829 		bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5830 	}
5831 }
5832 
5833 static bool
5834 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5835 {
5836 	bool    equal;
5837 
5838 	if (is_ipv4) {
5839 		equal = addr1->ip.s_addr == addr2->ip.s_addr;
5840 	} else {
5841 		equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5842 	}
5843 	return equal;
5844 }
5845 
5846 static bool
5847 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5848 {
5849 	bool    our_ip;
5850 
5851 	if (is_ipv4) {
5852 		our_ip = in_addr_is_ours(ipaddr->ip);
5853 	} else {
5854 		our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5855 	}
5856 	return our_ip;
5857 }
5858 
5859 static void
5860 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5861     mblist list, bool bif_uses_virtio)
5862 {
5863 	uint32_t        in_errors = 0;
5864 	bool            is_ipv4;
5865 	mblist          in_list;
5866 	ip_addr         last_ip;
5867 	bool            last_ip_ours = false;
5868 	bool            last_ip_valid = false;
5869 	u_int           mac_hlen;
5870 	bool            may_forward = false;
5871 	mbuf_t          next_packet;
5872 
5873 	switch (etypef) {
5874 	case ETHER_TYPE_FLAG_IPV4:
5875 		is_ipv4 = true;
5876 		may_forward = (ipforwarding != 0);
5877 		break;
5878 	case ETHER_TYPE_FLAG_IPV6:
5879 		is_ipv4 = false;
5880 		may_forward = (ip6_forwarding != 0);
5881 		break;
5882 	}
5883 	if (!may_forward) {
5884 		in_list = list;
5885 		goto done;
5886 	}
5887 
5888 	mblist_init(&in_list);
5889 	mac_hlen = sizeof(struct ether_header);
5890 	bzero(&last_ip, sizeof(last_ip));
5891 	for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5892 		int             error;
5893 		ip_packet_info  info;
5894 		bool            ip_ours;
5895 		struct ifbrmstats stats; /* XXX should really be accounted */
5896 		ip_addr         this_ip;
5897 
5898 		/* take it out of the list */
5899 		next_packet = scan->m_nextpkt;
5900 		scan->m_nextpkt = NULL;
5901 
5902 		/* check for TCP packet and get IP header */
5903 		error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5904 		    &info, &stats.brms_in_ip);
5905 		if (error != 0) {
5906 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5907 			    "%s bridge_get_tcp_header failed %d",
5908 			    bridge_ifp->if_xname, error);
5909 			if (scan != NULL) {
5910 				m_freem(scan);
5911 				scan = NULL;
5912 			}
5913 			in_errors++;
5914 			continue;
5915 		}
5916 		ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
5917 		if (last_ip_valid &&
5918 		    ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
5919 			/* use cached result */
5920 			ip_ours = last_ip_ours;
5921 		} else {
5922 			ip_ours = ip_addr_is_ours(&this_ip,
5923 			    bridge_ifp->if_index,
5924 			    is_ipv4);
5925 			/* cache the result */
5926 			last_ip_valid = true;
5927 			last_ip_ours = ip_ours;
5928 			last_ip = this_ip;
5929 		}
5930 
5931 		/* if the packet is destined to us, just send it up */
5932 		if (ip_ours) {
5933 			mblist_append(&in_list, scan);
5934 			continue;
5935 		}
5936 		/*
5937 		 * If this is a TCP packet that's marked for TSO or LRO, or
5938 		 * we think it's a large packet, segment it.
5939 		 */
5940 		if (info.ip_proto_hdr != NULL &&
5941 		    ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
5942 		    (!bif_uses_virtio &&
5943 		    (scan->m_pkthdr.rx_seg_cnt > 1 ||
5944 		    (mbuf_pkthdr_len(scan) >
5945 		    (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
5946 			mblist          seg;
5947 
5948 			seg = gso_tcp_with_info(bridge_ifp, scan, &info,
5949 			    mac_hlen, is_ipv4, false);
5950 			if (seg.head == NULL) {
5951 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5952 				    "gso_tcp returned no packets");
5953 				in_errors++;
5954 				continue;
5955 			}
5956 			if (seg.count > 1) {
5957 				/* packet was segmented+checksummed */
5958 				mblist_append_list(&in_list, seg);
5959 				continue;
5960 			}
5961 			/* there's just one packet, no segmentation */
5962 			scan = seg.head;
5963 		}
5964 		/* need checksum if it's marked for checksum offload */
5965 		if (bif_uses_virtio &&
5966 		    (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5967 			error = bridge_offload_checksum(&scan, &info, &stats);
5968 			if (error != 0) {
5969 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5970 				    "%s bridge_offload_checksum failed %d",
5971 				    bridge_ifp->if_xname, error);
5972 				if (scan != NULL) {
5973 					m_freem(scan);
5974 					scan = NULL;
5975 				}
5976 				in_errors++;
5977 				continue;
5978 			}
5979 		}
5980 		mblist_append(&in_list, scan);
5981 	}
5982 
5983 done:
5984 	if (in_list.head != NULL) {
5985 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5986 		    "%s packets %d bytes %d",
5987 		    bridge_ifp->if_xname,
5988 		    in_list.count, in_list.bytes);
5989 		/* Mark the packets as arriving on the bridge interface */
5990 		inject_input_packet_list(bridge_ifp, in_list.head, false);
5991 		ifnet_stat_increment_in(bridge_ifp, in_list.count,
5992 		    in_list.bytes, in_errors);
5993 	} else if (in_errors != 0) {
5994 		ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
5995 	}
5996 	return;
5997 }
5998 
5999 /*
6000  * bridge_broadcast:
6001  *
6002  *	Send a frame to all interfaces that are members of
6003  *	the bridge, except for the one on which the packet
6004  *	arrived.
6005  *
6006  *	NOTE: Releases the lock on return.
6007  */
6008 static void
6009 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6010     ether_type_flag_t etypef, mbuf_t m)
6011 {
6012 	ifnet_t bridge_ifp;
6013 	struct bridge_iflist *dbif;
6014 	struct ifnet * src_if;
6015 	mbuf_ref_t mc;
6016 	struct mbuf *mc_in;
6017 	int error = 0, used = 0;
6018 	ChecksumOperation cksum_op;
6019 	struct mac_nat_record mnr;
6020 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6021 	boolean_t translate_mac = FALSE;
6022 	uint32_t sc_filter_flags;
6023 	bool is_bcast_mcast;
6024 
6025 	bridge_ifp = sc->sc_ifp;
6026 	if (sbif != NULL) {
6027 		src_if = sbif->bif_ifp;
6028 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6029 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6030 			/* get the translation record */
6031 			translate_mac
6032 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6033 			if (m == NULL) {
6034 				/* packet was deallocated */
6035 				BRIDGE_UNLOCK(sc);
6036 				return;
6037 			}
6038 		}
6039 	} else {
6040 		/*
6041 		 * sbif is NULL when the bridge interface calls
6042 		 * bridge_broadcast().
6043 		 */
6044 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6045 		src_if = NULL;
6046 	}
6047 
6048 	BRIDGE_LOCK2REF(sc, error);
6049 	if (error) {
6050 		m_freem(m);
6051 		return;
6052 	}
6053 	is_bcast_mcast = IS_BCAST_MCAST(m);
6054 	sc_filter_flags = sc->sc_filter_flags;
6055 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6056 		ifnet_t         dst_if;
6057 
6058 		dst_if = dbif->bif_ifp;
6059 		if (dst_if == src_if) {
6060 			/* skip the interface that the packet came in on */
6061 			continue;
6062 		}
6063 
6064 		/* Private segments can not talk to each other */
6065 		if (sbif != NULL &&
6066 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6067 			continue;
6068 		}
6069 
6070 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6071 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6072 			continue;
6073 		}
6074 
6075 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6076 		    !is_bcast_mcast) {
6077 			continue;
6078 		}
6079 
6080 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6081 			continue;
6082 		}
6083 
6084 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6085 			continue;
6086 		}
6087 
6088 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6089 			mc = m;
6090 			used = 1;
6091 		} else {
6092 			mc = m_dup(m, M_DONTWAIT);
6093 			if (mc == NULL) {
6094 				(void) ifnet_stat_increment_out(bridge_ifp,
6095 				    0, 0, 1);
6096 				continue;
6097 			}
6098 		}
6099 
6100 		/*
6101 		 * If broadcast input is enabled, do so only if this
6102 		 * is an input packet.
6103 		 */
6104 		if (sbif != NULL && is_bcast_mcast &&
6105 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6106 			mc_in = m_dup(mc, M_DONTWAIT);
6107 			/* this could fail, but we continue anyways */
6108 		} else {
6109 			mc_in = NULL;
6110 		}
6111 
6112 		/* out */
6113 		if (translate_mac && mac_nat_bif == dbif) {
6114 			/* translate the packet */
6115 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6116 		}
6117 
6118 		if (mc != NULL && sbif != NULL &&
6119 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6120 			if (used == 0) {
6121 				/* Keep the layer3 header aligned */
6122 				int i = min(mc->m_pkthdr.len, max_protohdr);
6123 				mc = m_copyup(mc, i, ETHER_ALIGN);
6124 				if (mc == NULL) {
6125 					(void) ifnet_stat_increment_out(
6126 						sc->sc_ifp, 0, 0, 1);
6127 					if (mc_in != NULL) {
6128 						m_freem(mc_in);
6129 						mc_in = NULL;
6130 					}
6131 					continue;
6132 				}
6133 			}
6134 			if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6135 				if (mc_in != NULL) {
6136 					m_freem(mc_in);
6137 					mc_in = NULL;
6138 				}
6139 				continue;
6140 			}
6141 			if (mc == NULL) {
6142 				if (mc_in != NULL) {
6143 					m_freem(mc_in);
6144 					mc_in = NULL;
6145 				}
6146 				continue;
6147 			}
6148 		}
6149 
6150 		if (mc != NULL) {
6151 			/* verify checksum if necessary */
6152 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6153 			    !bif_has_checksum_offload(sbif)) {
6154 				error = bridge_verify_checksum(&mc,
6155 				    &dbif->bif_stats);
6156 				if (error != 0) {
6157 					if (mc != NULL) {
6158 						m_freem(mc);
6159 					}
6160 					mc = NULL;
6161 				}
6162 			}
6163 			if (mc != NULL) {
6164 				(void) bridge_enqueue(bridge_ifp,
6165 				    NULL, dst_if, etypef, mc, cksum_op,
6166 				    pkt_direction_TX);
6167 			}
6168 		}
6169 
6170 		/* in */
6171 		if (mc_in == NULL) {
6172 			continue;
6173 		}
6174 		BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6175 		prepare_input_packet(dst_if, mc_in);
6176 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6177 		dlil_input_packet_list(dst_if, mc_in);
6178 	}
6179 	if (used == 0) {
6180 		m_freem(m);
6181 	}
6182 
6183 
6184 	BRIDGE_UNREF(sc);
6185 }
6186 
6187 static mbuf_t
6188 copy_packet_list(mbuf_t m)
6189 {
6190 	mblist  ret;
6191 	mbuf_t  next_packet;
6192 
6193 	mblist_init(&ret);
6194 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6195 		mbuf_t  copy_m;
6196 
6197 		/* take it out of the list */
6198 		next_packet = scan->m_nextpkt;
6199 		scan->m_nextpkt = NULL;
6200 
6201 		/* create a copy and add it to the new list */
6202 		copy_m = m_dup(scan, M_DONTWAIT);
6203 		if (copy_m != NULL) {
6204 			mblist_append(&ret, copy_m);
6205 		}
6206 
6207 		/* put it back in the original list */
6208 		scan->m_nextpkt = next_packet;
6209 	}
6210 	return ret.head;
6211 }
6212 
6213 /*
6214  * bridge_broadcast_list:
6215  *
6216  *      Broadcast a list of packets to all members except `sbif`.
6217  *      Consumes `m` before returning.
6218  *
6219  *	NOTE: Releases the lock on return.
6220  */
6221 static void
6222 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6223     ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6224 {
6225 	bool                    bridge_has_address;
6226 	ifnet_t                 bridge_ifp;
6227 	struct bridge_iflist *  dbif;
6228 	bool                    is_bcast_mcast;
6229 	errno_t                 error = 0;
6230 	ChecksumOperation       cksum_op;
6231 	struct bridge_iflist *  mac_nat_bif = sc->sc_mac_nat_bif;
6232 	ifnet_t                 mac_nat_if = NULL;
6233 	bool                    need_mac_nat = false;
6234 	mbuf_t                  out_mac_nat = NULL;
6235 	ifnet_t                 src_if;
6236 	uint32_t                sc_filter_flags;
6237 	bool                    used = false;
6238 
6239 	bridge_ifp = sc->sc_ifp;
6240 	if (sbif != NULL) {
6241 		src_if = sbif->bif_ifp;
6242 
6243 		if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6244 			bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6245 
6246 			/* compute checksum on packets marked with offload */
6247 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6248 			    m, is_ipv4);
6249 			if (m == NULL) {
6250 				BRIDGE_UNLOCK(sc);
6251 				goto done;
6252 			}
6253 			cksum_op = CHECKSUM_OPERATION_NONE;
6254 		} else {
6255 			cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6256 		}
6257 
6258 		/*
6259 		 * If MAC-NAT is enabled and we'll be sending the packets
6260 		 * over it, verify that it is up and active before
6261 		 * deciding to make a translated copy.
6262 		 */
6263 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6264 			mac_nat_if = mac_nat_bif->bif_ifp;
6265 			if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6266 			    (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6267 				need_mac_nat = true;
6268 			}
6269 		}
6270 	} else {
6271 		/*
6272 		 * sbif is NULL when the bridge interface calls
6273 		 * bridge_broadcast_list() (TBD).
6274 		 */
6275 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6276 		src_if = NULL;
6277 	}
6278 
6279 	/*
6280 	 * Create a translated copy for packets destined to MAC-NAT interface.
6281 	 */
6282 	if (need_mac_nat) {
6283 		out_mac_nat
6284 		        = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6285 		    mac_nat_if, m);
6286 	}
6287 	sc_filter_flags = sc->sc_filter_flags;
6288 	bridge_has_address = (sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0;
6289 	BRIDGE_LOCK2REF(sc, error);
6290 	if (error) {
6291 		goto done;
6292 	}
6293 	is_bcast_mcast = IS_BCAST_MCAST(m);
6294 
6295 	/* make a copy for the bridge interface */
6296 	if (sbif != NULL && is_bcast_mcast && bridge_has_address) {
6297 		mbuf_t  in_list;
6298 
6299 		in_list = copy_packet_list(m);
6300 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6301 		    "%s mcast for us in_m %p",
6302 		    bridge_ifp->if_xname, in_list);
6303 		if (in_list != NULL) {
6304 			inject_input_packet_list(bridge_ifp, in_list, false);
6305 		}
6306 	}
6307 
6308 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6309 		ifnet_t         dst_if;
6310 		mbuf_t          in_m = NULL;
6311 		mbuf_t          out_m = NULL;
6312 
6313 		dst_if = dbif->bif_ifp;
6314 		if (dst_if == src_if) {
6315 			/* skip the interface that the packet came in on */
6316 			continue;
6317 		}
6318 
6319 		/* Private segments can not talk to each other */
6320 		if (sbif != NULL &&
6321 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6322 			continue;
6323 		}
6324 
6325 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6326 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6327 			continue;
6328 		}
6329 
6330 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6331 		    !is_bcast_mcast) {
6332 			continue;
6333 		}
6334 
6335 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6336 			continue;
6337 		}
6338 
6339 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6340 			continue;
6341 		}
6342 		if (dbif == mac_nat_bif) {
6343 			/* translated copy was created above, use that */
6344 			out_m = out_mac_nat;
6345 			out_mac_nat = NULL;
6346 		} else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6347 			/* consume `m` */
6348 			out_m = m;
6349 			used = true;
6350 		} else {
6351 			/* needs a copy */
6352 			out_m = copy_packet_list(m);
6353 		}
6354 
6355 		if (out_m == NULL) {
6356 			ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6357 			continue;
6358 		}
6359 		/*
6360 		 * If broadcast input is enabled, do so only if this
6361 		 * is an input packet.
6362 		 */
6363 		if (sbif != NULL && is_bcast_mcast &&
6364 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6365 			in_m = copy_packet_list(m);
6366 			/* this could fail, but we continue anyways */
6367 		} else {
6368 			in_m = NULL;
6369 		}
6370 
6371 		if (sbif != NULL &&
6372 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6373 			out_m = bridge_pf_list(out_m, dst_if,
6374 			    sc_filter_flags, false);
6375 		}
6376 		if (out_m != NULL) {
6377 			/* verify checksum if necessary */
6378 			if (sbif != NULL &&
6379 			    ether_type_flag_is_ip(etypef) &&
6380 			    bif_has_checksum_offload(dbif) &&
6381 			    !bif_has_checksum_offload(sbif)) {
6382 				bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6383 
6384 				out_m = bridge_verify_checksum_list(bridge_ifp,
6385 				    dbif, out_m, is_ipv4);
6386 			}
6387 			if (out_m != NULL) {
6388 				bridge_enqueue(bridge_ifp, src_if, dst_if,
6389 				    etypef, out_m, cksum_op, direction);
6390 			}
6391 		}
6392 
6393 		/* in */
6394 		if (in_m != NULL) {
6395 			inject_input_packet_list(dst_if, in_m, true);
6396 		}
6397 	}
6398 
6399 	BRIDGE_UNREF(sc);
6400 
6401 done:
6402 	if (out_mac_nat != NULL) {
6403 		m_freem_list(out_mac_nat);
6404 	}
6405 	if (!used) {
6406 		m_freem_list(m);
6407 	}
6408 	return;
6409 }
6410 
6411 #define NEEDED_CSUM_IPV4   (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6412 #define NEEDED_CSUM_IPV6   (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6413 
6414 static bool
6415 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6416 {
6417 	uint32_t        hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6418 	uint32_t        needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6419 	bool            supports;
6420 
6421 	supports = (hwcap & needed) == needed;
6422 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6423 	    ifp->if_xname, supports ? "" : "not ");
6424 	return supports;
6425 }
6426 
6427 static void
6428 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6429     ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6430 {
6431 	bool                    checksum_ok = false;
6432 	ChecksumOperation       cksum_op;
6433 	ifnet_t                 bridge_ifp;
6434 	struct bridge_iflist *  dbif;
6435 	uint32_t                sc_filter_flags;
6436 	ifnet_t                 src_if;
6437 
6438 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6439 		goto drop;
6440 	}
6441 	dbif = bridge_lookup_member_if(sc, dst_if);
6442 	if (dbif == NULL) {
6443 		/* Not a member of the bridge (anymore?) */
6444 		goto drop;
6445 	}
6446 
6447 	/* Private segments can not talk to each other */
6448 	if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6449 		goto drop;
6450 	}
6451 	bridge_ifp = sc->sc_ifp;
6452 	src_if = sbif->bif_ifp;
6453 	cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6454 	if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6455 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6456 
6457 		if (dbif == sc->sc_mac_nat_bif ||
6458 		    (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6459 		    !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6460 			/* compute checksums now if necessary */
6461 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6462 			    m, is_ipv4);
6463 			checksum_ok = true;
6464 		} else {
6465 			cksum_op = CHECKSUM_OPERATION_NONE;
6466 		}
6467 	}
6468 
6469 	if (dbif == sc->sc_mac_nat_bif) {
6470 		/* translate the packets before forwarding them */
6471 		if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6472 			m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6473 		}
6474 	} else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6475 	    bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6476 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6477 
6478 		/*
6479 		 * If the destination interface has checksum offload enabled,
6480 		 * verify the checksum now, unless the source interface also has
6481 		 * checksum offload enabled. The checksum in that case has
6482 		 * already just been computed and verifying it is unnecessary.
6483 		 */
6484 		m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6485 	}
6486 	sc_filter_flags = sc->sc_filter_flags;
6487 	BRIDGE_UNLOCK(sc);
6488 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6489 		m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6490 	}
6491 
6492 	/*
6493 	 * We're forwarding inbound packets for which the checksums must
6494 	 * already have been computed and if required, verified, or
6495 	 * packets from a virtio-enabled interface for which we rely
6496 	 * on the packet containing appropriate offload flags.
6497 	 */
6498 	if (m != NULL) {
6499 		bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6500 		    cksum_op, pkt_direction_RX);
6501 	}
6502 	return;
6503 
6504 drop:
6505 	BRIDGE_UNLOCK(sc);
6506 	m_freem_list(m);
6507 	return;
6508 }
6509 
6510 /*
6511  * bridge_span:
6512  *
6513  *	Duplicate a packet out one or more interfaces that are in span mode,
6514  *	the original mbuf is unmodified.
6515  */
6516 static void
6517 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6518 {
6519 	struct bridge_iflist *bif;
6520 	struct ifnet *dst_if;
6521 	struct mbuf *mc;
6522 
6523 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6524 		return;
6525 	}
6526 
6527 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6528 		dst_if = bif->bif_ifp;
6529 
6530 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6531 			continue;
6532 		}
6533 
6534 		mc = m_copypacket(m, M_DONTWAIT);
6535 		if (mc == NULL) {
6536 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6537 			continue;
6538 		}
6539 
6540 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6541 		    CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6542 	}
6543 }
6544 
6545 /*
6546  * bridge_rtupdate:
6547  *
6548  *	Add a bridge routing entry.
6549  */
6550 static int
6551 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6552     struct bridge_iflist *bif, int setflags, uint8_t flags)
6553 {
6554 	struct bridge_rtnode *brt;
6555 	int error;
6556 
6557 	BRIDGE_LOCK_ASSERT_HELD(sc);
6558 
6559 	/* Check the source address is valid and not multicast. */
6560 	if (ETHER_IS_MULTICAST(dst) ||
6561 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6562 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6563 		return EINVAL;
6564 	}
6565 
6566 	/* 802.1p frames map to vlan 1 */
6567 	if (vlan == 0) {
6568 		vlan = 1;
6569 	}
6570 
6571 	/*
6572 	 * A route for this destination might already exist.  If so,
6573 	 * update it, otherwise create a new one.
6574 	 */
6575 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6576 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6577 			sc->sc_brtexceeded++;
6578 			return ENOSPC;
6579 		}
6580 		/* Check per interface address limits (if enabled) */
6581 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6582 			bif->bif_addrexceeded++;
6583 			return ENOSPC;
6584 		}
6585 
6586 		/*
6587 		 * Allocate a new bridge forwarding node, and
6588 		 * initialize the expiration time and Ethernet
6589 		 * address.
6590 		 */
6591 		brt = zalloc_noblock(bridge_rtnode_pool);
6592 		if (brt == NULL) {
6593 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6594 			    "zalloc_nolock failed");
6595 			return ENOMEM;
6596 		}
6597 		bzero(brt, sizeof(struct bridge_rtnode));
6598 
6599 		if (bif->bif_ifflags & IFBIF_STICKY) {
6600 			brt->brt_flags = IFBAF_STICKY;
6601 		} else {
6602 			brt->brt_flags = IFBAF_DYNAMIC;
6603 		}
6604 
6605 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6606 		brt->brt_vlan = vlan;
6607 
6608 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6609 			zfree(bridge_rtnode_pool, brt);
6610 			return error;
6611 		}
6612 		brt->brt_dst = bif;
6613 		bif->bif_addrcnt++;
6614 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6615 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6616 		    "on %s count %u hashsize %u",
6617 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6618 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6619 		    sc->sc_rthash_size);
6620 	}
6621 
6622 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6623 	    brt->brt_dst != bif) {
6624 		brt->brt_dst->bif_addrcnt--;
6625 		brt->brt_dst = bif;
6626 		brt->brt_dst->bif_addrcnt++;
6627 	}
6628 
6629 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6630 		unsigned long now;
6631 
6632 		now = (unsigned long) net_uptime();
6633 		brt->brt_expire = now + sc->sc_brttimeout;
6634 	}
6635 	if (setflags) {
6636 		brt->brt_flags = flags;
6637 	}
6638 
6639 	return 0;
6640 }
6641 
6642 /*
6643  * bridge_rtlookup:
6644  *
6645  *	Lookup the destination interface for an address.
6646  */
6647 static struct bridge_iflist *
6648 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6649     uint16_t vlan)
6650 {
6651 	struct bridge_rtnode *brt;
6652 
6653 	BRIDGE_LOCK_ASSERT_HELD(sc);
6654 
6655 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6656 		return NULL;
6657 	}
6658 
6659 	return brt->brt_dst;
6660 }
6661 
6662 /*
6663  * bridge_rttrim:
6664  *
6665  *	Trim the routine table so that we have a number
6666  *	of routing entries less than or equal to the
6667  *	maximum number.
6668  */
6669 static void
6670 bridge_rttrim(struct bridge_softc *sc)
6671 {
6672 	struct bridge_rtnode *brt, *nbrt;
6673 
6674 	BRIDGE_LOCK_ASSERT_HELD(sc);
6675 
6676 	/* Make sure we actually need to do this. */
6677 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6678 		return;
6679 	}
6680 
6681 	/* Force an aging cycle; this might trim enough addresses. */
6682 	bridge_rtage(sc);
6683 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6684 		return;
6685 	}
6686 
6687 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6688 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6689 			bridge_rtnode_destroy(sc, brt);
6690 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6691 				return;
6692 			}
6693 		}
6694 	}
6695 }
6696 
6697 /*
6698  * bridge_aging_timer:
6699  *
6700  *	Aging periodic timer for the bridge routing table.
6701  */
6702 static void
6703 bridge_aging_timer(struct bridge_softc *sc)
6704 {
6705 	BRIDGE_LOCK_ASSERT_HELD(sc);
6706 
6707 	bridge_rtage(sc);
6708 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6709 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6710 		sc->sc_aging_timer.bdc_sc = sc;
6711 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6712 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6713 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6714 	}
6715 }
6716 
6717 /*
6718  * bridge_rtage:
6719  *
6720  *	Perform an aging cycle.
6721  */
6722 static void
6723 bridge_rtage(struct bridge_softc *sc)
6724 {
6725 	struct bridge_rtnode *brt, *nbrt;
6726 	unsigned long now;
6727 
6728 	BRIDGE_LOCK_ASSERT_HELD(sc);
6729 
6730 	now = (unsigned long) net_uptime();
6731 
6732 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6733 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6734 			if (now >= brt->brt_expire) {
6735 				bridge_rtnode_destroy(sc, brt);
6736 			}
6737 		}
6738 	}
6739 	if (sc->sc_mac_nat_bif != NULL) {
6740 		bridge_mac_nat_age_entries(sc, now);
6741 	}
6742 }
6743 
6744 /*
6745  * bridge_rtflush:
6746  *
6747  *	Remove all dynamic addresses from the bridge.
6748  */
6749 static void
6750 bridge_rtflush(struct bridge_softc *sc, int full)
6751 {
6752 	struct bridge_rtnode *brt, *nbrt;
6753 
6754 	BRIDGE_LOCK_ASSERT_HELD(sc);
6755 
6756 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6757 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6758 			bridge_rtnode_destroy(sc, brt);
6759 		}
6760 	}
6761 }
6762 
6763 /*
6764  * bridge_rtdaddr:
6765  *
6766  *	Remove an address from the table.
6767  */
6768 static int
6769 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6770 {
6771 	struct bridge_rtnode *brt;
6772 	int found = 0;
6773 
6774 	BRIDGE_LOCK_ASSERT_HELD(sc);
6775 
6776 	/*
6777 	 * If vlan is zero then we want to delete for all vlans so the lookup
6778 	 * may return more than one.
6779 	 */
6780 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6781 		bridge_rtnode_destroy(sc, brt);
6782 		found = 1;
6783 	}
6784 
6785 	return found ? 0 : ENOENT;
6786 }
6787 
6788 /*
6789  * bridge_rtdelete:
6790  *
6791  *	Delete routes to a specific member interface.
6792  */
6793 static void
6794 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6795 {
6796 	struct bridge_rtnode *brt, *nbrt;
6797 
6798 	BRIDGE_LOCK_ASSERT_HELD(sc);
6799 
6800 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6801 		if (brt->brt_ifp == ifp && (full ||
6802 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6803 			bridge_rtnode_destroy(sc, brt);
6804 		}
6805 	}
6806 }
6807 
6808 /*
6809  * bridge_rtable_init:
6810  *
6811  *	Initialize the route table for this bridge.
6812  */
6813 static int
6814 bridge_rtable_init(struct bridge_softc *sc)
6815 {
6816 	u_int32_t i;
6817 
6818 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6819 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6820 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6821 
6822 	for (i = 0; i < sc->sc_rthash_size; i++) {
6823 		LIST_INIT(&sc->sc_rthash[i]);
6824 	}
6825 
6826 	sc->sc_rthash_key = RandomULong();
6827 
6828 	LIST_INIT(&sc->sc_rtlist);
6829 
6830 	return 0;
6831 }
6832 
6833 /*
6834  * bridge_rthash_delayed_resize:
6835  *
6836  *	Resize the routing table hash on a delayed thread call.
6837  */
6838 static void
6839 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6840 {
6841 	u_int32_t new_rthash_size = 0;
6842 	u_int32_t old_rthash_size = 0;
6843 	struct _bridge_rtnode_list *new_rthash = NULL;
6844 	struct _bridge_rtnode_list *old_rthash = NULL;
6845 	u_int32_t i;
6846 	struct bridge_rtnode *brt;
6847 	int error = 0;
6848 
6849 	BRIDGE_LOCK_ASSERT_HELD(sc);
6850 
6851 	/*
6852 	 * Four entries per hash bucket is our ideal load factor
6853 	 */
6854 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6855 		goto out;
6856 	}
6857 
6858 	/*
6859 	 * Doubling the number of hash buckets may be too simplistic
6860 	 * especially when facing a spike of new entries
6861 	 */
6862 	new_rthash_size = sc->sc_rthash_size * 2;
6863 
6864 	sc->sc_flags |= SCF_RESIZING;
6865 	BRIDGE_UNLOCK(sc);
6866 
6867 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6868 	    Z_WAITOK | Z_ZERO);
6869 
6870 	BRIDGE_LOCK(sc);
6871 	sc->sc_flags &= ~SCF_RESIZING;
6872 
6873 	if (new_rthash == NULL) {
6874 		error = ENOMEM;
6875 		goto out;
6876 	}
6877 	if ((sc->sc_flags & SCF_DETACHING)) {
6878 		error = ENODEV;
6879 		goto out;
6880 	}
6881 	/*
6882 	 * Fail safe from here on
6883 	 */
6884 	old_rthash = sc->sc_rthash;
6885 	old_rthash_size = sc->sc_rthash_size;
6886 	sc->sc_rthash = new_rthash;
6887 	sc->sc_rthash_size = new_rthash_size;
6888 
6889 	/*
6890 	 * Get a new key to force entries to be shuffled around to reduce
6891 	 * the likelihood they will land in the same buckets
6892 	 */
6893 	sc->sc_rthash_key = RandomULong();
6894 
6895 	for (i = 0; i < sc->sc_rthash_size; i++) {
6896 		LIST_INIT(&sc->sc_rthash[i]);
6897 	}
6898 
6899 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6900 		LIST_REMOVE(brt, brt_hash);
6901 		(void) bridge_rtnode_hash(sc, brt);
6902 	}
6903 out:
6904 	if (error == 0) {
6905 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6906 		    "%s new size %u",
6907 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6908 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6909 	} else {
6910 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6911 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6912 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6913 	}
6914 }
6915 
6916 /*
6917  * Resize the number of hash buckets based on the load factor
6918  * Currently only grow
6919  * Failing to resize the hash table is not fatal
6920  */
6921 static void
6922 bridge_rthash_resize(struct bridge_softc *sc)
6923 {
6924 	BRIDGE_LOCK_ASSERT_HELD(sc);
6925 
6926 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6927 		return;
6928 	}
6929 
6930 	/*
6931 	 * Four entries per hash bucket is our ideal load factor
6932 	 */
6933 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6934 		return;
6935 	}
6936 	/*
6937 	 * Hard limit on the size of the routing hash table
6938 	 */
6939 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6940 		return;
6941 	}
6942 
6943 	sc->sc_resize_call.bdc_sc = sc;
6944 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6945 	bridge_schedule_delayed_call(&sc->sc_resize_call);
6946 }
6947 
6948 /*
6949  * bridge_rtable_fini:
6950  *
6951  *	Deconstruct the route table for this bridge.
6952  */
6953 static void
6954 bridge_rtable_fini(struct bridge_softc *sc)
6955 {
6956 	KASSERT(sc->sc_brtcnt == 0,
6957 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6958 	kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
6959 	    sc->sc_rthash);
6960 	sc->sc_rthash = NULL;
6961 	sc->sc_rthash_size = 0;
6962 }
6963 
6964 /*
6965  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6966  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6967  */
6968 #define mix(a, b, c)                                                    \
6969 do {                                                                    \
6970 	a -= b; a -= c; a ^= (c >> 13);                                 \
6971 	b -= c; b -= a; b ^= (a << 8);                                  \
6972 	c -= a; c -= b; c ^= (b >> 13);                                 \
6973 	a -= b; a -= c; a ^= (c >> 12);                                 \
6974 	b -= c; b -= a; b ^= (a << 16);                                 \
6975 	c -= a; c -= b; c ^= (b >> 5);                                  \
6976 	a -= b; a -= c; a ^= (c >> 3);                                  \
6977 	b -= c; b -= a; b ^= (a << 10);                                 \
6978 	c -= a; c -= b; c ^= (b >> 15);                                 \
6979 } while ( /*CONSTCOND*/ 0)
6980 
6981 static __inline uint32_t
6982 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
6983 {
6984 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6985 
6986 	b += addr[5] << 8;
6987 	b += addr[4];
6988 	a += addr[3] << 24;
6989 	a += addr[2] << 16;
6990 	a += addr[1] << 8;
6991 	a += addr[0];
6992 
6993 	mix(a, b, c);
6994 
6995 	return c & BRIDGE_RTHASH_MASK(sc);
6996 }
6997 
6998 #undef mix
6999 
7000 static int
7001 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7002 {
7003 	int i, d;
7004 
7005 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7006 		d = ((int)a[i]) - ((int)b[i]);
7007 	}
7008 
7009 	return d;
7010 }
7011 
7012 /*
7013  * bridge_rtnode_lookup:
7014  *
7015  *	Look up a bridge route node for the specified destination. Compare the
7016  *	vlan id or if zero then just return the first match.
7017  */
7018 static struct bridge_rtnode *
7019 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7020     uint16_t vlan)
7021 {
7022 	struct bridge_rtnode *brt;
7023 	uint32_t hash;
7024 	int dir;
7025 
7026 	BRIDGE_LOCK_ASSERT_HELD(sc);
7027 
7028 	hash = bridge_rthash(sc, addr);
7029 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7030 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7031 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7032 			return brt;
7033 		}
7034 		if (dir > 0) {
7035 			return NULL;
7036 		}
7037 	}
7038 
7039 	return NULL;
7040 }
7041 
7042 /*
7043  * bridge_rtnode_hash:
7044  *
7045  *	Insert the specified bridge node into the route hash table.
7046  *	This is used when adding a new node or to rehash when resizing
7047  *	the hash table
7048  */
7049 static int
7050 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7051 {
7052 	struct bridge_rtnode *lbrt;
7053 	uint32_t hash;
7054 	int dir;
7055 
7056 	BRIDGE_LOCK_ASSERT_HELD(sc);
7057 
7058 	hash = bridge_rthash(sc, brt->brt_addr);
7059 
7060 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7061 	if (lbrt == NULL) {
7062 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7063 		goto out;
7064 	}
7065 
7066 	do {
7067 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7068 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7069 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7070 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7071 			    sc->sc_ifp->if_xname,
7072 			    brt->brt_addr[0], brt->brt_addr[1],
7073 			    brt->brt_addr[2], brt->brt_addr[3],
7074 			    brt->brt_addr[4], brt->brt_addr[5]);
7075 			return EEXIST;
7076 		}
7077 		if (dir > 0) {
7078 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7079 			goto out;
7080 		}
7081 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7082 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7083 			goto out;
7084 		}
7085 		lbrt = LIST_NEXT(lbrt, brt_hash);
7086 	} while (lbrt != NULL);
7087 
7088 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7089 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7090 	    sc->sc_ifp->if_xname,
7091 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7092 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7093 out:
7094 	return 0;
7095 }
7096 
7097 /*
7098  * bridge_rtnode_insert:
7099  *
7100  *	Insert the specified bridge node into the route table.  We
7101  *	assume the entry is not already in the table.
7102  */
7103 static int
7104 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7105 {
7106 	int error;
7107 
7108 	error = bridge_rtnode_hash(sc, brt);
7109 	if (error != 0) {
7110 		return error;
7111 	}
7112 
7113 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7114 	sc->sc_brtcnt++;
7115 
7116 	bridge_rthash_resize(sc);
7117 
7118 	return 0;
7119 }
7120 
7121 /*
7122  * bridge_rtnode_destroy:
7123  *
7124  *	Destroy a bridge rtnode.
7125  */
7126 static void
7127 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7128 {
7129 	BRIDGE_LOCK_ASSERT_HELD(sc);
7130 
7131 	LIST_REMOVE(brt, brt_hash);
7132 
7133 	LIST_REMOVE(brt, brt_list);
7134 	sc->sc_brtcnt--;
7135 	brt->brt_dst->bif_addrcnt--;
7136 	zfree(bridge_rtnode_pool, brt);
7137 }
7138 
7139 #if BRIDGESTP
7140 /*
7141  * bridge_rtable_expire:
7142  *
7143  *	Set the expiry time for all routes on an interface.
7144  */
7145 static void
7146 bridge_rtable_expire(struct ifnet *ifp, int age)
7147 {
7148 	struct bridge_softc *sc = ifp->if_bridge;
7149 	struct bridge_rtnode *brt;
7150 
7151 	BRIDGE_LOCK(sc);
7152 
7153 	/*
7154 	 * If the age is zero then flush, otherwise set all the expiry times to
7155 	 * age for the interface
7156 	 */
7157 	if (age == 0) {
7158 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7159 	} else {
7160 		unsigned long now;
7161 
7162 		now = (unsigned long) net_uptime();
7163 
7164 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7165 			/* Cap the expiry time to 'age' */
7166 			if (brt->brt_ifp == ifp &&
7167 			    brt->brt_expire > now + age &&
7168 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7169 				brt->brt_expire = now + age;
7170 			}
7171 		}
7172 	}
7173 	BRIDGE_UNLOCK(sc);
7174 }
7175 
7176 /*
7177  * bridge_state_change:
7178  *
7179  *	Callback from the bridgestp code when a port changes states.
7180  */
7181 static void
7182 bridge_state_change(struct ifnet *ifp, int state)
7183 {
7184 	struct bridge_softc *sc = ifp->if_bridge;
7185 	static const char *stpstates[] = {
7186 		"disabled",
7187 		"listening",
7188 		"learning",
7189 		"forwarding",
7190 		"blocking",
7191 		"discarding"
7192 	};
7193 
7194 	if (log_stp) {
7195 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7196 		    sc->sc_ifp->if_xname,
7197 		    stpstates[state], ifp->if_xname);
7198 	}
7199 }
7200 #endif /* BRIDGESTP */
7201 
7202 /*
7203  * bridge_detach:
7204  *
7205  *	Callback when interface has been detached.
7206  */
7207 static void
7208 bridge_detach(ifnet_t ifp)
7209 {
7210 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7211 
7212 #if BRIDGESTP
7213 	bstp_detach(&sc->sc_stp);
7214 #endif /* BRIDGESTP */
7215 
7216 	/* Tear down the routing table. */
7217 	bridge_rtable_fini(sc);
7218 
7219 	lck_mtx_lock(&bridge_list_mtx);
7220 	LIST_REMOVE(sc, sc_list);
7221 	lck_mtx_unlock(&bridge_list_mtx);
7222 
7223 	ifnet_release(ifp);
7224 
7225 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7226 	kfree_type(struct bridge_softc, sc);
7227 }
7228 
7229 /*
7230  * bridge_link_event:
7231  *
7232  *	Report a data link event on an interface
7233  */
7234 static void
7235 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7236 {
7237 	struct event {
7238 		u_int32_t ifnet_family;
7239 		u_int32_t unit;
7240 		char if_name[IFNAMSIZ];
7241 	};
7242 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7243 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7244 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7245 
7246 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7247 	    "%s event_code %u - %s", ifp->if_xname,
7248 	    event_code, dlil_kev_dl_code_str(event_code));
7249 	header->total_size   = sizeof(message);
7250 	header->vendor_code  = KEV_VENDOR_APPLE;
7251 	header->kev_class    = KEV_NETWORK_CLASS;
7252 	header->kev_subclass = KEV_DL_SUBCLASS;
7253 	header->event_code   = event_code;
7254 	data->ifnet_family   = ifnet_family(ifp);
7255 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7256 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7257 	ifnet_event(ifp, header);
7258 }
7259 
7260 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7261 	        bridge_hostfilter_stats.reason++;                       \
7262 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7263 	                   "%s.%d" #reason, func, line);                \
7264 	        error = EINVAL;                                         \
7265 	}
7266 
7267 static int
7268 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7269 {
7270 	struct ether_arp *ea;
7271 	struct ether_header *eh;
7272 	int error = EINVAL;
7273 	mbuf_t m = *data;
7274 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7275 
7276 	/*
7277 	 * Make the Ethernet and ARP headers contiguous
7278 	 */
7279 	if (mbuf_pkthdr_len(m) < minlen) {
7280 		BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7281 		goto done;
7282 	}
7283 	if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7284 		BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7285 		    __func__, __LINE__);
7286 		goto done;
7287 	}
7288 	m = *data;
7289 
7290 	/*
7291 	 * Restrict Ethernet protocols to ARP and IP/IPv6
7292 	 */
7293 	eh = mtod(m, struct ether_header *);
7294 	ea = (struct ether_arp *)(eh + 1);
7295 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7296 		BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7297 		    __func__, __LINE__);
7298 		goto done;
7299 	}
7300 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7301 		BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7302 		    __func__, __LINE__);
7303 		goto done;
7304 	}
7305 	/*
7306 	 * Verify the address lengths are correct
7307 	 */
7308 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7309 		BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7310 		goto done;
7311 	}
7312 	if (ea->arp_pln != sizeof(struct in_addr)) {
7313 		BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7314 		    __func__, __LINE__);
7315 		goto done;
7316 	}
7317 	/*
7318 	 * Allow only ARP request or ARP reply
7319 	 */
7320 	if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7321 	    ea->arp_op != HTONS_ARPOP_REPLY) {
7322 		BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7323 		goto done;
7324 	}
7325 	if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7326 		/*
7327 		 * Verify source hardware address matches
7328 		 */
7329 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7330 		    ETHER_ADDR_LEN) != 0) {
7331 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7332 			goto done;
7333 		}
7334 	}
7335 	if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7336 		/*
7337 		 * Verify source protocol address:
7338 		 * May be null for an ARP probe
7339 		 */
7340 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7341 		    sizeof(struct in_addr)) != 0 &&
7342 		    bcmp(ea->arp_spa, &inaddr_any,
7343 		    sizeof(struct in_addr)) != 0) {
7344 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7345 			goto done;
7346 		}
7347 	}
7348 	bridge_hostfilter_stats.brhf_arp_ok += 1;
7349 	error = 0;
7350 done:
7351 	return error;
7352 }
7353 
7354 /*
7355  * MAC NAT
7356  */
7357 
7358 static errno_t
7359 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7360 {
7361 	errno_t         error = 0;
7362 
7363 	BRIDGE_LOCK_ASSERT_HELD(sc);
7364 
7365 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7366 		error = EINVAL;
7367 		goto done;
7368 	}
7369 	if (sc->sc_mac_nat_bif != NULL) {
7370 		if (sc->sc_mac_nat_bif != bif) {
7371 			error = EBUSY;
7372 		}
7373 		goto done;
7374 	}
7375 	sc->sc_mac_nat_bif = bif;
7376 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7377 	bridge_mac_nat_populate_entries(sc);
7378 
7379 done:
7380 	return error;
7381 }
7382 
7383 static void
7384 bridge_mac_nat_disable(struct bridge_softc *sc)
7385 {
7386 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7387 
7388 	assert(mac_nat_bif != NULL);
7389 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7390 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7391 	sc->sc_mac_nat_bif = NULL;
7392 	return;
7393 }
7394 
7395 static void
7396 mac_nat_entry_print2(struct mac_nat_entry *mne,
7397     const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7398 {
7399 	int             af;
7400 	char            etopbuf[24];
7401 	char            ntopbuf[MAX_IPv6_STR_LEN];
7402 	const char      *space;
7403 
7404 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7405 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7406 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7407 	if (msg2 == NULL) {
7408 		msg2 = "";
7409 		space = "";
7410 	} else {
7411 		space = " ";
7412 	}
7413 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7414 	    "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7415 	    mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7416 }
7417 
7418 static void
7419 mac_nat_entry_print(struct mac_nat_entry *mne,
7420     const char ifname[IFNAMSIZ], const char *msg)
7421 {
7422 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7423 }
7424 
7425 static struct mac_nat_entry *
7426 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7427 {
7428 	struct mac_nat_entry    *mne;
7429 	struct mac_nat_entry    *ret_mne = NULL;
7430 
7431 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7432 		if (mne->mne_ip.s_addr == ip->s_addr) {
7433 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7434 				mac_nat_entry_print(mne, sc->sc_if_xname,
7435 				    "found");
7436 			}
7437 			ret_mne = mne;
7438 			break;
7439 		}
7440 	}
7441 
7442 	return ret_mne;
7443 }
7444 
7445 static struct mac_nat_entry *
7446 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7447 {
7448 	struct mac_nat_entry    *mne;
7449 	struct mac_nat_entry    *ret_mne = NULL;
7450 
7451 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7452 		if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7453 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7454 				mac_nat_entry_print(mne, sc->sc_if_xname,
7455 				    "found");
7456 			}
7457 			ret_mne = mne;
7458 			break;
7459 		}
7460 	}
7461 
7462 	return ret_mne;
7463 }
7464 
7465 static void
7466 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7467     struct mac_nat_entry *mne, const char *reason)
7468 {
7469 	LIST_REMOVE(mne, mne_list);
7470 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7471 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7472 	}
7473 	zfree(bridge_mne_pool, mne);
7474 	sc->sc_mne_count--;
7475 }
7476 
7477 static struct mac_nat_entry *
7478 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7479     struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7480 {
7481 	struct mac_nat_entry *mne;
7482 
7483 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7484 		sc->sc_mne_allocation_failures++;
7485 		return NULL;
7486 	}
7487 
7488 	mne = zalloc_noblock(bridge_mne_pool);
7489 	if (mne == NULL) {
7490 		sc->sc_mne_allocation_failures++;
7491 		return NULL;
7492 	}
7493 
7494 	sc->sc_mne_count++;
7495 	bzero(mne, sizeof(*mne));
7496 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7497 
7498 	mne->mne_bif = bif;
7499 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7500 
7501 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7502 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7503 	}
7504 
7505 	return mne;
7506 }
7507 
7508 static struct mac_nat_entry *
7509 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7510     struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7511 {
7512 	struct mac_nat_entry *mne;
7513 
7514 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7515 	if (mne == NULL) {
7516 		return NULL;
7517 	}
7518 
7519 	bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7520 	LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7521 
7522 	return mne;
7523 }
7524 
7525 static struct mac_nat_entry *
7526 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7527     struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7528 {
7529 	struct mac_nat_entry *mne;
7530 
7531 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7532 	if (mne == NULL) {
7533 		return NULL;
7534 	}
7535 
7536 	bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7537 	mne->mne_flags |= MNE_FLAGS_IPV6;
7538 	LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7539 
7540 	return mne;
7541 }
7542 
7543 static struct mac_nat_entry *
7544 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7545     struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7546 {
7547 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7548 
7549 	if (mne->mne_bif == mac_nat_bif) {
7550 		/* the MAC NAT interface takes precedence */
7551 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7552 			if (mne->mne_bif != bif) {
7553 				mac_nat_entry_print2(mne,
7554 				    sc->sc_if_xname, "reject",
7555 				    bif->bif_ifp->if_xname);
7556 			}
7557 		}
7558 	} else if (mne->mne_bif != bif) {
7559 		const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7560 
7561 		mne->mne_bif = bif;
7562 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7563 			mac_nat_entry_print2(mne,
7564 			    sc->sc_if_xname, "replaced",
7565 			    old_if);
7566 		}
7567 		bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7568 	}
7569 
7570 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7571 
7572 	return mne;
7573 }
7574 
7575 static struct mac_nat_entry *
7576 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7577     struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7578 {
7579 	struct mac_nat_entry *mne;
7580 
7581 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7582 	if (mne != NULL) {
7583 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7584 	}
7585 
7586 	mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7587 	return mne;
7588 }
7589 
7590 static struct mac_nat_entry *
7591 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7592     struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7593 {
7594 	struct mac_nat_entry *mne;
7595 
7596 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7597 	if (mne != NULL) {
7598 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7599 	}
7600 
7601 	mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7602 	return mne;
7603 }
7604 
7605 static void
7606 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7607     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7608 {
7609 	struct mac_nat_entry *mne;
7610 	struct mac_nat_entry *tmne;
7611 
7612 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7613 		if (bif != NULL && mne->mne_bif != bif) {
7614 			continue;
7615 		}
7616 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7617 	}
7618 }
7619 
7620 /*
7621  * bridge_mac_nat_flush_entries:
7622  *
7623  * Flush MAC NAT entries for the specified member. Flush all entries if
7624  * the member is the one that requires MAC NAT, otherwise just flush the
7625  * ones for the specified member.
7626  */
7627 static void
7628 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7629 {
7630 	struct bridge_iflist *flush_bif;
7631 
7632 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7633 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7634 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7635 }
7636 
7637 static void
7638 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7639 {
7640 	errno_t                 error;
7641 	ifnet_t                 ifp;
7642 	uint16_t                addresses_count = 0;
7643 	ifaddr_t                * __counted_by(addresses_count) list;
7644 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7645 
7646 	assert(mac_nat_bif != NULL);
7647 	ifp = mac_nat_bif->bif_ifp;
7648 	error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7649 	if (error != 0) {
7650 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7651 		    "ifnet_get_address_list(%s) failed %d",
7652 		    ifp->if_xname, error);
7653 		return;
7654 	}
7655 
7656 	for (uint16_t i = 0; i < addresses_count; ++i) {
7657 		sa_family_t af;
7658 
7659 		af = ifaddr_address_family(list[i]);
7660 		switch (af) {
7661 		case AF_INET: {
7662 			struct sockaddr_in sin;
7663 
7664 			error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7665 			if (error != 0) {
7666 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7667 				    "ifaddr_address failed %d",
7668 				    error);
7669 				break;
7670 			}
7671 
7672 			bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7673 			break;
7674 		}
7675 
7676 		case AF_INET6: {
7677 			struct sockaddr_in6 sin6;
7678 
7679 			error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7680 			if (error != 0) {
7681 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7682 				    "ifaddr_address failed %d",
7683 				    error);
7684 				break;
7685 			}
7686 
7687 			if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7688 				/* remove scope ID */
7689 				sin6.sin6_addr.s6_addr16[1] = 0;
7690 			}
7691 
7692 			bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7693 			break;
7694 		}
7695 
7696 		default:
7697 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7698 			    "ifaddr_address_family unknown %d",
7699 			    af);
7700 			break;
7701 		}
7702 	}
7703 
7704 	ifnet_address_list_free_counted_by(list, addresses_count);
7705 	return;
7706 }
7707 
7708 static void
7709 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7710     struct mac_nat_entry_list *list, unsigned long now)
7711 {
7712 	struct mac_nat_entry *mne;
7713 	struct mac_nat_entry *tmne;
7714 
7715 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7716 		if (now >= mne->mne_expire) {
7717 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7718 		}
7719 	}
7720 }
7721 
7722 static void
7723 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7724 {
7725 	if (sc->sc_mac_nat_bif == NULL) {
7726 		return;
7727 	}
7728 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7729 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7730 }
7731 
7732 static const char *
7733 get_in_out_string(boolean_t is_output)
7734 {
7735 	return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7736 }
7737 
7738 /*
7739  * is_valid_arp_packet:
7740  *	Verify that this is a valid ARP packet.
7741  *
7742  *	Returns TRUE if the packet is valid, FALSE otherwise.
7743  */
7744 static boolean_t
7745 is_valid_arp_packet(mbuf_t *data, bool is_output,
7746     struct ether_header **eh_p, struct ether_arp **ea_p)
7747 {
7748 	struct ether_arp *ea;
7749 	struct ether_header *eh;
7750 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7751 	boolean_t is_valid = FALSE;
7752 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7753 
7754 	if (mbuf_pkthdr_len(*data) < minlen) {
7755 		BRIDGE_LOG(LOG_DEBUG, flags,
7756 		    "ARP %s short frame %lu < %lu",
7757 		    get_in_out_string(is_output),
7758 		    mbuf_pkthdr_len(*data), minlen);
7759 		goto done;
7760 	}
7761 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7762 		BRIDGE_LOG(LOG_DEBUG, flags,
7763 		    "ARP %s size %lu mbuf_pullup fail",
7764 		    get_in_out_string(is_output),
7765 		    minlen);
7766 		*data = NULL;
7767 		goto done;
7768 	}
7769 
7770 	/* validate ARP packet */
7771 	eh = mtod(*data, struct ether_header *);
7772 	ea = (struct ether_arp *)(eh + 1);
7773 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7774 		BRIDGE_LOG(LOG_DEBUG, flags,
7775 		    "ARP %s htype not ethernet",
7776 		    get_in_out_string(is_output));
7777 		goto done;
7778 	}
7779 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7780 		BRIDGE_LOG(LOG_DEBUG, flags,
7781 		    "ARP %s hlen not ethernet",
7782 		    get_in_out_string(is_output));
7783 		goto done;
7784 	}
7785 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7786 		BRIDGE_LOG(LOG_DEBUG, flags,
7787 		    "ARP %s ptype not IP",
7788 		    get_in_out_string(is_output));
7789 		goto done;
7790 	}
7791 	if (ea->arp_pln != sizeof(struct in_addr)) {
7792 		BRIDGE_LOG(LOG_DEBUG, flags,
7793 		    "ARP %s plen not IP",
7794 		    get_in_out_string(is_output));
7795 		goto done;
7796 	}
7797 	is_valid = TRUE;
7798 	*ea_p = ea;
7799 	*eh_p = eh;
7800 done:
7801 	return is_valid;
7802 }
7803 
7804 static struct mac_nat_entry *
7805 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7806 {
7807 	struct ether_arp        * __single ea;
7808 	struct ether_header     * __single eh;
7809 	struct mac_nat_entry    *mne = NULL;
7810 	u_short                 op;
7811 	struct in_addr          tpa;
7812 
7813 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7814 		goto done;
7815 	}
7816 	op = ea->arp_op;
7817 	switch (op) {
7818 	case HTONS_ARPOP_REQUEST:
7819 	case HTONS_ARPOP_REPLY:
7820 		/* only care about REQUEST and REPLY */
7821 		break;
7822 	default:
7823 		goto done;
7824 	}
7825 
7826 	/* check the target IP address for a NAT entry */
7827 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7828 	if (tpa.s_addr != 0) {
7829 		mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7830 	}
7831 	if (mne != NULL) {
7832 		if (op == HTONS_ARPOP_REPLY) {
7833 			/* translate the MAC address */
7834 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7835 				char    mac_src[24];
7836 				char    mac_dst[24];
7837 
7838 				ether_ntop(mac_src, sizeof(mac_src),
7839 				    ea->arp_tha);
7840 				ether_ntop(mac_dst, sizeof(mac_dst),
7841 				    mne->mne_mac);
7842 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7843 				    "%s %s ARP %s -> %s",
7844 				    sc->sc_if_xname,
7845 				    mne->mne_bif->bif_ifp->if_xname,
7846 				    mac_src, mac_dst);
7847 			}
7848 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7849 		}
7850 	} else {
7851 		/* handle conflicting ARP (sender matches mne) */
7852 		struct in_addr spa;
7853 
7854 		bcopy(ea->arp_spa, &spa, sizeof(spa));
7855 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7856 			/* check the source IP for a NAT entry */
7857 			mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7858 		}
7859 	}
7860 
7861 done:
7862 	return mne;
7863 }
7864 
7865 static boolean_t
7866 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7867     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7868 {
7869 	struct ether_arp        * __single ea;
7870 	struct ether_header     * __single eh;
7871 	struct in_addr          ip;
7872 	struct mac_nat_entry    *mne = NULL;
7873 	u_short                 op;
7874 	boolean_t               translate = FALSE;
7875 
7876 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7877 		goto done;
7878 	}
7879 	op = ea->arp_op;
7880 	switch (op) {
7881 	case HTONS_ARPOP_REQUEST:
7882 	case HTONS_ARPOP_REPLY:
7883 		/* only care about REQUEST and REPLY */
7884 		break;
7885 	default:
7886 		goto done;
7887 	}
7888 
7889 	bcopy(ea->arp_spa, &ip, sizeof(ip));
7890 	if (ip.s_addr == 0) {
7891 		goto done;
7892 	}
7893 	/* XXX validate IP address: no multicast/broadcast */
7894 	mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7895 	    (const char *)ea->arp_sha);
7896 	if (mnr != NULL && mne != NULL) {
7897 		/* record the offset to do the replacement */
7898 		translate = TRUE;
7899 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7900 	}
7901 
7902 done:
7903 	return translate;
7904 }
7905 
7906 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
7907 	                         + sizeof(struct ip))
7908 static uint8_t * __indexable
7909 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
7910 {
7911 	uint8_t         *header = NULL;
7912 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7913 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
7914 
7915 	if (mbuf_pkthdr_len(*data) < minlen) {
7916 		BRIDGE_LOG(LOG_DEBUG, flags,
7917 		    "IP %s short frame %lu < %lu",
7918 		    get_in_out_string(is_output),
7919 		    mbuf_pkthdr_len(*data), minlen);
7920 		goto done;
7921 	}
7922 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7923 		BRIDGE_LOG(LOG_DEBUG, flags,
7924 		    "IP %s size %lu mbuf_pullup fail",
7925 		    get_in_out_string(is_output),
7926 		    minlen);
7927 		*data = NULL;
7928 		goto done;
7929 	}
7930 	header = mtod(*data, uint8_t *);
7931 done:
7932 	return header;
7933 }
7934 
7935 static struct mac_nat_entry *
7936 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
7937 {
7938 	struct in_addr          dst;
7939 	uint8_t                 *header;
7940 	struct ip               *iphdr;
7941 	struct mac_nat_entry    *mne = NULL;
7942 
7943 	header = get_ether_ip_header_ptr(data, FALSE);
7944 	if (header == NULL) {
7945 		goto done;
7946 	}
7947 	iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
7948 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7949 	/* XXX validate IP address */
7950 	if (dst.s_addr == 0) {
7951 		goto done;
7952 	}
7953 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
7954 done:
7955 	return mne;
7956 }
7957 
7958 static void
7959 bridge_mac_nat_udp_output(struct bridge_softc *sc,
7960     struct bridge_iflist *bif, mbuf_t m,
7961     uint8_t ip_header_len, struct mac_nat_record *mnr)
7962 {
7963 	uint16_t        dp_flags;
7964 	errno_t         error;
7965 	size_t          offset;
7966 	struct udphdr   udphdr;
7967 
7968 	/* copy the UDP header */
7969 	offset = sizeof(struct ether_header) + ip_header_len;
7970 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
7971 	if (error != 0) {
7972 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7973 		    "mbuf_copydata udphdr failed %d",
7974 		    error);
7975 		return;
7976 	}
7977 	if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
7978 	    udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
7979 		/* not a BOOTP/DHCP packet */
7980 		return;
7981 	}
7982 	/* check whether the broadcast bit is already set */
7983 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
7984 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
7985 	if (error != 0) {
7986 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7987 		    "mbuf_copydata dp_flags failed %d",
7988 		    error);
7989 		return;
7990 	}
7991 	if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
7992 		/* it's already set, nothing to do */
7993 		return;
7994 	}
7995 	/* broadcast bit needs to be set */
7996 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
7997 	mnr->mnr_ip_header_len = ip_header_len;
7998 	if (udphdr.uh_sum != 0) {
7999 		uint16_t        delta;
8000 
8001 		/* adjust checksum to take modified dp_flags into account */
8002 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8003 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8004 	}
8005 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8006 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8007 	    sc->sc_if_xname,
8008 	    bif->bif_ifp->if_xname,
8009 	    ntohs(mnr->mnr_ip_dhcp_flags),
8010 	    ntohs(mnr->mnr_ip_udp_csum));
8011 	return;
8012 }
8013 
8014 static boolean_t
8015 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8016     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8017 {
8018 #pragma unused(mnr)
8019 	uint8_t                 *header;
8020 	struct ether_header     *eh;
8021 	struct in_addr          ip;
8022 	struct ip               *iphdr;
8023 	uint8_t                 ip_header_len;
8024 	struct mac_nat_entry    *mne = NULL;
8025 	boolean_t               translate = FALSE;
8026 
8027 	header = get_ether_ip_header_ptr(data, TRUE);
8028 	if (header == NULL) {
8029 		goto done;
8030 	}
8031 
8032 	eh = (struct ether_header *)header;
8033 	iphdr = (struct ip *)(header + sizeof(*eh));
8034 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8035 	if (ip_header_len < sizeof(ip)) {
8036 		/* bogus IP header */
8037 		goto done;
8038 	}
8039 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8040 	/* XXX validate the source address */
8041 	if (ip.s_addr != 0) {
8042 		mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8043 		    (const char *)eh->ether_shost);
8044 	}
8045 	if (mnr != NULL) {
8046 		if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8047 			/* handle DHCP must broadcast */
8048 			bridge_mac_nat_udp_output(sc, bif, *data,
8049 			    ip_header_len, mnr);
8050 		}
8051 		translate = TRUE;
8052 	}
8053 done:
8054 	return translate;
8055 }
8056 
8057 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8058 	                         + sizeof(struct ip6_hdr))
8059 static uint8_t * __indexable
8060 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8061 {
8062 	uint8_t         *header = NULL;
8063 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8064 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8065 
8066 	if (mbuf_pkthdr_len(*data) < minlen) {
8067 		BRIDGE_LOG(LOG_DEBUG, flags,
8068 		    "IP %s short frame %lu < %lu",
8069 		    get_in_out_string(is_output),
8070 		    mbuf_pkthdr_len(*data), minlen);
8071 		goto done;
8072 	}
8073 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8074 		BRIDGE_LOG(LOG_DEBUG, flags,
8075 		    "IP %s size %lu mbuf_pullup fail",
8076 		    get_in_out_string(is_output),
8077 		    minlen);
8078 		*data = NULL;
8079 		goto done;
8080 	}
8081 	header = mtod(*data, uint8_t *);
8082 done:
8083 	return header;
8084 }
8085 
8086 #include <netinet/icmp6.h>
8087 #include <netinet6/nd6.h>
8088 
8089 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8090 
8091 static void
8092 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8093     struct bridge_iflist *bif,
8094     mbuf_t *data, struct ip6_hdr *ip6h,
8095     struct in6_addr *saddrp,
8096     struct mac_nat_record *mnr)
8097 {
8098 	uint8_t *header;
8099 	struct ether_header *eh;
8100 	struct icmp6_hdr *icmp6;
8101 	uint8_t         icmp6_type;
8102 	uint32_t        icmp6len;
8103 	int             lladdrlen = 0;
8104 	char            *lladdr = NULL;
8105 	unsigned int    off = sizeof(*ip6h);
8106 
8107 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8108 	if (icmp6len < sizeof(*icmp6)) {
8109 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8110 		    "short IPv6 payload length %d < %lu",
8111 		    icmp6len, sizeof(*icmp6));
8112 		return;
8113 	}
8114 
8115 	/* pullup IP6 header + ICMPv6 header */
8116 	header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8117 	if (header == NULL) {
8118 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8119 		    "failed to pullup icmp6 header");
8120 		return;
8121 	}
8122 	eh = (struct ether_header *)header;
8123 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8124 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8125 	icmp6_type = icmp6->icmp6_type;
8126 	switch (icmp6_type) {
8127 	case ND_NEIGHBOR_SOLICIT:
8128 	case ND_NEIGHBOR_ADVERT:
8129 	case ND_ROUTER_ADVERT:
8130 	case ND_ROUTER_SOLICIT:
8131 		break;
8132 	default:
8133 		return;
8134 	}
8135 
8136 	/* pullup IP6 header + payload */
8137 	header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8138 	if (header == NULL) {
8139 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8140 		    "failed to pullup icmp6 + payload");
8141 		return;
8142 	}
8143 	eh = (struct ether_header *)header;
8144 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8145 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8146 
8147 	switch (icmp6_type) {
8148 	case ND_NEIGHBOR_SOLICIT: {
8149 		struct nd_neighbor_solicit *nd_ns;
8150 		union nd_opts ndopts;
8151 		boolean_t is_dad_probe;
8152 		struct in6_addr taddr;
8153 
8154 		if (icmp6len < sizeof(*nd_ns)) {
8155 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8156 			    "short nd_ns %d < %lu",
8157 			    icmp6len, sizeof(*nd_ns));
8158 			return;
8159 		}
8160 
8161 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8162 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8163 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8164 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8165 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8166 			    "invalid target ignored");
8167 			return;
8168 		}
8169 
8170 		/* parse options */
8171 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8172 		if (nd6_options(&ndopts) < 0) {
8173 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8174 			    "invalid ND6 NS option");
8175 			return;
8176 		}
8177 		if (ndopts.nd_opts_src_lladdr != NULL) {
8178 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8179 			    lladdr, lladdrlen);
8180 		}
8181 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8182 		if (lladdr != NULL) {
8183 			if (is_dad_probe) {
8184 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8185 				    "bad ND6 DAD packet");
8186 				return;
8187 			}
8188 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8189 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8190 				    "source lladdrlen %d != %lu",
8191 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8192 				return;
8193 			}
8194 		}
8195 		if (is_dad_probe) {
8196 			/* node is trying use taddr, create an mne for taddr */
8197 			*saddrp = taddr;
8198 		}
8199 		break;
8200 	}
8201 	case ND_NEIGHBOR_ADVERT: {
8202 		struct nd_neighbor_advert *nd_na;
8203 		union nd_opts ndopts;
8204 		struct in6_addr taddr;
8205 
8206 
8207 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8208 
8209 		if (icmp6len < sizeof(*nd_na)) {
8210 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8211 			    "short nd_na %d < %lu",
8212 			    icmp6len, sizeof(*nd_na));
8213 			return;
8214 		}
8215 
8216 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8217 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8218 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8219 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8220 			    "invalid target ignored");
8221 			return;
8222 		}
8223 
8224 		/* parse options */
8225 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8226 		if (nd6_options(&ndopts) < 0) {
8227 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8228 			    "invalid ND6 NA option");
8229 			return;
8230 		}
8231 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8232 			/* target linklayer, nothing to do */
8233 			return;
8234 		}
8235 
8236 		ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8237 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8238 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8239 			    "target lladdrlen %d != %lu",
8240 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8241 			return;
8242 		}
8243 		break;
8244 	}
8245 	case ND_ROUTER_ADVERT:
8246 	case ND_ROUTER_SOLICIT: {
8247 		union nd_opts ndopts;
8248 		uint32_t type_length;
8249 		const char *description;
8250 
8251 		if (icmp6_type == ND_ROUTER_ADVERT) {
8252 			type_length = sizeof(struct nd_router_advert);
8253 			description = "RA";
8254 		} else {
8255 			type_length = sizeof(struct nd_router_solicit);
8256 			description = "RS";
8257 		}
8258 		if (icmp6len < type_length) {
8259 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8260 			    "short ND6 %s %d < %d",
8261 			    description, icmp6len, type_length);
8262 			return;
8263 		}
8264 
8265 		/* parse options */
8266 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8267 		    icmp6len - type_length, &ndopts);
8268 		if (nd6_options(&ndopts) < 0) {
8269 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8270 			    "invalid ND6 %s option", description);
8271 			return;
8272 		}
8273 		if (ndopts.nd_opts_src_lladdr != NULL) {
8274 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8275 
8276 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8277 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8278 				    "source lladdrlen %d != %lu",
8279 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8280 				return;
8281 			}
8282 		}
8283 		break;
8284 	}
8285 	default:
8286 		break;
8287 	}
8288 
8289 	if (lladdr != NULL) {
8290 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8291 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8292 		mnr->mnr_ip6_icmp6_len = icmp6len;
8293 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8294 		mnr->mnr_ip6_header_len = off;
8295 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8296 			const char *str;
8297 
8298 			switch (mnr->mnr_ip6_icmp6_type) {
8299 			case ND_ROUTER_ADVERT:
8300 				str = "ROUTER ADVERT";
8301 				break;
8302 			case ND_ROUTER_SOLICIT:
8303 				str = "ROUTER SOLICIT";
8304 				break;
8305 			case ND_NEIGHBOR_ADVERT:
8306 				str = "NEIGHBOR ADVERT";
8307 				break;
8308 			case ND_NEIGHBOR_SOLICIT:
8309 				str = "NEIGHBOR SOLICIT";
8310 				break;
8311 			default:
8312 				str = "";
8313 				break;
8314 			}
8315 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8316 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8317 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8318 			    mnr->mnr_ip6_header_len,
8319 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8320 		}
8321 	}
8322 }
8323 
8324 static struct mac_nat_entry *
8325 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8326 {
8327 	struct in6_addr         dst;
8328 	uint8_t                 *header;
8329 	struct ether_header     *eh;
8330 	struct ip6_hdr          *ip6h;
8331 	struct mac_nat_entry    *mne = NULL;
8332 
8333 	header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8334 	if (header == NULL) {
8335 		goto done;
8336 	}
8337 	eh = (struct ether_header *)header;
8338 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8339 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8340 	/* XXX validate IPv6 address */
8341 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8342 		goto done;
8343 	}
8344 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8345 
8346 done:
8347 	return mne;
8348 }
8349 
8350 static boolean_t
8351 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8352     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8353 {
8354 	uint8_t                 *header;
8355 	struct ether_header     *eh;
8356 	ether_addr_t            ether_shost;
8357 	struct ip6_hdr          *ip6h;
8358 	struct in6_addr         saddr;
8359 	boolean_t               translate;
8360 
8361 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8362 	header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8363 	if (header == NULL) {
8364 		translate = FALSE;
8365 		goto done;
8366 	}
8367 	eh = (struct ether_header *)header;
8368 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8369 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8370 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8371 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8372 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8373 	}
8374 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8375 		goto done;
8376 	}
8377 	(void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8378 	    (const char *)ether_shost.octet);
8379 
8380 done:
8381 	return translate;
8382 }
8383 
8384 /*
8385  * Function: bridge_mac_nat_input:
8386  *
8387  * Purpose:
8388  *   Process a unicast packet arriving on the external interface `external_ifp`.
8389  *
8390  *   If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8391  *   the mac_nat_entry table. If an entry is found, and the interface is
8392  *   not `external_ifp`, replace the destination MAC address in the
8393  *   ethernet header with the corresponding internal MAC address, and return
8394  *   the interface via `*dst_if`.
8395  *
8396  * Returns:
8397  *   NULL if the packet was deallocated during processing.
8398  *
8399  *   Otherwise, returns non-NULL packet that should:
8400  *   1) if `*dst_if` is NULL, continue on as an input packet
8401  *      over `external_ifp`, OR
8402  *   2) if `*dst_if` is not NULL, be delivered as an output packet
8403  *      over `*dst_if`.
8404  */
8405 static mbuf_t
8406 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8407     mbuf_t m, ifnet_t * dst_if)
8408 {
8409 	struct ether_header     *eh;
8410 	mbuf_t                  m0 = m;
8411 	struct mac_nat_entry    *mne = NULL;
8412 
8413 	BRIDGE_LOCK_ASSERT_HELD(sc);
8414 	*dst_if = NULL;
8415 	eh = mtod(m, struct ether_header *);
8416 	switch (eh->ether_type) {
8417 	case HTONS_ETHERTYPE_ARP:
8418 		mne = bridge_mac_nat_arp_input(sc, &m);
8419 		break;
8420 	case HTONS_ETHERTYPE_IP:
8421 		mne = bridge_mac_nat_ip_input(sc, &m);
8422 		break;
8423 	case HTONS_ETHERTYPE_IPV6:
8424 		mne = bridge_mac_nat_ipv6_input(sc, &m);
8425 		break;
8426 	default:
8427 		break;
8428 	}
8429 	if (m != NULL & mne != NULL) {
8430 		*dst_if = mne->mne_bif->bif_ifp;
8431 		if (*dst_if == external_ifp) {
8432 			/* receive packet for ifp */
8433 			*dst_if = NULL;
8434 		} else {
8435 			/* replace the destination MAC with internal one */
8436 			if (m != m0) {
8437 				/* it may have changed */
8438 				eh = mtod(m, struct ether_header *);
8439 			}
8440 			bcopy(mne->mne_mac, eh->ether_dhost,
8441 			    sizeof(eh->ether_dhost));
8442 		}
8443 	}
8444 	return m;
8445 }
8446 
8447 
8448 static mblist
8449 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8450     mbuf_t m, mbuf_t * forward_head)
8451 {
8452 	mblist          forward;
8453 	mbuf_t          next_packet;
8454 	mblist          ret;
8455 
8456 	mblist_init(&ret);
8457 	mblist_init(&forward);
8458 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8459 		ifnet_ref_t     dst_if;
8460 
8461 		/* take packet out of the list */
8462 		next_packet = scan->m_nextpkt;
8463 		scan->m_nextpkt = NULL;
8464 
8465 		scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8466 		if (scan != NULL) {
8467 			if (dst_if != NULL) {
8468 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8469 				    "%s MAC-NAT input translate to %s",
8470 				    sc->sc_if_xname, dst_if->if_xname);
8471 				/* use rcvif to store the egress interface */
8472 				mbuf_pkthdr_setrcvif(scan, dst_if);
8473 				/* add it to the forwarding list */
8474 				mblist_append(&forward, scan);
8475 			} else {
8476 				/* add it to the "continue on as input" list */
8477 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8478 				    "%s MAC-NAT input for %s",
8479 				    sc->sc_if_xname,
8480 				    external_ifp->if_xname);
8481 				mblist_append(&ret, scan);
8482 			}
8483 		}
8484 	}
8485 	*forward_head = forward.head;
8486 	return ret;
8487 }
8488 
8489 /*
8490  * bridge_mac_nat_translate_list:
8491  * Process a list of packets destined to the MAC-NAT interface `dst_if`
8492  * from the bridge member `sbif`.
8493  *
8494  * For each packet in the list, update the MAC-NAT record, and if
8495  * translation is required, translate it.
8496  *
8497  * Returns the list of packets that should be delivered to the MAC-NAT
8498  * interface.
8499  */
8500 static mbuf_t
8501 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8502     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8503 {
8504 	mbuf_t          next_packet;
8505 	mblist          ret;
8506 
8507 	mblist_init(&ret);
8508 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8509 		struct mac_nat_record   mnr;
8510 		bool                    translate_mac;
8511 
8512 		/* take packet out of the list */
8513 		next_packet = scan->m_nextpkt;
8514 		scan->m_nextpkt = NULL;
8515 		translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8516 		if (scan != NULL) {
8517 			if (translate_mac) {
8518 				bridge_mac_nat_translate(&scan, &mnr,
8519 				    IF_LLADDR(dst_if));
8520 			}
8521 			if (scan != NULL) {
8522 				/* add it back to the list */
8523 				mblist_append(&ret, scan);
8524 			}
8525 		}
8526 	}
8527 	return ret.head;
8528 }
8529 
8530 /*
8531  * bridge_mac_nat_copy_and_translate_list:
8532  * Same as bridge_mac_nat_translate_list() except that a copy of the
8533  * packet list is returned instead.
8534  *
8535  * The packet list `m` is left unaltered.
8536  */
8537 static mbuf_t
8538 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8539     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8540 {
8541 	mbuf_t          next_packet;
8542 	mblist          ret;
8543 
8544 	mblist_init(&ret);
8545 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8546 		mbuf_ref_t              mc = NULL;
8547 		struct mac_nat_record   mnr;
8548 		bool                    translate_mac;
8549 
8550 		/* take packet out of the list, make a copy, put it back */
8551 		next_packet = scan->m_nextpkt;
8552 		scan->m_nextpkt = NULL;
8553 		mc = m_dup(scan, M_DONTWAIT);
8554 		scan->m_nextpkt = next_packet;
8555 		if (mc == NULL) {
8556 			continue;
8557 		}
8558 		translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8559 		if (mc != NULL) {
8560 			if (translate_mac) {
8561 				bridge_mac_nat_translate(&mc, &mnr,
8562 				    IF_LLADDR(dst_if));
8563 			}
8564 			if (mc != NULL) {
8565 				/* add it to the new list */
8566 				mblist_append(&ret, mc);
8567 			}
8568 		}
8569 	}
8570 	return ret.head;
8571 }
8572 
8573 static void
8574 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8575     mbuf_t m)
8576 {
8577 	int             count = 0;
8578 	ifnet_t         dst_if;
8579 	mblist          list;
8580 	int             n_lists = 0;
8581 	mbuf_t          next_packet;
8582 
8583 	mblist_init(&list);
8584 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8585 		ifnet_t         this_if;
8586 
8587 		next_packet = scan->m_nextpkt;
8588 		this_if = mbuf_pkthdr_rcvif(scan);
8589 		mbuf_pkthdr_setrcvif(scan, NULL);
8590 		if (list.head == NULL) {
8591 			/* start a new list */
8592 			list.head = list.tail = scan;
8593 			count = 1;
8594 			dst_if = this_if;
8595 		} else if (dst_if != this_if) {
8596 			/* send up the previous chain */
8597 			if (list.tail != NULL) {
8598 				/* terminate the list */
8599 				list.tail->m_nextpkt = NULL;
8600 			}
8601 			n_lists++;
8602 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8603 			    "(%s): sublist %u pkts %u",
8604 			    dst_if->if_xname, n_lists, count);
8605 			bridge_enqueue(bridge_ifp, NULL,
8606 			    dst_if, etypef, list.head,
8607 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8608 
8609 			/* start new list */
8610 			list.head = list.tail = scan;
8611 			count = 1;
8612 			dst_if = this_if;
8613 		} else {
8614 			count++;
8615 			list.tail = scan;
8616 		}
8617 		if (next_packet == NULL) {
8618 			/* last list */
8619 			n_lists++;
8620 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8621 			    "(%s): sublist %u pkts %u",
8622 			    dst_if->if_xname, n_lists, count);
8623 			bridge_enqueue(bridge_ifp, NULL,
8624 			    dst_if, etypef, list.head,
8625 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8626 		}
8627 	}
8628 	return;
8629 }
8630 
8631 /*
8632  * bridge_mac_nat_output:
8633  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8634  * from the interface 'bif'.
8635  *
8636  * Create a mac_nat_entry containing the source IP address and MAC address
8637  * from the packet. Populate a mac_nat_record with information detailing
8638  * how to translate the packet. Translation takes place later by calling
8639  * `bridge_mac_nat_translate()`.
8640  *
8641  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8642  * interface is generating an output packet. No translation is required in this
8643  * case, we just record the IP address used to prevent another bif from
8644  * claiming our IP address.
8645  *
8646  * Returns:
8647  * TRUE if the packet should be translated (*mnr updated as well),
8648  * FALSE otherwise.
8649  *
8650  * *data may be updated to point at a different mbuf chain or NULL if
8651  * the chain was deallocated during processing.
8652  */
8653 
8654 static boolean_t
8655 bridge_mac_nat_output(struct bridge_softc *sc,
8656     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8657 {
8658 	struct ether_header     *eh;
8659 	boolean_t               translate = FALSE;
8660 
8661 	BRIDGE_LOCK_ASSERT_HELD(sc);
8662 	assert(sc->sc_mac_nat_bif != NULL);
8663 
8664 	eh = mtod(*data, struct ether_header *);
8665 	if (mnr != NULL) {
8666 		bzero(mnr, sizeof(*mnr));
8667 		mnr->mnr_ether_type = eh->ether_type;
8668 	}
8669 	switch (eh->ether_type) {
8670 	case HTONS_ETHERTYPE_ARP:
8671 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8672 		break;
8673 	case HTONS_ETHERTYPE_IP:
8674 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8675 		break;
8676 	case HTONS_ETHERTYPE_IPV6:
8677 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8678 		break;
8679 	default:
8680 		break;
8681 	}
8682 	return translate;
8683 }
8684 
8685 static void
8686 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8687     const char eaddr[ETHER_ADDR_LEN])
8688 {
8689 	errno_t                 error;
8690 
8691 	if (mnr->mnr_arp_offset == 0) {
8692 		return;
8693 	}
8694 	/* replace the source hardware address */
8695 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8696 	    ETHER_ADDR_LEN, eaddr,
8697 	    MBUF_DONTWAIT);
8698 	if (error != 0) {
8699 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8700 		    "mbuf_copyback failed");
8701 		m_freem(*data);
8702 		*data = NULL;
8703 	}
8704 	return;
8705 }
8706 
8707 static void
8708 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8709 {
8710 	errno_t         error;
8711 	size_t          offset;
8712 
8713 	if (mnr->mnr_ip_header_len == 0) {
8714 		return;
8715 	}
8716 	/* update the UDP checksum */
8717 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8718 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8719 	    sizeof(mnr->mnr_ip_udp_csum),
8720 	    &mnr->mnr_ip_udp_csum,
8721 	    MBUF_DONTWAIT);
8722 	if (error != 0) {
8723 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8724 		    "mbuf_copyback uh_sum failed");
8725 		m_freem(*data);
8726 		*data = NULL;
8727 	}
8728 	/* update the DHCP must broadcast flag */
8729 	offset += sizeof(struct udphdr);
8730 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8731 	    sizeof(mnr->mnr_ip_dhcp_flags),
8732 	    &mnr->mnr_ip_dhcp_flags,
8733 	    MBUF_DONTWAIT);
8734 	if (error != 0) {
8735 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8736 		    "mbuf_copyback dp_flags failed");
8737 		m_freem(*data);
8738 		*data = NULL;
8739 	}
8740 }
8741 
8742 static void
8743 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8744     const char eaddr[ETHER_ADDR_LEN])
8745 {
8746 	uint16_t        cksum;
8747 	errno_t         error;
8748 	mbuf_t          m = *data;
8749 
8750 	if (mnr->mnr_ip6_header_len == 0) {
8751 		return;
8752 	}
8753 	switch (mnr->mnr_ip6_icmp6_type) {
8754 	case ND_ROUTER_ADVERT:
8755 	case ND_ROUTER_SOLICIT:
8756 	case ND_NEIGHBOR_SOLICIT:
8757 	case ND_NEIGHBOR_ADVERT:
8758 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8759 			/* nothing to do */
8760 			return;
8761 		}
8762 		break;
8763 	default:
8764 		return;
8765 	}
8766 
8767 	/*
8768 	 * replace the lladdr
8769 	 */
8770 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8771 	    ETHER_ADDR_LEN, eaddr,
8772 	    MBUF_DONTWAIT);
8773 	if (error != 0) {
8774 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8775 		    "mbuf_copyback lladdr failed");
8776 		m_freem(m);
8777 		*data = NULL;
8778 		return;
8779 	}
8780 
8781 	/*
8782 	 * recompute the icmp6 checksum
8783 	 */
8784 
8785 	/* skip past the ethernet header */
8786 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8787 
8788 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8789 	/* set the checksum to zero */
8790 	cksum = 0;
8791 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8792 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8793 	if (error != 0) {
8794 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8795 		    "mbuf_copyback cksum=0 failed");
8796 		m_freem(m);
8797 		*data = NULL;
8798 		return;
8799 	}
8800 	/* compute and set the new checksum */
8801 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8802 	    mnr->mnr_ip6_icmp6_len);
8803 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8804 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8805 	if (error != 0) {
8806 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8807 		    "mbuf_copyback cksum failed");
8808 		m_freem(m);
8809 		*data = NULL;
8810 		return;
8811 	}
8812 	/* restore the ethernet header */
8813 	_mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8814 	return;
8815 }
8816 
8817 static void
8818 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8819     const char eaddr[ETHER_ADDR_LEN])
8820 {
8821 	struct ether_header     *eh;
8822 
8823 	/* replace the source ethernet address with the single MAC */
8824 	eh = mtod(*data, struct ether_header *);
8825 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8826 	switch (mnr->mnr_ether_type) {
8827 	case HTONS_ETHERTYPE_ARP:
8828 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8829 		break;
8830 
8831 	case HTONS_ETHERTYPE_IP:
8832 		bridge_mac_nat_ip_translate(data, mnr);
8833 		break;
8834 
8835 	case HTONS_ETHERTYPE_IPV6:
8836 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8837 		break;
8838 
8839 	default:
8840 		break;
8841 	}
8842 	return;
8843 }
8844 
8845 /*
8846  * bridge packet filtering
8847  */
8848 
8849 /*
8850  * Perform basic checks on header size since
8851  * pfil assumes ip_input has already processed
8852  * it for it.  Cut-and-pasted from ip_input.c.
8853  * Given how simple the IPv6 version is,
8854  * does the IPv4 version really need to be
8855  * this complicated?
8856  *
8857  * XXX Should we update ipstat here, or not?
8858  * XXX Right now we update ipstat but not
8859  * XXX csum_counter.
8860  */
8861 static int
8862 bridge_ip_checkbasic(struct mbuf **mp)
8863 {
8864 	struct mbuf *m = *mp;
8865 	struct ip *ip;
8866 	int len, hlen;
8867 	u_short sum;
8868 
8869 	if (*mp == NULL) {
8870 		return -1;
8871 	}
8872 
8873 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8874 		/* max_linkhdr is already rounded up to nearest 4-byte */
8875 		if ((m = m_copyup(m, sizeof(struct ip),
8876 		    max_linkhdr)) == NULL) {
8877 			/* XXXJRT new stat, please */
8878 			ipstat.ips_toosmall++;
8879 			goto bad;
8880 		}
8881 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8882 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8883 			ipstat.ips_toosmall++;
8884 			goto bad;
8885 		}
8886 	}
8887 	ip = mtod(m, struct ip *);
8888 	if (ip == NULL) {
8889 		goto bad;
8890 	}
8891 
8892 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8893 		ipstat.ips_badvers++;
8894 		goto bad;
8895 	}
8896 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8897 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8898 		ipstat.ips_badhlen++;
8899 		goto bad;
8900 	}
8901 	if (hlen > m->m_len) {
8902 		if ((m = m_pullup(m, hlen)) == 0) {
8903 			ipstat.ips_badhlen++;
8904 			goto bad;
8905 		}
8906 		ip = mtod(m, struct ip *);
8907 		if (ip == NULL) {
8908 			goto bad;
8909 		}
8910 	}
8911 
8912 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8913 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8914 	} else {
8915 		if (hlen == sizeof(struct ip)) {
8916 			sum = in_cksum_hdr(ip);
8917 		} else {
8918 			sum = in_cksum(m, hlen);
8919 		}
8920 	}
8921 	if (sum) {
8922 		ipstat.ips_badsum++;
8923 		goto bad;
8924 	}
8925 
8926 	/* Retrieve the packet length. */
8927 	len = ntohs(ip->ip_len);
8928 
8929 	/*
8930 	 * Check for additional length bogosity
8931 	 */
8932 	if (len < hlen) {
8933 		ipstat.ips_badlen++;
8934 		goto bad;
8935 	}
8936 
8937 	/*
8938 	 * Check that the amount of data in the buffers
8939 	 * is as at least much as the IP header would have us expect.
8940 	 * Drop packet if shorter than we expect.
8941 	 */
8942 	if (m->m_pkthdr.len < len) {
8943 		ipstat.ips_tooshort++;
8944 		goto bad;
8945 	}
8946 
8947 	/* Checks out, proceed */
8948 	*mp = m;
8949 	return 0;
8950 
8951 bad:
8952 	*mp = m;
8953 	return -1;
8954 }
8955 
8956 /*
8957  * Same as above, but for IPv6.
8958  * Cut-and-pasted from ip6_input.c.
8959  * XXX Should we update ip6stat, or not?
8960  */
8961 static int
8962 bridge_ip6_checkbasic(struct mbuf **mp)
8963 {
8964 	struct mbuf *m = *mp;
8965 	struct ip6_hdr *ip6;
8966 
8967 	/*
8968 	 * If the IPv6 header is not aligned, slurp it up into a new
8969 	 * mbuf with space for link headers, in the event we forward
8970 	 * it.  Otherwise, if it is aligned, make sure the entire base
8971 	 * IPv6 header is in the first mbuf of the chain.
8972 	 */
8973 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8974 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8975 		/* max_linkhdr is already rounded up to nearest 4-byte */
8976 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8977 		    max_linkhdr)) == NULL) {
8978 			/* XXXJRT new stat, please */
8979 			ip6stat.ip6s_toosmall++;
8980 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8981 			goto bad;
8982 		}
8983 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8984 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8985 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8986 			ip6stat.ip6s_toosmall++;
8987 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8988 			goto bad;
8989 		}
8990 	}
8991 
8992 	ip6 = mtod(m, struct ip6_hdr *);
8993 
8994 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8995 		ip6stat.ip6s_badvers++;
8996 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8997 		goto bad;
8998 	}
8999 
9000 	/* Checks out, proceed */
9001 	*mp = m;
9002 	return 0;
9003 
9004 bad:
9005 	*mp = m;
9006 	return -1;
9007 }
9008 
9009 /*
9010  * the PF routines expect to be called from ip_input, so we
9011  * need to do and undo here some of the same processing.
9012  *
9013  * XXX : this is heavily inspired on bridge_pfil()
9014  */
9015 static int
9016 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9017     bool input)
9018 {
9019 	/*
9020 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9021 	 */
9022 
9023 	int snap, error, i, hlen;
9024 	struct ether_header *eh1, eh2;
9025 	struct ip *ip;
9026 	struct llc llc1;
9027 	u_int16_t ether_type;
9028 
9029 	snap = 0;
9030 	error = -1;     /* Default error if not error == 0 */
9031 
9032 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9033 		return 0; /* filtering is disabled */
9034 	}
9035 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9036 	if ((*mp)->m_len < i) {
9037 		*mp = m_pullup(*mp, i);
9038 		if (*mp == NULL) {
9039 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9040 			return -1;
9041 		}
9042 	}
9043 
9044 	eh1 = mtod(*mp, struct ether_header *);
9045 	ether_type = ntohs(eh1->ether_type);
9046 
9047 	/*
9048 	 * Check for SNAP/LLC.
9049 	 */
9050 	if (ether_type < ETHERMTU) {
9051 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9052 
9053 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9054 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9055 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9056 		    llc2->llc_control == LLC_UI) {
9057 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9058 			snap = 1;
9059 		}
9060 	}
9061 
9062 	/*
9063 	 * If we're trying to filter bridge traffic, don't look at anything
9064 	 * other than IP and ARP traffic.  If the filter doesn't understand
9065 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9066 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9067 	 * but of course we don't have an AppleTalk filter to begin with.
9068 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9069 	 * ARP traffic.)
9070 	 */
9071 	switch (ether_type) {
9072 	case ETHERTYPE_ARP:
9073 	case ETHERTYPE_REVARP:
9074 		return 0;         /* Automatically pass */
9075 
9076 	case ETHERTYPE_IP:
9077 	case ETHERTYPE_IPV6:
9078 		break;
9079 	default:
9080 		/*
9081 		 * Check to see if the user wants to pass non-ip
9082 		 * packets, these will not be checked by pf and
9083 		 * passed unconditionally so the default is to drop.
9084 		 */
9085 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9086 			goto bad;
9087 		}
9088 		break;
9089 	}
9090 
9091 	/* Strip off the Ethernet header and keep a copy. */
9092 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9093 	m_adj(*mp, ETHER_HDR_LEN);
9094 
9095 	/* Strip off snap header, if present */
9096 	if (snap) {
9097 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9098 		m_adj(*mp, sizeof(struct llc));
9099 	}
9100 
9101 	/*
9102 	 * Check the IP header for alignment and errors
9103 	 */
9104 	switch (ether_type) {
9105 	case ETHERTYPE_IP:
9106 		error = bridge_ip_checkbasic(mp);
9107 		break;
9108 	case ETHERTYPE_IPV6:
9109 		error = bridge_ip6_checkbasic(mp);
9110 		break;
9111 	default:
9112 		error = 0;
9113 		break;
9114 	}
9115 	if (error) {
9116 		goto bad;
9117 	}
9118 
9119 	error = 0;
9120 
9121 	/*
9122 	 * Run the packet through pf rules
9123 	 */
9124 	switch (ether_type) {
9125 	case ETHERTYPE_IP:
9126 		/*
9127 		 * before calling the firewall, swap fields the same as
9128 		 * IP does. here we assume the header is contiguous
9129 		 */
9130 		ip = mtod(*mp, struct ip *);
9131 
9132 		ip->ip_len = ntohs(ip->ip_len);
9133 		ip->ip_off = ntohs(ip->ip_off);
9134 
9135 		if (ifp != NULL) {
9136 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9137 		}
9138 
9139 		if (*mp == NULL || error != 0) { /* filter may consume */
9140 			break;
9141 		}
9142 
9143 		/* Recalculate the ip checksum and restore byte ordering */
9144 		ip = mtod(*mp, struct ip *);
9145 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9146 		if (hlen < (int)sizeof(struct ip)) {
9147 			goto bad;
9148 		}
9149 		if (hlen > (*mp)->m_len) {
9150 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9151 				goto bad;
9152 			}
9153 			ip = mtod(*mp, struct ip *);
9154 			if (ip == NULL) {
9155 				goto bad;
9156 			}
9157 		}
9158 		ip->ip_len = htons(ip->ip_len);
9159 		ip->ip_off = htons(ip->ip_off);
9160 		ip->ip_sum = 0;
9161 		if (hlen == sizeof(struct ip)) {
9162 			ip->ip_sum = in_cksum_hdr(ip);
9163 		} else {
9164 			ip->ip_sum = in_cksum(*mp, hlen);
9165 		}
9166 		break;
9167 
9168 	case ETHERTYPE_IPV6:
9169 		if (ifp != NULL) {
9170 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9171 		}
9172 
9173 		if (*mp == NULL || error != 0) { /* filter may consume */
9174 			break;
9175 		}
9176 		break;
9177 	default:
9178 		error = 0;
9179 		break;
9180 	}
9181 
9182 	if (*mp == NULL) {
9183 		return error;
9184 	}
9185 	if (error != 0) {
9186 		goto bad;
9187 	}
9188 
9189 	error = -1;
9190 
9191 	/*
9192 	 * Finally, put everything back the way it was and return
9193 	 */
9194 	if (snap) {
9195 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9196 		if (*mp == NULL) {
9197 			return error;
9198 		}
9199 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9200 	}
9201 
9202 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9203 	if (*mp == NULL) {
9204 		return error;
9205 	}
9206 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9207 
9208 	return 0;
9209 
9210 bad:
9211 	m_freem(*mp);
9212 	*mp = NULL;
9213 	return error;
9214 }
9215 
9216 #if BRIDGESTP
9217 static void
9218 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9219 {
9220 	mbuf_t  next_packet = NULL;
9221 
9222 	for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9223 		next_packet = scan->m_nextpkt;
9224 		scan->m_nextpkt = NULL;
9225 		bstp_input(bp, scan);
9226 	}
9227 }
9228 #endif /* BRIDGESTP */
9229 
9230 static mblist
9231 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9232 {
9233 	mbuf_t          next_packet = NULL;
9234 	mblist          ret;
9235 
9236 	mblist_init(&ret);
9237 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9238 		errno_t                 error;
9239 
9240 		/* take packet out of the list */
9241 		next_packet = scan->m_nextpkt;
9242 		scan->m_nextpkt = NULL;
9243 		/* filter the ARP packet */
9244 		error = bridge_host_filter_arp(bif, &scan);
9245 		if (error != 0 && scan != NULL) {
9246 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9247 				brlog_mbuf_data(scan, 0,
9248 				    sizeof(struct ether_header) +
9249 				    sizeof(struct ip));
9250 			}
9251 			m_freem(scan);
9252 			scan = NULL;
9253 		}
9254 		if (scan != NULL) {
9255 			/* add it to the list */
9256 			mblist_append(&ret, scan);
9257 		}
9258 	}
9259 	return ret;
9260 }
9261 
9262 static mbuf_t
9263 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9264     bool is_ipv4, bool host_filter, bool checksum)
9265 {
9266 	uint32_t                dbgf = 0;
9267 	errno_t                 error;
9268 	ip_packet_info          info;
9269 	u_int                   mac_hlen = sizeof(struct ether_header);
9270 
9271 	if (host_filter) {
9272 		dbgf |= BR_DBGF_HOSTFILTER;
9273 	}
9274 	if (checksum) {
9275 		dbgf |= BR_DBGF_CHECKSUM;
9276 	}
9277 	/* get the IP protocol header */
9278 	error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9279 	    &bif->bif_stats.brms_in_ip);
9280 	if (error != 0) {
9281 		BRIDGE_LOG(LOG_NOTICE, dbgf,
9282 		    "%s(%s) bridge_get_ip_proto failed %d",
9283 		    bridge_ifp->if_xname,
9284 		    bif->bif_ifp->if_xname, error);
9285 		goto drop;
9286 	}
9287 	if (host_filter) {
9288 		bool            drop = true;
9289 
9290 		/* restrict IP protocols */
9291 		switch (info.ip_proto) {
9292 		case IPPROTO_ICMP:
9293 		case IPPROTO_IGMP:
9294 			drop = !is_ipv4;
9295 			break;
9296 		case IPPROTO_TCP:
9297 		case IPPROTO_UDP:
9298 			drop = false;
9299 			break;
9300 		case IPPROTO_ICMPV6:
9301 			drop = is_ipv4;
9302 			break;
9303 		default:
9304 			break;
9305 		}
9306 		if (drop) {
9307 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9308 			goto drop;
9309 		}
9310 		bridge_hostfilter_stats.brhf_ip_ok += 1;
9311 	}
9312 	if (checksum) {
9313 		/* need to compute IP/UDP/TCP/checksums */
9314 		error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9315 		if (error != 0) {
9316 			BRIDGE_LOG(LOG_NOTICE, dbgf,
9317 			    "%s(%s) bridge_offload_checksum failed %d",
9318 			    bridge_ifp->if_xname,
9319 			    bif->bif_ifp->if_xname, error);
9320 			goto drop;
9321 		}
9322 	}
9323 	return m;
9324 
9325 drop:
9326 	/* toss the packet */
9327 	if (m != NULL) {
9328 		if (host_filter &&
9329 		    BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9330 			brlog_mbuf_data(m, 0,
9331 			    sizeof(struct ether_header) +
9332 			    sizeof(struct ip));
9333 		}
9334 		m_freem(m);
9335 		m = NULL;
9336 	}
9337 	return NULL;
9338 }
9339 
9340 static mblist
9341 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9342     mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9343 {
9344 	bool                    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9345 	mbuf_t                  next_packet = NULL;
9346 	mblist                  ret;
9347 
9348 	mblist_init(&ret);
9349 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9350 		/* take packet out of the list */
9351 		next_packet = scan->m_nextpkt;
9352 		scan->m_nextpkt = NULL;
9353 		scan = bridge_filter_checksum(bridge_ifp, bif,
9354 		    scan, is_ipv4, host_filter, checksum);
9355 		if (scan != NULL) {
9356 			/* add packet to the list */
9357 			mblist_append(&ret, scan);
9358 		}
9359 	}
9360 	return ret;
9361 }
9362 
9363 static mbuf_t
9364 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9365     mbuf_t m, bool is_ipv4)
9366 {
9367 	mblist          ret;
9368 	mbuf_t          next_packet;
9369 
9370 	mblist_init(&ret);
9371 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9372 		uint32_t        csum_flags;
9373 
9374 		/* take it out of the list */
9375 		next_packet = scan->m_nextpkt;
9376 		scan->m_nextpkt = NULL;
9377 
9378 		csum_flags = scan->m_pkthdr.csum_flags;
9379 		if ((csum_flags & checksum_request_flags) != 0) {
9380 			/* compute the checksum now */
9381 			scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9382 			    is_ipv4, false, true);
9383 			if (scan != NULL) {
9384 				/* clear offload now */
9385 				scan->m_pkthdr.csum_flags &= csum_flags;
9386 			}
9387 		}
9388 		if (scan != NULL) {
9389 			mblist_append(&ret, scan);
9390 		}
9391 	}
9392 	return ret.head;
9393 }
9394 
9395 static mbuf_t
9396 copy_broadcast_packet(mbuf_t m)
9397 {
9398 	mbuf_t  mc;
9399 
9400 	/* make a copy of the packet */
9401 	mc = m_dup(m, M_DONTWAIT);
9402 	if (mc != NULL) {
9403 		struct ether_header *eh;
9404 
9405 		/* make copy look like it is broadcast */
9406 		mc->m_flags |= M_BCAST;
9407 		eh = mtod(mc, struct ether_header *);
9408 		bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9409 	}
9410 	return mc;
9411 }
9412 
9413 static mblist
9414 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9415 {
9416 	mblist          ip_bcast;
9417 	mbuf_t          next_packet = NULL;
9418 	mblist          ret;
9419 
9420 	mblist_init(&ret);
9421 	mblist_init(&ip_bcast);
9422 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9423 		mbuf_t  bcast_pkt = NULL;
9424 		uint8_t *header;
9425 
9426 		/* take packet out of the list */
9427 		next_packet = scan->m_nextpkt;
9428 		scan->m_nextpkt = NULL;
9429 
9430 		header = get_ether_ip_header_ptr(&scan, FALSE);
9431 		if (header != NULL) {
9432 			struct in_addr  dst;
9433 			struct ip       *iphdr;
9434 
9435 			iphdr = (struct ip *)(header + sizeof(struct ether_header));
9436 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9437 			if (dst.s_addr == INADDR_BROADCAST) {
9438 				bcast_pkt = copy_broadcast_packet(scan);
9439 			}
9440 		}
9441 		if (bcast_pkt != NULL) {
9442 			/* add packet to broadcast list */
9443 			mblist_append(&ip_bcast, bcast_pkt);
9444 		}
9445 		if (scan != NULL) {
9446 			/* add packet back into the list */
9447 			mblist_append(&ret, scan);
9448 		}
9449 	}
9450 	*ip_bcast_head = ip_bcast.head;
9451 	return ret;
9452 }
9453 
9454 static ifnet_t
9455 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9456     struct bridge_iflist * sbif)
9457 {
9458 	struct bridge_iflist * bif;
9459 
9460 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9461 		if (bif == sbif) {
9462 			/* skip the input member */
9463 			continue;
9464 		}
9465 		if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9466 			return bif->bif_ifp;
9467 		}
9468 	}
9469 	return NULL;
9470 }
9471 
9472 
9473 /*
9474  * Function: bridge_input_list
9475  *
9476  * Purpose:
9477  *   Process a list of input packets through the bridge.
9478  *   The caller ensures that all of the packets in the list
9479  *  `list_head` .. `list_tail` have the same ethernet header.
9480  *
9481  * Returns:
9482  *    Non-NULL head of the chain of packets that were not consumed/freed,
9483  *    *tail_p set to the tail of that chain.
9484  *
9485  *    NULL if all of the packets were consumed.
9486  */
9487 static mblist
9488 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9489     struct ether_header * eh_in_p, mblist list, bool is_promisc)
9490 {
9491 	struct bridge_iflist *  bif;
9492 	ifnet_t                 bridge_ifp;
9493 	bool                    checksum_offload;
9494 	uint8_t *               dhost;
9495 #if BRIDGESTP
9496 	bool                    discarding = false;
9497 #endif /* BRIDGESTP */
9498 	ifnet_t                 dst_if = NULL;
9499 	errno_t                 error;
9500 	ether_type_flag_t       etypef;
9501 	bool                    host_filter;
9502 	bool                    host_filter_drop = false;
9503 	mbuf_ref_t              ip_bcast = NULL;
9504 	bool                    is_bridge_mac = false;
9505 	bool                    is_broadcast;
9506 	bool                    is_ifp_mac;
9507 	ifnet_t                 member_input = NULL;
9508 	uint8_t *               shost;
9509 	bool                    uses_virtio = false;
9510 	uint16_t                vlan;
9511 
9512 	if (ifp->if_bridge == NULL) {
9513 		/* no longer part of bridge */
9514 		goto done;
9515 	}
9516 	bridge_ifp = sc->sc_ifp;
9517 	is_broadcast = IS_BCAST_MCAST(list.head);
9518 	is_ifp_mac = (!is_broadcast && !is_promisc);
9519 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9520 	    "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9521 	    bridge_ifp->if_xname, ifp->if_xname, list.count,
9522 	    (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9523 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9524 	    (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9525 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9526 
9527 	/* assume we'll return all packets */
9528 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9529 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9530 		    "%s not running passing along",
9531 		    bridge_ifp->if_xname);
9532 		goto done;
9533 	}
9534 
9535 	vlan = VLANTAGOF(m);
9536 
9537 	/* lookup the bridge member */
9538 	BRIDGE_LOCK(sc);
9539 	bif = bridge_lookup_member_if(sc, ifp);
9540 	if (bif == NULL) {
9541 		BRIDGE_UNLOCK(sc);
9542 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9543 		    "%s bridge_lookup_member_if failed",
9544 		    bridge_ifp->if_xname);
9545 		goto done;
9546 	}
9547 
9548 	uses_virtio = bif_uses_virtio(bif);
9549 
9550 	/*
9551 	 * host filter drops packets that:
9552 	 * - are not ARP, IPv4, or IPv6
9553 	 * - have incorrect source MAC address
9554 	 */
9555 	host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9556 	etypef = ether_type_flag_get(eh_in_p->ether_type);
9557 	if (host_filter
9558 	    && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9559 		/* ether type not one of ARP, IPv4, or IPv6 */
9560 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9561 		host_filter_drop = true;
9562 	} else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9563 	    bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9564 	    != 0) {
9565 		/* only allow the single source MAC address */
9566 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9567 		    __func__, __LINE__);
9568 		host_filter_drop = true;
9569 	}
9570 	if (host_filter_drop) {
9571 		BRIDGE_UNLOCK(sc);
9572 		m_freem_list(list.head);
9573 		list.head = list.tail = NULL;
9574 		goto done;
9575 	}
9576 
9577 #if BRIDGESTP
9578 	discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9579 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9580 #endif /* BRIDGESTP */
9581 
9582 	dhost = eh_in_p->ether_dhost;
9583 	shost = eh_in_p->ether_shost;
9584 	/*
9585 	 * Reserved multicast address listed in 802.1D section 7.12.6
9586 	 * must not be forwarded by the bridge.
9587 	 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9588 	 */
9589 	if (is_broadcast) {
9590 		if (IS_MCAST(list.head)) {
9591 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9592 			    " multicast: "
9593 			    "%02x:%02x:%02x:%02x:%02x:%02x",
9594 			    dhost[0], dhost[1],
9595 			    dhost[2], dhost[3],
9596 			    dhost[4], dhost[5]);
9597 		}
9598 		if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9599 			if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9600 				/* multicast for spanning tree */
9601 #if BRIDGESTP
9602 				bridge_bstp_input_list(&bif->bif_stp, list.head);
9603 #else /* BRIDGESTP */
9604 				m_freem_list(list.head);
9605 #endif /* BRIDGESTP */
9606 				list.head = list.tail = NULL;
9607 				BRIDGE_UNLOCK(sc);
9608 				goto done;
9609 			}
9610 			if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9611 				/* allow packet to continue up the stack */
9612 				BRIDGE_UNLOCK(sc);
9613 				goto done;
9614 			}
9615 		}
9616 		/* broadcast to all members */
9617 		os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9618 	}
9619 
9620 #if BRIDGESTP
9621 	if (discarding) {
9622 		BRIDGE_UNLOCK(sc);
9623 		goto done;
9624 	}
9625 #endif /* BRIDGESTP */
9626 
9627 	/* If the interface is learning, record the address. */
9628 	if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9629 		error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9630 		/*
9631 		 * If the interface has addresses limits then deny any source
9632 		 * that is not in the cache.
9633 		 */
9634 		if (error != 0 && bif->bif_addrmax) {
9635 			BRIDGE_UNLOCK(sc);
9636 			goto done;
9637 		}
9638 	}
9639 #if BRIDGESTP
9640 	if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9641 	    bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9642 		BRIDGE_UNLOCK(sc);
9643 		goto done;
9644 	}
9645 #endif /* BRIDGESTP */
9646 
9647 	/*
9648 	 * If the packet is not IP, let the host filter drop ARP packets.
9649 	 * Otherwise, if the host filter is enabled or we need to compute
9650 	 * checksums, do that.
9651 	 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9652 	 * check for IPv4 broadcast packets. Accumulate those in a separate
9653 	 * list `ip_bcast`.
9654 	 */
9655 	checksum_offload = bif_has_checksum_offload(bif);
9656 	if (!ether_type_flag_is_ip(etypef)) {
9657 		/* host filter process ARP */
9658 		if (host_filter) {
9659 			/* host filter check earlier means this must be ARP */
9660 			VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9661 			list = bridge_filter_arp_list(bif, list.head);
9662 			if (list.head == NULL) {
9663 				VERIFY(list.tail == NULL);
9664 				BRIDGE_UNLOCK(sc);
9665 				goto done;
9666 			}
9667 		}
9668 	} else if (host_filter || checksum_offload) {
9669 		/* host filter and/or checksum */
9670 		list = bridge_filter_checksum_list(bridge_ifp, bif,
9671 		    list.head, etypef, host_filter, checksum_offload);
9672 		if (list.head == NULL) {
9673 			VERIFY(list.tail == NULL);
9674 			BRIDGE_UNLOCK(sc);
9675 			goto done;
9676 		}
9677 	} else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9678 	    etypef == ETHER_TYPE_FLAG_IPV4) {
9679 		/* look for broadcast IPv4 packet */
9680 		list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9681 		if (list.head == NULL && ip_bcast == NULL) {
9682 			/* all packets were consumed */
9683 			BRIDGE_UNLOCK(sc);
9684 			goto done;
9685 		}
9686 	}
9687 
9688 	/*
9689 	 * If the bridge has an address assigned, and the destination MAC
9690 	 * matches the bridge interface, claim the packets for the bridge
9691 	 * interface.
9692 	 */
9693 	if ((sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0 &&
9694 	    !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9695 		is_bridge_mac = true;
9696 	}
9697 	if (is_ifp_mac) {
9698 		/* unicast to the interface */
9699 		if (sc->sc_mac_nat_bif == bif) {
9700 			mbuf_ref_t  forward = NULL;
9701 
9702 			if (list.head != NULL) {
9703 				/* handle MAC-NAT if enabled */
9704 				list = bridge_mac_nat_input_list(sc, ifp,
9705 				    list.head, &forward);
9706 			}
9707 			if (ip_bcast != NULL) {
9708 				/* forward to all members except this one */
9709 				/* bridge_broadcast_list unlocks */
9710 				bridge_broadcast_list(sc, bif, etypef,
9711 				    ip_bcast, pkt_direction_RX);
9712 			} else {
9713 				BRIDGE_UNLOCK(sc);
9714 			}
9715 			if (forward != NULL) {
9716 				bridge_mac_nat_forward_list(bridge_ifp, etypef,
9717 				    forward);
9718 			}
9719 		} else {
9720 			BRIDGE_UNLOCK(sc);
9721 		}
9722 		/* unicast packets for this interface do not get forwarded */
9723 		goto done;
9724 	}
9725 	if (is_bridge_mac || list.head == NULL) {
9726 		BRIDGE_UNLOCK(sc);
9727 		goto done;
9728 	}
9729 	if (!is_broadcast) {
9730 		/* find where to send the packet */
9731 		dst_if = bridge_rtlookup(sc, dhost, vlan);
9732 		if (ifp == dst_if) {
9733 			/* nothing to forward */
9734 			BRIDGE_UNLOCK(sc);
9735 			goto done;
9736 		}
9737 		if (dst_if == NULL) {
9738 			/* if a member is the dhost, deliver as input */
9739 			member_input = bridge_find_member(sc, dhost, bif);
9740 			if (member_input != NULL) {
9741 				/* grab packets destined to member */
9742 				BRIDGE_UNLOCK(sc);
9743 				goto done;
9744 			}
9745 			/* if a member is shost, there's a loop, drop it */
9746 			if (bridge_find_member(sc, shost, bif) != NULL) {
9747 				BRIDGE_UNLOCK(sc);
9748 				m_freem_list(list.head);
9749 				list.head = list.tail = NULL;
9750 				goto done;
9751 			}
9752 		}
9753 	}
9754 	if (dst_if == NULL) {
9755 		mbuf_t  m;
9756 
9757 		m = copy_packet_list(list.head);
9758 		if (m != NULL) {
9759 			/* bridge_broadcast_list unlocks */
9760 			bridge_broadcast_list(sc, bif, etypef, m,
9761 			    pkt_direction_RX);
9762 		} else {
9763 			BRIDGE_UNLOCK(sc);
9764 		}
9765 	} else {
9766 		/* bridge_forward_list() consumes list and unlocks */
9767 		bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9768 		list.head = list.tail = NULL;
9769 	}
9770 
9771 done:
9772 	if (list.head != NULL) {
9773 		if (member_input != NULL) {
9774 			/* member gets the packets */
9775 			inject_input_packet_list(member_input, list.head, true);
9776 			list.head = list.tail = NULL;
9777 		} else if (is_bridge_mac) {
9778 			/* bridge consumes all the unicast packets */
9779 			bridge_interface_input_list(bridge_ifp, etypef, list,
9780 			    uses_virtio);
9781 			list.head = list.tail = NULL;
9782 		} else {
9783 			adjust_input_packet_list(list.head);
9784 		}
9785 	}
9786 	return list;
9787 }
9788 
9789 static inline void
9790 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9791 {
9792 	/* duplicate some of the work done in ether_demux */
9793 	if ((eh->ether_dhost[0] & 1) == 0) {
9794 		if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9795 			m->m_flags |= M_PROMISC;
9796 		}
9797 	} else {
9798 		/* Check for broadcast */
9799 		if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9800 			m->m_flags |= M_BCAST;
9801 		} else {
9802 			m->m_flags |= M_MCAST;
9803 		}
9804 	}
9805 	if (m->m_flags & M_HASFCS) {
9806 		/*
9807 		 * If the M_HASFCS is set by the driver we want to make sure
9808 		 * that we strip off the trailing FCS data before handing it
9809 		 * up the stack.
9810 		 */
9811 		m_adj(m, -ETHER_CRC_LEN);
9812 		m->m_flags &= ~M_HASFCS;
9813 	}
9814 	return;
9815 }
9816 
9817 static mbuf_t
9818 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9819 {
9820 	mbuf_t  next_packet = NULL;
9821 	mblist  ret;
9822 
9823 	mblist_init(&ret);
9824 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9825 		next_packet = scan->m_nextpkt;
9826 
9827 		/* remove packet from list, and pass through PF */
9828 		scan->m_nextpkt = NULL;
9829 		MBUF_INPUT_CHECK(scan, ifp);
9830 		bridge_pf(&scan, ifp, sc_filter_flags, input);
9831 		if (scan != NULL) {
9832 			/* add packet back to the list */
9833 			mblist_append(&ret, scan);
9834 		}
9835 	}
9836 	return ret.head;
9837 }
9838 
9839 static inline bool
9840 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9841 {
9842 	bool                    included = false;
9843 	char * __single         header;
9844 	size_t                  header_length = 0;
9845 
9846 	header = m->m_pkthdr.pkt_hdr;
9847 	if (header >= (char *)mbuf_datastart(m) &&
9848 	    header <= mtod(m, char *)) {
9849 		header_length = mtod(m, char *) - header;
9850 		if (header_length >= ETHER_HDR_LEN) {
9851 			included = true;
9852 		}
9853 	}
9854 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9855 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9856 	    "header length %lu", sc->sc_ifp->if_xname,
9857 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9858 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9859 	    (uint64_t)VM_KERNEL_ADDRPERM(header),
9860 	    included ? "inside" : "outside", header_length);
9861 	if (!included) {
9862 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9863 		    "%s: frame_header outside mbuf", ifp->if_xname);
9864 	}
9865 	return included;
9866 }
9867 
9868 
9869 mbuf_t
9870 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9871 {
9872 	struct ether_header eh;
9873 	mblist          list;
9874 	volatile bool   list_is_promisc;
9875 	int             n_lists = 0;
9876 	mbuf_t          next_packet = NULL;
9877 	mblist          ret;
9878 	struct bridge_softc * __single sc = ifp->if_bridge;
9879 	uint32_t        sc_filter_flags;
9880 
9881 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9882 	    "(%s): count %u", ifp->if_xname, cnt);
9883 
9884 	/* run packet list through PF first */
9885 	sc_filter_flags = sc->sc_filter_flags;
9886 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9887 		in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9888 	}
9889 
9890 	/* form sublists with the same ethernet header */
9891 	mblist_init(&list);
9892 	mblist_init(&ret);
9893 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9894 		struct ether_header *   eh_p;
9895 		volatile bool           is_promisc;
9896 		mblist                  resid;
9897 
9898 		/* take it out of the list */
9899 		next_packet = scan->m_nextpkt;
9900 		scan->m_nextpkt = NULL;
9901 
9902 		/* don't loop the packet */
9903 		if ((scan->m_flags & M_PROTO1) != 0) {
9904 			mblist_append(&ret, scan);
9905 			continue;
9906 		}
9907 		/* Check if this mbuf looks valid */
9908 		MBUF_INPUT_CHECK(scan, ifp);
9909 
9910 		/* if the frame header isn't in the first mbuf, ignore */
9911 		if (!bridge_check_frame_header(sc, ifp, scan)) {
9912 			mblist_append(&ret, scan);
9913 			continue;
9914 		}
9915 		eh_p = __unsafe_forge_single(struct ether_header *,
9916 		    scan->m_pkthdr.pkt_hdr);
9917 		update_mbuf_flags(ifp, scan, eh_p);
9918 
9919 		/* set start back to include ether header */
9920 		_mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
9921 
9922 		is_promisc = get_and_clear_promisc(scan);
9923 		if (list.head == NULL) {
9924 			/* start a new list */
9925 			mblist_append(&list, scan);
9926 			bcopy(eh_p, &eh, sizeof(eh));
9927 			list_is_promisc = is_promisc;
9928 		} else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
9929 			n_lists++;
9930 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9931 			    "(%s): sublist %u pkts %u",
9932 			    ifp->if_xname, n_lists, list.count);
9933 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9934 				brlog_ether_header(&eh);
9935 			}
9936 			resid = bridge_input_list(sc, ifp, &eh, list,
9937 			    list_is_promisc);
9938 			if (resid.head != NULL) {
9939 				/* add to the packets to be returned */
9940 				mblist_append_list(&ret, resid);
9941 			}
9942 			/* start new list */
9943 			mblist_init(&list);
9944 			mblist_append(&list, scan);
9945 			list_is_promisc = is_promisc;
9946 			bcopy(eh_p, &eh, sizeof(eh));
9947 		} else {
9948 			mblist_append(&list, scan);
9949 			VERIFY(is_promisc == list_is_promisc);
9950 		}
9951 		if (next_packet == NULL) {
9952 			/* last list */
9953 			n_lists++;
9954 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9955 			    "(%s): sublist %u pkts %u",
9956 			    ifp->if_xname, n_lists, list.count);
9957 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9958 				brlog_ether_header(&eh);
9959 			}
9960 			resid = bridge_input_list(sc, ifp, &eh, list,
9961 			    list_is_promisc);
9962 			if (resid.head != NULL) {
9963 				/* add to the packets to be returned */
9964 				mblist_append_list(&ret, resid);
9965 			}
9966 		}
9967 	}
9968 	return ret.head;
9969 }
9970 
9971 /*
9972  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9973  * All rights reserved.
9974  *
9975  * Redistribution and use in source and binary forms, with or without
9976  * modification, are permitted provided that the following conditions
9977  * are met:
9978  *   1. Redistributions of source code must retain the above copyright
9979  *      notice, this list of conditions and the following disclaimer.
9980  *   2. Redistributions in binary form must reproduce the above copyright
9981  *      notice, this list of conditions and the following disclaimer in the
9982  *      documentation and/or other materials provided with the distribution.
9983  *
9984  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9985  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9986  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9987  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9988  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9989  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9990  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9991  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9992  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9993  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9994  * SUCH DAMAGE.
9995  */
9996 
9997 /*
9998  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9999  *
10000  * Create a queue of packets/segments which fit the given mss + hdr_len.
10001  * m0 points to mbuf chain to be segmented.
10002  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10003  * into segments of length MSS bytes and then copy the first hdr_len bytes
10004  * from m0 at the top of each segment.
10005  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10006  * in each segment after the first hdr_len bytes
10007  *
10008  * Return the new queue with the segments on success, NULL on failure.
10009  * (the mbuf queue is freed in this case).
10010  */
10011 
10012 static mblist
10013 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10014 {
10015 	int off = 0, n, firstlen;
10016 	struct mbuf *mseg;
10017 	int total_len = m0->m_pkthdr.len;
10018 	mblist ret;
10019 
10020 	mblist_init(&ret);
10021 	mblist_append(&ret, m0);
10022 
10023 	/*
10024 	 * Segmentation useless
10025 	 */
10026 	if (total_len <= hdr_len + mss) {
10027 		n = 1;
10028 		goto done;
10029 	}
10030 	if (hdr2_buf == NULL || hdr2_len <= 0) {
10031 		hdr2_buf = NULL;
10032 		hdr2_len = 0;
10033 	}
10034 
10035 	off = hdr_len + mss;
10036 	firstlen = mss; /* first segment stored in the original mbuf */
10037 	ret.bytes = off;
10038 	for (n = 1; off < total_len; off += mss, n++) {
10039 		struct mbuf *m;
10040 		/*
10041 		 * Copy the header from the original packet
10042 		 * and create a new mbuf chain
10043 		 */
10044 		if (MHLEN < hdr_len) {
10045 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10046 		} else {
10047 			m = m_gethdr(M_NOWAIT, MT_DATA);
10048 		}
10049 
10050 		if (m == NULL) {
10051 #ifdef GSO_DEBUG
10052 			D("MGETHDR error\n");
10053 #endif
10054 			goto err;
10055 		}
10056 
10057 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10058 
10059 		m->m_len = hdr_len;
10060 		/*
10061 		 * if the optional header is present, copy it
10062 		 */
10063 		if (hdr2_buf != NULL) {
10064 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10065 		}
10066 
10067 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10068 		if (off + mss >= total_len) {           /* last segment */
10069 			mss = total_len - off;
10070 		}
10071 		/*
10072 		 * Copy the payload from original packet
10073 		 */
10074 		mseg = m_copym(m0, off, mss, M_NOWAIT);
10075 		if (mseg == NULL) {
10076 			m_freem(m);
10077 #ifdef GSO_DEBUG
10078 			D("m_copym error\n");
10079 #endif
10080 			goto err;
10081 		}
10082 		m_cat(m, mseg);
10083 
10084 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10085 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10086 		/*
10087 		 * Copy the checksum flags and data (in_cksum() need this)
10088 		 */
10089 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10090 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10091 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10092 
10093 		mblist_append(&ret, m);
10094 	}
10095 
10096 	/*
10097 	 * Update first segment.
10098 	 * If the optional header is present, is necessary
10099 	 * to insert it into the first segment.
10100 	 */
10101 	if (hdr2_buf == NULL) {
10102 		m_adj(m0, hdr_len + firstlen - total_len);
10103 		m0->m_pkthdr.len = hdr_len + firstlen;
10104 	} else {
10105 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10106 		if (mseg == NULL) {
10107 #ifdef GSO_DEBUG
10108 			D("m_copym error\n");
10109 #endif
10110 			goto err;
10111 		}
10112 		m_adj(m0, hdr_len - total_len);
10113 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10114 		m_cat(m0, mseg);
10115 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10116 	}
10117 
10118 done:
10119 	return ret;
10120 
10121 err:
10122 	if (ret.head != NULL) {
10123 		m_freem_list(ret.head);
10124 		mblist_init(&ret);
10125 	}
10126 	return ret;
10127 }
10128 
10129 /*
10130  * Wrappers of IPv4 checksum functions
10131  */
10132 static inline void
10133 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10134 {
10135 	m->m_data += mac_hlen;
10136 	m->m_len -= mac_hlen;
10137 	m->m_pkthdr.len -= mac_hlen;
10138 #if __FreeBSD_version < 1000000
10139 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10140 #endif
10141 
10142 	in_delayed_cksum(m);
10143 
10144 #if __FreeBSD_version < 1000000
10145 	ip->ip_len = htons(ip->ip_len);
10146 #endif
10147 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10148 	m->m_len += mac_hlen;
10149 	m->m_pkthdr.len += mac_hlen;
10150 	m->m_data -= mac_hlen;
10151 }
10152 
10153 static inline void
10154 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10155 {
10156 	m->m_data += mac_hlen;
10157 
10158 	ip->ip_sum = in_cksum(m, ip_hlen);
10159 
10160 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
10161 	m->m_data -= mac_hlen;
10162 }
10163 
10164 /*
10165  * Structure that contains the state during the TCP segmentation
10166  */
10167 struct gso_ip_tcp_state {
10168 	void    (*update)
10169 	(struct gso_ip_tcp_state*, struct mbuf*);
10170 	void    (*internal)
10171 	(struct gso_ip_tcp_state*, struct mbuf*);
10172 	u_int ip_m0_len;
10173 	uint8_t * __counted_by(ip_m0_len) hdr;
10174 	struct tcphdr *tcp;
10175 	int mac_hlen;
10176 	int ip_hlen;
10177 	int tcp_hlen;
10178 	int hlen;
10179 	int pay_len;
10180 	int sw_csum;
10181 	uint32_t tcp_seq;
10182 	uint16_t ip_id;
10183 	boolean_t is_tx;
10184 };
10185 
10186 /*
10187  * Update the pointers to TCP and IPv4 headers
10188  */
10189 static inline void
10190 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10191 {
10192 	state->hdr = mtodo(m, state->mac_hlen);
10193 	state->ip_m0_len = m->m_len - state->mac_hlen;
10194 	state->ip_hlen = state->ip_hlen;
10195 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10196 	state->pay_len = m->m_pkthdr.len - state->hlen;
10197 }
10198 
10199 /*
10200  * Set properly the TCP and IPv4 headers
10201  */
10202 static inline void
10203 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10204 {
10205 	struct ip *ip;
10206 	/*
10207 	 * Update IP header
10208 	 */
10209 	ip = (struct ip *)state->hdr;
10210 	ip->ip_id = htons((state->ip_id)++);
10211 	ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10212 	/*
10213 	 * TCP Checksum
10214 	 */
10215 	state->tcp->th_sum = 0;
10216 	state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10217 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10218 	/*
10219 	 * Checksum HW not supported (TCP)
10220 	 */
10221 	if (state->sw_csum & CSUM_DELAY_DATA) {
10222 		gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10223 	}
10224 
10225 	state->tcp_seq += state->pay_len;
10226 	/*
10227 	 * IP Checksum
10228 	 */
10229 	ip->ip_sum = 0;
10230 	/*
10231 	 * Checksum HW not supported (IP)
10232 	 */
10233 	if (state->sw_csum & CSUM_IP) {
10234 		gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10235 	}
10236 }
10237 
10238 
10239 /*
10240  * Updates the pointers to TCP and IPv6 headers
10241  */
10242 static inline void
10243 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10244 {
10245 	state->hdr = mtodo(m, state->mac_hlen);
10246 	state->ip_m0_len = m->m_len - state->mac_hlen;
10247 	state->ip_hlen = state->ip_hlen;
10248 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10249 	state->pay_len = m->m_pkthdr.len - state->hlen;
10250 }
10251 
10252 /*
10253  * Sets properly the TCP and IPv6 headers
10254  */
10255 static inline void
10256 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10257 {
10258 	struct ip6_hdr *ip6;
10259 
10260 	ip6 = (struct ip6_hdr *)state->hdr;
10261 	ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10262 	/*
10263 	 * TCP Checksum
10264 	 */
10265 	state->tcp->th_sum = 0;
10266 	state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10267 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10268 	/*
10269 	 * Checksum HW not supported (TCP)
10270 	 */
10271 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10272 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10273 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10274 	}
10275 	state->tcp_seq += state->pay_len;
10276 }
10277 
10278 /*
10279  * Init the state during the TCP segmentation
10280  */
10281 static void
10282 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10283     bool is_ipv4, int mac_hlen, int ip_hlen,
10284     uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10285     struct tcphdr * tcp_hdr)
10286 {
10287 #pragma unused(ifp)
10288 
10289 	state->hdr = ip_hdr;
10290 	state->ip_m0_len = ip_m0_len;
10291 	state->ip_hlen = ip_hlen;
10292 	state->tcp = tcp_hdr;
10293 	if (is_ipv4) {
10294 		state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10295 		state->update = gso_ipv4_tcp_update;
10296 		state->internal = gso_ipv4_tcp_internal;
10297 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10298 	} else {
10299 		state->update = gso_ipv6_tcp_update;
10300 		state->internal = gso_ipv6_tcp_internal;
10301 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10302 	}
10303 	state->mac_hlen = mac_hlen;
10304 	state->tcp_hlen = state->tcp->th_off << 2;
10305 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10306 	state->tcp_seq = ntohl(state->tcp->th_seq);
10307 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10308 	return;
10309 }
10310 
10311 /*
10312  * GSO on TCP/IP (v4 or v6)
10313  *
10314  * Segment the given mbuf and return the list of packets.
10315  *
10316  */
10317 static mblist
10318 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10319 {
10320 	struct mbuf *m;
10321 	int orig_mss;
10322 	int mss = 0;
10323 #ifdef GSO_STATS
10324 	int total_len = m0->m_pkthdr.len;
10325 #endif /* GSO_STATS */
10326 	mblist  seg;
10327 	bool tso_with_gso = false;
10328 
10329 	orig_mss = mss = _mbuf_get_tso_mss(m0);
10330 	if (mss == 0 && !is_tx) {
10331 		uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10332 
10333 		if (seg_cnt != 0) {
10334 			uint32_t        hdr_len;
10335 			uint32_t        len;
10336 
10337 			/* approximate the MSS using LRO seg cnt */
10338 			hdr_len = state->ip_hlen + state->tcp_hlen;
10339 			len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10340 			mss = len / seg_cnt;
10341 			m0->m_pkthdr.rx_seg_cnt = 0;
10342 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10343 			    "%s: mss %d = len %d / seg cnt %d",
10344 			    ifp->if_xname, mss, len, seg_cnt);
10345 		}
10346 	}
10347 	if (mss == 0) {
10348 		/* hack: we don't have the actual MSS */
10349 		u_int reduce_mss;
10350 
10351 		reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10352 		    : if_bridge_tso_reduce_mss_forwarding;
10353 		mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10354 		    reduce_mss;
10355 		assert(mss > 0);
10356 	} else if (is_tx) {
10357 		bool    is_ipv4;
10358 		bool    do_tso = true;
10359 
10360 		if (TSO_IPV4_OK(ifp, m0)) {
10361 			is_ipv4 = true;
10362 		} else if (TSO_IPV6_OK(ifp, m0)) {
10363 			is_ipv4 = false;
10364 		} else {
10365 			do_tso = false;
10366 		}
10367 		if (do_tso) { /* TSO with GSO */
10368 			uint32_t        if_tso_max;
10369 
10370 			if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10371 			mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10372 			    - ETHER_HDR_LEN;
10373 			tso_with_gso = true;
10374 		}
10375 	}
10376 	if (!tso_with_gso) {
10377 		/* clear TSO flags */
10378 		m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10379 	}
10380 	seg = m_seg(m0, state->hlen, mss, 0, 0);
10381 	if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10382 		return seg;
10383 	}
10384 	if (tso_with_gso) {
10385 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10386 		    "%s TX gso size %d mss %d nsegs %d",
10387 		    ifp->if_xname,
10388 		    mss, orig_mss, seg.count);
10389 	} else {
10390 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10391 		    "%s %s mss %d nsegs %d",
10392 		    ifp->if_xname,
10393 		    is_tx ? "TX" : "RX",
10394 		    mss, seg.count);
10395 	}
10396 #ifdef GSO_STATS
10397 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10398 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10399 	GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10400 #endif /* GSO_STATS */
10401 
10402 	/* first pkt */
10403 	VERIFY(seg.head == m0);
10404 	m = m0;
10405 
10406 	state->update(state, m);
10407 
10408 	do {
10409 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10410 
10411 		state->internal(state, m);
10412 		m = m->m_nextpkt;
10413 		state->update(state, m);
10414 		state->tcp->th_flags &= ~TH_CWR;
10415 		state->tcp->th_seq = htonl(state->tcp_seq);
10416 	} while (m->m_nextpkt);
10417 
10418 	/* last pkt */
10419 	state->internal(state, m);
10420 
10421 #ifdef GSO_STATS
10422 	if (!error) {
10423 		GSOSTAT_INC(tcp.gsos_segmented);
10424 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10425 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10426 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10427 	}
10428 #endif /* GSO_STATS */
10429 	return seg;
10430 }
10431 
10432 /*
10433  * GSO for TCP/IPv[46]
10434  */
10435 static mblist
10436 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10437     u_int mac_hlen, bool is_ipv4, bool is_tx)
10438 {
10439 	uint32_t csum_flags;
10440 	struct gso_ip_tcp_state state;
10441 	struct tcphdr *tcp;
10442 
10443 	assert(info_p->ip_proto_hdr != NULL);
10444 	tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10445 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10446 	    info_p->ip_hlen + info_p->ip_opt_len,
10447 	    info_p->ip_hdr, info_p->ip_m0_len, tcp);
10448 	csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10449 	m->m_pkthdr.csum_flags |= csum_flags;
10450 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10451 	return gso_ip_tcp(ifp, m, &state, is_tx);
10452 }
10453 
10454 static mblist
10455 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10456 {
10457 	int error;
10458 	ip_packet_info info;
10459 	struct bripstats stats; /* XXX ignored */
10460 	mblist ret;
10461 
10462 	error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10463 	if (error != 0) {
10464 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10465 		    "%s bridge_get_tcp_header failed %d (%s)",
10466 		    ifp->if_xname, error,
10467 		    is_tx ? "TX" : "RX");
10468 		if (m != NULL) {
10469 			m_freem(m);
10470 			m = NULL;
10471 		}
10472 		goto no_segment;
10473 	}
10474 	if (info.ip_proto_hdr == NULL) {
10475 		/* not actually a TCP packet, no segmentation */
10476 		goto no_segment;
10477 	}
10478 	if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10479 		goto no_segment;
10480 	}
10481 	return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10482 
10483 no_segment:
10484 	mblist_init(&ret);
10485 	if (m != NULL) {
10486 		mblist_append(&ret, m);
10487 	}
10488 	return ret;
10489 }
10490