xref: /xnu-11417.101.15/bsd/net/if_bridge.c (revision e3723e1f17661b24996789d8afc084c0c3303b26)
1 /*
2  * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 
142 #include <netinet/in.h> /* for struct arpcom */
143 #include <netinet/tcp.h> /* for struct tcphdr */
144 #include <netinet/in_systm.h>
145 #include <netinet/in_var.h>
146 #define _IP_VHL
147 #include <netinet/ip.h>
148 #include <netinet/ip_var.h>
149 #include <netinet/ip6.h>
150 #include <netinet6/ip6_var.h>
151 #include <netinet/if_ether.h> /* for struct arpcom */
152 #include <net/bridgestp.h>
153 #include <net/if_bridgevar.h>
154 #include <net/if_llc.h>
155 #if NVLAN > 0
156 #include <net/if_vlan_var.h>
157 #endif /* NVLAN > 0 */
158 
159 #include <net/if_ether.h>
160 #include <net/dlil.h>
161 #include <net/kpi_interfacefilter.h>
162 #include <net/pfvar.h>
163 
164 #include <net/route.h>
165 #include <dev/random/randomdev.h>
166 
167 #include <netinet/bootp.h>
168 #include <netinet/dhcp.h>
169 
170 #if SKYWALK
171 #include <skywalk/nexus/netif/nx_netif.h>
172 #endif /* SKYWALK */
173 
174 #include <net/sockaddr_utils.h>
175 #include <net/mblist.h>
176 
177 #include <os/log.h>
178 
179 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
180 
181 
182 #define __M_FLAGS_ARE_SET(m, flags)     (((m)->m_flags & (flags)) != 0)
183 #define IS_BCAST(m)                     __M_FLAGS_ARE_SET(m, M_BCAST)
184 #define IS_MCAST(m)                     __M_FLAGS_ARE_SET(m, M_MCAST)
185 #define IS_BCAST_MCAST(m)               __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
186 
187 #define HTONS_ETHERTYPE_ARP             htons(ETHERTYPE_ARP)
188 #define HTONS_ETHERTYPE_IP              htons(ETHERTYPE_IP)
189 #define HTONS_ETHERTYPE_IPV6            htons(ETHERTYPE_IPV6)
190 #define HTONS_ARPHRD_ETHER              htons(ARPHRD_ETHER)
191 #define HTONS_ARPOP_REQUEST             htons(ARPOP_REQUEST)
192 #define HTONS_ARPOP_REPLY               htons(ARPOP_REPLY)
193 #define HTONS_IPPORT_BOOTPC             htons(IPPORT_BOOTPC)
194 #define HTONS_IPPORT_BOOTPS             htons(IPPORT_BOOTPS)
195 #define HTONS_DHCP_FLAGS_BROADCAST      htons(DHCP_FLAGS_BROADCAST)
196 
197 /*
198  * if_bridge_debug, BR_DBGF_*
199  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
200  *   to enable additional logs for the corresponding bridge function
201  * - "sysctl net.link.bridge.debug" controls the value of
202  *   'if_bridge_debug'
203  */
204 static uint32_t if_bridge_debug = 0;
205 #define BR_DBGF_LIFECYCLE       0x0001
206 #define BR_DBGF_INPUT           0x0002
207 #define BR_DBGF_OUTPUT          0x0004
208 #define BR_DBGF_RT_TABLE        0x0008
209 #define BR_DBGF_DELAYED_CALL    0x0010
210 #define BR_DBGF_IOCTL           0x0020
211 #define BR_DBGF_MBUF            0x0040
212 #define BR_DBGF_MCAST           0x0080
213 #define BR_DBGF_HOSTFILTER      0x0100
214 #define BR_DBGF_CHECKSUM        0x0200
215 #define BR_DBGF_MAC_NAT         0x0400
216 #define BR_DBGF_INPUT_LIST      0x0800
217 
218 /*
219  * if_bridge_log_level
220  * - 'if_bridge_log_level' ensures that by default important logs are
221  *   logged regardless of if_bridge_debug by comparing the log level
222  *   in BRIDGE_LOG to if_bridge_log_level
223  * - use "sysctl net.link.bridge.log_level" controls the value of
224  *   'if_bridge_log_level'
225  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
226  *   logs must use LOG_NOTICE to ensure they appear by default
227  */
228 static int if_bridge_log_level = LOG_NOTICE;
229 
230 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
231 
232 /*
233  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
234  * - macros to generate the specified log conditionally based on
235  *   the specified log level and debug flags
236  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
237  */
238 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
239 	do {                                                            \
240 	        if (__level <= if_bridge_log_level ||                   \
241 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
242 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
243 	                       __func__, ## __VA_ARGS__);       \
244 	        }                                                       \
245 	} while (0)
246 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
247 	do {                                                    \
248 	        if (__level <= if_bridge_log_level ||           \
249 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
250 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
251 	        }                                                               \
252 	} while (0)
253 
254 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
255 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
256 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
257 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
258 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
259 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
260 
261 #define BRIDGE_LOCK_DEBUG      1
262 #if BRIDGE_LOCK_DEBUG
263 
264 #define BR_LCKDBG_MAX                   4
265 
266 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
267 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
268 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
269 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
270 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
271 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
272 
273 #else /* !BRIDGE_LOCK_DEBUG */
274 
275 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
276 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
277 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
278 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
279 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
280 	        (_err) = EBUSY;                                         \
281 	else {                                                          \
282 	        (_sc)->sc_iflist_ref++;                                 \
283 	        (_err) = 0;                                             \
284 	}                                                               \
285 	_BRIDGE_UNLOCK(_sc);                                            \
286 } while (0)
287 #define BRIDGE_UNREF(_sc)               do {                            \
288 	_BRIDGE_LOCK(_sc);                                              \
289 	(_sc)->sc_iflist_ref--;                                         \
290 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
291 	        _BRIDGE_UNLOCK(_sc);                                    \
292 	        wakeup(&(_sc)->sc_cv);                                  \
293 	} else                                                          \
294 	        _BRIDGE_UNLOCK(_sc);                                    \
295 } while (0)
296 #define BRIDGE_XLOCK(_sc)               do {                            \
297 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
298 	(_sc)->sc_iflist_xcnt++;                                        \
299 	while ((_sc)->sc_iflist_ref > 0)                                \
300 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
301 	            "BRIDGE_XLOCK", NULL);                              \
302 } while (0)
303 #define BRIDGE_XDROP(_sc)               do {                            \
304 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
305 	(_sc)->sc_iflist_xcnt--;                                        \
306 } while (0)
307 
308 #endif /* BRIDGE_LOCK_DEBUG */
309 
310 #define BRIDGE_BPF_TAP_IN(ifp, m) \
311 	do {                                                            \
312 	        if (ifp->if_bpf != NULL) {                              \
313 	                bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0);        \
314 	        }                                                       \
315 	} while(0)
316 
317 #define BRIDGE_BPF_TAP_OUT(ifp, m)                                      \
318 	do {                                                            \
319 	        if (ifp->if_bpf != NULL) {                              \
320 	                bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);       \
321 	        }                                                       \
322 	} while(0)
323 
324 
325 /*
326  * Initial size of the route hash table.  Must be a power of two.
327  */
328 #ifndef BRIDGE_RTHASH_SIZE
329 #define BRIDGE_RTHASH_SIZE              16
330 #endif
331 
332 /*
333  * Maximum size of the routing hash table
334  */
335 #define BRIDGE_RTHASH_SIZE_MAX          2048
336 
337 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
338 
339 /*
340  * Maximum number of addresses to cache.
341  */
342 #ifndef BRIDGE_RTABLE_MAX
343 #define BRIDGE_RTABLE_MAX               100
344 #endif
345 
346 /*
347  * Timeout (in seconds) for entries learned dynamically.
348  */
349 #ifndef BRIDGE_RTABLE_TIMEOUT
350 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
351 #endif
352 
353 /*
354  * Number of seconds between walks of the route list.
355  */
356 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
357 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
358 #endif
359 
360 /*
361  * Number of MAC NAT entries
362  * - sized based on 16 clients (including MAC NAT interface)
363  *   each with 4 addresses
364  */
365 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
366 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
367 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
368 
369 /*
370  * List of capabilities to possibly mask on the member interface.
371  */
372 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
373 /*
374  * List of capabilities to disable on the member interface.
375  */
376 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
377 
378 /*
379  * Bridge interface list entry.
380  */
381 struct bridge_iflist {
382 	TAILQ_ENTRY(bridge_iflist) bif_next;
383 	struct ifnet            *bif_ifp;       /* member if */
384 	struct bstp_port        bif_stp;        /* STP state */
385 	uint32_t                bif_ifflags;    /* member if flags */
386 	int                     bif_savedcaps;  /* saved capabilities */
387 	uint32_t                bif_addrmax;    /* max # of addresses */
388 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
389 	uint32_t                bif_addrexceeded; /* # of address violations */
390 
391 	interface_filter_t      bif_iff_ref;
392 	struct bridge_softc     *bif_sc;
393 	uint32_t                bif_flags;
394 
395 	/* host filter */
396 	struct in_addr          bif_hf_ipsrc;
397 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
398 
399 	struct ifbrmstats       bif_stats;
400 };
401 
402 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)403 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
404 {
405 	return (bif->bif_ifflags & flags) != 0;
406 }
407 
408 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)409 bif_has_checksum_offload(struct bridge_iflist * bif)
410 {
411 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
412 }
413 
414 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)415 bif_has_mac_nat(struct bridge_iflist * bif)
416 {
417 	return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
418 }
419 
420 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)421 bif_uses_virtio(struct bridge_iflist * bif)
422 {
423 	return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
424 }
425 
426 /* fake errors to make the code clearer */
427 #define _EBADIP                 EJUSTRETURN
428 #define _EBADIPCHECKSUM         EJUSTRETURN
429 #define _EBADIPV6               EJUSTRETURN
430 #define _EBADUDP                EJUSTRETURN
431 #define _EBADTCP                EJUSTRETURN
432 #define _EBADUDPCHECKSUM        EJUSTRETURN
433 #define _EBADTCPCHECKSUM        EJUSTRETURN
434 
435 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
436 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
437 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
438 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
439 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
440 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
441 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
442 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
443 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
444 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
445 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
446 #define BIFF_LRO_DISABLED       0x800   /* LRO was disabled */
447 #if SKYWALK
448 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
449 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
450 #endif /* SKYWALK */
451 
452 /*
453  * mac_nat_entry
454  * - translates between an IP address and MAC address on a specific
455  *   bridge interface member
456  */
457 struct mac_nat_entry {
458 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
459 	struct bridge_iflist    *mne_bif;       /* originating interface */
460 	unsigned long           mne_expire;     /* expiration time */
461 	union {
462 		struct in_addr  mneu_ip;        /* originating IPv4 address */
463 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
464 	} mne_u;
465 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
466 	uint8_t                 mne_flags;
467 	uint8_t                 mne_reserved;
468 };
469 #define mne_ip  mne_u.mneu_ip
470 #define mne_ip6 mne_u.mneu_ip6
471 
472 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
473 
474 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
475 
476 /*
477  * mac_nat_record
478  * - used by bridge_mac_nat_output() to convey the translation that needs
479  *   to take place in bridge_mac_nat_translate
480  * - holds enough information so that the translation can be done later
481  *   when the destination interface is the MAC-NAT interface
482  */
483 struct mac_nat_record {
484 	uint16_t                mnr_ether_type;
485 	union {
486 		uint16_t        mnru_arp_offset;
487 		struct {
488 			uint16_t mnruip_dhcp_flags;
489 			uint16_t mnruip_udp_csum;
490 			uint8_t  mnruip_header_len;
491 		} mnru_ip;
492 		struct {
493 			uint16_t mnruip6_icmp6_len;
494 			uint16_t mnruip6_lladdr_offset;
495 			uint8_t mnruip6_icmp6_type;
496 			uint8_t mnruip6_header_len;
497 		} mnru_ip6;
498 	} mnr_u;
499 };
500 
501 #define mnr_arp_offset  mnr_u.mnru_arp_offset
502 
503 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
504 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
505 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
506 
507 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
508 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
509 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
510 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
511 
512 /*
513  * Bridge route node.
514  */
515 struct bridge_rtnode {
516 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
517 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
518 	struct bridge_iflist    *brt_dst;       /* destination if */
519 	unsigned long           brt_expire;     /* expiration time */
520 	uint8_t                 brt_flags;      /* address flags */
521 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
522 	uint16_t                brt_vlan;       /* vlan id */
523 };
524 
525 #define brt_ifp                 brt_dst->bif_ifp
526 
527 /*
528  * Bridge delayed function call context
529  */
530 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
531 
532 struct bridge_delayed_call {
533 	struct bridge_softc     *bdc_sc;
534 	bridge_delayed_func_t   bdc_func; /* Function to call */
535 	struct timespec         bdc_ts; /* Time to call */
536 	u_int32_t               bdc_flags;
537 	thread_call_t           bdc_thread_call;
538 };
539 
540 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
541 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
542 
543 /*
544  * Software state for each bridge.
545  */
546 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
547 
548 struct bridge_softc {
549 	struct ifnet            *sc_ifp;        /* make this an interface */
550 	uint32_t                sc_flags;
551 	LIST_ENTRY(bridge_softc) sc_list;
552 	decl_lck_mtx_data(, sc_mtx);
553 	struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash;  /* our forwarding table */
554 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
555 	uint32_t                sc_rthash_key;  /* key for hash */
556 	uint32_t                sc_rthash_size; /* size of the hash table */
557 	struct bridge_delayed_call sc_aging_timer;
558 	struct bridge_delayed_call sc_resize_call;
559 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
560 	struct bstp_state       sc_stp;         /* STP state */
561 	void                    *sc_cv;
562 	uint32_t                sc_brtmax;      /* max # of addresses */
563 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
564 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
565 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
566 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
567 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
568 	uint32_t                sc_brtexceeded; /* # of cache drops */
569 	uint32_t                sc_filter_flags; /* ipf and flags */
570 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
571 	u_char                  sc_defaddr[6];  /* Default MAC address */
572 	char                    sc_if_xname[IFNAMSIZ];
573 
574 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
575 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
576 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
577 	uint32_t                sc_mne_max;      /* max # of entries */
578 	uint32_t                sc_mne_count;    /* cur. # of entries */
579 	uint32_t                sc_mne_allocation_failures;
580 #if BRIDGE_LOCK_DEBUG
581 	/*
582 	 * Locking and unlocking calling history
583 	 */
584 	void                    *lock_lr[BR_LCKDBG_MAX];
585 	int                     next_lock_lr;
586 	void                    *unlock_lr[BR_LCKDBG_MAX];
587 	int                     next_unlock_lr;
588 #endif /* BRIDGE_LOCK_DEBUG */
589 };
590 
591 #define SCF_DETACHING            0x01
592 #define SCF_RESIZING             0x02
593 #define SCF_MEDIA_ACTIVE         0x04
594 #define SCF_ADDRESS_ASSIGNED     0x08
595 
596 typedef enum {
597 	CHECKSUM_OPERATION_NONE = 0,
598 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
599 	CHECKSUM_OPERATION_FINALIZE = 2,
600 	CHECKSUM_OPERATION_COMPUTE = 3,
601 } ChecksumOperation;
602 
603 typedef struct {
604 	u_int           ip_hlen;        /* IP header length */
605 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
606 	u_int           ip_m0_len;      /* bytes available at ip_hdr (without jumping mbufs) */
607 	u_int           ip_opt_len;     /* IPv6 options headers length */
608 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
609 	bool            ip_is_ipv4;
610 	bool            ip_is_fragmented;
611 	uint8_t         *__sized_by(ip_m0_len) ip_hdr;   /* pointer to IP header */
612 	uint8_t         *__indexable ip_proto_hdr;   /* ptr to protocol header (TCP) */
613 } ip_packet_info, *ip_packet_info_t;
614 
615 struct bridge_hostfilter_stats bridge_hostfilter_stats;
616 
617 typedef uint8_t ether_type_flag_t;
618 
619 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
620 #if BRIDGE_LOCK_DEBUG
621 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
622 #else
623 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
624 #endif
625 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
626 
627 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
628 
629 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
630 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
631 
632 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
633 static int      bridge_clone_destroy(struct ifnet *);
634 
635 static errno_t  bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
636 #if HAS_IF_CAP
637 static void     bridge_mutecaps(struct bridge_softc *);
638 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
639     int);
640 #endif
641 static errno_t bridge_set_tso(struct bridge_softc *);
642 static void     bridge_proto_attach_changed(struct ifnet *);
643 static int      bridge_init(struct ifnet *);
644 static void     bridge_ifstop(struct ifnet *, int);
645 static int      bridge_output(struct ifnet *, struct mbuf *);
646 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
647 static void     bridge_start(struct ifnet *);
648 static mblist   bridge_input_list(struct bridge_softc *, ifnet_t,
649     struct ether_header *, mblist, bool);
650 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
651     mbuf_t *, char **);
652 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
653     mbuf_t *);
654 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
655     mbuf_t *m);
656 static int      bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
657     ether_type_flag_t, mbuf_t, ChecksumOperation);
658 static mbuf_t   bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
659     mbuf_t, bool);
660 static mbuf_t   bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
661     mbuf_t m, bool, bool, bool);
662 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
663 
664 static void     bridge_aging_timer(struct bridge_softc *sc);
665 
666 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
667     ether_type_flag_t, mbuf_t);
668 static void     bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
669 
670 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
671     uint16_t, struct bridge_iflist *, int, uint8_t);
672 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
673     const uint8_t[ETHER_ADDR_LEN], uint16_t);
674 static void     bridge_rttrim(struct bridge_softc *);
675 static void     bridge_rtage(struct bridge_softc *);
676 static void     bridge_rtflush(struct bridge_softc *, int);
677 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
678     uint16_t);
679 
680 static int      bridge_rtable_init(struct bridge_softc *);
681 static void     bridge_rtable_fini(struct bridge_softc *);
682 
683 static void     bridge_rthash_resize(struct bridge_softc *);
684 
685 static int      bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
686 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
687     const uint8_t[ETHER_ADDR_LEN], uint16_t);
688 static int      bridge_rtnode_hash(struct bridge_softc *,
689     struct bridge_rtnode *);
690 static int      bridge_rtnode_insert(struct bridge_softc *,
691     struct bridge_rtnode *);
692 static void     bridge_rtnode_destroy(struct bridge_softc *,
693     struct bridge_rtnode *);
694 #if BRIDGESTP
695 static void     bridge_rtable_expire(struct ifnet *, int);
696 static void     bridge_state_change(struct ifnet *, int);
697 #endif /* BRIDGESTP */
698 
699 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
700     char * __sized_by(IFNAMSIZ) name);
701 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
702     struct ifnet *ifp);
703 static void     bridge_delete_member(struct bridge_softc *,
704     struct bridge_iflist *);
705 static void     bridge_delete_span(struct bridge_softc *,
706     struct bridge_iflist *);
707 
708 static int      bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
709 static int      bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
710 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
711 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
712 static int      bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
713 static int      bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
714 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
715 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
716 static int      bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
717 static int      bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
718 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
719 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
720 static int      bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int      bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int      bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int      bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int      bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int      bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int      bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int      bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int      bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int      bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int      bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int      bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int      bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int      bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int      bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 
755 static int      bridge_pf(struct mbuf **, struct ifnet *,
756     uint32_t sc_filter_flags, bool input);
757 static int bridge_ip_checkbasic(struct mbuf **);
758 static int bridge_ip6_checkbasic(struct mbuf **);
759 
760 static void bridge_detach(ifnet_t);
761 static void bridge_link_event(struct ifnet *, u_int32_t);
762 static void bridge_iflinkevent(struct ifnet *);
763 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
764 static int interface_media_active(struct ifnet *);
765 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
766 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
767 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
768 
769 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
770     struct bridge_iflist *);
771 static void bridge_mac_nat_disable(struct bridge_softc *sc);
772 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
773 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
774 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
775     struct bridge_iflist *);
776 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
777     ifnet_t * dst_if);
778 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
779     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
780 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
781     const char[ETHER_ADDR_LEN]);
782 
783 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
784     ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
785 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
786     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
787 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
788     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
789 
790 static mbuf_t   bridge_pf_list(mbuf_t m, ifnet_t ifp,
791     uint32_t sc_filter_flags, bool input);
792 
793 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)794 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
795     uint16_t vlan)
796 {
797 	struct bridge_iflist *  bif;
798 	ifnet_t                 ifp = NULL;
799 
800 	bif = bridge_rtlookup_bif(sc, addr, vlan);
801 	if (bif != NULL) {
802 		ifp = bif->bif_ifp;
803 	}
804 	return ifp;
805 }
806 
807 static bool in_addr_is_ours(const struct in_addr);
808 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
809 
810 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
811 
812 static mblist
813 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
814 
815 static mblist
816 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
817     u_int mac_hlen, bool is_ipv4, bool is_tx);
818 
819 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)820 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
821 {
822 	return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
823 }
824 
825 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
826 #define VLANTAGOF(_m)   0
827 
828 #define BSTP_ETHERADDR_RANGE_FIRST      0x00
829 #define BSTP_ETHERADDR_RANGE_LAST       0x0f
830 
831 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
832 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
833 
834 
835 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
836 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
837 
838 #if BRIDGESTP
839 static struct bstp_cb_ops bridge_ops = {
840 	.bcb_state = bridge_state_change,
841 	.bcb_rtage = bridge_rtable_expire
842 };
843 #endif /* BRIDGESTP */
844 
845 SYSCTL_DECL(_net_link);
846 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
847     "Bridge");
848 
849 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
850 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
851     CTLFLAG_RW | CTLFLAG_LOCKED,
852     &bridge_inherit_mac, 0,
853     "Inherit MAC address from the first bridge member");
854 
855 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
856     CTLFLAG_RW | CTLFLAG_LOCKED,
857     &bridge_rtable_prune_period, 0,
858     "Interval between pruning of routing table");
859 
860 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
861 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
862     CTLFLAG_RW | CTLFLAG_LOCKED,
863     &bridge_rtable_hash_size_max, 0,
864     "Maximum size of the routing hash table");
865 
866 #if BRIDGE_DELAYED_CALLBACK_DEBUG
867 static int bridge_delayed_callback_delay = 0;
868 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
869     CTLFLAG_RW | CTLFLAG_LOCKED,
870     &bridge_delayed_callback_delay, 0,
871     "Delay before calling delayed function");
872 #endif
873 
874 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
875     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
876     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
877 
878 #if BRIDGESTP
879 static int log_stp   = 0;   /* log STP state changes */
880 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
881     &log_stp, 0, "Log STP state changes");
882 #endif /* BRIDGESTP */
883 
884 struct bridge_control {
885 	int             (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
886 	unsigned int    bc_argsize;
887 	unsigned int    bc_flags;
888 };
889 
890 #define BC_F_COPYIN             0x01    /* copy arguments in */
891 #define BC_F_COPYOUT            0x02    /* copy arguments out */
892 #define BC_F_SUSER              0x04    /* do super-user check */
893 
894 static const struct bridge_control bridge_control_table32[] = {
895 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
896 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
897 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
898 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
899 
900 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
901 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
902 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
903 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
904 
905 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
906 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
908 	  .bc_flags = BC_F_COPYOUT },
909 
910 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
911 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
912 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
913 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
914 
915 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
916 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917 
918 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
919 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
920 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
921 	  .bc_flags = BC_F_COPYOUT },
922 
923 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
924 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
925 
926 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
927 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
928 
929 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
930 	  .bc_flags = BC_F_COPYOUT },
931 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
932 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
933 
934 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
935 	  .bc_flags = BC_F_COPYOUT },
936 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 
939 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
940 	  .bc_flags = BC_F_COPYOUT },
941 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
942 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943 
944 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
945 	  .bc_flags = BC_F_COPYOUT },
946 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
947 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
948 
949 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
950 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
951 
952 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
953 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
954 
955 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
956 	  .bc_flags = BC_F_COPYOUT },
957 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
958 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
959 
960 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
961 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
962 
963 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
964 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
965 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
966 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
967 
968 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
969 	  .bc_flags = BC_F_COPYOUT },
970 
971 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
972 	  .bc_flags = BC_F_COPYOUT },
973 
974 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
975 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
976 
977 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
978 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
979 
980 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
981 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982 
983 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
984 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 
986 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
987 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
988 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
989 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
990 
991 	{ .bc_func = bridge_ioctl_gmnelist32,
992 	  .bc_argsize = sizeof(struct ifbrmnelist32),
993 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
994 	{ .bc_func = bridge_ioctl_gifstats32,
995 	  .bc_argsize = sizeof(struct ifbrmreq32),
996 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
997 };
998 
999 static const struct bridge_control bridge_control_table64[] = {
1000 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
1001 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1002 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1003 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1004 
1005 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1006 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1007 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1008 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1009 
1010 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1011 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1013 	  .bc_flags = BC_F_COPYOUT },
1014 
1015 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1016 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1017 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1018 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1019 
1020 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1021 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022 
1023 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1024 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1025 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
1026 	  .bc_flags = BC_F_COPYOUT },
1027 
1028 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1029 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1030 
1031 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1032 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1033 
1034 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1035 	  .bc_flags = BC_F_COPYOUT },
1036 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1037 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1038 
1039 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1040 	  .bc_flags = BC_F_COPYOUT },
1041 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 
1044 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1045 	  .bc_flags = BC_F_COPYOUT },
1046 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1047 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048 
1049 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1052 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1053 
1054 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1055 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1056 
1057 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1058 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1059 
1060 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1061 	  .bc_flags = BC_F_COPYOUT },
1062 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1063 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1064 
1065 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1066 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1067 
1068 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1069 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1070 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1071 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1072 
1073 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1074 	  .bc_flags = BC_F_COPYOUT },
1075 
1076 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1077 	  .bc_flags = BC_F_COPYOUT },
1078 
1079 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1080 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1081 
1082 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1083 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1084 
1085 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1086 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1087 
1088 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1089 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1090 
1091 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1092 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1093 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1094 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1095 
1096 	{ .bc_func = bridge_ioctl_gmnelist64,
1097 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1098 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1099 	{ .bc_func = bridge_ioctl_gifstats64,
1100 	  .bc_argsize = sizeof(struct ifbrmreq64),
1101 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1102 };
1103 
1104 static const unsigned int bridge_control_table_size =
1105     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1106 
1107 static LIST_HEAD(, bridge_softc) bridge_list =
1108     LIST_HEAD_INITIALIZER(bridge_list);
1109 
1110 #define BRIDGENAME      "bridge"
1111 #define BRIDGES_MAX     IF_MAXUNIT
1112 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1113 
1114 static struct if_clone bridge_cloner =
1115     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1116     0, BRIDGES_MAX);
1117 
1118 static int if_bridge_txstart = 0;
1119 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1120     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1121 
1122 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1123     &if_bridge_debug, 0, "Bridge debug flags");
1124 
1125 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1126     CTLFLAG_RW | CTLFLAG_LOCKED,
1127     &if_bridge_log_level, 0, "Bridge log level");
1128 
1129 static int if_bridge_output_skip_filters = 1;
1130 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1131     CTLFLAG_RW | CTLFLAG_LOCKED,
1132     &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1133 
1134 int bridge_enable_early_input = 1;   /* DLIL early input */
1135 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1136     CTLFLAG_RW | CTLFLAG_LOCKED,
1137     &bridge_enable_early_input, 0,
1138     "Bridge enable early input");
1139 
1140 int bridge_allow_lro_num_seg = 1;   /* allow LRO_NUM_SEG to keep LRO enabled */
1141 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1142     CTLFLAG_RW | CTLFLAG_LOCKED,
1143     &bridge_allow_lro_num_seg, 0,
1144     "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1145 
1146 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1147 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1148 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1149 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1150 
1151 static u_int if_bridge_tso_reduce_mss_forwarding
1152         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1153 static u_int if_bridge_tso_reduce_mss_tx
1154         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1155 
1156 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1157 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1158 {
1159 	int     changed;
1160 	int     error;
1161 	u_int   new_value;
1162 
1163 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1164 	    &changed);
1165 	if (error == 0 && changed != 0) {
1166 		if (new_value > val_max) {
1167 			return EINVAL;
1168 		}
1169 		*val = new_value;
1170 	}
1171 	return error;
1172 }
1173 
1174 static int
1175 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1176 {
1177 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1178     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1179 }
1180 
1181 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1182     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1183     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1184     "Bridge tso reduce mss when forwarding");
1185 
1186 static int
1187 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1188 {
1189 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1190     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1191 }
1192 
1193 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1194     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1195     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1196     "Bridge tso reduce mss on transmit");
1197 
1198 #if DEBUG || DEVELOPMENT
1199 /*
1200  * net.link.bridge.reduce_tso_mtu
1201  * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1202  *   value (i.e. 16K) to enable testing the "use GSO instead" path
1203  */
1204 static int if_bridge_reduce_tso_mtu = 0;
1205 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1206     CTLFLAG_RW | CTLFLAG_LOCKED,
1207     &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1208 
1209 #endif /* DEBUG || DEVELOPMENT */
1210 
1211 static void brlog_ether_header(struct ether_header *);
1212 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1213 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1214 static void brlog_mbuf(mbuf_t, const char *, const char *);
1215 static void brlog_link(struct bridge_softc * sc);
1216 
1217 #if BRIDGE_LOCK_DEBUG
1218 static void bridge_lock(struct bridge_softc *);
1219 static void bridge_unlock(struct bridge_softc *);
1220 static int bridge_lock2ref(struct bridge_softc *);
1221 static void bridge_unref(struct bridge_softc *);
1222 static void bridge_xlock(struct bridge_softc *);
1223 static void bridge_xdrop(struct bridge_softc *);
1224 
1225 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1226 
1227 static void
bridge_lock(struct bridge_softc * sc)1228 bridge_lock(struct bridge_softc *sc)
1229 {
1230 	DECL_RETURN_ADDR(lr_saved);
1231 
1232 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1233 
1234 	_BRIDGE_LOCK(sc);
1235 
1236 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1237 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1238 }
1239 
1240 static void
bridge_unlock(struct bridge_softc * sc)1241 bridge_unlock(struct bridge_softc *sc)
1242 {
1243 	DECL_RETURN_ADDR(lr_saved);
1244 
1245 	BRIDGE_LOCK_ASSERT_HELD(sc);
1246 
1247 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1248 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1249 
1250 	_BRIDGE_UNLOCK(sc);
1251 }
1252 
1253 static int
bridge_lock2ref(struct bridge_softc * sc)1254 bridge_lock2ref(struct bridge_softc *sc)
1255 {
1256 	int error = 0;
1257 	DECL_RETURN_ADDR(lr_saved);
1258 
1259 	BRIDGE_LOCK_ASSERT_HELD(sc);
1260 
1261 	if (sc->sc_iflist_xcnt > 0) {
1262 		error = EBUSY;
1263 	} else {
1264 		sc->sc_iflist_ref++;
1265 	}
1266 
1267 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1268 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1269 
1270 	_BRIDGE_UNLOCK(sc);
1271 
1272 	return error;
1273 }
1274 
1275 static void
bridge_unref(struct bridge_softc * sc)1276 bridge_unref(struct bridge_softc *sc)
1277 {
1278 	DECL_RETURN_ADDR(lr_saved);
1279 
1280 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1281 
1282 	_BRIDGE_LOCK(sc);
1283 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1284 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1285 
1286 	sc->sc_iflist_ref--;
1287 
1288 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1289 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1290 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1291 		_BRIDGE_UNLOCK(sc);
1292 		wakeup(&sc->sc_cv);
1293 	} else {
1294 		_BRIDGE_UNLOCK(sc);
1295 	}
1296 }
1297 
1298 static void
bridge_xlock(struct bridge_softc * sc)1299 bridge_xlock(struct bridge_softc *sc)
1300 {
1301 	DECL_RETURN_ADDR(lr_saved);
1302 
1303 	BRIDGE_LOCK_ASSERT_HELD(sc);
1304 
1305 	sc->sc_iflist_xcnt++;
1306 	while (sc->sc_iflist_ref > 0) {
1307 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1308 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1309 
1310 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1311 
1312 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1313 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1314 	}
1315 }
1316 
1317 #undef DECL_RETURN_ADDR
1318 
1319 static void
bridge_xdrop(struct bridge_softc * sc)1320 bridge_xdrop(struct bridge_softc *sc)
1321 {
1322 	BRIDGE_LOCK_ASSERT_HELD(sc);
1323 
1324 	sc->sc_iflist_xcnt--;
1325 }
1326 
1327 #endif /* BRIDGE_LOCK_DEBUG */
1328 
1329 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1330 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1331 {
1332 	if (m) {
1333 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1334 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1335 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1336 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1337 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1338 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1339 		    suffix ? suffix : "");
1340 	} else {
1341 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1342 	}
1343 }
1344 
1345 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1346 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1347 {
1348 	if (m) {
1349 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1350 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1351 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1352 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1353 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1354 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1355 		    (unsigned int)mbuf_maxlen(m),
1356 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1357 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1358 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1359 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1360 			brlog_mbuf_pkthdr(m, "", suffix);
1361 		}
1362 	} else {
1363 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1364 	}
1365 }
1366 
1367 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1368 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1369 {
1370 	mbuf_t                  n;
1371 	size_t                  i, j;
1372 	size_t                  pktlen, mlen, maxlen;
1373 	unsigned char   *ptr;
1374 
1375 	pktlen = mbuf_pkthdr_len(m);
1376 
1377 	if (offset > pktlen) {
1378 		return;
1379 	}
1380 
1381 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1382 	n = m;
1383 	mlen = mbuf_len(n);
1384 	ptr = mtod(n, unsigned char *);
1385 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1386 		if (j >= mlen) {
1387 			n = mbuf_next(n);
1388 			if (n == 0) {
1389 				break;
1390 			}
1391 			ptr = mtod(n, unsigned char *);
1392 			mlen = mbuf_len(n);
1393 			j = 0;
1394 		}
1395 		if (i >= offset) {
1396 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1397 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1398 		}
1399 	}
1400 }
1401 
1402 static void
brlog_ether_header(struct ether_header * eh)1403 brlog_ether_header(struct ether_header *eh)
1404 {
1405 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1406 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1407 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1408 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1409 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1410 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1411 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1412 	    ntohs(eh->ether_type));
1413 }
1414 
1415 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1416 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1417 {
1418 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1419 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1420 
1421 	return buf;
1422 }
1423 
1424 static void
brlog_link(struct bridge_softc * sc)1425 brlog_link(struct bridge_softc * sc)
1426 {
1427 	int i;
1428 	uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1429 	IFNAMSIZ + ETHER_ADDR_LEN)];
1430 	struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1431 	const u_char * lladdr;
1432 	char lladdr_str[48];
1433 
1434 	memset(sdl_buffer, 0, sizeof(sdl_buffer));
1435 	sdl->sdl_family = AF_LINK;
1436 	sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1437 	sdl->sdl_alen = ETHER_ADDR_LEN;
1438 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1439 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1440 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1441 	lladdr_str[0] = '\0';
1442 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1443 	    i < sdl->sdl_alen;
1444 	    i++, lladdr++) {
1445 		char    byte_str[4];
1446 
1447 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1448 		    *lladdr);
1449 		strbufcat(lladdr_str, byte_str);
1450 	}
1451 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1452 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1453 	    " slen %d addr %s", sc->sc_if_xname,
1454 	    sdl->sdl_len, sdl->sdl_index,
1455 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1456 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1457 }
1458 
1459 static int
_mbuf_get_tso_mss(mbuf_t m)1460 _mbuf_get_tso_mss(mbuf_t m)
1461 {
1462 	int     mss = 0;
1463 
1464 #define _TSO_CSUM       (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
1465 	if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1466 		mss = m->m_pkthdr.tso_segsz;
1467 	}
1468 	return mss;
1469 }
1470 
1471 /*
1472  * bridgeattach:
1473  *
1474  *	Pseudo-device attach routine.
1475  */
1476 __private_extern__ int
bridgeattach(int n)1477 bridgeattach(int n)
1478 {
1479 #pragma unused(n)
1480 	int error;
1481 
1482 	LIST_INIT(&bridge_list);
1483 
1484 #if BRIDGESTP
1485 	bstp_sys_init();
1486 #endif /* BRIDGESTP */
1487 
1488 	error = if_clone_attach(&bridge_cloner);
1489 	if (error != 0) {
1490 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1491 	}
1492 	return error;
1493 }
1494 
1495 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1496 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1497 {
1498 	mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1499 	mbuf_pkthdr_adjustlen(m, -len);
1500 }
1501 
1502 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1503 bridge_ifnet_set_attrs(struct ifnet * ifp)
1504 {
1505 	errno_t         error;
1506 
1507 	error = ifnet_set_mtu(ifp, ETHERMTU);
1508 	if (error != 0) {
1509 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1510 		goto done;
1511 	}
1512 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1513 	if (error != 0) {
1514 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1515 		goto done;
1516 	}
1517 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1518 	if (error != 0) {
1519 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1520 		goto done;
1521 	}
1522 	error = ifnet_set_flags(ifp,
1523 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1524 	    0xffff);
1525 
1526 	if (error != 0) {
1527 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1528 		goto done;
1529 	}
1530 done:
1531 	return error;
1532 }
1533 
1534 /*
1535  * bridge_clone_create:
1536  *
1537  *	Create a new bridge instance.
1538  */
1539 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1540 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1541 {
1542 #pragma unused(params)
1543 	ifnet_ref_t ifp = NULL;
1544 	struct bridge_softc *sc = NULL;
1545 	struct bridge_softc *sc2 = NULL;
1546 	struct ifnet_init_eparams init_params;
1547 	errno_t error = 0;
1548 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1549 	int fb, retry, has_hostid;
1550 
1551 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1552 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1553 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1554 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1555 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1556 	sc->sc_filter_flags = 0;
1557 
1558 	TAILQ_INIT(&sc->sc_iflist);
1559 
1560 	/* use the interface name as the unique id for ifp recycle */
1561 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1562 	    ifc->ifc_name, unit);
1563 	bzero(&init_params, sizeof(init_params));
1564 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1565 	init_params.len                 = sizeof(init_params);
1566 	/* Initialize our routing table. */
1567 	error = bridge_rtable_init(sc);
1568 	if (error != 0) {
1569 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1570 		goto done;
1571 	}
1572 	TAILQ_INIT(&sc->sc_spanlist);
1573 	if (if_bridge_txstart) {
1574 		init_params.start = bridge_start;
1575 	} else {
1576 		init_params.flags = IFNET_INIT_LEGACY;
1577 		init_params.output = bridge_output;
1578 	}
1579 	init_params.uniqueid_len        = strbuflen(sc->sc_if_xname);
1580 	init_params.uniqueid            = sc->sc_if_xname;
1581 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1582 	init_params.name                = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1583 	init_params.unit                = unit;
1584 	init_params.family              = IFNET_FAMILY_ETHERNET;
1585 	init_params.type                = IFT_BRIDGE;
1586 	init_params.demux               = ether_demux;
1587 	init_params.add_proto           = ether_add_proto;
1588 	init_params.del_proto           = ether_del_proto;
1589 	init_params.check_multi         = ether_check_multi;
1590 	init_params.framer_extended     = ether_frameout_extended;
1591 	init_params.softc               = sc;
1592 	init_params.ioctl               = bridge_ioctl;
1593 	init_params.detach              = bridge_detach;
1594 	init_params.broadcast_addr      = etherbroadcastaddr;
1595 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1596 
1597 	error = ifnet_allocate_extended(&init_params, &ifp);
1598 	if (error != 0) {
1599 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1600 		goto done;
1601 	}
1602 	LIST_INIT(&sc->sc_mne_list);
1603 	LIST_INIT(&sc->sc_mne_list_v6);
1604 	sc->sc_ifp = ifp;
1605 	error = bridge_ifnet_set_attrs(ifp);
1606 	if (error != 0) {
1607 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1608 		    error);
1609 		goto done;
1610 	}
1611 	/*
1612 	 * Generate an ethernet address with a locally administered address.
1613 	 *
1614 	 * Since we are using random ethernet addresses for the bridge, it is
1615 	 * possible that we might have address collisions, so make sure that
1616 	 * this hardware address isn't already in use on another bridge.
1617 	 * The first try uses the "hostid" and falls back to read_frandom();
1618 	 * for "hostid", we use the MAC address of the first-encountered
1619 	 * Ethernet-type interface that is currently configured.
1620 	 */
1621 	fb = 0;
1622 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1623 	for (retry = 1; retry != 0;) {
1624 		if (fb || has_hostid == 0) {
1625 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1626 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1627 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1628 		} else {
1629 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1630 			    ETHER_ADDR_LEN);
1631 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1632 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1633 			sc->sc_defaddr[3] =     /* stir it up a bit */
1634 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1635 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1636 			/*
1637 			 * Mix in the LSB as it's actually pretty significant,
1638 			 * see rdar://14076061
1639 			 */
1640 			sc->sc_defaddr[4] =
1641 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1642 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1643 			    sc->sc_defaddr[5];
1644 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1645 		}
1646 
1647 		fb = 1;
1648 		retry = 0;
1649 		lck_mtx_lock(&bridge_list_mtx);
1650 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1651 			if (_ether_cmp(sc->sc_defaddr,
1652 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1653 				retry = 1;
1654 			}
1655 		}
1656 		lck_mtx_unlock(&bridge_list_mtx);
1657 	}
1658 
1659 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1660 
1661 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1662 		brlog_link(sc);
1663 	}
1664 	error = ifnet_attach(ifp, NULL);
1665 	if (error != 0) {
1666 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1667 		goto done;
1668 	}
1669 
1670 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1671 	    IFT_ETHER);
1672 	if (error != 0) {
1673 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1674 		    error);
1675 		goto done;
1676 	}
1677 
1678 	ifnet_set_offload(ifp,
1679 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1680 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1681 	error = bridge_set_tso(sc);
1682 	if (error != 0) {
1683 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1684 		goto done;
1685 	}
1686 #if BRIDGESTP
1687 	bstp_attach(&sc->sc_stp, &bridge_ops);
1688 #endif /* BRIDGESTP */
1689 
1690 	lck_mtx_lock(&bridge_list_mtx);
1691 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1692 	lck_mtx_unlock(&bridge_list_mtx);
1693 
1694 	/* attach as ethernet */
1695 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1696 	    NULL, NULL);
1697 
1698 done:
1699 	if (error != 0) {
1700 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1701 		/* TBD: Clean up: sc, sc_rthash etc */
1702 	}
1703 
1704 	return error;
1705 }
1706 
1707 /*
1708  * bridge_clone_destroy:
1709  *
1710  *	Destroy a bridge instance.
1711  */
1712 static int
bridge_clone_destroy(struct ifnet * ifp)1713 bridge_clone_destroy(struct ifnet *ifp)
1714 {
1715 	struct bridge_softc * __single sc = ifp->if_softc;
1716 	struct bridge_iflist *bif;
1717 	errno_t error;
1718 
1719 	BRIDGE_LOCK(sc);
1720 	if ((sc->sc_flags & SCF_DETACHING)) {
1721 		BRIDGE_UNLOCK(sc);
1722 		return 0;
1723 	}
1724 	sc->sc_flags |= SCF_DETACHING;
1725 
1726 	bridge_ifstop(ifp, 1);
1727 
1728 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1729 
1730 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1731 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1732 
1733 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1734 	if (error != 0) {
1735 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1736 	}
1737 
1738 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1739 		bridge_delete_member(sc, bif);
1740 	}
1741 
1742 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1743 		bridge_delete_span(sc, bif);
1744 	}
1745 	BRIDGE_UNLOCK(sc);
1746 
1747 	error = ifnet_detach(ifp);
1748 	if (error != 0) {
1749 		panic("%s (%d): ifnet_detach(%p) failed %d",
1750 		    __func__, __LINE__, ifp, error);
1751 	}
1752 	return 0;
1753 }
1754 
1755 #define DRVSPEC do { \
1756 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1757 	        error = EINVAL;                                         \
1758 	        break;                                                  \
1759 	}                                                               \
1760 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1761                                                                         \
1762 	if (cmd == SIOCGDRVSPEC &&                                      \
1763 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1764 	        error = EINVAL;                                         \
1765 	        break;                                                  \
1766 	} else if (cmd == SIOCSDRVSPEC &&                               \
1767 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1768 	        error = EINVAL;                                         \
1769 	        break;                                                  \
1770 	}                                                               \
1771                                                                         \
1772 	if (bc->bc_flags & BC_F_SUSER) {                                \
1773 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1774 	            KAUTH_GENERIC_ISSUSER);                             \
1775 	        if (error)                                              \
1776 	                break;                                          \
1777 	}                                                               \
1778                                                                         \
1779 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1780 	    ifd->ifd_len > sizeof (args)) {                             \
1781 	        error = EINVAL;                                         \
1782 	        break;                                                  \
1783 	}                                                               \
1784                                                                         \
1785 	bzero(&args, sizeof (args));                                    \
1786 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1787 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1788 	        if (error)                                              \
1789 	                break;                                          \
1790 	}                                                               \
1791                                                                         \
1792 	BRIDGE_LOCK(sc);                                                \
1793 	error = (*bc->bc_func)(sc, &args, sizeof(args));                \
1794 	BRIDGE_UNLOCK(sc);                                              \
1795 	if (error)                                                      \
1796 	        break;                                                  \
1797                                                                         \
1798 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1799 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1800 } while (0)
1801 
1802 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1803 interface_needs_input_broadcast(struct ifnet * ifp)
1804 {
1805 	/*
1806 	 * Selectively enable input broadcast only when necessary.
1807 	 * The bridge interface itself attaches a fake protocol
1808 	 * so checking for at least two protocols means that the
1809 	 * interface is being used for something besides bridging
1810 	 * and needs to see broadcast packets from other members.
1811 	 */
1812 	return if_get_protolist(ifp, NULL, 0) >= 2;
1813 }
1814 
1815 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1816 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1817 {
1818 	boolean_t       old_input_broadcast;
1819 
1820 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1821 	if (input_broadcast) {
1822 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1823 	} else {
1824 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1825 	}
1826 	return old_input_broadcast != input_broadcast;
1827 }
1828 
1829 /*
1830  * bridge_ioctl:
1831  *
1832  *	Handle a control request from the operator.
1833  */
1834 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1835 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1836 {
1837 	struct bridge_softc * __single sc = ifp->if_softc;
1838 	struct ifreq *ifr = (struct ifreq *)data;
1839 	struct bridge_iflist *bif;
1840 	int error = 0;
1841 
1842 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1843 
1844 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1845 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1846 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1847 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1848 	    (char)IOCGROUP(cmd), cmd & 0xff);
1849 
1850 	switch (cmd) {
1851 	case SIOCAIFADDR_IN6_32:
1852 	case SIOCAIFADDR_IN6_64:
1853 	case SIOCSIFADDR:
1854 	case SIOCAIFADDR:
1855 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1856 		BRIDGE_LOCK(sc);
1857 		sc->sc_flags |= SCF_ADDRESS_ASSIGNED;
1858 		BRIDGE_UNLOCK(sc);
1859 		BRIDGE_LOG(LOG_NOTICE, 0,
1860 		    "ifp %s has address", ifp->if_xname);
1861 		break;
1862 
1863 	case SIOCGIFMEDIA32:
1864 	case SIOCGIFMEDIA64: {
1865 		// cast to 32bit version to work within bounds with 32bit userspace
1866 		struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1867 		user_addr_t user_addr;
1868 
1869 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1870 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
1871 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1872 
1873 		ifmr->ifm_status = IFM_AVALID;
1874 		ifmr->ifm_mask = 0;
1875 		ifmr->ifm_count = 1;
1876 
1877 		BRIDGE_LOCK(sc);
1878 		if (!(sc->sc_flags & SCF_DETACHING) &&
1879 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1880 			ifmr->ifm_status |= IFM_ACTIVE;
1881 			ifmr->ifm_active = ifmr->ifm_current =
1882 			    IFM_ETHER | IFM_AUTO;
1883 		} else {
1884 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1885 		}
1886 		BRIDGE_UNLOCK(sc);
1887 
1888 		if (user_addr != USER_ADDR_NULL) {
1889 			error = copyout(&ifmr->ifm_current, user_addr,
1890 			    sizeof(int));
1891 		}
1892 		break;
1893 	}
1894 
1895 	case SIOCADDMULTI:
1896 	case SIOCDELMULTI:
1897 		break;
1898 
1899 	case SIOCSDRVSPEC32:
1900 	case SIOCGDRVSPEC32: {
1901 		union {
1902 			struct ifbreq ifbreq;
1903 			struct ifbifconf32 ifbifconf;
1904 			struct ifbareq32 ifbareq;
1905 			struct ifbaconf32 ifbaconf;
1906 			struct ifbrparam ifbrparam;
1907 			struct ifbropreq32 ifbropreq;
1908 		} args;
1909 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1910 		const struct bridge_control *bridge_control_table =
1911 		    bridge_control_table32, *bc;
1912 
1913 		DRVSPEC;
1914 
1915 		break;
1916 	}
1917 	case SIOCSDRVSPEC64:
1918 	case SIOCGDRVSPEC64: {
1919 		union {
1920 			struct ifbreq ifbreq;
1921 			struct ifbifconf64 ifbifconf;
1922 			struct ifbareq64 ifbareq;
1923 			struct ifbaconf64 ifbaconf;
1924 			struct ifbrparam ifbrparam;
1925 			struct ifbropreq64 ifbropreq;
1926 		} args;
1927 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1928 		const struct bridge_control *bridge_control_table =
1929 		    bridge_control_table64, *bc;
1930 
1931 		DRVSPEC;
1932 
1933 		break;
1934 	}
1935 
1936 	case SIOCSIFFLAGS:
1937 		if (!(ifp->if_flags & IFF_UP) &&
1938 		    (ifp->if_flags & IFF_RUNNING)) {
1939 			/*
1940 			 * If interface is marked down and it is running,
1941 			 * then stop and disable it.
1942 			 */
1943 			BRIDGE_LOCK(sc);
1944 			bridge_ifstop(ifp, 1);
1945 			BRIDGE_UNLOCK(sc);
1946 		} else if ((ifp->if_flags & IFF_UP) &&
1947 		    !(ifp->if_flags & IFF_RUNNING)) {
1948 			/*
1949 			 * If interface is marked up and it is stopped, then
1950 			 * start it.
1951 			 */
1952 			BRIDGE_LOCK(sc);
1953 			error = bridge_init(ifp);
1954 			BRIDGE_UNLOCK(sc);
1955 		}
1956 		break;
1957 
1958 	case SIOCSIFLLADDR:
1959 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1960 		    ifr->ifr_addr.sa_len);
1961 		if (error != 0) {
1962 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1963 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1964 			    error);
1965 		}
1966 		break;
1967 
1968 	case SIOCSIFMTU:
1969 		if (ifr->ifr_mtu < 576) {
1970 			error = EINVAL;
1971 			break;
1972 		}
1973 		BRIDGE_LOCK(sc);
1974 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1975 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1976 			BRIDGE_UNLOCK(sc);
1977 			break;
1978 		}
1979 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1980 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1981 				BRIDGE_LOG(LOG_NOTICE, 0,
1982 				    "%s invalid MTU: %u(%s) != %d",
1983 				    sc->sc_ifp->if_xname,
1984 				    bif->bif_ifp->if_mtu,
1985 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1986 				error = EINVAL;
1987 				break;
1988 			}
1989 		}
1990 		if (!error) {
1991 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1992 		}
1993 		BRIDGE_UNLOCK(sc);
1994 		break;
1995 
1996 	default:
1997 		error = ether_ioctl(ifp, cmd, data);
1998 		if (error != 0 && error != EOPNOTSUPP) {
1999 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2000 			    "ifp %s cmd 0x%08lx "
2001 			    "(%c%c [%lu] %c %lu) failed error: %d",
2002 			    ifp->if_xname, cmd,
2003 			    (cmd & IOC_IN) ? 'I' : ' ',
2004 			    (cmd & IOC_OUT) ? 'O' : ' ',
2005 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2006 			    cmd & 0xff, error);
2007 		}
2008 		break;
2009 	}
2010 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2011 
2012 	return error;
2013 }
2014 
2015 #if HAS_IF_CAP
2016 /*
2017  * bridge_mutecaps:
2018  *
2019  *	Clear or restore unwanted capabilities on the member interface
2020  */
2021 static void
bridge_mutecaps(struct bridge_softc * sc)2022 bridge_mutecaps(struct bridge_softc *sc)
2023 {
2024 	struct bridge_iflist *bif;
2025 	int enabled, mask;
2026 
2027 	/* Initial bitmask of capabilities to test */
2028 	mask = BRIDGE_IFCAPS_MASK;
2029 
2030 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2031 		/* Every member must support it or its disabled */
2032 		mask &= bif->bif_savedcaps;
2033 	}
2034 
2035 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2036 		enabled = bif->bif_ifp->if_capenable;
2037 		enabled &= ~BRIDGE_IFCAPS_STRIP;
2038 		/* strip off mask bits and enable them again if allowed */
2039 		enabled &= ~BRIDGE_IFCAPS_MASK;
2040 		enabled |= mask;
2041 
2042 		bridge_set_ifcap(sc, bif, enabled);
2043 	}
2044 }
2045 
2046 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2047 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2048 {
2049 	struct ifnet *ifp = bif->bif_ifp;
2050 	struct ifreq ifr;
2051 	int error;
2052 
2053 	bzero(&ifr, sizeof(ifr));
2054 	ifr.ifr_reqcap = set;
2055 
2056 	if (ifp->if_capenable != set) {
2057 		IFF_LOCKGIANT(ifp);
2058 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2059 		IFF_UNLOCKGIANT(ifp);
2060 		if (error) {
2061 			BRIDGE_LOG(LOG_NOTICE, 0,
2062 			    "%s error setting interface capabilities on %s",
2063 			    sc->sc_ifp->if_xname, ifp->if_xname);
2064 		}
2065 	}
2066 }
2067 #endif /* HAS_IF_CAP */
2068 
2069 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2070 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2071 {
2072 	struct ifreq    ifr;
2073 
2074 	bzero(&ifr, sizeof(ifr));
2075 	ifr.ifr_reqcap = cap_enable;
2076 	return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2077 }
2078 
2079 static const char *
enable_disable_str(boolean_t enable)2080 enable_disable_str(boolean_t enable)
2081 {
2082 	return (const char * __null_terminated)(enable ? "enable" : "disable");
2083 }
2084 
2085 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2086 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2087 {
2088 	uint32_t        cap_enable;
2089 	uint32_t        cap_supported;
2090 	boolean_t       changed = FALSE;
2091 	boolean_t       lro_enabled;
2092 
2093 	cap_supported = ifnet_capabilities_supported(ifp);
2094 	if ((cap_supported & IFCAP_LRO) == 0) {
2095 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2096 		    "%s doesn't support LRO",
2097 		    ifp->if_xname);
2098 		goto done;
2099 	}
2100 	if (bridge_allow_lro_num_seg != 0 &&
2101 	    (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2102 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2103 		    "%s supports LRO_NUM_SEG, leaving LRO enabled",
2104 		    ifp->if_xname);
2105 		goto done;
2106 	}
2107 	cap_enable = ifnet_capabilities_enabled(ifp);
2108 	lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2109 	if (lro_enabled != enable) {
2110 		errno_t         error;
2111 
2112 		if (enable) {
2113 			cap_enable |= IFCAP_LRO;
2114 		} else {
2115 			cap_enable &= ~IFCAP_LRO;
2116 		}
2117 		error = siocsifcap(ifp, cap_enable);
2118 		if (error != 0) {
2119 			BRIDGE_LOG(LOG_NOTICE, 0,
2120 			    "%s %s failed (cap 0x%x) %d",
2121 			    ifp->if_xname,
2122 			    enable_disable_str(enable),
2123 			    cap_enable,
2124 			    error);
2125 		} else {
2126 			changed = TRUE;
2127 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2128 			    "%s %s success (cap 0x%x)",
2129 			    ifp->if_xname,
2130 			    enable_disable_str(enable),
2131 			    cap_enable);
2132 		}
2133 	}
2134 done:
2135 	return changed;
2136 }
2137 
2138 static errno_t
bridge_set_tso(struct bridge_softc * sc)2139 bridge_set_tso(struct bridge_softc *sc)
2140 {
2141 	struct bridge_iflist *bif;
2142 	u_int32_t tso_v4_mtu;
2143 	u_int32_t tso_v6_mtu;
2144 	ifnet_offload_t offload;
2145 	errno_t error = 0;
2146 
2147 	/* By default, support TSO */
2148 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2149 	tso_v4_mtu = IP_MAXPACKET;
2150 	tso_v6_mtu = IP_MAXPACKET;
2151 
2152 	/* Use the lowest common denominator of the members */
2153 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2154 		ifnet_t ifp = bif->bif_ifp;
2155 
2156 		if (ifp == NULL) {
2157 			continue;
2158 		}
2159 
2160 		if (offload & IFNET_TSO_IPV4) {
2161 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2162 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2163 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2164 				}
2165 			} else {
2166 				offload &= ~IFNET_TSO_IPV4;
2167 				tso_v4_mtu = 0;
2168 			}
2169 		}
2170 		if (offload & IFNET_TSO_IPV6) {
2171 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2172 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2173 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2174 				}
2175 			} else {
2176 				offload &= ~IFNET_TSO_IPV6;
2177 				tso_v6_mtu = 0;
2178 			}
2179 		}
2180 	}
2181 
2182 	if (offload != sc->sc_ifp->if_hwassist) {
2183 		error = ifnet_set_offload(sc->sc_ifp, offload);
2184 		if (error != 0) {
2185 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2186 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2187 			    sc->sc_ifp->if_xname, offload, error);
2188 			goto done;
2189 		}
2190 		/*
2191 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2192 		 * as large as the interface MTU
2193 		 */
2194 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2195 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2196 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2197 			}
2198 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2199 			    tso_v4_mtu);
2200 			if (error != 0) {
2201 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2202 				    "ifnet_set_tso_mtu(%s, "
2203 				    "AF_INET, %u) failed %d",
2204 				    sc->sc_ifp->if_xname,
2205 				    tso_v4_mtu, error);
2206 				goto done;
2207 			}
2208 		}
2209 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2210 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2211 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2212 			}
2213 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2214 			    tso_v6_mtu);
2215 			if (error != 0) {
2216 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2217 				    "ifnet_set_tso_mtu(%s, "
2218 				    "AF_INET6, %u) failed %d",
2219 				    sc->sc_ifp->if_xname,
2220 				    tso_v6_mtu, error);
2221 				goto done;
2222 			}
2223 		}
2224 	}
2225 done:
2226 	return error;
2227 }
2228 
2229 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2230 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2231 {
2232 	ifname[IFNAMSIZ - 1] = '\0';
2233 	return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2234 }
2235 
2236 /*
2237  * bridge_lookup_member:
2238  *
2239  *	Lookup a bridge member interface.
2240  */
2241 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2242 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2243 {
2244 	struct bridge_iflist *bif;
2245 	struct ifnet *ifp;
2246 	const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2247 
2248 	BRIDGE_LOCK_ASSERT_HELD(sc);
2249 
2250 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2251 		ifp = bif->bif_ifp;
2252 		if (strcmp(ifp->if_xname, name) == 0) {
2253 			return bif;
2254 		}
2255 	}
2256 
2257 	return NULL;
2258 }
2259 
2260 /*
2261  * bridge_lookup_member_if:
2262  *
2263  *	Lookup a bridge member interface by ifnet*.
2264  */
2265 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2266 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2267 {
2268 	struct bridge_iflist *bif;
2269 
2270 	BRIDGE_LOCK_ASSERT_HELD(sc);
2271 
2272 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2273 		if (bif->bif_ifp == member_ifp) {
2274 			return bif;
2275 		}
2276 	}
2277 
2278 	return NULL;
2279 }
2280 
2281 static inline bool
get_and_clear_promisc(mbuf_t m)2282 get_and_clear_promisc(mbuf_t m)
2283 {
2284 	bool    is_promisc;
2285 
2286 	/*
2287 	 * Need to clear the promiscuous flag otherwise the packet will be
2288 	 * dropped by DLIL after processing filters
2289 	 */
2290 	is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2291 	if (is_promisc) {
2292 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2293 	}
2294 	return is_promisc;
2295 }
2296 
2297 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2298 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2299     mbuf_t *data, char **frame_ptr)
2300 {
2301 #pragma unused(protocol)
2302 	errno_t error = 0;
2303 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2304 	struct bridge_softc *sc = bif->bif_sc;
2305 	int included = 0;
2306 	struct ether_header * eh_p;
2307 	size_t frmlen = 0;
2308 	bool is_promisc;
2309 	mblist list;
2310 	mbuf_t m = *data;
2311 
2312 	if ((m->m_flags & M_PROTO1)) {
2313 		goto out;
2314 	}
2315 
2316 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2317 	    *frame_ptr <= mtod(m, char *)) {
2318 		included = 1;
2319 		frmlen = mtod(m, char *) - *frame_ptr;
2320 	}
2321 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2322 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2323 	    "frmlen %lu", sc->sc_ifp->if_xname,
2324 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2325 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2326 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2327 	    included ? "inside" : "outside", frmlen);
2328 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2329 		brlog_mbuf(m, "bridge_iff_input[", "");
2330 		brlog_ether_header((struct ether_header *)
2331 		    (void *)*frame_ptr);
2332 		brlog_mbuf_data(m, 0, 20);
2333 	}
2334 	if (included == 0) {
2335 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2336 		goto out;
2337 	}
2338 
2339 	/* Move data pointer to start of frame to the link layer header */
2340 	_mbuf_adjust_pkthdr_and_data(m, -frmlen);
2341 
2342 	/* make sure we can access the ethernet header */
2343 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2344 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2345 		    "short frame %lu < %lu",
2346 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2347 		goto out;
2348 	}
2349 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2350 		error = mbuf_pullup(data, sizeof(struct ether_header));
2351 		if (error != 0) {
2352 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2353 			    "mbuf_pullup(%lu) failed %d",
2354 			    sizeof(struct ether_header),
2355 			    error);
2356 			error = EJUSTRETURN;
2357 			goto out;
2358 		}
2359 		if (m != *data) {
2360 			m = *data;
2361 			*frame_ptr = mtod(m, char *);
2362 		}
2363 	}
2364 	mblist_init(&list);
2365 	mblist_append(&list, m);
2366 	is_promisc = get_and_clear_promisc(m);
2367 	eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2368 	list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2369 	m = *data = list.head;
2370 	if (m == NULL) {
2371 		error = EJUSTRETURN;
2372 	}
2373 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2374 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2375 		brlog_mbuf(m, "bridge_iff_input]", "");
2376 	}
2377 
2378 out:
2379 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2380 
2381 	return error;
2382 }
2383 
2384 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2385 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2386     mbuf_t *data)
2387 {
2388 #pragma unused(protocol)
2389 	errno_t error = 0;
2390 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2391 	struct bridge_softc *sc = bif->bif_sc;
2392 	mbuf_t m = *data;
2393 
2394 	if ((m->m_flags & M_PROTO1)) {
2395 		goto out;
2396 	}
2397 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2398 	    "%s from %s m 0x%llx data 0x%llx",
2399 	    sc->sc_ifp->if_xname, ifp->if_xname,
2400 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2401 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2402 
2403 	error = bridge_member_output(sc, ifp, data);
2404 	if (error != 0 && error != EJUSTRETURN) {
2405 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2406 		    "bridge_member_output failed error %d",
2407 		    error);
2408 	}
2409 out:
2410 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2411 
2412 	return error;
2413 }
2414 
2415 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2416 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2417     const struct kev_msg *event_msg)
2418 {
2419 #pragma unused(protocol)
2420 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2421 	struct bridge_softc *sc = bif->bif_sc;
2422 
2423 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2424 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2425 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2426 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2427 		    "%s event_code %u - %s",
2428 		    ifp->if_xname, event_msg->event_code,
2429 		    dlil_kev_dl_code_str(event_msg->event_code));
2430 
2431 		switch (event_msg->event_code) {
2432 		case KEV_DL_LINK_OFF:
2433 		case KEV_DL_LINK_ON: {
2434 			bridge_iflinkevent(ifp);
2435 #if BRIDGESTP
2436 			bstp_linkstate(ifp, event_msg->event_code);
2437 #endif /* BRIDGESTP */
2438 			break;
2439 		}
2440 		case KEV_DL_SIFFLAGS: {
2441 			if ((ifp->if_flags & IFF_UP) == 0) {
2442 				break;
2443 			}
2444 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2445 				errno_t error;
2446 
2447 				error = ifnet_set_promiscuous(ifp, 1);
2448 				if (error != 0) {
2449 					BRIDGE_LOG(LOG_NOTICE, 0,
2450 					    "ifnet_set_promiscuous (%s)"
2451 					    " failed %d", ifp->if_xname,
2452 					    error);
2453 				} else {
2454 					bif->bif_flags |= BIFF_PROMISC;
2455 				}
2456 			}
2457 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2458 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2459 				errno_t error;
2460 
2461 				error = if_allmulti(ifp, 1);
2462 				if (error != 0) {
2463 					BRIDGE_LOG(LOG_NOTICE, 0,
2464 					    "if_allmulti (%s)"
2465 					    " failed %d", ifp->if_xname,
2466 					    error);
2467 				} else {
2468 					bif->bif_flags |= BIFF_ALL_MULTI;
2469 #ifdef XNU_PLATFORM_AppleTVOS
2470 					ip6_forwarding = 1;
2471 #endif /* XNU_PLATFORM_AppleTVOS */
2472 				}
2473 			}
2474 			break;
2475 		}
2476 		case KEV_DL_IFCAP_CHANGED: {
2477 			BRIDGE_LOCK(sc);
2478 			bridge_set_tso(sc);
2479 			BRIDGE_UNLOCK(sc);
2480 			break;
2481 		}
2482 		case KEV_DL_PROTO_DETACHED:
2483 		case KEV_DL_PROTO_ATTACHED: {
2484 			bridge_proto_attach_changed(ifp);
2485 			break;
2486 		}
2487 		default:
2488 			break;
2489 		}
2490 	}
2491 }
2492 
2493 /*
2494  * bridge_iff_detached:
2495  *
2496  *      Called when our interface filter has been detached from a
2497  *      member interface.
2498  */
2499 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2500 bridge_iff_detached(void *cookie, ifnet_t ifp)
2501 {
2502 #pragma unused(cookie)
2503 	struct bridge_iflist *bif;
2504 	struct bridge_softc * __single sc = ifp->if_bridge;
2505 
2506 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2507 
2508 	/* Check if the interface is a bridge member */
2509 	if (sc != NULL) {
2510 		BRIDGE_LOCK(sc);
2511 		bif = bridge_lookup_member_if(sc, ifp);
2512 		if (bif != NULL) {
2513 			bridge_delete_member(sc, bif);
2514 		}
2515 		BRIDGE_UNLOCK(sc);
2516 		return;
2517 	}
2518 	/* Check if the interface is a span port */
2519 	lck_mtx_lock(&bridge_list_mtx);
2520 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2521 		BRIDGE_LOCK(sc);
2522 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2523 		if (ifp == bif->bif_ifp) {
2524 			bridge_delete_span(sc, bif);
2525 			break;
2526 		}
2527 		BRIDGE_UNLOCK(sc);
2528 	}
2529 	lck_mtx_unlock(&bridge_list_mtx);
2530 }
2531 
2532 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2533 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2534     char *header)
2535 {
2536 #pragma unused(protocol, packet, header)
2537 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2538 	    ifp->if_xname);
2539 	return 0;
2540 }
2541 
2542 static int
bridge_attach_protocol(struct ifnet * ifp)2543 bridge_attach_protocol(struct ifnet *ifp)
2544 {
2545 	int     error;
2546 	struct ifnet_attach_proto_param reg;
2547 
2548 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2549 	bzero(&reg, sizeof(reg));
2550 	reg.input = bridge_proto_input;
2551 
2552 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2553 	if (error) {
2554 		BRIDGE_LOG(LOG_NOTICE, 0,
2555 		    "ifnet_attach_protocol(%s) failed, %d",
2556 		    ifp->if_xname, error);
2557 	}
2558 
2559 	return error;
2560 }
2561 
2562 static int
bridge_detach_protocol(struct ifnet * ifp)2563 bridge_detach_protocol(struct ifnet *ifp)
2564 {
2565 	int     error;
2566 
2567 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2568 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2569 	if (error) {
2570 		BRIDGE_LOG(LOG_NOTICE, 0,
2571 		    "ifnet_detach_protocol(%s) failed, %d",
2572 		    ifp->if_xname, error);
2573 	}
2574 
2575 	return error;
2576 }
2577 
2578 /*
2579  * bridge_delete_member:
2580  *
2581  *	Delete the specified member interface.
2582  */
2583 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2584 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2585 {
2586 #if SKYWALK
2587 	boolean_t add_netagent = FALSE;
2588 #endif /* SKYWALK */
2589 	uint32_t    bif_flags;
2590 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2591 	int lladdr_changed = 0, error;
2592 	uint8_t eaddr[ETHER_ADDR_LEN];
2593 	u_int32_t event_code = 0;
2594 
2595 	BRIDGE_LOCK_ASSERT_HELD(sc);
2596 	VERIFY(ifs != NULL);
2597 
2598 	/*
2599 	 * Remove the member from the list first so it cannot be found anymore
2600 	 * when we release the bridge lock below
2601 	 */
2602 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2603 		bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2604 		BRIDGE_XLOCK(sc);
2605 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2606 		BRIDGE_XDROP(sc);
2607 	}
2608 	if (sc->sc_mac_nat_bif != NULL) {
2609 		if (bif == sc->sc_mac_nat_bif) {
2610 			bridge_mac_nat_disable(sc);
2611 		} else {
2612 			bridge_mac_nat_flush_entries(sc, bif);
2613 		}
2614 	}
2615 #if BRIDGESTP
2616 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2617 		bstp_disable(&bif->bif_stp);
2618 	}
2619 #endif /* BRIDGESTP */
2620 
2621 	/*
2622 	 * If removing the interface that gave the bridge its mac address, set
2623 	 * the mac address of the bridge to the address of the next member, or
2624 	 * to its default address if no members are left.
2625 	 */
2626 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2627 		ifnet_release(sc->sc_ifaddr);
2628 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2629 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2630 			sc->sc_ifaddr = NULL;
2631 		} else {
2632 			struct ifnet *fif =
2633 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2634 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2635 			sc->sc_ifaddr = fif;
2636 			ifnet_reference(fif);   /* for sc_ifaddr */
2637 		}
2638 		lladdr_changed = 1;
2639 	}
2640 
2641 #if HAS_IF_CAP
2642 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2643 #endif /* HAS_IF_CAP */
2644 
2645 	error = bridge_set_tso(sc);
2646 	if (error != 0) {
2647 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2648 	}
2649 
2650 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2651 
2652 	KASSERT(bif->bif_addrcnt == 0,
2653 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2654 
2655 	/*
2656 	 * Update link status of the bridge based on its remaining members
2657 	 */
2658 	event_code = bridge_updatelinkstatus(sc);
2659 	bif_flags = bif->bif_flags;
2660 	BRIDGE_UNLOCK(sc);
2661 
2662 	/* only perform these steps if the interface is still attached */
2663 	if (ifnet_is_attached(ifs, 1)) {
2664 #if SKYWALK
2665 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2666 
2667 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2668 			ifnet_detach_flowswitch_nexus(ifs);
2669 		}
2670 #endif /* SKYWALK */
2671 		/* disable promiscuous mode */
2672 		if ((bif_flags & BIFF_PROMISC) != 0) {
2673 			(void) ifnet_set_promiscuous(ifs, 0);
2674 		}
2675 		/* disable all multi */
2676 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2677 			(void)if_allmulti(ifs, 0);
2678 		}
2679 #if HAS_IF_CAP
2680 		/* re-enable any interface capabilities */
2681 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2682 #endif
2683 		/* detach bridge "protocol" */
2684 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2685 			(void)bridge_detach_protocol(ifs);
2686 		}
2687 		/* detach interface filter */
2688 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2689 			iflt_detach(bif->bif_iff_ref);
2690 		}
2691 		/* re-enable LRO */
2692 		if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2693 			(void)bridge_set_lro(ifs, TRUE);
2694 		}
2695 		ifnet_decr_iorefcnt(ifs);
2696 	}
2697 
2698 	if (lladdr_changed &&
2699 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2700 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2701 	}
2702 
2703 	if (event_code != 0) {
2704 		bridge_link_event(bifp, event_code);
2705 	}
2706 
2707 #if BRIDGESTP
2708 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2709 #endif /* BRIDGESTP */
2710 
2711 	kfree_type(struct bridge_iflist, bif);
2712 	ifs->if_bridge = NULL;
2713 #if SKYWALK
2714 	if (add_netagent && ifnet_is_attached(ifs, 1)) {
2715 		(void)ifnet_add_netagent(ifs);
2716 		ifnet_decr_iorefcnt(ifs);
2717 	}
2718 #endif /* SKYWALK */
2719 
2720 	ifnet_release(ifs);
2721 
2722 	BRIDGE_LOCK(sc);
2723 }
2724 
2725 /*
2726  * bridge_delete_span:
2727  *
2728  *	Delete the specified span interface.
2729  */
2730 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2731 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2732 {
2733 	BRIDGE_LOCK_ASSERT_HELD(sc);
2734 
2735 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2736 	    ("%s: not a span interface", __func__));
2737 
2738 	ifnet_release(bif->bif_ifp);
2739 
2740 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2741 	kfree_type(struct bridge_iflist, bif);
2742 }
2743 
2744 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2745 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2746 {
2747 	struct ifbreq * __single req = arg;
2748 	struct bridge_iflist *bif = NULL;
2749 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2750 	int error = 0, lladdr_changed = 0;
2751 	uint8_t eaddr[ETHER_ADDR_LEN];
2752 	struct iff_filter iff;
2753 	u_int32_t event_code = 0;
2754 	boolean_t input_broadcast;
2755 	int media_active;
2756 	boolean_t wifi_infra = FALSE;
2757 
2758 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2759 	if (ifs == NULL) {
2760 		return ENOENT;
2761 	}
2762 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2763 		return EINVAL;
2764 	}
2765 
2766 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2767 		return EINVAL;
2768 	}
2769 
2770 	/* If it's in the span list, it can't be a member. */
2771 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2772 		if (ifs == bif->bif_ifp) {
2773 			return EBUSY;
2774 		}
2775 	}
2776 
2777 	if (ifs->if_bridge == sc) {
2778 		return EEXIST;
2779 	}
2780 
2781 	if (ifs->if_bridge != NULL) {
2782 		return EBUSY;
2783 	}
2784 
2785 	switch (ifs->if_type) {
2786 	case IFT_ETHER:
2787 		if (strcmp(ifs->if_name, "en") == 0 &&
2788 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2789 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2790 			/* XXX is there a better way to identify Wi-Fi STA? */
2791 			wifi_infra = TRUE;
2792 		}
2793 		break;
2794 	case IFT_L2VLAN:
2795 	case IFT_IEEE8023ADLAG:
2796 		break;
2797 	default:
2798 		return EINVAL;
2799 	}
2800 
2801 	/* fail to add the interface if the MTU doesn't match */
2802 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2803 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2804 		    sc->sc_ifp->if_xname,
2805 		    ifs->if_xname);
2806 		return EINVAL;
2807 	}
2808 
2809 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2810 		/* there's already an interface that's doing MAC NAT */
2811 		return EBUSY;
2812 	}
2813 
2814 	/* prevent the interface from detaching while we add the member */
2815 	if (!ifnet_is_attached(ifs, 1)) {
2816 		return ENXIO;
2817 	}
2818 
2819 	/* allocate a new member */
2820 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2821 	bif->bif_ifp = ifs;
2822 	ifnet_reference(ifs);
2823 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2824 #if HAS_IF_CAP
2825 	bif->bif_savedcaps = ifs->if_capenable;
2826 #endif /* HAS_IF_CAP */
2827 	bif->bif_sc = sc;
2828 	if (wifi_infra) {
2829 		(void)bridge_mac_nat_enable(sc, bif);
2830 	}
2831 
2832 	/* Allow the first Ethernet member to define the MTU */
2833 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2834 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2835 	}
2836 
2837 	/*
2838 	 * Assign the interface's MAC address to the bridge if it's the first
2839 	 * member and the MAC address of the bridge has not been changed from
2840 	 * the default (randomly) generated one.
2841 	 */
2842 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2843 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2844 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2845 		sc->sc_ifaddr = ifs;
2846 		ifnet_reference(ifs);   /* for sc_ifaddr */
2847 		lladdr_changed = 1;
2848 	}
2849 
2850 	ifs->if_bridge = sc;
2851 #if BRIDGESTP
2852 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2853 #endif /* BRIDGESTP */
2854 
2855 #if HAS_IF_CAP
2856 	/* Set interface capabilities to the intersection set of all members */
2857 	bridge_mutecaps(sc);
2858 #endif /* HAS_IF_CAP */
2859 
2860 	/*
2861 	 * Respect lock ordering with DLIL lock for the following operations
2862 	 */
2863 	BRIDGE_UNLOCK(sc);
2864 
2865 	/* enable promiscuous mode */
2866 	error = ifnet_set_promiscuous(ifs, 1);
2867 	switch (error) {
2868 	case 0:
2869 		bif->bif_flags |= BIFF_PROMISC;
2870 		break;
2871 	case ENETDOWN:
2872 	case EPWROFF:
2873 		BRIDGE_LOG(LOG_NOTICE, 0,
2874 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2875 		    ifs->if_xname, error);
2876 		/* Ignore error when device is not up */
2877 		error = 0;
2878 		break;
2879 	default:
2880 		BRIDGE_LOG(LOG_NOTICE, 0,
2881 		    "ifnet_set_promiscuous(%s) failed %d",
2882 		    ifs->if_xname, error);
2883 		BRIDGE_LOCK(sc);
2884 		goto out;
2885 	}
2886 	if (wifi_infra) {
2887 		int this_error;
2888 
2889 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2890 		bif->bif_flags |= BIFF_WIFI_INFRA;
2891 		this_error = if_allmulti(ifs, 1);
2892 		if (this_error == 0) {
2893 			bif->bif_flags |= BIFF_ALL_MULTI;
2894 #ifdef XNU_PLATFORM_AppleTVOS
2895 			ip6_forwarding = 1;
2896 #endif /* XNU_PLATFORM_AppleTVOS */
2897 		} else {
2898 			BRIDGE_LOG(LOG_NOTICE, 0,
2899 			    "if_allmulti(%s) failed %d, ignoring",
2900 			    ifs->if_xname, this_error);
2901 		}
2902 	}
2903 #if SKYWALK
2904 	/* ensure that the flowswitch is present for native interface */
2905 	if (SKYWALK_NATIVE(ifs)) {
2906 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2907 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2908 		}
2909 	}
2910 	/* remove the netagent on the flowswitch (rdar://75050182) */
2911 	if (if_is_fsw_netagent_enabled()) {
2912 		(void)ifnet_remove_netagent(ifs);
2913 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2914 	}
2915 #endif /* SKYWALK */
2916 
2917 	/*
2918 	 * install an interface filter
2919 	 */
2920 	memset(&iff, 0, sizeof(struct iff_filter));
2921 	iff.iff_cookie = bif;
2922 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2923 	iff.iff_input = bridge_iff_input;
2924 	iff.iff_output = bridge_iff_output;
2925 	iff.iff_event = bridge_iff_event;
2926 	iff.iff_detached = bridge_iff_detached;
2927 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2928 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
2929 	if (error != 0) {
2930 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2931 		BRIDGE_LOCK(sc);
2932 		goto out;
2933 	}
2934 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2935 
2936 	/*
2937 	 * install a dummy "bridge" protocol
2938 	 */
2939 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2940 		if (error != 0) {
2941 			BRIDGE_LOG(LOG_NOTICE, 0,
2942 			    "bridge_attach_protocol failed %d", error);
2943 			BRIDGE_LOCK(sc);
2944 			goto out;
2945 		}
2946 	}
2947 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2948 
2949 	if (lladdr_changed &&
2950 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2951 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2952 	}
2953 
2954 	media_active = interface_media_active(ifs);
2955 
2956 	/* disable LRO if needed */
2957 	if (bridge_set_lro(ifs, FALSE)) {
2958 		bif->bif_flags |= BIFF_LRO_DISABLED;
2959 	}
2960 
2961 	/*
2962 	 * No failures past this point. Add the member to the list.
2963 	 */
2964 	BRIDGE_LOCK(sc);
2965 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2966 	BRIDGE_XLOCK(sc);
2967 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2968 	BRIDGE_XDROP(sc);
2969 
2970 	/* cache the member link status */
2971 	if (media_active != 0) {
2972 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2973 	} else {
2974 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2975 	}
2976 
2977 	/* the new member may change the link status of the bridge interface */
2978 	event_code = bridge_updatelinkstatus(sc);
2979 
2980 	/* check whether we need input broadcast or not */
2981 	input_broadcast = interface_needs_input_broadcast(ifs);
2982 	bif_set_input_broadcast(bif, input_broadcast);
2983 	BRIDGE_UNLOCK(sc);
2984 
2985 	if (event_code != 0) {
2986 		bridge_link_event(bifp, event_code);
2987 	}
2988 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2989 	    "%s input broadcast %s", ifs->if_xname,
2990 	    input_broadcast ? "ENABLED" : "DISABLED");
2991 
2992 	BRIDGE_LOCK(sc);
2993 	bridge_set_tso(sc);
2994 
2995 out:
2996 	/* allow the interface to detach */
2997 	ifnet_decr_iorefcnt(ifs);
2998 
2999 	if (error != 0) {
3000 		if (bif != NULL) {
3001 			bridge_delete_member(sc, bif);
3002 		}
3003 	} else if (IFNET_IS_VMNET(ifs)) {
3004 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3005 	}
3006 
3007 	return error;
3008 }
3009 
3010 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3011 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3012 {
3013 	struct ifbreq * __single req = arg;
3014 	struct bridge_iflist *bif;
3015 
3016 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3017 	if (bif == NULL) {
3018 		return ENOENT;
3019 	}
3020 
3021 	bridge_delete_member(sc, bif);
3022 
3023 	return 0;
3024 }
3025 
3026 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3027 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3028 {
3029 #pragma unused(sc, arg, arg_len)
3030 	return 0;
3031 }
3032 
3033 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3034 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3035 {
3036 	struct ifbreq * __single req = arg;
3037 	struct bridge_iflist *bif;
3038 
3039 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3040 	if (bif == NULL) {
3041 		return ENOENT;
3042 	}
3043 
3044 	struct bstp_port *bp;
3045 
3046 	bp = &bif->bif_stp;
3047 	req->ifbr_state = bp->bp_state;
3048 	req->ifbr_priority = bp->bp_priority;
3049 	req->ifbr_path_cost = bp->bp_path_cost;
3050 	req->ifbr_proto = bp->bp_protover;
3051 	req->ifbr_role = bp->bp_role;
3052 	req->ifbr_stpflags = bp->bp_flags;
3053 	req->ifbr_ifsflags = bif->bif_ifflags;
3054 
3055 	/* Copy STP state options as flags */
3056 	if (bp->bp_operedge) {
3057 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3058 	}
3059 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3060 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3061 	}
3062 	if (bp->bp_ptp_link) {
3063 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3064 	}
3065 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3066 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3067 	}
3068 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3069 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3070 	}
3071 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3072 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3073 	}
3074 
3075 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3076 	req->ifbr_addrcnt = bif->bif_addrcnt;
3077 	req->ifbr_addrmax = bif->bif_addrmax;
3078 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
3079 
3080 	return 0;
3081 }
3082 
3083 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3084 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3085 {
3086 	struct ifbreq * __single req = arg;
3087 	struct bridge_iflist *bif;
3088 #if BRIDGESTP
3089 	struct bstp_port *bp;
3090 #endif /* BRIDGESTP */
3091 	errno_t error;
3092 	uint32_t ifsflags;
3093 
3094 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3095 	if (bif == NULL) {
3096 		return ENOENT;
3097 	}
3098 
3099 	ifsflags = req->ifbr_ifsflags;
3100 	if (ifsflags & IFBIF_SPAN) {
3101 		/* SPAN is readonly */
3102 		return EINVAL;
3103 	}
3104 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3105 	if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3106 		/* can't specify checksum and virtio */
3107 		return EINVAL;
3108 	}
3109 	if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3110 	    ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3111 	    (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3112 		/* MAC-NAT can't be used with checksum, host filter, or virtio */
3113 		return EINVAL;
3114 	}
3115 	if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3116 		error = bridge_mac_nat_enable(sc, bif);
3117 		if (error != 0) {
3118 			return error;
3119 		}
3120 	} else if (sc->sc_mac_nat_bif == bif) {
3121 		bridge_mac_nat_disable(sc);
3122 	}
3123 
3124 #if BRIDGESTP
3125 	if (ifsflags & IFBIF_STP) {
3126 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3127 			error = bstp_enable(&bif->bif_stp);
3128 			if (error) {
3129 				return error;
3130 			}
3131 		}
3132 	} else {
3133 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3134 			bstp_disable(&bif->bif_stp);
3135 		}
3136 	}
3137 
3138 	/* Pass on STP flags */
3139 	bp = &bif->bif_stp;
3140 	bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3141 	bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3142 	bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3143 	bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3144 #else /* !BRIDGESTP */
3145 	if (ifsflags & IFBIF_STP) {
3146 		return EOPNOTSUPP;
3147 	}
3148 #endif /* !BRIDGESTP */
3149 
3150 	/* Save the bits relating to the bridge */
3151 	bif->bif_ifflags = ifsflags & IFBIFMASK;
3152 
3153 	return 0;
3154 }
3155 
3156 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3157 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3158 {
3159 	struct ifbrparam * __single param = arg;
3160 
3161 	sc->sc_brtmax = param->ifbrp_csize;
3162 	bridge_rttrim(sc);
3163 	return 0;
3164 }
3165 
3166 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3167 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3168 {
3169 	struct ifbrparam * __single param = arg;
3170 
3171 	param->ifbrp_csize = sc->sc_brtmax;
3172 
3173 	return 0;
3174 }
3175 
3176 #define BRIDGE_IOCTL_GIFS do { \
3177 	struct bridge_iflist *bif;                                      \
3178 	struct ifbreq breq;                                             \
3179 	char *buf, *outbuf;                                             \
3180 	unsigned int count, buflen, len;                                \
3181                                                                         \
3182 	count = 0;                                                      \
3183 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3184 	        count++;                                                \
3185 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3186 	        count++;                                                \
3187                                                                         \
3188 	buflen = sizeof (breq) * count;                                 \
3189 	if (bifc->ifbic_len == 0) {                                     \
3190 	        bifc->ifbic_len = buflen;                               \
3191 	        return (0);                                             \
3192 	}                                                               \
3193 	BRIDGE_UNLOCK(sc);                                              \
3194 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3195 	BRIDGE_LOCK(sc);                                                \
3196                                                                         \
3197 	count = 0;                                                      \
3198 	buf = outbuf;                                                   \
3199 	len = min(bifc->ifbic_len, buflen);                             \
3200 	bzero(&breq, sizeof (breq));                                    \
3201 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3202 	        if (len < sizeof (breq))                                \
3203 	                break;                                          \
3204                                                                         \
3205 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3206 	            "%s", bif->bif_ifp->if_xname);                      \
3207 	/* Fill in the ifbreq structure */                      \
3208 	        error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3209 	        if (error)                                              \
3210 	                break;                                          \
3211 	        memcpy(buf, &breq, sizeof (breq));                      \
3212 	        count++;                                                \
3213 	        buf += sizeof (breq);                                   \
3214 	        len -= sizeof (breq);                                   \
3215 	}                                                               \
3216 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3217 	        if (len < sizeof (breq))                                \
3218 	                break;                                          \
3219                                                                         \
3220 	        snprintf(breq.ifbr_ifsname,                             \
3221 	                 sizeof (breq.ifbr_ifsname),                    \
3222 	                 "%s", bif->bif_ifp->if_xname);                 \
3223 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3224 	        breq.ifbr_portno                                        \
3225 	                = bif->bif_ifp->if_index & 0xfff;               \
3226 	        memcpy(buf, &breq, sizeof (breq));                      \
3227 	        count++;                                                \
3228 	        buf += sizeof (breq);                                   \
3229 	        len -= sizeof (breq);                                   \
3230 	}                                                               \
3231                                                                         \
3232 	BRIDGE_UNLOCK(sc);                                              \
3233 	bifc->ifbic_len = sizeof (breq) * count;                        \
3234 	if (bifc->ifbic_len > 0) {                                      \
3235 	        error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3236 	}                                                               \
3237 	BRIDGE_LOCK(sc);                                                \
3238 	kfree_data(outbuf, buflen);                                     \
3239 } while (0)
3240 
3241 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3242 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3243 {
3244 	struct ifbifconf64 * __single bifc = arg;
3245 	int error = 0;
3246 
3247 	BRIDGE_IOCTL_GIFS;
3248 
3249 	return error;
3250 }
3251 
3252 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3253 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3254 {
3255 	struct ifbifconf32 * __single bifc = arg;
3256 	int error = 0;
3257 
3258 	BRIDGE_IOCTL_GIFS;
3259 
3260 	return error;
3261 }
3262 
3263 #define BRIDGE_IOCTL_RTS do {                                               \
3264 	struct bridge_rtnode *brt;                                          \
3265 	char *buf;                                                          \
3266 	char *outbuf = NULL;                                                \
3267 	unsigned int count, buflen, len;                                    \
3268 	unsigned long now;                                                  \
3269                                                                             \
3270 	if (bac->ifbac_len == 0)                                            \
3271 	        return (0);                                                 \
3272                                                                             \
3273 	bzero(&bareq, sizeof (bareq));                                      \
3274 	count = 0;                                                          \
3275 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3276 	        count++;                                                    \
3277 	buflen = sizeof (bareq) * count;                                    \
3278                                                                             \
3279 	BRIDGE_UNLOCK(sc);                                                  \
3280 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                    \
3281 	BRIDGE_LOCK(sc);                                                    \
3282                                                                             \
3283 	count = 0;                                                          \
3284 	buf = outbuf;                                                       \
3285 	len = min(bac->ifbac_len, buflen);                                  \
3286 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3287 	        if (len < sizeof (bareq))                                   \
3288 	                goto out;                                           \
3289 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3290 	                 "%s", brt->brt_ifp->if_xname);                     \
3291 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3292 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3293 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3294 	                now = (unsigned long) net_uptime();                 \
3295 	                if (now < brt->brt_expire)                          \
3296 	                        bareq.ifba_expire =                         \
3297 	                            brt->brt_expire - now;                  \
3298 	        } else                                                      \
3299 	                bareq.ifba_expire = 0;                              \
3300 	        bareq.ifba_flags = brt->brt_flags;                          \
3301                                                                             \
3302 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3303 	        count++;                                                    \
3304 	        buf += sizeof (bareq);                                      \
3305 	        len -= sizeof (bareq);                                      \
3306 	}                                                                   \
3307 out:                                                                        \
3308 	bac->ifbac_len = sizeof (bareq) * count;                            \
3309 	if (outbuf != NULL) {                                               \
3310 	        BRIDGE_UNLOCK(sc);                                          \
3311 	        if (bac->ifbac_len > 0) {                                   \
3312 	                error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3313 	        }                                                           \
3314 	        kfree_data(outbuf, buflen);                                 \
3315 	        BRIDGE_LOCK(sc);                                            \
3316 	}                                                                   \
3317 	return (error);                                                     \
3318 } while (0)
3319 
3320 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3321 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3322 {
3323 	struct ifbaconf64 * __single bac = arg;
3324 	struct ifbareq64 bareq;
3325 	int error = 0;
3326 
3327 	BRIDGE_IOCTL_RTS;
3328 	return error;
3329 }
3330 
3331 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3332 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3333 {
3334 	struct ifbaconf32 * __single bac = arg;
3335 	struct ifbareq32 bareq;
3336 	int error = 0;
3337 
3338 	BRIDGE_IOCTL_RTS;
3339 	return error;
3340 }
3341 
3342 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3343 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3344 {
3345 	struct ifbareq32 * __single req = arg;
3346 	struct bridge_iflist *bif;
3347 	int error;
3348 
3349 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3350 	if (bif == NULL) {
3351 		return ENOENT;
3352 	}
3353 
3354 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3355 	    req->ifba_flags);
3356 
3357 	return error;
3358 }
3359 
3360 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3361 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3362 {
3363 	struct ifbareq64 * __single req = arg;
3364 	struct bridge_iflist *bif;
3365 	int error;
3366 
3367 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3368 	if (bif == NULL) {
3369 		return ENOENT;
3370 	}
3371 
3372 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3373 	    req->ifba_flags);
3374 
3375 	return error;
3376 }
3377 
3378 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3379 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3380 {
3381 	struct ifbrparam * __single param = arg;
3382 
3383 	sc->sc_brttimeout = param->ifbrp_ctime;
3384 	return 0;
3385 }
3386 
3387 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3388 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3389 {
3390 	struct ifbrparam * __single param = arg;
3391 
3392 	param->ifbrp_ctime = sc->sc_brttimeout;
3393 	return 0;
3394 }
3395 
3396 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3397 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3398 {
3399 	struct ifbareq32 * __single req = arg;
3400 
3401 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3402 }
3403 
3404 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3405 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3406 {
3407 	struct ifbareq64 * __single req = arg;
3408 
3409 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3410 }
3411 
3412 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3413 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3414 {
3415 	struct ifbreq * __single req = arg;
3416 
3417 	bridge_rtflush(sc, req->ifbr_ifsflags);
3418 	return 0;
3419 }
3420 
3421 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3422 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3423 {
3424 	struct ifbrparam * __single param = arg;
3425 	struct bstp_state *bs = &sc->sc_stp;
3426 
3427 	param->ifbrp_prio = bs->bs_bridge_priority;
3428 	return 0;
3429 }
3430 
3431 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3432 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3433 {
3434 #if BRIDGESTP
3435 	struct ifbrparam *param = arg;
3436 
3437 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3438 #else /* !BRIDGESTP */
3439 #pragma unused(sc, arg)
3440 	return EOPNOTSUPP;
3441 #endif /* !BRIDGESTP */
3442 }
3443 
3444 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3445 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3446 {
3447 	struct ifbrparam * __single param = arg;
3448 	struct bstp_state *bs = &sc->sc_stp;
3449 
3450 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3451 	return 0;
3452 }
3453 
3454 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3455 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3456 {
3457 #if BRIDGESTP
3458 	struct ifbrparam *param = arg;
3459 
3460 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3461 #else /* !BRIDGESTP */
3462 #pragma unused(sc, arg)
3463 	return EOPNOTSUPP;
3464 #endif /* !BRIDGESTP */
3465 }
3466 
3467 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3468 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3469 {
3470 	struct ifbrparam * __single param;
3471 	struct bstp_state *bs;
3472 
3473 	param = arg;
3474 	bs = &sc->sc_stp;
3475 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3476 	return 0;
3477 }
3478 
3479 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3480 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3481 {
3482 #if BRIDGESTP
3483 	struct ifbrparam *param = arg;
3484 
3485 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3486 #else /* !BRIDGESTP */
3487 #pragma unused(sc, arg)
3488 	return EOPNOTSUPP;
3489 #endif /* !BRIDGESTP */
3490 }
3491 
3492 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3493 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3494 {
3495 	struct ifbrparam * __single param;
3496 	struct bstp_state *bs;
3497 
3498 	param = arg;
3499 	bs = &sc->sc_stp;
3500 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3501 	return 0;
3502 }
3503 
3504 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3505 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3506 {
3507 #if BRIDGESTP
3508 	struct ifbrparam *param = arg;
3509 
3510 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3511 #else /* !BRIDGESTP */
3512 #pragma unused(sc, arg)
3513 	return EOPNOTSUPP;
3514 #endif /* !BRIDGESTP */
3515 }
3516 
3517 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3518 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3519 {
3520 #if BRIDGESTP
3521 	struct ifbreq *req = arg;
3522 	struct bridge_iflist *bif;
3523 
3524 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3525 	if (bif == NULL) {
3526 		return ENOENT;
3527 	}
3528 
3529 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3530 #else /* !BRIDGESTP */
3531 #pragma unused(sc, arg)
3532 	return EOPNOTSUPP;
3533 #endif /* !BRIDGESTP */
3534 }
3535 
3536 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3537 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3538 {
3539 #if BRIDGESTP
3540 	struct ifbreq *req = arg;
3541 	struct bridge_iflist *bif;
3542 
3543 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3544 	if (bif == NULL) {
3545 		return ENOENT;
3546 	}
3547 
3548 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3549 #else /* !BRIDGESTP */
3550 #pragma unused(sc, arg)
3551 	return EOPNOTSUPP;
3552 #endif /* !BRIDGESTP */
3553 }
3554 
3555 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3556 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3557 {
3558 	struct ifbrparam * __single param = arg;
3559 
3560 	param->ifbrp_filter = sc->sc_filter_flags;
3561 
3562 	return 0;
3563 }
3564 
3565 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3566 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3567 {
3568 	struct ifbrparam * __single param = arg;
3569 
3570 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3571 		return EINVAL;
3572 	}
3573 
3574 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3575 		return EINVAL;
3576 	}
3577 
3578 	sc->sc_filter_flags = param->ifbrp_filter;
3579 
3580 	return 0;
3581 }
3582 
3583 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3584 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3585 {
3586 	struct ifbreq * __single req = arg;
3587 	struct bridge_iflist *bif;
3588 
3589 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3590 	if (bif == NULL) {
3591 		return ENOENT;
3592 	}
3593 
3594 	bif->bif_addrmax = req->ifbr_addrmax;
3595 	return 0;
3596 }
3597 
3598 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3599 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3600 {
3601 	struct ifbreq * __single req = arg;
3602 	struct bridge_iflist *bif = NULL;
3603 	struct ifnet *ifs;
3604 
3605 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3606 	if (ifs == NULL) {
3607 		return ENOENT;
3608 	}
3609 
3610 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3611 		return EINVAL;
3612 	}
3613 
3614 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3615 	if (ifs == bif->bif_ifp) {
3616 		return EBUSY;
3617 	}
3618 
3619 	if (ifs->if_bridge != NULL) {
3620 		return EBUSY;
3621 	}
3622 
3623 	switch (ifs->if_type) {
3624 	case IFT_ETHER:
3625 	case IFT_L2VLAN:
3626 	case IFT_IEEE8023ADLAG:
3627 		break;
3628 	default:
3629 		return EINVAL;
3630 	}
3631 
3632 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3633 
3634 	bif->bif_ifp = ifs;
3635 	bif->bif_ifflags = IFBIF_SPAN;
3636 
3637 	ifnet_reference(bif->bif_ifp);
3638 
3639 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3640 
3641 	return 0;
3642 }
3643 
3644 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3645 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3646 {
3647 	struct ifbreq * __single req = arg;
3648 	struct bridge_iflist *bif;
3649 	struct ifnet *ifs;
3650 
3651 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3652 	if (ifs == NULL) {
3653 		return ENOENT;
3654 	}
3655 
3656 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3657 	if (ifs == bif->bif_ifp) {
3658 		break;
3659 	}
3660 
3661 	if (bif == NULL) {
3662 		return ENOENT;
3663 	}
3664 
3665 	bridge_delete_span(sc, bif);
3666 
3667 	return 0;
3668 }
3669 
3670 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3671 	struct bstp_state *bs = &sc->sc_stp;                            \
3672 	struct bstp_port *root_port;                                    \
3673                                                                         \
3674 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3675 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3676 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3677                                                                         \
3678 	root_port = bs->bs_root_port;                                   \
3679 	if (root_port == NULL)                                          \
3680 	        req->ifbop_root_port = 0;                               \
3681 	else                                                            \
3682 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3683                                                                         \
3684 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3685 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3686 	req->ifbop_protocol = bs->bs_protover;                          \
3687 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3688 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3689 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3690 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3691 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3692 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3693 } while (0)
3694 
3695 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3696 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3697 {
3698 	struct ifbropreq32 * __single req = arg;
3699 
3700 	BRIDGE_IOCTL_GBPARAM;
3701 	return 0;
3702 }
3703 
3704 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3705 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3706 {
3707 	struct ifbropreq64 * __single req = arg;
3708 
3709 	BRIDGE_IOCTL_GBPARAM;
3710 	return 0;
3711 }
3712 
3713 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3714 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3715 {
3716 	struct ifbrparam * __single param = arg;
3717 
3718 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3719 	return 0;
3720 }
3721 
3722 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3723 	struct bridge_iflist *bif;                                      \
3724 	struct bstp_port *bp;                                           \
3725 	struct ifbpstpreq bpreq;                                        \
3726 	char *buf, *outbuf;                                             \
3727 	unsigned int count, buflen, len;                                \
3728                                                                         \
3729 	count = 0;                                                      \
3730 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3731 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3732 	                count++;                                        \
3733 	}                                                               \
3734                                                                         \
3735 	buflen = sizeof (bpreq) * count;                                \
3736 	if (bifstp->ifbpstp_len == 0) {                                 \
3737 	        bifstp->ifbpstp_len = buflen;                           \
3738 	        return (0);                                             \
3739 	}                                                               \
3740                                                                         \
3741 	BRIDGE_UNLOCK(sc);                                              \
3742 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3743 	BRIDGE_LOCK(sc);                                                \
3744                                                                         \
3745 	count = 0;                                                      \
3746 	buf = outbuf;                                                   \
3747 	len = min(bifstp->ifbpstp_len, buflen);                         \
3748 	bzero(&bpreq, sizeof (bpreq));                                  \
3749 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3750 	        if (len < sizeof (bpreq))                               \
3751 	                break;                                          \
3752                                                                         \
3753 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3754 	                continue;                                       \
3755                                                                         \
3756 	        bp = &bif->bif_stp;                                     \
3757 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3758 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3759 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3760 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3761 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3762 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3763                                                                         \
3764 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3765 	        count++;                                                \
3766 	        buf += sizeof (bpreq);                                  \
3767 	        len -= sizeof (bpreq);                                  \
3768 	}                                                               \
3769                                                                         \
3770 	BRIDGE_UNLOCK(sc);                                              \
3771 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3772 	if (bifstp->ifbpstp_len > 0) {                                  \
3773 	        error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3774 	}                                                               \
3775 	BRIDGE_LOCK(sc);                                                \
3776 	kfree_data(outbuf, buflen);                                     \
3777 	return (error);                                                 \
3778 } while (0)
3779 
3780 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3781 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3782 {
3783 	struct ifbpstpconf32 * __single bifstp = arg;
3784 	int error = 0;
3785 
3786 	BRIDGE_IOCTL_GIFSSTP;
3787 	return error;
3788 }
3789 
3790 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3791 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3792 {
3793 	struct ifbpstpconf64 * __single bifstp = arg;
3794 	int error = 0;
3795 
3796 	BRIDGE_IOCTL_GIFSSTP;
3797 	return error;
3798 }
3799 
3800 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3801 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3802 {
3803 #if BRIDGESTP
3804 	struct ifbrparam *param = arg;
3805 
3806 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3807 #else /* !BRIDGESTP */
3808 #pragma unused(sc, arg)
3809 	return EOPNOTSUPP;
3810 #endif /* !BRIDGESTP */
3811 }
3812 
3813 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3814 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3815 {
3816 #if BRIDGESTP
3817 	struct ifbrparam *param = arg;
3818 
3819 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3820 #else /* !BRIDGESTP */
3821 #pragma unused(sc, arg)
3822 	return EOPNOTSUPP;
3823 #endif /* !BRIDGESTP */
3824 }
3825 
3826 
3827 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3828 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3829 {
3830 	struct ifbrhostfilter * __single req = arg;
3831 	struct bridge_iflist *bif;
3832 
3833 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3834 	if (bif == NULL) {
3835 		return ENOENT;
3836 	}
3837 
3838 	bzero(req, sizeof(struct ifbrhostfilter));
3839 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3840 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3841 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3842 		    ETHER_ADDR_LEN);
3843 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3844 	}
3845 	return 0;
3846 }
3847 
3848 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3849 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3850 {
3851 	struct ifbrhostfilter * __single req = arg;
3852 	struct bridge_iflist *bif;
3853 
3854 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3855 	if (bif == NULL) {
3856 		return ENOENT;
3857 	}
3858 	if (bif_has_mac_nat(bif)) {
3859 		/* no host filter with MAC-NAT */
3860 		return EINVAL;
3861 	}
3862 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3863 		bif->bif_flags |= BIFF_HOST_FILTER;
3864 
3865 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3866 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3867 			    ETHER_ADDR_LEN);
3868 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3869 			    ETHER_ADDR_LEN) != 0) {
3870 				bif->bif_flags |= BIFF_HF_HWSRC;
3871 			} else {
3872 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3873 			}
3874 		}
3875 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3876 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3877 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3878 				bif->bif_flags |= BIFF_HF_IPSRC;
3879 			} else {
3880 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3881 			}
3882 		}
3883 	} else {
3884 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3885 		    BIFF_HF_IPSRC);
3886 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3887 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3888 	}
3889 
3890 	return 0;
3891 }
3892 
3893 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3894 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3895     unsigned int * count_p, char *__indexable buf,
3896     unsigned int * len_p)
3897 {
3898 	unsigned int            count = *count_p;
3899 	struct ifbrmne          ifbmne;
3900 	unsigned int            len = *len_p;
3901 	struct mac_nat_entry    *mne;
3902 	unsigned long           now;
3903 
3904 	bzero(&ifbmne, sizeof(ifbmne));
3905 	LIST_FOREACH(mne, list, mne_list) {
3906 		if (len < sizeof(ifbmne)) {
3907 			break;
3908 		}
3909 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3910 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3911 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3912 		    sizeof(ifbmne.ifbmne_mac));
3913 		now = (unsigned long) net_uptime();
3914 		if (now < mne->mne_expire) {
3915 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3916 		} else {
3917 			ifbmne.ifbmne_expire = 0;
3918 		}
3919 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3920 			ifbmne.ifbmne_af = AF_INET6;
3921 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3922 		} else {
3923 			ifbmne.ifbmne_af = AF_INET;
3924 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3925 		}
3926 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3927 		count++;
3928 		buf += sizeof(ifbmne);
3929 		len -= sizeof(ifbmne);
3930 	}
3931 	*count_p = count;
3932 	*len_p = len;
3933 	return buf;
3934 }
3935 
3936 /*
3937  * bridge_ioctl_gmnelist()
3938  *   Perform the get mac_nat_entry list ioctl.
3939  *
3940  * Note:
3941  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3942  *   field size/layout except for the last field ifbml_buf, the user-supplied
3943  *   buffer pointer. That is passed in separately via the 'user_addr'
3944  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3945  */
3946 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3947 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3948     user_addr_t user_addr)
3949 {
3950 	unsigned int            count;
3951 	char                    *buf;
3952 	int                     error = 0;
3953 	char                    *outbuf = NULL;
3954 	struct mac_nat_entry    *mne;
3955 	unsigned int            buflen;
3956 	unsigned int            len;
3957 
3958 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3959 	count = 0;
3960 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3961 		count++;
3962 	}
3963 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3964 		count++;
3965 	}
3966 	buflen = sizeof(struct ifbrmne) * count;
3967 	if (buflen == 0 || mnl->ifbml_len == 0) {
3968 		mnl->ifbml_len = buflen;
3969 		return error;
3970 	}
3971 	BRIDGE_UNLOCK(sc);
3972 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3973 	BRIDGE_LOCK(sc);
3974 	count = 0;
3975 	buf = outbuf;
3976 	len = min(mnl->ifbml_len, buflen);
3977 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3978 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3979 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3980 	BRIDGE_UNLOCK(sc);
3981 	if (mnl->ifbml_len > 0) {
3982 		error = copyout(outbuf, user_addr, mnl->ifbml_len);
3983 	}
3984 	kfree_data(outbuf, buflen);
3985 	BRIDGE_LOCK(sc);
3986 	return error;
3987 }
3988 
3989 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3990 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3991 {
3992 	struct ifbrmnelist64 * __single mnl = arg;
3993 
3994 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3995 }
3996 
3997 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3998 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3999 {
4000 	struct ifbrmnelist32 * __single mnl = arg;
4001 
4002 	return bridge_ioctl_gmnelist(sc, arg,
4003 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
4004 }
4005 
4006 /*
4007  * bridge_ioctl_gifstats()
4008  *   Return per-member stats.
4009  *
4010  * Note:
4011  *   The ifbrmreq32 and ifbrmreq64 structures have the same
4012  *   field size/layout except for the last field brmr_buf, the user-supplied
4013  *   buffer pointer. That is passed in separately via the 'user_addr'
4014  *   parameter from the respective 32-bit or 64-bit ioctl routine.
4015  */
4016 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4017 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4018     user_addr_t user_addr)
4019 {
4020 	struct bridge_iflist    *bif;
4021 	int                     error = 0;
4022 	unsigned int            buflen;
4023 
4024 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4025 	if (bif == NULL) {
4026 		error = ENOENT;
4027 		goto done;
4028 	}
4029 
4030 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4031 	if (buflen == 0 || mreq->brmr_len == 0) {
4032 		mreq->brmr_len = buflen;
4033 		goto done;
4034 	}
4035 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4036 		error = ENOBUFS;
4037 		goto done;
4038 	}
4039 	mreq->brmr_len = buflen;
4040 	error = copyout(&bif->bif_stats, user_addr, buflen);
4041 done:
4042 	return error;
4043 }
4044 
4045 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4046 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4047 {
4048 	struct ifbrmreq32 * __single mreq = arg;
4049 
4050 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4051 }
4052 
4053 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4054 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4055 {
4056 	struct ifbrmreq64 * __single mreq = arg;
4057 
4058 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4059 }
4060 
4061 /*
4062  * bridge_proto_attach_changed
4063  *
4064  *	Called when protocol attachment on the interface changes.
4065  */
4066 static void
bridge_proto_attach_changed(struct ifnet * ifp)4067 bridge_proto_attach_changed(struct ifnet *ifp)
4068 {
4069 	boolean_t changed = FALSE;
4070 	struct bridge_iflist *bif;
4071 	boolean_t input_broadcast;
4072 	struct bridge_softc * __single sc = ifp->if_bridge;
4073 
4074 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4075 	if (sc == NULL) {
4076 		return;
4077 	}
4078 	input_broadcast = interface_needs_input_broadcast(ifp);
4079 	BRIDGE_LOCK(sc);
4080 	bif = bridge_lookup_member_if(sc, ifp);
4081 	if (bif != NULL) {
4082 		changed = bif_set_input_broadcast(bif, input_broadcast);
4083 	}
4084 	BRIDGE_UNLOCK(sc);
4085 	if (changed) {
4086 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4087 		    "%s input broadcast %s", ifp->if_xname,
4088 		    input_broadcast ? "ENABLED" : "DISABLED");
4089 	}
4090 	return;
4091 }
4092 
4093 /*
4094  * interface_media_active:
4095  *
4096  *	Tells if an interface media is active.
4097  */
4098 static int
interface_media_active(struct ifnet * ifp)4099 interface_media_active(struct ifnet *ifp)
4100 {
4101 	struct ifmediareq   ifmr;
4102 	int status = 0;
4103 
4104 	bzero(&ifmr, sizeof(ifmr));
4105 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4106 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4107 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4108 		}
4109 	}
4110 
4111 	return status;
4112 }
4113 
4114 /*
4115  * bridge_updatelinkstatus:
4116  *
4117  *      Update the media active status of the bridge based on the
4118  *	media active status of its member.
4119  *	If changed, return the corresponding onf/off link event.
4120  */
4121 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4122 bridge_updatelinkstatus(struct bridge_softc *sc)
4123 {
4124 	struct bridge_iflist *bif;
4125 	int active_member = 0;
4126 	u_int32_t event_code = 0;
4127 
4128 	BRIDGE_LOCK_ASSERT_HELD(sc);
4129 
4130 	/*
4131 	 * Find out if we have an active interface
4132 	 */
4133 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4134 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4135 			active_member = 1;
4136 			break;
4137 		}
4138 	}
4139 
4140 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4141 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
4142 		event_code = KEV_DL_LINK_ON;
4143 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4144 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4145 		event_code = KEV_DL_LINK_OFF;
4146 	}
4147 
4148 	return event_code;
4149 }
4150 
4151 /*
4152  * bridge_iflinkevent:
4153  */
4154 static void
bridge_iflinkevent(struct ifnet * ifp)4155 bridge_iflinkevent(struct ifnet *ifp)
4156 {
4157 	struct bridge_softc * __single sc = ifp->if_bridge;
4158 	struct bridge_iflist *bif;
4159 	u_int32_t event_code = 0;
4160 	int media_active;
4161 
4162 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4163 
4164 	/* Check if the interface is a bridge member */
4165 	if (sc == NULL) {
4166 		return;
4167 	}
4168 
4169 	media_active = interface_media_active(ifp);
4170 	BRIDGE_LOCK(sc);
4171 	bif = bridge_lookup_member_if(sc, ifp);
4172 	if (bif != NULL) {
4173 		if (media_active) {
4174 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4175 		} else {
4176 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4177 		}
4178 		if (sc->sc_mac_nat_bif != NULL) {
4179 			bridge_mac_nat_flush_entries(sc, bif);
4180 		}
4181 
4182 		event_code = bridge_updatelinkstatus(sc);
4183 	}
4184 	BRIDGE_UNLOCK(sc);
4185 
4186 	if (event_code != 0) {
4187 		bridge_link_event(sc->sc_ifp, event_code);
4188 	}
4189 }
4190 
4191 /*
4192  * bridge_delayed_callback:
4193  *
4194  *	Makes a delayed call
4195  */
4196 static void
bridge_delayed_callback(void * param,__unused void * param2)4197 bridge_delayed_callback(void *param, __unused void *param2)
4198 {
4199 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4200 	struct bridge_softc *sc = call->bdc_sc;
4201 
4202 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4203 	if (bridge_delayed_callback_delay > 0) {
4204 		struct timespec ts;
4205 
4206 		ts.tv_sec = bridge_delayed_callback_delay;
4207 		ts.tv_nsec = 0;
4208 
4209 		BRIDGE_LOG(LOG_NOTICE, 0,
4210 		    "sleeping for %d seconds",
4211 		    bridge_delayed_callback_delay);
4212 
4213 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4214 		    __func__, &ts);
4215 
4216 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4217 	}
4218 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4219 
4220 	BRIDGE_LOCK(sc);
4221 
4222 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4223 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4224 	    "%s call 0x%llx flags 0x%x",
4225 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4226 	    call->bdc_flags);
4227 }
4228 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4229 
4230 	if (call->bdc_flags & BDCF_CANCELLING) {
4231 		wakeup(call);
4232 	} else {
4233 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4234 			(*call->bdc_func)(sc);
4235 		}
4236 	}
4237 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4238 	BRIDGE_UNLOCK(sc);
4239 }
4240 
4241 /*
4242  * bridge_schedule_delayed_call:
4243  *
4244  *	Schedule a function to be called on a separate thread
4245  *      The actual call may be scheduled to run at a given time or ASAP.
4246  */
4247 static void
4248 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4249 {
4250 	uint64_t deadline = 0;
4251 	struct bridge_softc *sc = call->bdc_sc;
4252 
4253 	BRIDGE_LOCK_ASSERT_HELD(sc);
4254 
4255 	if ((sc->sc_flags & SCF_DETACHING) ||
4256 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4257 		return;
4258 	}
4259 
4260 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4261 		nanoseconds_to_absolutetime(
4262 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4263 			call->bdc_ts.tv_nsec, &deadline);
4264 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4265 	}
4266 
4267 	call->bdc_flags = BDCF_OUTSTANDING;
4268 
4269 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4270 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4271 	    "%s call 0x%llx flags 0x%x",
4272 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4273 	    call->bdc_flags);
4274 }
4275 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4276 
4277 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4278 		thread_call_func_delayed(
4279 			(thread_call_func_t)bridge_delayed_callback,
4280 			call, deadline);
4281 	} else {
4282 		if (call->bdc_thread_call == NULL) {
4283 			call->bdc_thread_call = thread_call_allocate(
4284 				(thread_call_func_t)bridge_delayed_callback,
4285 				call);
4286 		}
4287 		thread_call_enter(call->bdc_thread_call);
4288 	}
4289 }
4290 
4291 /*
4292  * bridge_cancel_delayed_call:
4293  *
4294  *	Cancel a queued or running delayed call.
4295  *	If call is running, does not return until the call is done to
4296  *	prevent race condition with the brigde interface getting destroyed
4297  */
4298 static void
4299 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4300 {
4301 	boolean_t result;
4302 	struct bridge_softc *sc = call->bdc_sc;
4303 
4304 	/*
4305 	 * The call was never scheduled
4306 	 */
4307 	if (sc == NULL) {
4308 		return;
4309 	}
4310 
4311 	BRIDGE_LOCK_ASSERT_HELD(sc);
4312 
4313 	call->bdc_flags |= BDCF_CANCELLING;
4314 
4315 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4316 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4317 		    "%s call 0x%llx flags 0x%x",
4318 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4319 		    call->bdc_flags);
4320 		result = thread_call_func_cancel(
4321 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4322 
4323 		if (result) {
4324 			/*
4325 			 * We managed to dequeue the delayed call
4326 			 */
4327 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4328 		} else {
4329 			/*
4330 			 * Wait for delayed call do be done running
4331 			 */
4332 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4333 		}
4334 	}
4335 	call->bdc_flags &= ~BDCF_CANCELLING;
4336 }
4337 
4338 /*
4339  * bridge_cleanup_delayed_call:
4340  *
4341  *	Dispose resource allocated for a delayed call
4342  *	Assume the delayed call is not queued or running .
4343  */
4344 static void
4345 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4346 {
4347 	boolean_t result;
4348 	struct bridge_softc *sc = call->bdc_sc;
4349 
4350 	/*
4351 	 * The call was never scheduled
4352 	 */
4353 	if (sc == NULL) {
4354 		return;
4355 	}
4356 
4357 	BRIDGE_LOCK_ASSERT_HELD(sc);
4358 
4359 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4360 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4361 
4362 	if (call->bdc_thread_call != NULL) {
4363 		result = thread_call_free(call->bdc_thread_call);
4364 		if (result == FALSE) {
4365 			panic("%s thread_call_free() failed for call %p",
4366 			    __func__, call);
4367 		}
4368 		call->bdc_thread_call = NULL;
4369 	}
4370 }
4371 
4372 /*
4373  * bridge_init:
4374  *
4375  *	Initialize a bridge interface.
4376  */
4377 static int
4378 bridge_init(struct ifnet *ifp)
4379 {
4380 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4381 	errno_t error;
4382 
4383 	BRIDGE_LOCK_ASSERT_HELD(sc);
4384 
4385 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4386 		return 0;
4387 	}
4388 
4389 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4390 
4391 	/*
4392 	 * Calling bridge_aging_timer() is OK as there are no entries to
4393 	 * age so we're just going to arm the timer
4394 	 */
4395 	bridge_aging_timer(sc);
4396 #if BRIDGESTP
4397 	if (error == 0) {
4398 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4399 	}
4400 #endif /* BRIDGESTP */
4401 	return error;
4402 }
4403 
4404 /*
4405  * bridge_ifstop:
4406  *
4407  *	Stop the bridge interface.
4408  */
4409 static void
4410 bridge_ifstop(struct ifnet *ifp, int disable)
4411 {
4412 #pragma unused(disable)
4413 	struct bridge_softc * __single sc = ifp->if_softc;
4414 
4415 	BRIDGE_LOCK_ASSERT_HELD(sc);
4416 
4417 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4418 		return;
4419 	}
4420 
4421 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4422 
4423 #if BRIDGESTP
4424 	bstp_stop(&sc->sc_stp);
4425 #endif /* BRIDGESTP */
4426 
4427 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4428 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4429 }
4430 
4431 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4432     MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4433 
4434 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4435     (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4436     | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4437 
4438 /*
4439  * bridge_compute_cksum:
4440  *
4441  *	If the packet has checksum flags, compare the hardware checksum
4442  *	capabilities of the source and destination interfaces. If they
4443  *	are the same, there's nothing to do. If they are different,
4444  *	finalize the checksum so that it can be sent on the destination
4445  *	interface.
4446  */
4447 static void
4448 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4449 {
4450 	uint32_t csum_flags;
4451 	uint16_t dst_hw_csum;
4452 	uint32_t did_sw = 0;
4453 	struct ether_header *eh;
4454 	uint16_t src_hw_csum;
4455 
4456 	if (src_if == dst_if) {
4457 		return;
4458 	}
4459 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4460 	if (csum_flags == 0) {
4461 		/* no checksum offload */
4462 		return;
4463 	}
4464 
4465 	/*
4466 	 * if destination/source differ in checksum offload
4467 	 * capabilities, finalize/compute the checksum
4468 	 */
4469 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4470 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4471 	if (dst_hw_csum == src_hw_csum) {
4472 		return;
4473 	}
4474 	eh = mtod(m, struct ether_header *);
4475 	switch (eh->ether_type) {
4476 	case HTONS_ETHERTYPE_IP:
4477 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4478 		break;
4479 	case HTONS_ETHERTYPE_IPV6:
4480 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4481 		break;
4482 	}
4483 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4484 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4485 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4486 	    m->m_pkthdr.csum_flags);
4487 }
4488 
4489 static inline errno_t
4490 bridge_transmit(ifnet_t ifp, mbuf_t m)
4491 {
4492 	struct flowadv  adv = { .code = FADV_SUCCESS };
4493 	errno_t         error;
4494 	int             flags = DLIL_OUTPUT_FLAGS_RAW;
4495 
4496 	flags = (if_bridge_output_skip_filters != 0)
4497 	    ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4498 	    : DLIL_OUTPUT_FLAGS_RAW;
4499 	error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4500 	if (error == 0) {
4501 		if (adv.code == FADV_FLOW_CONTROLLED) {
4502 			error = EQFULL;
4503 		} else if (adv.code == FADV_SUSPENDED) {
4504 			error = EQSUSPENDED;
4505 		}
4506 	}
4507 	return error;
4508 }
4509 
4510 static int
4511 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4512     bool *is_fragmented)
4513 {
4514 	int newoff;
4515 
4516 	*is_fragmented = false;
4517 	while (1) {
4518 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4519 		if (newoff < 0) {
4520 			return off;
4521 		} else if (newoff < off) {
4522 			return -1;    /* invalid */
4523 		} else if (newoff == off) {
4524 			return newoff;
4525 		}
4526 		off = newoff;
4527 		proto = *nxtp;
4528 		if (proto == IPPROTO_FRAGMENT) {
4529 			*is_fragmented = true;
4530 		}
4531 	}
4532 }
4533 
4534 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4535 
4536 static int
4537 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4538     ip_packet_info_t info_p, struct bripstats * stats_p)
4539 {
4540 	int             error = 0;
4541 	u_int           hlen;
4542 	u_int           ip_hlen;
4543 	u_int           ip_pay_len;
4544 	struct mbuf *   m0 = *mp;
4545 	int             off;
4546 	int             opt_len = 0;
4547 	int             proto = 0;
4548 
4549 	bzero(info_p, sizeof(*info_p));
4550 	if (is_ipv4) {
4551 		struct ip *     ip;
4552 		u_int           ip_total_len;
4553 
4554 		/* IPv4 */
4555 		hlen = mac_hlen + sizeof(struct ip);
4556 		if (m0->m_pkthdr.len < hlen) {
4557 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4558 			    "Short IP packet %d < %d",
4559 			    m0->m_pkthdr.len, hlen);
4560 			error = _EBADIP;
4561 			__ATOMIC_INC(stats_p->bips_bad_ip);
4562 			goto done;
4563 		}
4564 		if (m0->m_len < hlen) {
4565 			*mp = m0 = m_pullup(m0, hlen);
4566 			if (m0 == NULL) {
4567 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4568 				    "m_pullup failed hlen %d",
4569 				    hlen);
4570 				error = ENOBUFS;
4571 				__ATOMIC_INC(stats_p->bips_bad_ip);
4572 				goto done;
4573 			}
4574 		}
4575 		ip = (struct ip *)mtodo(m0, mac_hlen);
4576 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4577 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4578 			    "bad IP version");
4579 			error = _EBADIP;
4580 			__ATOMIC_INC(stats_p->bips_bad_ip);
4581 			goto done;
4582 		}
4583 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4584 		if (ip_hlen < sizeof(struct ip)) {
4585 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4586 			    "bad IP header length %d < %d",
4587 			    ip_hlen,
4588 			    (int)sizeof(struct ip));
4589 			error = _EBADIP;
4590 			__ATOMIC_INC(stats_p->bips_bad_ip);
4591 			goto done;
4592 		}
4593 		hlen = mac_hlen + ip_hlen;
4594 		if (m0->m_len < hlen) {
4595 			*mp = m0 = m_pullup(m0, hlen);
4596 			if (m0 == NULL) {
4597 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4598 				    "m_pullup failed hlen %d",
4599 				    hlen);
4600 				error = ENOBUFS;
4601 				__ATOMIC_INC(stats_p->bips_bad_ip);
4602 				goto done;
4603 			}
4604 			ip = (struct ip *)mtodo(m0, mac_hlen);
4605 		}
4606 
4607 		ip_total_len = ntohs(ip->ip_len);
4608 		if (ip_total_len < ip_hlen) {
4609 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4610 			    "IP total len %d < header len %d",
4611 			    ip_total_len, ip_hlen);
4612 			error = _EBADIP;
4613 			__ATOMIC_INC(stats_p->bips_bad_ip);
4614 			goto done;
4615 		}
4616 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4617 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4618 			    "invalid IP payload length %d > %d",
4619 			    ip_total_len,
4620 			    (m0->m_pkthdr.len - mac_hlen));
4621 			error = _EBADIP;
4622 			__ATOMIC_INC(stats_p->bips_bad_ip);
4623 			goto done;
4624 		}
4625 		ip_pay_len = ip_total_len - ip_hlen;
4626 		info_p->ip_proto = ip->ip_p;
4627 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4628 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4629 		info_p->ip_hlen = ip_hlen;
4630 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4631 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4632 			info_p->ip_is_fragmented = true;
4633 		}
4634 		__ATOMIC_INC(stats_p->bips_ip);
4635 	} else {
4636 		struct ip6_hdr *ip6;
4637 
4638 		/* IPv6 */
4639 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4640 		if (m0->m_pkthdr.len < hlen) {
4641 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4642 			    "short IPv6 packet %d < %d",
4643 			    m0->m_pkthdr.len, hlen);
4644 			error = _EBADIPV6;
4645 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4646 			goto done;
4647 		}
4648 		if (m0->m_len < hlen) {
4649 			*mp = m0 = m_pullup(m0, hlen);
4650 			if (m0 == NULL) {
4651 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4652 				    "m_pullup failed hlen %d",
4653 				    hlen);
4654 				error = ENOBUFS;
4655 				__ATOMIC_INC(stats_p->bips_bad_ip6);
4656 				goto done;
4657 			}
4658 		}
4659 		ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4660 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4661 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4662 			    "bad IPv6 version");
4663 			error = _EBADIPV6;
4664 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4665 			goto done;
4666 		}
4667 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4668 		    &info_p->ip_is_fragmented);
4669 		if (off < 0 || m0->m_pkthdr.len < off) {
4670 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4671 			    "ip6_lasthdr() returned %d",
4672 			    off);
4673 			error = _EBADIPV6;
4674 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4675 			goto done;
4676 		}
4677 		ip_hlen = sizeof(*ip6);
4678 		opt_len = off - mac_hlen - ip_hlen;
4679 		if (opt_len < 0) {
4680 			error = _EBADIPV6;
4681 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4682 			goto done;
4683 		}
4684 		ip_pay_len = ntohs(ip6->ip6_plen);
4685 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4686 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4687 			    "invalid IPv6 payload length %d > %d",
4688 			    ip_pay_len,
4689 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4690 			error = _EBADIPV6;
4691 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4692 			goto done;
4693 		}
4694 		info_p->ip_proto = proto;
4695 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4696 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4697 		info_p->ip_hlen = ip_hlen;
4698 		__ATOMIC_INC(stats_p->bips_ip6);
4699 	}
4700 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4701 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4702 	    is_ipv4 ? '4' : '6',
4703 	    proto, ip_hlen, ip_pay_len, opt_len,
4704 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4705 	info_p->ip_pay_len = ip_pay_len;
4706 	info_p->ip_opt_len = opt_len;
4707 	info_p->ip_is_ipv4 = is_ipv4;
4708 done:
4709 	return error;
4710 }
4711 
4712 static int
4713 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4714     ip_packet_info_t info_p, struct bripstats * stats_p)
4715 {
4716 	int             error;
4717 	u_int           hlen;
4718 
4719 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4720 	if (error != 0) {
4721 		goto done;
4722 	}
4723 	if (info_p->ip_proto != IPPROTO_TCP) {
4724 		/* not a TCP frame, not an error, just a bad guess */
4725 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4726 		    "non-TCP (%d) IPv%c frame %d bytes",
4727 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4728 		    (*mp)->m_pkthdr.len);
4729 		goto done;
4730 	}
4731 	if (info_p->ip_is_fragmented) {
4732 		/* both TSO and IP fragmentation don't make sense */
4733 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4734 		    "fragmented TSO packet?");
4735 		__ATOMIC_INC(stats_p->bips_bad_tcp);
4736 		error = _EBADTCP;
4737 		goto done;
4738 	}
4739 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4740 	    info_p->ip_opt_len;
4741 	if ((*mp)->m_len < hlen) {
4742 		*mp = m_pullup(*mp, hlen);
4743 		if (*mp == NULL) {
4744 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 			    "m_pullup %d failed",
4746 			    hlen);
4747 			__ATOMIC_INC(stats_p->bips_bad_tcp);
4748 			error = _EBADTCP;
4749 			goto done;
4750 		}
4751 	}
4752 	info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4753 	    info_p->ip_opt_len;
4754 done:
4755 	return error;
4756 }
4757 
4758 static inline void
4759 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4760 {
4761 	if (proto == IPPROTO_TCP) {
4762 		__ATOMIC_INC(stats_p->brcs_tcp_checksum);
4763 	} else {
4764 		__ATOMIC_INC(stats_p->brcs_udp_checksum);
4765 	}
4766 	return;
4767 }
4768 
4769 #define ETHER_TYPE_FLAG_NONE    0x00
4770 #define ETHER_TYPE_FLAG_IPV4    0x01
4771 #define ETHER_TYPE_FLAG_IPV6    0x02
4772 #define ETHER_TYPE_FLAG_ARP     0x04
4773 #define ETHER_TYPE_FLAG_IP      (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4774 #define ETHER_TYPE_FLAG_IP_ARP  (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4775 
4776 static inline bool
4777 ether_type_flag_is_ip(ether_type_flag_t flag)
4778 {
4779 	return (flag & ETHER_TYPE_FLAG_IP) != 0;
4780 }
4781 
4782 static inline ether_type_flag_t
4783 ether_type_flag_get(uint16_t ether_type)
4784 {
4785 	ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4786 
4787 	switch (ether_type) {
4788 	case HTONS_ETHERTYPE_IP:
4789 		flag = ETHER_TYPE_FLAG_IPV4;
4790 		break;
4791 	case HTONS_ETHERTYPE_IPV6:
4792 		flag = ETHER_TYPE_FLAG_IPV6;
4793 		break;
4794 	case HTONS_ETHERTYPE_ARP:
4795 		flag = ETHER_TYPE_FLAG_ARP;
4796 		break;
4797 	default:
4798 		break;
4799 	}
4800 	return flag;
4801 }
4802 
4803 static bool
4804 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4805 {
4806 	uint16_t        ether_type;
4807 	bool            is_ip = TRUE;
4808 
4809 	ether_type = ntohs(eh->ether_type);
4810 	switch (ether_type) {
4811 	case ETHERTYPE_IP:
4812 		*is_ipv4 = TRUE;
4813 		break;
4814 	case ETHERTYPE_IPV6:
4815 		*is_ipv4 = FALSE;
4816 		break;
4817 	default:
4818 		is_ip = FALSE;
4819 		break;
4820 	}
4821 	return is_ip;
4822 }
4823 
4824 static errno_t
4825 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4826 {
4827 	struct brcsumstats *csum_stats_p;
4828 	struct ether_header     *eh;
4829 	errno_t         error = 0;
4830 	ip_packet_info  info;
4831 	bool            is_ipv4;
4832 	struct mbuf *   m;
4833 	u_int           mac_hlen = sizeof(struct ether_header);
4834 	uint16_t        sum;
4835 	bool            valid;
4836 
4837 	eh = mtod(*mp, struct ether_header *);
4838 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4839 		goto done;
4840 	}
4841 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4842 	    &stats_p->brms_out_ip);
4843 	m = *mp;
4844 	if (error != 0) {
4845 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4846 		    "bridge_get_ip_proto failed %d",
4847 		    error);
4848 		goto done;
4849 	}
4850 	if (is_ipv4) {
4851 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4852 			/* hardware offloaded IP header checksum */
4853 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4854 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4855 			    "IP checksum HW %svalid",
4856 			    valid ? "" : "in");
4857 			if (!valid) {
4858 				__ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4859 				error = _EBADIPCHECKSUM;
4860 				goto done;
4861 			}
4862 			__ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4863 		} else {
4864 			/* verify */
4865 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4866 			valid = (sum == 0);
4867 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4868 			    "IP checksum SW %svalid",
4869 			    valid ? "" : "in");
4870 			if (!valid) {
4871 				__ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4872 				error = _EBADIPCHECKSUM;
4873 				goto done;
4874 			}
4875 			__ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4876 		}
4877 	}
4878 	if (info.ip_is_fragmented) {
4879 		/* can't verify checksum on fragmented packets */
4880 		goto done;
4881 	}
4882 	switch (info.ip_proto) {
4883 	case IPPROTO_TCP:
4884 		__ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4885 		break;
4886 	case IPPROTO_UDP:
4887 		__ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4888 		break;
4889 	default:
4890 		goto done;
4891 	}
4892 	/* check for hardware offloaded UDP/TCP checksum */
4893 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4894 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4895 		/* checksum verified by hardware */
4896 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4897 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4898 		    "IPv%c %s checksum HW 0x%x %svalid",
4899 		    is_ipv4 ? '4' : '6',
4900 		    (info.ip_proto == IPPROTO_TCP)
4901 		    ? "TCP" : "UDP",
4902 		    m->m_pkthdr.csum_data,
4903 		    valid ? "" : "in" );
4904 		if (!valid) {
4905 			/* bad checksum */
4906 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4907 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4908 			    : _EBADTCPCHECKSUM;
4909 		} else {
4910 			/* good checksum */
4911 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4912 		}
4913 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4914 		goto done;
4915 	}
4916 	/* adjust frame to skip mac-layer header */
4917 	_mbuf_adjust_pkthdr_and_data(m, mac_hlen);
4918 	if (is_ipv4) {
4919 		sum = inet_cksum(m, info.ip_proto,
4920 		    info.ip_hlen,
4921 		    info.ip_pay_len);
4922 	} else {
4923 		sum = inet6_cksum(m, info.ip_proto,
4924 		    info.ip_hlen + info.ip_opt_len,
4925 		    info.ip_pay_len - info.ip_opt_len);
4926 	}
4927 	valid = (sum == 0);
4928 	if (valid) {
4929 		csum_stats_p = &stats_p->brms_out_cksum_good;
4930 	} else {
4931 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4932 		error = (info.ip_proto == IPPROTO_TCP)
4933 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4934 	}
4935 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4936 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4937 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4938 	    is_ipv4 ? '4' : '6',
4939 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4940 	    valid ? "" : "in",
4941 	    sum, info.ip_hlen, info.ip_pay_len);
4942 	/* adjust frame back to start of mac-layer header */
4943 	_mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
4944 
4945 done:
4946 	return error;
4947 }
4948 
4949 static mbuf_t
4950 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
4951     mbuf_t in_list, bool is_ipv4)
4952 {
4953 	mbuf_t          next_packet;
4954 	mblist          ret;
4955 
4956 	mblist_init(&ret);
4957 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
4958 		errno_t         error;
4959 
4960 		/* take packet out of the list */
4961 		next_packet = scan->m_nextpkt;
4962 		scan->m_nextpkt = NULL;
4963 
4964 		if (scan->m_pkthdr.rx_seg_cnt > 1) {
4965 			/* LRO packet, compute checksum on large packet */
4966 			scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
4967 			    is_ipv4, false, true);
4968 		} else {
4969 			/* verify checksum */
4970 			error = bridge_verify_checksum(&scan, &dbif->bif_stats);
4971 			if (error != 0) {
4972 				if (scan != NULL) {
4973 					m_freem(scan);
4974 					scan = NULL;
4975 				}
4976 			}
4977 		}
4978 
4979 		/* add it back to the list */
4980 		if (scan != NULL) {
4981 			mblist_append(&ret, scan);
4982 		}
4983 	}
4984 	return ret.head;
4985 }
4986 
4987 
4988 static errno_t
4989 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4990     struct ifbrmstats * stats_p)
4991 {
4992 	uint16_t *      csum_p;
4993 	errno_t         error = 0;
4994 	u_int           hlen;
4995 	struct mbuf *   m0 = *mp;
4996 	u_int           mac_hlen = sizeof(struct ether_header);
4997 	u_int           pkt_hdr_len;
4998 	struct tcphdr * tcp;
4999 	u_int           tcp_hlen;
5000 	struct udphdr * udp;
5001 
5002 	if (info_p->ip_is_ipv4) {
5003 		/* compute IP header checksum */
5004 		struct ip *ip = (struct ip *)info_p->ip_hdr;
5005 		ip->ip_sum = 0;
5006 		ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5007 		__ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5008 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5009 		    "IPv4 checksum 0x%x",
5010 		    ntohs(ip->ip_sum));
5011 	}
5012 	if (info_p->ip_is_fragmented) {
5013 		/* can't compute checksum on fragmented packets */
5014 		goto done;
5015 	}
5016 	pkt_hdr_len = m0->m_pkthdr.len;
5017 	switch (info_p->ip_proto) {
5018 	case IPPROTO_TCP:
5019 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5020 		    + sizeof(struct tcphdr);
5021 		if (m0->m_len < hlen) {
5022 			*mp = m0 = m_pullup(m0, hlen);
5023 			if (m0 == NULL) {
5024 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5025 				error = _EBADTCP;
5026 				goto done;
5027 			}
5028 		}
5029 		tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5030 		    + info_p->ip_opt_len);
5031 		tcp_hlen = tcp->th_off << 2;
5032 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5033 		if (hlen > pkt_hdr_len) {
5034 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5035 			    "bad tcp header length %u",
5036 			    tcp_hlen);
5037 			__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5038 			error = _EBADTCP;
5039 			goto done;
5040 		}
5041 		csum_p = &tcp->th_sum;
5042 		__ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5043 		break;
5044 	case IPPROTO_UDP:
5045 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5046 		if (m0->m_len < hlen) {
5047 			*mp = m0 = m_pullup(m0, hlen);
5048 			if (m0 == NULL) {
5049 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5050 				error = ENOBUFS;
5051 				goto done;
5052 			}
5053 		}
5054 		udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5055 		    + info_p->ip_opt_len);
5056 		csum_p = &udp->uh_sum;
5057 		__ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5058 		break;
5059 	default:
5060 		/* not TCP or UDP */
5061 		goto done;
5062 	}
5063 	*csum_p = 0;
5064 	/* adjust frame to skip mac-layer header */
5065 	_mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5066 	if (info_p->ip_is_ipv4) {
5067 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5068 		    info_p->ip_pay_len);
5069 	} else {
5070 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
5071 		    info_p->ip_hlen + info_p->ip_opt_len,
5072 		    info_p->ip_pay_len - info_p->ip_opt_len);
5073 	}
5074 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5075 		/* RFC 1122 4.1.3.4 */
5076 		*csum_p = 0xffff;
5077 	}
5078 	/* adjust frame back to start of mac-layer header */
5079 	_mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5080 	proto_csum_stats_increment(info_p->ip_proto,
5081 	    &stats_p->brms_in_computed_cksum);
5082 
5083 	/* indicate that the checksum is good */
5084 	mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5085 
5086 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5087 	    "IPv%c %s set checksum 0x%x",
5088 	    info_p->ip_is_ipv4 ? '4' : '6',
5089 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5090 	    ntohs(*csum_p));
5091 done:
5092 	return error;
5093 }
5094 
5095 static inline void
5096 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5097     mbuf_t m, ChecksumOperation cksum_op)
5098 {
5099 	switch (cksum_op) {
5100 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5101 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5102 		break;
5103 	case CHECKSUM_OPERATION_FINALIZE:
5104 		/* the checksum might not be correct, finalize now */
5105 		VERIFY(dst_ifp != NULL);
5106 		bridge_finalize_cksum(dst_ifp, m);
5107 		break;
5108 	case CHECKSUM_OPERATION_COMPUTE:
5109 		VERIFY(dst_ifp != NULL && src_ifp != NULL);
5110 		bridge_compute_cksum(src_ifp, dst_ifp, m);
5111 		break;
5112 	default:
5113 		break;
5114 	}
5115 	return;
5116 }
5117 
5118 static uint32_t
5119 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5120 {
5121 	uint32_t tso_mtu;
5122 
5123 	tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5124 	if (tso_mtu == 0) {
5125 		tso_mtu = IP_MAXPACKET;
5126 	}
5127 
5128 #if DEBUG || DEVELOPMENT
5129 #define REDUCED_TSO_MTU         (16 * 1024)
5130 	if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5131 		tso_mtu = REDUCED_TSO_MTU;
5132 	}
5133 #endif /* DEBUG || DEVELOPMENT */
5134 	return tso_mtu;
5135 }
5136 
5137 /*
5138  * tso_hwassist:
5139  * - determine whether the destination interface supports TSO offload
5140  * - if the packet is already marked for offload and the hardware supports
5141  *   it, just allow the packet to continue on
5142  * - if not, parse the packet headers to verify that this is a large TCP
5143  *   packet requiring segmentation; if the hardware doesn't support it
5144  *   set need_sw_tso; otherwise, mark the packet for TSO offload
5145  */
5146 static int
5147 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5148     int * mss_p, bool * need_gso, bool * is_large_tcp)
5149 {
5150 	uint32_t                csum_flags;
5151 	int                     error = 0;
5152 	ip_packet_info          info;
5153 	u_int32_t               if_csum;
5154 	u_int32_t               if_tso;
5155 	u_int32_t               mbuf_tso;
5156 	int                     mss = *mss_p;
5157 	uint8_t                 seg_cnt = 0;
5158 	bool                    supports_cksum = false;
5159 	uint32_t                pkt_mtu;
5160 	struct bripstats        stats;
5161 
5162 	*need_gso = false;
5163 	*is_large_tcp = false;
5164 	if (is_ipv4) {
5165 		/*
5166 		 * Enable both TCP and IP offload if the hardware supports it.
5167 		 * If the hardware doesn't support TCP offload, supports_cksum
5168 		 * will be false so we won't set either offload.
5169 		 */
5170 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5171 		supports_cksum = (if_csum & CSUM_TCP) != 0;
5172 		if_tso = IFNET_TSO_IPV4;
5173 		mbuf_tso = CSUM_TSO_IPV4;
5174 	} else {
5175 		if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5176 		supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5177 		if_tso = IFNET_TSO_IPV6;
5178 		mbuf_tso = CSUM_TSO_IPV6;
5179 	}
5180 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5181 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
5182 	    ifp->if_xname, supports_cksum ? "" : " not",
5183 	    ifp->if_hwassist, if_csum);
5184 
5185 	/* verify that this is a large TCP frame */
5186 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5187 	    &info, &stats);
5188 	if (error != 0) {
5189 		/* bad packet */
5190 		goto done;
5191 	}
5192 	if (info.ip_proto_hdr == NULL) {
5193 		/* not a TCP packet */
5194 		goto done;
5195 	}
5196 	pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5197 	if (mss == 0) {
5198 		/* check for LRO */
5199 		seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5200 		if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5201 			/* not actually a large packet */
5202 			goto done;
5203 		}
5204 	}
5205 	*is_large_tcp = true;
5206 	(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5207 	if (mss == 0) {
5208 		uint32_t            hdr_len;
5209 		struct tcphdr *     tcp;
5210 
5211 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5212 		hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5213 
5214 		/* packet isn't marked, mark it now */
5215 		if (seg_cnt != 0) {
5216 			uint32_t    len;
5217 
5218 			/* approximate the MSS using the LRO seg cnt */
5219 			len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5220 			mss = len / seg_cnt;
5221 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5222 			    "%s: mss %d = len %d / seg cnt %d",
5223 			    ifp->if_xname, mss, len, seg_cnt);
5224 		} else {
5225 			mss = ifp->if_mtu - hdr_len
5226 			    - if_bridge_tso_reduce_mss_tx;
5227 		}
5228 		assert(mss > 0);
5229 		csum_flags = mbuf_tso;
5230 		if (supports_cksum) {
5231 			csum_flags |= if_csum;
5232 		}
5233 		(*mp)->m_pkthdr.tso_segsz = mss;
5234 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5235 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5236 	}
5237 	if ((ifp->if_hwassist & if_tso) == 0) {
5238 		/* need gso if no hardware support */
5239 		*need_gso = true;
5240 	} else {
5241 		uint32_t                tso_mtu = 0;
5242 
5243 		tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5244 		if (pkt_mtu > tso_mtu) {
5245 			/* need gso if tso_mtu too small */
5246 			*need_gso = true;
5247 		}
5248 	}
5249 done:
5250 	*mss_p = mss;
5251 	return error;
5252 }
5253 
5254 /*
5255  * bridge_enqueue:
5256  *
5257  *	Enqueue a packet list on a bridge member interface.
5258  *
5259  */
5260 static int
5261 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5262     ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op)
5263 {
5264 	int             enqueue_error = 0;
5265 	mbuf_t          next_packet;
5266 	uint32_t        out_errors = 0;
5267 	mblist          out_list;
5268 
5269 	VERIFY(dst_if != NULL);
5270 
5271 	mblist_init(&out_list);
5272 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5273 		bool            check_gso = false;
5274 		ChecksumOperation cksum_op = orig_cksum_op;
5275 		errno_t         error = 0;
5276 		bool            is_ipv4 = false;
5277 		int             len;
5278 		int             mss = 0;
5279 		bool            need_gso = false;
5280 
5281 		scan->m_flags |= M_PROTO1; /* set to avoid loops */
5282 		next_packet = scan->m_nextpkt;
5283 		scan->m_nextpkt = NULL;
5284 		len = mbuf_pkthdr_len(scan);
5285 		is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5286 		mss = _mbuf_get_tso_mss(scan);
5287 		if (mss != 0) {
5288 			/* packet is marked for segmentation */
5289 			check_gso = true;
5290 		} else if (scan->m_pkthdr.rx_seg_cnt != 0) {
5291 			/* LRO packet */
5292 			check_gso = true;
5293 		} else if (ether_type_flag_is_ip(etypef) &&
5294 		    len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5295 			/*
5296 			 * Need to segment the packet if it is a large frame
5297 			 * and the destination interface does not support TSO.
5298 			 *
5299 			 * Note that with trailers, it's possible for a packet to
5300 			 * be large but not actually require segmentation.
5301 			 */
5302 			check_gso = true;
5303 		}
5304 		if (check_gso) {
5305 			bool    is_large_tcp = false;
5306 
5307 			error = tso_hwassist(&scan, is_ipv4,
5308 			    dst_if, sizeof(struct ether_header), &mss,
5309 			    &need_gso, &is_large_tcp);
5310 			if (is_large_tcp &&
5311 			    cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5312 				cksum_op = CHECKSUM_OPERATION_NONE;
5313 			}
5314 		}
5315 		if (error != 0) {
5316 			if (scan != NULL) {
5317 				m_freem(scan);
5318 				scan = NULL;
5319 			}
5320 			out_errors++;
5321 		} else if (need_gso) {
5322 			int             mac_hlen = sizeof(struct ether_header);
5323 			mblist          segs;
5324 
5325 			/* segment packets, add to list */
5326 			segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5327 			    is_ipv4);
5328 			if (segs.head != NULL) {
5329 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5330 				    "%s (%s) append gso #segs %u bytes %u",
5331 				    bridge_ifp->if_xname,
5332 				    dst_if->if_xname,
5333 				    segs.count, segs.bytes);
5334 				mblist_append_list(&out_list, segs);
5335 			} else {
5336 				out_errors++;
5337 			}
5338 		} else {
5339 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5340 			    "%s (%s) append %d bytes mss %d op %d",
5341 			    bridge_ifp->if_xname,
5342 			    dst_if->if_xname,
5343 			    len, mss, cksum_op);
5344 			bridge_handle_checksum_op(src_if, dst_if,
5345 			    scan, cksum_op);
5346 			mblist_append(&out_list, scan);
5347 		}
5348 	}
5349 	if (out_list.head != NULL) {
5350 		enqueue_error = bridge_transmit(dst_if, out_list.head);
5351 		if (enqueue_error != 0) {
5352 			out_errors++;
5353 		}
5354 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5355 		    "%s (%s) bridge_transmit packets %u bytes %u error %d",
5356 		    bridge_ifp->if_xname,
5357 		    dst_if->if_xname,
5358 		    out_list.count, out_list.bytes, enqueue_error);
5359 	}
5360 	if (out_list.count != 0 || out_errors != 0) {
5361 		ifnet_stat_increment_out(bridge_ifp, out_list.count,
5362 		    out_list.bytes, out_errors);
5363 	}
5364 	return enqueue_error;
5365 }
5366 
5367 /*
5368  * bridge_member_output:
5369  *
5370  *	Send output from a bridge member interface.  This
5371  *	performs the bridging function for locally originated
5372  *	packets.
5373  *
5374  *	The mbuf has the Ethernet header already attached.
5375  */
5376 static errno_t
5377 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5378 {
5379 	struct bridge_iflist * bif = NULL;
5380 	ifnet_t bridge_ifp;
5381 	struct ether_header *eh;
5382 	ether_type_flag_t etypef;
5383 	struct ifnet *dst_if = NULL;
5384 	uint16_t vlan;
5385 	struct bridge_iflist *mac_nat_bif;
5386 	ifnet_t mac_nat_ifp;
5387 	mbuf_t m = *data;
5388 
5389 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5390 	    "ifp %s", ifp->if_xname);
5391 	if (m->m_len < ETHER_HDR_LEN) {
5392 		m = m_pullup(m, ETHER_HDR_LEN);
5393 		if (m == NULL) {
5394 			*data = NULL;
5395 			return EJUSTRETURN;
5396 		}
5397 	}
5398 
5399 	eh = mtod(m, struct ether_header *);
5400 	vlan = VLANTAGOF(m);
5401 	etypef = ether_type_flag_get(eh->ether_type);
5402 
5403 	BRIDGE_LOCK(sc);
5404 	mac_nat_bif = sc->sc_mac_nat_bif;
5405 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5406 	if (mac_nat_ifp == ifp) {
5407 		/* record the IP address used by the MAC NAT interface */
5408 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5409 		m = *data;
5410 		if (m == NULL) {
5411 			/* packet was deallocated */
5412 			BRIDGE_UNLOCK(sc);
5413 			return EJUSTRETURN;
5414 		}
5415 	}
5416 	bridge_ifp = sc->sc_ifp;
5417 
5418 	/*
5419 	 * APPLE MODIFICATION
5420 	 * If the packet is an 802.1X ethertype, then only send on the
5421 	 * original output interface.
5422 	 */
5423 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5424 		dst_if = ifp;
5425 		goto sendunicast;
5426 	}
5427 
5428 	/*
5429 	 * If bridge is down, but the original output interface is up,
5430 	 * go ahead and send out that interface.  Otherwise, the packet
5431 	 * is dropped below.
5432 	 */
5433 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5434 		dst_if = ifp;
5435 		goto sendunicast;
5436 	}
5437 
5438 	/*
5439 	 * If the packet is a multicast, or we don't know a better way to
5440 	 * get there, send to all interfaces.
5441 	 */
5442 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5443 		dst_if = NULL;
5444 	} else {
5445 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5446 		if (bif != NULL) {
5447 			dst_if = bif->bif_ifp;
5448 		}
5449 	}
5450 	if (dst_if == NULL) {
5451 		struct mbuf *mc;
5452 		errno_t error;
5453 
5454 
5455 		bridge_span(sc, etypef, m);
5456 
5457 		BRIDGE_LOCK2REF(sc, error);
5458 		if (error != 0) {
5459 			m_freem(m);
5460 			return EJUSTRETURN;
5461 		}
5462 
5463 		/*
5464 		 * Duplicate and send the packet across all member interfaces
5465 		 * except the originating interface.
5466 		 */
5467 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5468 			dst_if = bif->bif_ifp;
5469 			if (dst_if == ifp) {
5470 				/* skip the originating interface */
5471 				continue;
5472 			}
5473 			/* skip interface with inactive link status */
5474 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5475 				continue;
5476 			}
5477 
5478 			/* skip interface that isn't running */
5479 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5480 				continue;
5481 			}
5482 			/*
5483 			 * If the interface is participating in spanning
5484 			 * tree, make sure the port is in a state that
5485 			 * allows forwarding.
5486 			 */
5487 			if ((bif->bif_ifflags & IFBIF_STP) &&
5488 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5489 				continue;
5490 			}
5491 			/*
5492 			 * If the destination is the MAC NAT interface,
5493 			 * skip sending the packet. The packet can't be sent
5494 			 * if the source MAC is incorrect.
5495 			 */
5496 			if (dst_if == mac_nat_ifp) {
5497 				continue;
5498 			}
5499 
5500 			/* make a deep copy to send on this member interface */
5501 			mc = m_dup(m, M_DONTWAIT);
5502 			if (mc == NULL) {
5503 				(void)ifnet_stat_increment_out(bridge_ifp,
5504 				    0, 0, 1);
5505 				continue;
5506 			}
5507 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5508 			    mc, CHECKSUM_OPERATION_COMPUTE);
5509 		}
5510 		BRIDGE_UNREF(sc);
5511 
5512 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5513 			m_freem(m);
5514 			return EJUSTRETURN;
5515 		}
5516 		/* allow packet to continue on the originating interface */
5517 		return 0;
5518 	}
5519 
5520 sendunicast:
5521 	/*
5522 	 * XXX Spanning tree consideration here?
5523 	 */
5524 
5525 	bridge_span(sc, etypef, m);
5526 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5527 		m_freem(m);
5528 		BRIDGE_UNLOCK(sc);
5529 		return EJUSTRETURN;
5530 	}
5531 
5532 	BRIDGE_UNLOCK(sc);
5533 	if (dst_if == ifp) {
5534 		/* allow packet to continue on the originating interface */
5535 		return 0;
5536 	}
5537 	if (dst_if != mac_nat_ifp) {
5538 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5539 		    CHECKSUM_OPERATION_COMPUTE);
5540 	} else {
5541 		/*
5542 		 * This is not the original output interface
5543 		 * and the destination is the MAC NAT interface.
5544 		 * Drop the packet because the packet can't be sent
5545 		 * if the source MAC is incorrect.
5546 		 */
5547 		m_freem(m);
5548 	}
5549 	return EJUSTRETURN;
5550 }
5551 
5552 /*
5553  * Output callback.
5554  *
5555  * This routine is called externally from above only when if_bridge_txstart
5556  * is disabled; otherwise it is called internally by bridge_start().
5557  */
5558 static int
5559 bridge_output(struct ifnet *ifp, struct mbuf *m)
5560 {
5561 	struct bridge_iflist *bif;
5562 	struct bridge_softc * __single sc = ifnet_softc(ifp);
5563 	struct ether_header *eh;
5564 	ether_type_flag_t etypef;
5565 	struct ifnet *dst_if = NULL;
5566 	int error = 0;
5567 
5568 	eh = mtod(m, struct ether_header *);
5569 	etypef = ether_type_flag_get(eh->ether_type);
5570 	BRIDGE_LOCK(sc);
5571 
5572 	if (!IS_BCAST_MCAST(m)) {
5573 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5574 		if (bif != NULL) {
5575 			dst_if = bif->bif_ifp;
5576 		}
5577 	}
5578 
5579 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5580 
5581 	BRIDGE_BPF_TAP_OUT(ifp, m);
5582 
5583 	if (dst_if == NULL) {
5584 		/* callee will unlock */
5585 		bridge_broadcast(sc, NULL, etypef, m);
5586 	} else {
5587 		ifnet_t bridge_ifp;
5588 
5589 		bridge_ifp = sc->sc_ifp;
5590 		BRIDGE_UNLOCK(sc);
5591 
5592 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5593 		    CHECKSUM_OPERATION_FINALIZE);
5594 	}
5595 
5596 	return error;
5597 }
5598 
5599 static void
5600 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5601 {
5602 	struct ether_header *eh;
5603 	bool is_ipv4;
5604 	uint32_t sw_csum, hwcap;
5605 	uint32_t did_sw;
5606 	uint32_t csum_flags;
5607 
5608 	eh = mtod(m, struct ether_header *);
5609 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5610 		return;
5611 	}
5612 
5613 	/* do in software what the hardware cannot */
5614 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5615 	csum_flags = m->m_pkthdr.csum_flags;
5616 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5617 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5618 
5619 	if (is_ipv4) {
5620 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5621 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5622 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5623 				uint16_t start =
5624 				    sizeof(*eh) + sizeof(struct ip);
5625 				uint16_t ulpoff =
5626 				    m->m_pkthdr.csum_data & 0xffff;
5627 				m->m_pkthdr.csum_flags |=
5628 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5629 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5630 				m->m_pkthdr.csum_tx_start = start;
5631 			} else {
5632 				sw_csum |= (CSUM_DELAY_DATA &
5633 				    m->m_pkthdr.csum_flags);
5634 			}
5635 		}
5636 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5637 	} else {
5638 		if ((hwcap & CSUM_PARTIAL) &&
5639 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5640 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5641 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5642 				uint16_t start =
5643 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5644 				uint16_t ulpoff =
5645 				    m->m_pkthdr.csum_data & 0xffff;
5646 				m->m_pkthdr.csum_flags |=
5647 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5648 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5649 				m->m_pkthdr.csum_tx_start = start;
5650 			} else {
5651 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5652 				    m->m_pkthdr.csum_flags);
5653 			}
5654 		}
5655 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5656 	}
5657 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5658 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5659 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5660 	    did_sw, m->m_pkthdr.csum_flags);
5661 }
5662 
5663 /*
5664  * bridge_start:
5665  *
5666  *	Start output on a bridge.
5667  *
5668  * This routine is invoked by the start worker thread; because we never call
5669  * it directly, there is no need do deploy any serialization mechanism other
5670  * than what's already used by the worker thread, i.e. this is already single
5671  * threaded.
5672  *
5673  * This routine is called only when if_bridge_txstart is enabled.
5674  */
5675 static void
5676 bridge_start(struct ifnet *ifp)
5677 {
5678 	mbuf_ref_t m;
5679 
5680 	for (;;) {
5681 		if (ifnet_dequeue(ifp, &m) != 0) {
5682 			break;
5683 		}
5684 
5685 		(void) bridge_output(ifp, m);
5686 	}
5687 }
5688 
5689 static void
5690 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5691 {
5692 	mbuf_pkthdr_setrcvif(m, ifp);
5693 	mbuf_pkthdr_setheader(m, mtod(m, void *));
5694 	/* adjust frame to skip mac-layer header */
5695 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5696 }
5697 
5698 static void
5699 mark_tso_checksum_ok(mbuf_t m)
5700 {
5701 	if (_mbuf_get_tso_mss(m) != 0 ||
5702 	    (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5703 		mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5704 	}
5705 }
5706 
5707 static void
5708 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5709 {
5710 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5711 		/* mark the packets as arriving on the interface */
5712 		BRIDGE_BPF_TAP_IN(ifp, scan);
5713 		if (m_proto1) {
5714 			scan->m_flags |= M_PROTO1; /* set to avoid loops */
5715 		}
5716 		prepare_input_packet(ifp, scan);
5717 		mark_tso_checksum_ok(scan);
5718 	}
5719 	dlil_input_packet_list(ifp, in_list);
5720 	return;
5721 }
5722 
5723 static void
5724 adjust_input_packet_list(mbuf_t in_list)
5725 {
5726 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5727 		mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5728 		_mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5729 	}
5730 }
5731 
5732 static bool
5733 in_addr_is_ours(struct in_addr ip)
5734 {
5735 	struct in_ifaddr *ia;
5736 	bool             ours = false;
5737 
5738 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5739 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5740 		if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5741 			ours = true;
5742 			break;
5743 		}
5744 	}
5745 	lck_rw_done(&in_ifaddr_rwlock);
5746 	return ours;
5747 }
5748 
5749 static bool
5750 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5751 {
5752 	struct in6_addr         dst_ip;
5753 	struct in6_ifaddr       *ia6;
5754 	bool                    ours = false;
5755 
5756 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5757 		/* need to embed scope ID for comparison */
5758 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5759 		dst_ip.s6_addr16[1] = htons(ifscope);
5760 		ip6_p = &dst_ip;
5761 	}
5762 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5763 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5764 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5765 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5766 			ours = true;
5767 			break;
5768 		}
5769 	}
5770 	lck_rw_done(&in6_ifaddr_rwlock);
5771 	return ours;
5772 }
5773 
5774 static bool
5775 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5776 {
5777 	/* if the destination is our IP address, don't segment */
5778 	bool    our_ip = false;
5779 
5780 	if (info_p->ip_is_ipv4) {
5781 		struct ip *     hdr;
5782 		struct in_addr  dst_ip;
5783 
5784 		hdr = (struct ip *)(info_p->ip_hdr);
5785 		bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5786 		our_ip = in_addr_is_ours(dst_ip);
5787 	} else {
5788 		struct ip6_hdr *        hdr;
5789 
5790 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5791 		our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5792 	}
5793 	return our_ip;
5794 }
5795 
5796 typedef union {
5797 	struct in_addr  ip;
5798 	struct in6_addr ip6;
5799 } ip_addr, *ip_addr_t;
5800 
5801 static void
5802 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5803 {
5804 	if (info_p->ip_is_ipv4) {
5805 		struct ip *     hdr;
5806 
5807 		hdr = (struct ip *)(info_p->ip_hdr);
5808 		bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5809 	} else {
5810 		struct ip6_hdr *        hdr;
5811 
5812 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5813 		bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5814 	}
5815 }
5816 
5817 static bool
5818 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5819 {
5820 	bool    equal;
5821 
5822 	if (is_ipv4) {
5823 		equal = addr1->ip.s_addr == addr2->ip.s_addr;
5824 	} else {
5825 		equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5826 	}
5827 	return equal;
5828 }
5829 
5830 static bool
5831 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5832 {
5833 	bool    our_ip;
5834 
5835 	if (is_ipv4) {
5836 		our_ip = in_addr_is_ours(ipaddr->ip);
5837 	} else {
5838 		our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5839 	}
5840 	return our_ip;
5841 }
5842 
5843 static void
5844 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5845     mblist list, bool bif_uses_virtio)
5846 {
5847 	uint32_t        in_errors = 0;
5848 	bool            is_ipv4;
5849 	mblist          in_list;
5850 	ip_addr         last_ip;
5851 	bool            last_ip_ours = false;
5852 	bool            last_ip_valid = false;
5853 	u_int           mac_hlen;
5854 	bool            may_forward = false;
5855 	mbuf_t          next_packet;
5856 
5857 	switch (etypef) {
5858 	case ETHER_TYPE_FLAG_IPV4:
5859 		is_ipv4 = true;
5860 		may_forward = (ipforwarding != 0);
5861 		break;
5862 	case ETHER_TYPE_FLAG_IPV6:
5863 		is_ipv4 = false;
5864 		may_forward = (ip6_forwarding != 0);
5865 		break;
5866 	}
5867 	if (!may_forward) {
5868 		in_list = list;
5869 		goto done;
5870 	}
5871 
5872 	mblist_init(&in_list);
5873 	mac_hlen = sizeof(struct ether_header);
5874 	bzero(&last_ip, sizeof(last_ip));
5875 	for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5876 		int             error;
5877 		ip_packet_info  info;
5878 		bool            ip_ours;
5879 		struct ifbrmstats stats; /* XXX should really be accounted */
5880 		ip_addr         this_ip;
5881 
5882 		/* take it out of the list */
5883 		next_packet = scan->m_nextpkt;
5884 		scan->m_nextpkt = NULL;
5885 
5886 		/* check for TCP packet and get IP header */
5887 		error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5888 		    &info, &stats.brms_in_ip);
5889 		if (error != 0) {
5890 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5891 			    "%s bridge_get_tcp_header failed %d",
5892 			    bridge_ifp->if_xname, error);
5893 			if (scan != NULL) {
5894 				m_freem(scan);
5895 				scan = NULL;
5896 			}
5897 			in_errors++;
5898 			continue;
5899 		}
5900 		ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
5901 		if (last_ip_valid &&
5902 		    ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
5903 			/* use cached result */
5904 			ip_ours = last_ip_ours;
5905 		} else {
5906 			ip_ours = ip_addr_is_ours(&this_ip,
5907 			    bridge_ifp->if_index,
5908 			    is_ipv4);
5909 			/* cache the result */
5910 			last_ip_valid = true;
5911 			last_ip_ours = ip_ours;
5912 			last_ip = this_ip;
5913 		}
5914 
5915 		/* if the packet is destined to us, just send it up */
5916 		if (ip_ours) {
5917 			mblist_append(&in_list, scan);
5918 			continue;
5919 		}
5920 		/*
5921 		 * If this is a TCP packet that's marked for TSO or LRO, or
5922 		 * we think it's a large packet, segment it.
5923 		 */
5924 		if (info.ip_proto_hdr != NULL &&
5925 		    (_mbuf_get_tso_mss(scan) != 0 ||
5926 		    scan->m_pkthdr.rx_seg_cnt > 1 ||
5927 		    (!bif_uses_virtio &&
5928 		    (mbuf_pkthdr_len(scan) >
5929 		    (bridge_ifp->if_mtu + ETHER_HDR_LEN))))) {
5930 			mblist          seg;
5931 
5932 			seg = gso_tcp_with_info(bridge_ifp, scan, &info,
5933 			    mac_hlen, is_ipv4, false);
5934 			if (seg.head == NULL) {
5935 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5936 				    "gso_tcp returned no packets");
5937 				in_errors++;
5938 				continue;
5939 			}
5940 			if (seg.count > 1) {
5941 				/* packet was segmented+checksummed */
5942 				mblist_append_list(&in_list, seg);
5943 				continue;
5944 			}
5945 			/* there's just one packet, no segmentation */
5946 			scan = seg.head;
5947 		}
5948 		/* need checksum if it's marked for checksum offload */
5949 		if (bif_uses_virtio &&
5950 		    (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5951 			error = bridge_offload_checksum(&scan, &info, &stats);
5952 			if (error != 0) {
5953 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5954 				    "%s bridge_offload_checksum failed %d",
5955 				    bridge_ifp->if_xname, error);
5956 				if (scan != NULL) {
5957 					m_freem(scan);
5958 					scan = NULL;
5959 				}
5960 				in_errors++;
5961 				continue;
5962 			}
5963 		}
5964 		mblist_append(&in_list, scan);
5965 	}
5966 
5967 done:
5968 	if (in_list.head != NULL) {
5969 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5970 		    "%s packets %d bytes %d",
5971 		    bridge_ifp->if_xname,
5972 		    in_list.count, in_list.bytes);
5973 		/* Mark the packets as arriving on the bridge interface */
5974 		inject_input_packet_list(bridge_ifp, in_list.head, false);
5975 		ifnet_stat_increment_in(bridge_ifp, in_list.count,
5976 		    in_list.bytes, in_errors);
5977 	} else if (in_errors != 0) {
5978 		ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
5979 	}
5980 	return;
5981 }
5982 
5983 /*
5984  * bridge_broadcast:
5985  *
5986  *	Send a frame to all interfaces that are members of
5987  *	the bridge, except for the one on which the packet
5988  *	arrived.
5989  *
5990  *	NOTE: Releases the lock on return.
5991  */
5992 static void
5993 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
5994     ether_type_flag_t etypef, mbuf_t m)
5995 {
5996 	ifnet_t bridge_ifp;
5997 	struct bridge_iflist *dbif;
5998 	struct ifnet * src_if;
5999 	mbuf_ref_t mc;
6000 	struct mbuf *mc_in;
6001 	int error = 0, used = 0;
6002 	ChecksumOperation cksum_op;
6003 	struct mac_nat_record mnr;
6004 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6005 	boolean_t translate_mac = FALSE;
6006 	uint32_t sc_filter_flags;
6007 	bool is_bcast_mcast;
6008 
6009 	bridge_ifp = sc->sc_ifp;
6010 	if (sbif != NULL) {
6011 		src_if = sbif->bif_ifp;
6012 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6013 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6014 			/* get the translation record */
6015 			translate_mac
6016 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6017 			if (m == NULL) {
6018 				/* packet was deallocated */
6019 				BRIDGE_UNLOCK(sc);
6020 				return;
6021 			}
6022 		}
6023 	} else {
6024 		/*
6025 		 * sbif is NULL when the bridge interface calls
6026 		 * bridge_broadcast().
6027 		 */
6028 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6029 		src_if = NULL;
6030 	}
6031 
6032 	BRIDGE_LOCK2REF(sc, error);
6033 	if (error) {
6034 		m_freem(m);
6035 		return;
6036 	}
6037 	is_bcast_mcast = IS_BCAST_MCAST(m);
6038 	sc_filter_flags = sc->sc_filter_flags;
6039 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6040 		ifnet_t         dst_if;
6041 
6042 		dst_if = dbif->bif_ifp;
6043 		if (dst_if == src_if) {
6044 			/* skip the interface that the packet came in on */
6045 			continue;
6046 		}
6047 
6048 		/* Private segments can not talk to each other */
6049 		if (sbif != NULL &&
6050 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6051 			continue;
6052 		}
6053 
6054 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6055 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6056 			continue;
6057 		}
6058 
6059 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6060 		    !is_bcast_mcast) {
6061 			continue;
6062 		}
6063 
6064 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6065 			continue;
6066 		}
6067 
6068 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6069 			continue;
6070 		}
6071 
6072 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6073 			mc = m;
6074 			used = 1;
6075 		} else {
6076 			mc = m_dup(m, M_DONTWAIT);
6077 			if (mc == NULL) {
6078 				(void) ifnet_stat_increment_out(bridge_ifp,
6079 				    0, 0, 1);
6080 				continue;
6081 			}
6082 		}
6083 
6084 		/*
6085 		 * If broadcast input is enabled, do so only if this
6086 		 * is an input packet.
6087 		 */
6088 		if (sbif != NULL && is_bcast_mcast &&
6089 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6090 			mc_in = m_dup(mc, M_DONTWAIT);
6091 			/* this could fail, but we continue anyways */
6092 		} else {
6093 			mc_in = NULL;
6094 		}
6095 
6096 		/* out */
6097 		if (translate_mac && mac_nat_bif == dbif) {
6098 			/* translate the packet */
6099 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6100 		}
6101 
6102 		if (mc != NULL && sbif != NULL &&
6103 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6104 			if (used == 0) {
6105 				/* Keep the layer3 header aligned */
6106 				int i = min(mc->m_pkthdr.len, max_protohdr);
6107 				mc = m_copyup(mc, i, ETHER_ALIGN);
6108 				if (mc == NULL) {
6109 					(void) ifnet_stat_increment_out(
6110 						sc->sc_ifp, 0, 0, 1);
6111 					if (mc_in != NULL) {
6112 						m_freem(mc_in);
6113 						mc_in = NULL;
6114 					}
6115 					continue;
6116 				}
6117 			}
6118 			if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6119 				if (mc_in != NULL) {
6120 					m_freem(mc_in);
6121 					mc_in = NULL;
6122 				}
6123 				continue;
6124 			}
6125 			if (mc == NULL) {
6126 				if (mc_in != NULL) {
6127 					m_freem(mc_in);
6128 					mc_in = NULL;
6129 				}
6130 				continue;
6131 			}
6132 		}
6133 
6134 		if (mc != NULL) {
6135 			/* verify checksum if necessary */
6136 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6137 			    !bif_has_checksum_offload(sbif)) {
6138 				error = bridge_verify_checksum(&mc,
6139 				    &dbif->bif_stats);
6140 				if (error != 0) {
6141 					if (mc != NULL) {
6142 						m_freem(mc);
6143 					}
6144 					mc = NULL;
6145 				}
6146 			}
6147 			if (mc != NULL) {
6148 				(void) bridge_enqueue(bridge_ifp,
6149 				    NULL, dst_if, etypef, mc, cksum_op);
6150 			}
6151 		}
6152 
6153 		/* in */
6154 		if (mc_in == NULL) {
6155 			continue;
6156 		}
6157 		BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6158 		prepare_input_packet(dst_if, mc_in);
6159 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6160 		dlil_input_packet_list(dst_if, mc_in);
6161 	}
6162 	if (used == 0) {
6163 		m_freem(m);
6164 	}
6165 
6166 
6167 	BRIDGE_UNREF(sc);
6168 }
6169 
6170 static mbuf_t
6171 copy_packet_list(mbuf_t m)
6172 {
6173 	mblist  ret;
6174 	mbuf_t  next_packet;
6175 
6176 	mblist_init(&ret);
6177 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6178 		mbuf_t  copy_m;
6179 
6180 		/* take it out of the list */
6181 		next_packet = scan->m_nextpkt;
6182 		scan->m_nextpkt = NULL;
6183 
6184 		/* create a copy and add it to the new list */
6185 		copy_m = m_dup(scan, M_DONTWAIT);
6186 		if (copy_m != NULL) {
6187 			mblist_append(&ret, copy_m);
6188 		}
6189 
6190 		/* put it back in the original list */
6191 		scan->m_nextpkt = next_packet;
6192 	}
6193 	return ret.head;
6194 }
6195 
6196 /*
6197  * bridge_broadcast_list:
6198  *
6199  *      Broadcast a list of packets to all members except `sbif`.
6200  *      Consumes `m` before returning.
6201  *
6202  *	NOTE: Releases the lock on return.
6203  */
6204 static void
6205 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6206     ether_type_flag_t etypef, mbuf_t m)
6207 {
6208 	bool                    bridge_has_address;
6209 	ifnet_t                 bridge_ifp;
6210 	struct bridge_iflist *  dbif;
6211 	bool                    is_bcast_mcast;
6212 	errno_t                 error = 0;
6213 	ChecksumOperation       cksum_op;
6214 	struct bridge_iflist *  mac_nat_bif = sc->sc_mac_nat_bif;
6215 	ifnet_t                 mac_nat_if = NULL;
6216 	bool                    need_mac_nat = false;
6217 	mbuf_t                  out_mac_nat = NULL;
6218 	ifnet_t                 src_if;
6219 	uint32_t                sc_filter_flags;
6220 	bool                    used = false;
6221 
6222 	bridge_ifp = sc->sc_ifp;
6223 	if (sbif != NULL) {
6224 		src_if = sbif->bif_ifp;
6225 
6226 		if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6227 			bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6228 
6229 			/* compute checksum on packets marked with offload */
6230 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6231 			    m, is_ipv4);
6232 			if (m == NULL) {
6233 				BRIDGE_UNLOCK(sc);
6234 				goto done;
6235 			}
6236 			cksum_op = CHECKSUM_OPERATION_NONE;
6237 		} else {
6238 			cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6239 		}
6240 
6241 		/*
6242 		 * If MAC-NAT is enabled and we'll be sending the packets
6243 		 * over it, verify that it is up and active before
6244 		 * deciding to make a translated copy.
6245 		 */
6246 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6247 			mac_nat_if = mac_nat_bif->bif_ifp;
6248 			if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6249 			    (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6250 				need_mac_nat = true;
6251 			}
6252 		}
6253 	} else {
6254 		/*
6255 		 * sbif is NULL when the bridge interface calls
6256 		 * bridge_broadcast().
6257 		 */
6258 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6259 		src_if = NULL;
6260 	}
6261 
6262 	/*
6263 	 * Create a translated copy for packets destined to MAC-NAT interface.
6264 	 */
6265 	if (need_mac_nat) {
6266 		out_mac_nat
6267 		        = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6268 		    mac_nat_if, m);
6269 	}
6270 	sc_filter_flags = sc->sc_filter_flags;
6271 	bridge_has_address = (sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0;
6272 	BRIDGE_LOCK2REF(sc, error);
6273 	if (error) {
6274 		goto done;
6275 	}
6276 	is_bcast_mcast = IS_BCAST_MCAST(m);
6277 
6278 	/* make a copy for the bridge interface */
6279 	if (is_bcast_mcast && bridge_has_address) {
6280 		mbuf_t  in_list;
6281 
6282 		in_list = copy_packet_list(m);
6283 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6284 		    "%s mcast for us in_m %p",
6285 		    bridge_ifp->if_xname, in_list);
6286 		if (in_list != NULL) {
6287 			inject_input_packet_list(bridge_ifp, in_list, false);
6288 		}
6289 	}
6290 
6291 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6292 		ifnet_t         dst_if;
6293 		mbuf_t          in_m = NULL;
6294 		mbuf_t          out_m = NULL;
6295 
6296 		dst_if = dbif->bif_ifp;
6297 		if (dst_if == src_if) {
6298 			/* skip the interface that the packet came in on */
6299 			continue;
6300 		}
6301 
6302 		/* Private segments can not talk to each other */
6303 		if (sbif != NULL &&
6304 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6305 			continue;
6306 		}
6307 
6308 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6309 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6310 			continue;
6311 		}
6312 
6313 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6314 		    !is_bcast_mcast) {
6315 			continue;
6316 		}
6317 
6318 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6319 			continue;
6320 		}
6321 
6322 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6323 			continue;
6324 		}
6325 		if (dbif == mac_nat_bif) {
6326 			/* translated copy was created above, use that */
6327 			out_m = out_mac_nat;
6328 			out_mac_nat = NULL;
6329 		} else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6330 			/* consume `m` */
6331 			out_m = m;
6332 			used = true;
6333 		} else {
6334 			/* needs a copy */
6335 			out_m = copy_packet_list(m);
6336 		}
6337 
6338 		if (out_m == NULL) {
6339 			ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6340 			continue;
6341 		}
6342 		/*
6343 		 * If broadcast input is enabled, do so only if this
6344 		 * is an input packet.
6345 		 */
6346 		if (sbif != NULL && is_bcast_mcast &&
6347 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6348 			in_m = copy_packet_list(m);
6349 			/* this could fail, but we continue anyways */
6350 		} else {
6351 			in_m = NULL;
6352 		}
6353 
6354 		if (sbif != NULL &&
6355 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6356 			out_m = bridge_pf_list(out_m, dst_if,
6357 			    sc_filter_flags, false);
6358 		}
6359 		if (out_m != NULL) {
6360 			/* verify checksum if necessary */
6361 			if (sbif != NULL &&
6362 			    ether_type_flag_is_ip(etypef) &&
6363 			    bif_has_checksum_offload(dbif) &&
6364 			    !bif_has_checksum_offload(sbif)) {
6365 				bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6366 
6367 				out_m = bridge_verify_checksum_list(bridge_ifp,
6368 				    dbif, out_m, is_ipv4);
6369 			}
6370 			if (out_m != NULL) {
6371 				bridge_enqueue(bridge_ifp, src_if, dst_if,
6372 				    etypef, out_m, cksum_op);
6373 			}
6374 		}
6375 
6376 		/* in */
6377 		if (in_m != NULL) {
6378 			inject_input_packet_list(dst_if, in_m, true);
6379 		}
6380 	}
6381 
6382 	BRIDGE_UNREF(sc);
6383 
6384 done:
6385 	if (out_mac_nat != NULL) {
6386 		m_freem_list(out_mac_nat);
6387 	}
6388 	if (!used) {
6389 		m_freem_list(m);
6390 	}
6391 	return;
6392 }
6393 
6394 #define NEEDED_CSUM_IPV4   (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6395 #define NEEDED_CSUM_IPV6   (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6396 
6397 static bool
6398 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6399 {
6400 	uint32_t        hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6401 	uint32_t        needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6402 	bool            supports;
6403 
6404 	supports = (hwcap & needed) == needed;
6405 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6406 	    ifp->if_xname, supports ? "" : "not ");
6407 	return supports;
6408 }
6409 
6410 static void
6411 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6412     ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6413 {
6414 	bool                    checksum_ok = false;
6415 	ChecksumOperation       cksum_op;
6416 	ifnet_t                 bridge_ifp;
6417 	struct bridge_iflist *  dbif;
6418 	uint32_t                sc_filter_flags;
6419 	ifnet_t                 src_if;
6420 
6421 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6422 		goto drop;
6423 	}
6424 	dbif = bridge_lookup_member_if(sc, dst_if);
6425 	if (dbif == NULL) {
6426 		/* Not a member of the bridge (anymore?) */
6427 		goto drop;
6428 	}
6429 
6430 	/* Private segments can not talk to each other */
6431 	if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6432 		goto drop;
6433 	}
6434 	bridge_ifp = sc->sc_ifp;
6435 	src_if = sbif->bif_ifp;
6436 	cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6437 	if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6438 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6439 
6440 		if (dbif == sc->sc_mac_nat_bif ||
6441 		    (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6442 		    !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6443 			/* compute checksums now if necessary */
6444 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6445 			    m, is_ipv4);
6446 			checksum_ok = true;
6447 		} else {
6448 			cksum_op = CHECKSUM_OPERATION_NONE;
6449 		}
6450 	}
6451 
6452 	if (dbif == sc->sc_mac_nat_bif) {
6453 		/* translate the packets before forwarding them */
6454 		if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6455 			m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6456 		}
6457 	} else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6458 	    bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6459 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6460 
6461 		/*
6462 		 * If the destination interface has checksum offload enabled,
6463 		 * verify the checksum now, unless the source interface also has
6464 		 * checksum offload enabled. The checksum in that case has
6465 		 * already just been computed and verifying it is unnecessary.
6466 		 */
6467 		m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6468 	}
6469 	sc_filter_flags = sc->sc_filter_flags;
6470 	BRIDGE_UNLOCK(sc);
6471 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6472 		m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6473 	}
6474 
6475 	/*
6476 	 * We're forwarding inbound packets for which the checksums must
6477 	 * already have been computed and if required, verified, or
6478 	 * packets from a virtio-enabled interface for which we rely
6479 	 * on the packet containing appropriate offload flags.
6480 	 */
6481 	if (m != NULL) {
6482 		bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6483 		    cksum_op);
6484 	}
6485 	return;
6486 
6487 drop:
6488 	BRIDGE_UNLOCK(sc);
6489 	m_freem_list(m);
6490 	return;
6491 }
6492 
6493 /*
6494  * bridge_span:
6495  *
6496  *	Duplicate a packet out one or more interfaces that are in span mode,
6497  *	the original mbuf is unmodified.
6498  */
6499 static void
6500 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6501 {
6502 	struct bridge_iflist *bif;
6503 	struct ifnet *dst_if;
6504 	struct mbuf *mc;
6505 
6506 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6507 		return;
6508 	}
6509 
6510 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6511 		dst_if = bif->bif_ifp;
6512 
6513 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6514 			continue;
6515 		}
6516 
6517 		mc = m_copypacket(m, M_DONTWAIT);
6518 		if (mc == NULL) {
6519 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6520 			continue;
6521 		}
6522 
6523 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6524 		    CHECKSUM_OPERATION_NONE);
6525 	}
6526 }
6527 
6528 /*
6529  * bridge_rtupdate:
6530  *
6531  *	Add a bridge routing entry.
6532  */
6533 static int
6534 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6535     struct bridge_iflist *bif, int setflags, uint8_t flags)
6536 {
6537 	struct bridge_rtnode *brt;
6538 	int error;
6539 
6540 	BRIDGE_LOCK_ASSERT_HELD(sc);
6541 
6542 	/* Check the source address is valid and not multicast. */
6543 	if (ETHER_IS_MULTICAST(dst) ||
6544 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6545 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6546 		return EINVAL;
6547 	}
6548 
6549 	/* 802.1p frames map to vlan 1 */
6550 	if (vlan == 0) {
6551 		vlan = 1;
6552 	}
6553 
6554 	/*
6555 	 * A route for this destination might already exist.  If so,
6556 	 * update it, otherwise create a new one.
6557 	 */
6558 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6559 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6560 			sc->sc_brtexceeded++;
6561 			return ENOSPC;
6562 		}
6563 		/* Check per interface address limits (if enabled) */
6564 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6565 			bif->bif_addrexceeded++;
6566 			return ENOSPC;
6567 		}
6568 
6569 		/*
6570 		 * Allocate a new bridge forwarding node, and
6571 		 * initialize the expiration time and Ethernet
6572 		 * address.
6573 		 */
6574 		brt = zalloc_noblock(bridge_rtnode_pool);
6575 		if (brt == NULL) {
6576 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6577 			    "zalloc_nolock failed");
6578 			return ENOMEM;
6579 		}
6580 		bzero(brt, sizeof(struct bridge_rtnode));
6581 
6582 		if (bif->bif_ifflags & IFBIF_STICKY) {
6583 			brt->brt_flags = IFBAF_STICKY;
6584 		} else {
6585 			brt->brt_flags = IFBAF_DYNAMIC;
6586 		}
6587 
6588 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6589 		brt->brt_vlan = vlan;
6590 
6591 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6592 			zfree(bridge_rtnode_pool, brt);
6593 			return error;
6594 		}
6595 		brt->brt_dst = bif;
6596 		bif->bif_addrcnt++;
6597 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6598 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6599 		    "on %s count %u hashsize %u",
6600 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6601 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6602 		    sc->sc_rthash_size);
6603 	}
6604 
6605 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6606 	    brt->brt_dst != bif) {
6607 		brt->brt_dst->bif_addrcnt--;
6608 		brt->brt_dst = bif;
6609 		brt->brt_dst->bif_addrcnt++;
6610 	}
6611 
6612 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6613 		unsigned long now;
6614 
6615 		now = (unsigned long) net_uptime();
6616 		brt->brt_expire = now + sc->sc_brttimeout;
6617 	}
6618 	if (setflags) {
6619 		brt->brt_flags = flags;
6620 	}
6621 
6622 	return 0;
6623 }
6624 
6625 /*
6626  * bridge_rtlookup:
6627  *
6628  *	Lookup the destination interface for an address.
6629  */
6630 static struct bridge_iflist *
6631 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6632     uint16_t vlan)
6633 {
6634 	struct bridge_rtnode *brt;
6635 
6636 	BRIDGE_LOCK_ASSERT_HELD(sc);
6637 
6638 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6639 		return NULL;
6640 	}
6641 
6642 	return brt->brt_dst;
6643 }
6644 
6645 /*
6646  * bridge_rttrim:
6647  *
6648  *	Trim the routine table so that we have a number
6649  *	of routing entries less than or equal to the
6650  *	maximum number.
6651  */
6652 static void
6653 bridge_rttrim(struct bridge_softc *sc)
6654 {
6655 	struct bridge_rtnode *brt, *nbrt;
6656 
6657 	BRIDGE_LOCK_ASSERT_HELD(sc);
6658 
6659 	/* Make sure we actually need to do this. */
6660 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6661 		return;
6662 	}
6663 
6664 	/* Force an aging cycle; this might trim enough addresses. */
6665 	bridge_rtage(sc);
6666 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6667 		return;
6668 	}
6669 
6670 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6671 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6672 			bridge_rtnode_destroy(sc, brt);
6673 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6674 				return;
6675 			}
6676 		}
6677 	}
6678 }
6679 
6680 /*
6681  * bridge_aging_timer:
6682  *
6683  *	Aging periodic timer for the bridge routing table.
6684  */
6685 static void
6686 bridge_aging_timer(struct bridge_softc *sc)
6687 {
6688 	BRIDGE_LOCK_ASSERT_HELD(sc);
6689 
6690 	bridge_rtage(sc);
6691 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6692 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6693 		sc->sc_aging_timer.bdc_sc = sc;
6694 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6695 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6696 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6697 	}
6698 }
6699 
6700 /*
6701  * bridge_rtage:
6702  *
6703  *	Perform an aging cycle.
6704  */
6705 static void
6706 bridge_rtage(struct bridge_softc *sc)
6707 {
6708 	struct bridge_rtnode *brt, *nbrt;
6709 	unsigned long now;
6710 
6711 	BRIDGE_LOCK_ASSERT_HELD(sc);
6712 
6713 	now = (unsigned long) net_uptime();
6714 
6715 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6716 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6717 			if (now >= brt->brt_expire) {
6718 				bridge_rtnode_destroy(sc, brt);
6719 			}
6720 		}
6721 	}
6722 	if (sc->sc_mac_nat_bif != NULL) {
6723 		bridge_mac_nat_age_entries(sc, now);
6724 	}
6725 }
6726 
6727 /*
6728  * bridge_rtflush:
6729  *
6730  *	Remove all dynamic addresses from the bridge.
6731  */
6732 static void
6733 bridge_rtflush(struct bridge_softc *sc, int full)
6734 {
6735 	struct bridge_rtnode *brt, *nbrt;
6736 
6737 	BRIDGE_LOCK_ASSERT_HELD(sc);
6738 
6739 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6740 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6741 			bridge_rtnode_destroy(sc, brt);
6742 		}
6743 	}
6744 }
6745 
6746 /*
6747  * bridge_rtdaddr:
6748  *
6749  *	Remove an address from the table.
6750  */
6751 static int
6752 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6753 {
6754 	struct bridge_rtnode *brt;
6755 	int found = 0;
6756 
6757 	BRIDGE_LOCK_ASSERT_HELD(sc);
6758 
6759 	/*
6760 	 * If vlan is zero then we want to delete for all vlans so the lookup
6761 	 * may return more than one.
6762 	 */
6763 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6764 		bridge_rtnode_destroy(sc, brt);
6765 		found = 1;
6766 	}
6767 
6768 	return found ? 0 : ENOENT;
6769 }
6770 
6771 /*
6772  * bridge_rtdelete:
6773  *
6774  *	Delete routes to a specific member interface.
6775  */
6776 static void
6777 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6778 {
6779 	struct bridge_rtnode *brt, *nbrt;
6780 
6781 	BRIDGE_LOCK_ASSERT_HELD(sc);
6782 
6783 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6784 		if (brt->brt_ifp == ifp && (full ||
6785 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6786 			bridge_rtnode_destroy(sc, brt);
6787 		}
6788 	}
6789 }
6790 
6791 /*
6792  * bridge_rtable_init:
6793  *
6794  *	Initialize the route table for this bridge.
6795  */
6796 static int
6797 bridge_rtable_init(struct bridge_softc *sc)
6798 {
6799 	u_int32_t i;
6800 
6801 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6802 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6803 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6804 
6805 	for (i = 0; i < sc->sc_rthash_size; i++) {
6806 		LIST_INIT(&sc->sc_rthash[i]);
6807 	}
6808 
6809 	sc->sc_rthash_key = RandomULong();
6810 
6811 	LIST_INIT(&sc->sc_rtlist);
6812 
6813 	return 0;
6814 }
6815 
6816 /*
6817  * bridge_rthash_delayed_resize:
6818  *
6819  *	Resize the routing table hash on a delayed thread call.
6820  */
6821 static void
6822 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6823 {
6824 	u_int32_t new_rthash_size = 0;
6825 	u_int32_t old_rthash_size = 0;
6826 	struct _bridge_rtnode_list *new_rthash = NULL;
6827 	struct _bridge_rtnode_list *old_rthash = NULL;
6828 	u_int32_t i;
6829 	struct bridge_rtnode *brt;
6830 	int error = 0;
6831 
6832 	BRIDGE_LOCK_ASSERT_HELD(sc);
6833 
6834 	/*
6835 	 * Four entries per hash bucket is our ideal load factor
6836 	 */
6837 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6838 		goto out;
6839 	}
6840 
6841 	/*
6842 	 * Doubling the number of hash buckets may be too simplistic
6843 	 * especially when facing a spike of new entries
6844 	 */
6845 	new_rthash_size = sc->sc_rthash_size * 2;
6846 
6847 	sc->sc_flags |= SCF_RESIZING;
6848 	BRIDGE_UNLOCK(sc);
6849 
6850 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6851 	    Z_WAITOK | Z_ZERO);
6852 
6853 	BRIDGE_LOCK(sc);
6854 	sc->sc_flags &= ~SCF_RESIZING;
6855 
6856 	if (new_rthash == NULL) {
6857 		error = ENOMEM;
6858 		goto out;
6859 	}
6860 	if ((sc->sc_flags & SCF_DETACHING)) {
6861 		error = ENODEV;
6862 		goto out;
6863 	}
6864 	/*
6865 	 * Fail safe from here on
6866 	 */
6867 	old_rthash = sc->sc_rthash;
6868 	old_rthash_size = sc->sc_rthash_size;
6869 	sc->sc_rthash = new_rthash;
6870 	sc->sc_rthash_size = new_rthash_size;
6871 
6872 	/*
6873 	 * Get a new key to force entries to be shuffled around to reduce
6874 	 * the likelihood they will land in the same buckets
6875 	 */
6876 	sc->sc_rthash_key = RandomULong();
6877 
6878 	for (i = 0; i < sc->sc_rthash_size; i++) {
6879 		LIST_INIT(&sc->sc_rthash[i]);
6880 	}
6881 
6882 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6883 		LIST_REMOVE(brt, brt_hash);
6884 		(void) bridge_rtnode_hash(sc, brt);
6885 	}
6886 out:
6887 	if (error == 0) {
6888 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6889 		    "%s new size %u",
6890 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6891 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6892 	} else {
6893 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6894 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6895 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6896 	}
6897 }
6898 
6899 /*
6900  * Resize the number of hash buckets based on the load factor
6901  * Currently only grow
6902  * Failing to resize the hash table is not fatal
6903  */
6904 static void
6905 bridge_rthash_resize(struct bridge_softc *sc)
6906 {
6907 	BRIDGE_LOCK_ASSERT_HELD(sc);
6908 
6909 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6910 		return;
6911 	}
6912 
6913 	/*
6914 	 * Four entries per hash bucket is our ideal load factor
6915 	 */
6916 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6917 		return;
6918 	}
6919 	/*
6920 	 * Hard limit on the size of the routing hash table
6921 	 */
6922 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6923 		return;
6924 	}
6925 
6926 	sc->sc_resize_call.bdc_sc = sc;
6927 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6928 	bridge_schedule_delayed_call(&sc->sc_resize_call);
6929 }
6930 
6931 /*
6932  * bridge_rtable_fini:
6933  *
6934  *	Deconstruct the route table for this bridge.
6935  */
6936 static void
6937 bridge_rtable_fini(struct bridge_softc *sc)
6938 {
6939 	KASSERT(sc->sc_brtcnt == 0,
6940 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6941 	kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
6942 	    sc->sc_rthash);
6943 	sc->sc_rthash = NULL;
6944 	sc->sc_rthash_size = 0;
6945 }
6946 
6947 /*
6948  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6949  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6950  */
6951 #define mix(a, b, c)                                                    \
6952 do {                                                                    \
6953 	a -= b; a -= c; a ^= (c >> 13);                                 \
6954 	b -= c; b -= a; b ^= (a << 8);                                  \
6955 	c -= a; c -= b; c ^= (b >> 13);                                 \
6956 	a -= b; a -= c; a ^= (c >> 12);                                 \
6957 	b -= c; b -= a; b ^= (a << 16);                                 \
6958 	c -= a; c -= b; c ^= (b >> 5);                                  \
6959 	a -= b; a -= c; a ^= (c >> 3);                                  \
6960 	b -= c; b -= a; b ^= (a << 10);                                 \
6961 	c -= a; c -= b; c ^= (b >> 15);                                 \
6962 } while ( /*CONSTCOND*/ 0)
6963 
6964 static __inline uint32_t
6965 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
6966 {
6967 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6968 
6969 	b += addr[5] << 8;
6970 	b += addr[4];
6971 	a += addr[3] << 24;
6972 	a += addr[2] << 16;
6973 	a += addr[1] << 8;
6974 	a += addr[0];
6975 
6976 	mix(a, b, c);
6977 
6978 	return c & BRIDGE_RTHASH_MASK(sc);
6979 }
6980 
6981 #undef mix
6982 
6983 static int
6984 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
6985 {
6986 	int i, d;
6987 
6988 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6989 		d = ((int)a[i]) - ((int)b[i]);
6990 	}
6991 
6992 	return d;
6993 }
6994 
6995 /*
6996  * bridge_rtnode_lookup:
6997  *
6998  *	Look up a bridge route node for the specified destination. Compare the
6999  *	vlan id or if zero then just return the first match.
7000  */
7001 static struct bridge_rtnode *
7002 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7003     uint16_t vlan)
7004 {
7005 	struct bridge_rtnode *brt;
7006 	uint32_t hash;
7007 	int dir;
7008 
7009 	BRIDGE_LOCK_ASSERT_HELD(sc);
7010 
7011 	hash = bridge_rthash(sc, addr);
7012 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7013 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7014 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7015 			return brt;
7016 		}
7017 		if (dir > 0) {
7018 			return NULL;
7019 		}
7020 	}
7021 
7022 	return NULL;
7023 }
7024 
7025 /*
7026  * bridge_rtnode_hash:
7027  *
7028  *	Insert the specified bridge node into the route hash table.
7029  *	This is used when adding a new node or to rehash when resizing
7030  *	the hash table
7031  */
7032 static int
7033 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7034 {
7035 	struct bridge_rtnode *lbrt;
7036 	uint32_t hash;
7037 	int dir;
7038 
7039 	BRIDGE_LOCK_ASSERT_HELD(sc);
7040 
7041 	hash = bridge_rthash(sc, brt->brt_addr);
7042 
7043 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7044 	if (lbrt == NULL) {
7045 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7046 		goto out;
7047 	}
7048 
7049 	do {
7050 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7051 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7052 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7053 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7054 			    sc->sc_ifp->if_xname,
7055 			    brt->brt_addr[0], brt->brt_addr[1],
7056 			    brt->brt_addr[2], brt->brt_addr[3],
7057 			    brt->brt_addr[4], brt->brt_addr[5]);
7058 			return EEXIST;
7059 		}
7060 		if (dir > 0) {
7061 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7062 			goto out;
7063 		}
7064 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7065 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7066 			goto out;
7067 		}
7068 		lbrt = LIST_NEXT(lbrt, brt_hash);
7069 	} while (lbrt != NULL);
7070 
7071 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7072 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7073 	    sc->sc_ifp->if_xname,
7074 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7075 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7076 out:
7077 	return 0;
7078 }
7079 
7080 /*
7081  * bridge_rtnode_insert:
7082  *
7083  *	Insert the specified bridge node into the route table.  We
7084  *	assume the entry is not already in the table.
7085  */
7086 static int
7087 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7088 {
7089 	int error;
7090 
7091 	error = bridge_rtnode_hash(sc, brt);
7092 	if (error != 0) {
7093 		return error;
7094 	}
7095 
7096 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7097 	sc->sc_brtcnt++;
7098 
7099 	bridge_rthash_resize(sc);
7100 
7101 	return 0;
7102 }
7103 
7104 /*
7105  * bridge_rtnode_destroy:
7106  *
7107  *	Destroy a bridge rtnode.
7108  */
7109 static void
7110 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7111 {
7112 	BRIDGE_LOCK_ASSERT_HELD(sc);
7113 
7114 	LIST_REMOVE(brt, brt_hash);
7115 
7116 	LIST_REMOVE(brt, brt_list);
7117 	sc->sc_brtcnt--;
7118 	brt->brt_dst->bif_addrcnt--;
7119 	zfree(bridge_rtnode_pool, brt);
7120 }
7121 
7122 #if BRIDGESTP
7123 /*
7124  * bridge_rtable_expire:
7125  *
7126  *	Set the expiry time for all routes on an interface.
7127  */
7128 static void
7129 bridge_rtable_expire(struct ifnet *ifp, int age)
7130 {
7131 	struct bridge_softc *sc = ifp->if_bridge;
7132 	struct bridge_rtnode *brt;
7133 
7134 	BRIDGE_LOCK(sc);
7135 
7136 	/*
7137 	 * If the age is zero then flush, otherwise set all the expiry times to
7138 	 * age for the interface
7139 	 */
7140 	if (age == 0) {
7141 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7142 	} else {
7143 		unsigned long now;
7144 
7145 		now = (unsigned long) net_uptime();
7146 
7147 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7148 			/* Cap the expiry time to 'age' */
7149 			if (brt->brt_ifp == ifp &&
7150 			    brt->brt_expire > now + age &&
7151 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7152 				brt->brt_expire = now + age;
7153 			}
7154 		}
7155 	}
7156 	BRIDGE_UNLOCK(sc);
7157 }
7158 
7159 /*
7160  * bridge_state_change:
7161  *
7162  *	Callback from the bridgestp code when a port changes states.
7163  */
7164 static void
7165 bridge_state_change(struct ifnet *ifp, int state)
7166 {
7167 	struct bridge_softc *sc = ifp->if_bridge;
7168 	static const char *stpstates[] = {
7169 		"disabled",
7170 		"listening",
7171 		"learning",
7172 		"forwarding",
7173 		"blocking",
7174 		"discarding"
7175 	};
7176 
7177 	if (log_stp) {
7178 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7179 		    sc->sc_ifp->if_xname,
7180 		    stpstates[state], ifp->if_xname);
7181 	}
7182 }
7183 #endif /* BRIDGESTP */
7184 
7185 /*
7186  * bridge_detach:
7187  *
7188  *	Callback when interface has been detached.
7189  */
7190 static void
7191 bridge_detach(ifnet_t ifp)
7192 {
7193 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7194 
7195 #if BRIDGESTP
7196 	bstp_detach(&sc->sc_stp);
7197 #endif /* BRIDGESTP */
7198 
7199 	/* Tear down the routing table. */
7200 	bridge_rtable_fini(sc);
7201 
7202 	lck_mtx_lock(&bridge_list_mtx);
7203 	LIST_REMOVE(sc, sc_list);
7204 	lck_mtx_unlock(&bridge_list_mtx);
7205 
7206 	ifnet_release(ifp);
7207 
7208 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7209 	kfree_type(struct bridge_softc, sc);
7210 }
7211 
7212 /*
7213  * bridge_link_event:
7214  *
7215  *	Report a data link event on an interface
7216  */
7217 static void
7218 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7219 {
7220 	struct event {
7221 		u_int32_t ifnet_family;
7222 		u_int32_t unit;
7223 		char if_name[IFNAMSIZ];
7224 	};
7225 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7226 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7227 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7228 
7229 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7230 	    "%s event_code %u - %s", ifp->if_xname,
7231 	    event_code, dlil_kev_dl_code_str(event_code));
7232 	header->total_size   = sizeof(message);
7233 	header->vendor_code  = KEV_VENDOR_APPLE;
7234 	header->kev_class    = KEV_NETWORK_CLASS;
7235 	header->kev_subclass = KEV_DL_SUBCLASS;
7236 	header->event_code   = event_code;
7237 	data->ifnet_family   = ifnet_family(ifp);
7238 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7239 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7240 	ifnet_event(ifp, header);
7241 }
7242 
7243 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7244 	        bridge_hostfilter_stats.reason++;                       \
7245 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7246 	                   "%s.%d" #reason, func, line);                \
7247 	        error = EINVAL;                                         \
7248 	}
7249 
7250 static int
7251 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7252 {
7253 	struct ether_arp *ea;
7254 	struct ether_header *eh;
7255 	int error = EINVAL;
7256 	mbuf_t m = *data;
7257 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7258 
7259 	/*
7260 	 * Make the Ethernet and ARP headers contiguous
7261 	 */
7262 	if (mbuf_pkthdr_len(m) < minlen) {
7263 		BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7264 		goto done;
7265 	}
7266 	if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7267 		BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7268 		    __func__, __LINE__);
7269 		goto done;
7270 	}
7271 	m = *data;
7272 
7273 	/*
7274 	 * Restrict Ethernet protocols to ARP and IP/IPv6
7275 	 */
7276 	eh = mtod(m, struct ether_header *);
7277 	ea = (struct ether_arp *)(eh + 1);
7278 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7279 		BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7280 		    __func__, __LINE__);
7281 		goto done;
7282 	}
7283 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7284 		BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7285 		    __func__, __LINE__);
7286 		goto done;
7287 	}
7288 	/*
7289 	 * Verify the address lengths are correct
7290 	 */
7291 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7292 		BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7293 		goto done;
7294 	}
7295 	if (ea->arp_pln != sizeof(struct in_addr)) {
7296 		BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7297 		    __func__, __LINE__);
7298 		goto done;
7299 	}
7300 	/*
7301 	 * Allow only ARP request or ARP reply
7302 	 */
7303 	if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7304 	    ea->arp_op != HTONS_ARPOP_REPLY) {
7305 		BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7306 		goto done;
7307 	}
7308 	if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7309 		/*
7310 		 * Verify source hardware address matches
7311 		 */
7312 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7313 		    ETHER_ADDR_LEN) != 0) {
7314 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7315 			goto done;
7316 		}
7317 	}
7318 	if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7319 		/*
7320 		 * Verify source protocol address:
7321 		 * May be null for an ARP probe
7322 		 */
7323 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7324 		    sizeof(struct in_addr)) != 0 &&
7325 		    bcmp(ea->arp_spa, &inaddr_any,
7326 		    sizeof(struct in_addr)) != 0) {
7327 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7328 			goto done;
7329 		}
7330 	}
7331 	bridge_hostfilter_stats.brhf_arp_ok += 1;
7332 	error = 0;
7333 done:
7334 	return error;
7335 }
7336 
7337 /*
7338  * MAC NAT
7339  */
7340 
7341 static errno_t
7342 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7343 {
7344 	errno_t         error = 0;
7345 
7346 	BRIDGE_LOCK_ASSERT_HELD(sc);
7347 
7348 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7349 		error = EINVAL;
7350 		goto done;
7351 	}
7352 	if (sc->sc_mac_nat_bif != NULL) {
7353 		if (sc->sc_mac_nat_bif != bif) {
7354 			error = EBUSY;
7355 		}
7356 		goto done;
7357 	}
7358 	sc->sc_mac_nat_bif = bif;
7359 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7360 	bridge_mac_nat_populate_entries(sc);
7361 
7362 done:
7363 	return error;
7364 }
7365 
7366 static void
7367 bridge_mac_nat_disable(struct bridge_softc *sc)
7368 {
7369 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7370 
7371 	assert(mac_nat_bif != NULL);
7372 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7373 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7374 	sc->sc_mac_nat_bif = NULL;
7375 	return;
7376 }
7377 
7378 static void
7379 mac_nat_entry_print2(struct mac_nat_entry *mne,
7380     const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7381 {
7382 	int             af;
7383 	char            etopbuf[24];
7384 	char            ntopbuf[MAX_IPv6_STR_LEN];
7385 	const char      *space;
7386 
7387 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7388 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7389 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7390 	if (msg2 == NULL) {
7391 		msg2 = "";
7392 		space = "";
7393 	} else {
7394 		space = " ";
7395 	}
7396 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7397 	    "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7398 	    mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7399 }
7400 
7401 static void
7402 mac_nat_entry_print(struct mac_nat_entry *mne,
7403     const char ifname[IFNAMSIZ], const char *msg)
7404 {
7405 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7406 }
7407 
7408 static struct mac_nat_entry *
7409 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7410 {
7411 	struct mac_nat_entry    *mne;
7412 	struct mac_nat_entry    *ret_mne = NULL;
7413 
7414 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7415 		if (mne->mne_ip.s_addr == ip->s_addr) {
7416 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7417 				mac_nat_entry_print(mne, sc->sc_if_xname,
7418 				    "found");
7419 			}
7420 			ret_mne = mne;
7421 			break;
7422 		}
7423 	}
7424 
7425 	return ret_mne;
7426 }
7427 
7428 static struct mac_nat_entry *
7429 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7430 {
7431 	struct mac_nat_entry    *mne;
7432 	struct mac_nat_entry    *ret_mne = NULL;
7433 
7434 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7435 		if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7436 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7437 				mac_nat_entry_print(mne, sc->sc_if_xname,
7438 				    "found");
7439 			}
7440 			ret_mne = mne;
7441 			break;
7442 		}
7443 	}
7444 
7445 	return ret_mne;
7446 }
7447 
7448 static void
7449 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7450     struct mac_nat_entry *mne, const char *reason)
7451 {
7452 	LIST_REMOVE(mne, mne_list);
7453 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7454 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7455 	}
7456 	zfree(bridge_mne_pool, mne);
7457 	sc->sc_mne_count--;
7458 }
7459 
7460 static struct mac_nat_entry *
7461 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7462     struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7463 {
7464 	struct mac_nat_entry *mne;
7465 
7466 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7467 		sc->sc_mne_allocation_failures++;
7468 		return NULL;
7469 	}
7470 
7471 	mne = zalloc_noblock(bridge_mne_pool);
7472 	if (mne == NULL) {
7473 		sc->sc_mne_allocation_failures++;
7474 		return NULL;
7475 	}
7476 
7477 	sc->sc_mne_count++;
7478 	bzero(mne, sizeof(*mne));
7479 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7480 
7481 	mne->mne_bif = bif;
7482 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7483 
7484 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7485 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7486 	}
7487 
7488 	return mne;
7489 }
7490 
7491 static struct mac_nat_entry *
7492 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7493     struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7494 {
7495 	struct mac_nat_entry *mne;
7496 
7497 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7498 	if (mne == NULL) {
7499 		return NULL;
7500 	}
7501 
7502 	bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7503 	LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7504 
7505 	return mne;
7506 }
7507 
7508 static struct mac_nat_entry *
7509 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7510     struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7511 {
7512 	struct mac_nat_entry *mne;
7513 
7514 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7515 	if (mne == NULL) {
7516 		return NULL;
7517 	}
7518 
7519 	bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7520 	mne->mne_flags |= MNE_FLAGS_IPV6;
7521 	LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7522 
7523 	return mne;
7524 }
7525 
7526 static struct mac_nat_entry *
7527 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7528     struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7529 {
7530 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7531 
7532 	if (mne->mne_bif == mac_nat_bif) {
7533 		/* the MAC NAT interface takes precedence */
7534 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7535 			if (mne->mne_bif != bif) {
7536 				mac_nat_entry_print2(mne,
7537 				    sc->sc_if_xname, "reject",
7538 				    bif->bif_ifp->if_xname);
7539 			}
7540 		}
7541 	} else if (mne->mne_bif != bif) {
7542 		const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7543 
7544 		mne->mne_bif = bif;
7545 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7546 			mac_nat_entry_print2(mne,
7547 			    sc->sc_if_xname, "replaced",
7548 			    old_if);
7549 		}
7550 		bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7551 	}
7552 
7553 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7554 
7555 	return mne;
7556 }
7557 
7558 static struct mac_nat_entry *
7559 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7560     struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7561 {
7562 	struct mac_nat_entry *mne;
7563 
7564 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7565 	if (mne != NULL) {
7566 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7567 	}
7568 
7569 	mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7570 	return mne;
7571 }
7572 
7573 static struct mac_nat_entry *
7574 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7575     struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7576 {
7577 	struct mac_nat_entry *mne;
7578 
7579 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7580 	if (mne != NULL) {
7581 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7582 	}
7583 
7584 	mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7585 	return mne;
7586 }
7587 
7588 static void
7589 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7590     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7591 {
7592 	struct mac_nat_entry *mne;
7593 	struct mac_nat_entry *tmne;
7594 
7595 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7596 		if (bif != NULL && mne->mne_bif != bif) {
7597 			continue;
7598 		}
7599 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7600 	}
7601 }
7602 
7603 /*
7604  * bridge_mac_nat_flush_entries:
7605  *
7606  * Flush MAC NAT entries for the specified member. Flush all entries if
7607  * the member is the one that requires MAC NAT, otherwise just flush the
7608  * ones for the specified member.
7609  */
7610 static void
7611 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7612 {
7613 	struct bridge_iflist *flush_bif;
7614 
7615 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7616 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7617 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7618 }
7619 
7620 static void
7621 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7622 {
7623 	errno_t                 error;
7624 	ifnet_t                 ifp;
7625 	uint16_t                addresses_count = 0;
7626 	ifaddr_t                * __counted_by(addresses_count) list;
7627 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7628 
7629 	assert(mac_nat_bif != NULL);
7630 	ifp = mac_nat_bif->bif_ifp;
7631 	error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7632 	if (error != 0) {
7633 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7634 		    "ifnet_get_address_list(%s) failed %d",
7635 		    ifp->if_xname, error);
7636 		return;
7637 	}
7638 
7639 	for (uint16_t i = 0; i < addresses_count; ++i) {
7640 		sa_family_t af;
7641 
7642 		af = ifaddr_address_family(list[i]);
7643 		switch (af) {
7644 		case AF_INET: {
7645 			struct sockaddr_in sin;
7646 
7647 			error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7648 			if (error != 0) {
7649 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7650 				    "ifaddr_address failed %d",
7651 				    error);
7652 				break;
7653 			}
7654 
7655 			bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7656 			break;
7657 		}
7658 
7659 		case AF_INET6: {
7660 			struct sockaddr_in6 sin6;
7661 
7662 			error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7663 			if (error != 0) {
7664 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7665 				    "ifaddr_address failed %d",
7666 				    error);
7667 				break;
7668 			}
7669 
7670 			if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7671 				/* remove scope ID */
7672 				sin6.sin6_addr.s6_addr16[1] = 0;
7673 			}
7674 
7675 			bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7676 			break;
7677 		}
7678 
7679 		default:
7680 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7681 			    "ifaddr_address_family unknown %d",
7682 			    af);
7683 			break;
7684 		}
7685 	}
7686 
7687 	ifnet_address_list_free_counted_by(list, addresses_count);
7688 	return;
7689 }
7690 
7691 static void
7692 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7693     struct mac_nat_entry_list *list, unsigned long now)
7694 {
7695 	struct mac_nat_entry *mne;
7696 	struct mac_nat_entry *tmne;
7697 
7698 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7699 		if (now >= mne->mne_expire) {
7700 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7701 		}
7702 	}
7703 }
7704 
7705 static void
7706 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7707 {
7708 	if (sc->sc_mac_nat_bif == NULL) {
7709 		return;
7710 	}
7711 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7712 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7713 }
7714 
7715 static const char *
7716 get_in_out_string(boolean_t is_output)
7717 {
7718 	return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7719 }
7720 
7721 /*
7722  * is_valid_arp_packet:
7723  *	Verify that this is a valid ARP packet.
7724  *
7725  *	Returns TRUE if the packet is valid, FALSE otherwise.
7726  */
7727 static boolean_t
7728 is_valid_arp_packet(mbuf_t *data, bool is_output,
7729     struct ether_header **eh_p, struct ether_arp **ea_p)
7730 {
7731 	struct ether_arp *ea;
7732 	struct ether_header *eh;
7733 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7734 	boolean_t is_valid = FALSE;
7735 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7736 
7737 	if (mbuf_pkthdr_len(*data) < minlen) {
7738 		BRIDGE_LOG(LOG_DEBUG, flags,
7739 		    "ARP %s short frame %lu < %lu",
7740 		    get_in_out_string(is_output),
7741 		    mbuf_pkthdr_len(*data), minlen);
7742 		goto done;
7743 	}
7744 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7745 		BRIDGE_LOG(LOG_DEBUG, flags,
7746 		    "ARP %s size %lu mbuf_pullup fail",
7747 		    get_in_out_string(is_output),
7748 		    minlen);
7749 		*data = NULL;
7750 		goto done;
7751 	}
7752 
7753 	/* validate ARP packet */
7754 	eh = mtod(*data, struct ether_header *);
7755 	ea = (struct ether_arp *)(eh + 1);
7756 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7757 		BRIDGE_LOG(LOG_DEBUG, flags,
7758 		    "ARP %s htype not ethernet",
7759 		    get_in_out_string(is_output));
7760 		goto done;
7761 	}
7762 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7763 		BRIDGE_LOG(LOG_DEBUG, flags,
7764 		    "ARP %s hlen not ethernet",
7765 		    get_in_out_string(is_output));
7766 		goto done;
7767 	}
7768 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7769 		BRIDGE_LOG(LOG_DEBUG, flags,
7770 		    "ARP %s ptype not IP",
7771 		    get_in_out_string(is_output));
7772 		goto done;
7773 	}
7774 	if (ea->arp_pln != sizeof(struct in_addr)) {
7775 		BRIDGE_LOG(LOG_DEBUG, flags,
7776 		    "ARP %s plen not IP",
7777 		    get_in_out_string(is_output));
7778 		goto done;
7779 	}
7780 	is_valid = TRUE;
7781 	*ea_p = ea;
7782 	*eh_p = eh;
7783 done:
7784 	return is_valid;
7785 }
7786 
7787 static struct mac_nat_entry *
7788 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7789 {
7790 	struct ether_arp        * __single ea;
7791 	struct ether_header     * __single eh;
7792 	struct mac_nat_entry    *mne = NULL;
7793 	u_short                 op;
7794 	struct in_addr          tpa;
7795 
7796 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7797 		goto done;
7798 	}
7799 	op = ea->arp_op;
7800 	switch (op) {
7801 	case HTONS_ARPOP_REQUEST:
7802 	case HTONS_ARPOP_REPLY:
7803 		/* only care about REQUEST and REPLY */
7804 		break;
7805 	default:
7806 		goto done;
7807 	}
7808 
7809 	/* check the target IP address for a NAT entry */
7810 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7811 	if (tpa.s_addr != 0) {
7812 		mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7813 	}
7814 	if (mne != NULL) {
7815 		if (op == HTONS_ARPOP_REPLY) {
7816 			/* translate the MAC address */
7817 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7818 				char    mac_src[24];
7819 				char    mac_dst[24];
7820 
7821 				ether_ntop(mac_src, sizeof(mac_src),
7822 				    ea->arp_tha);
7823 				ether_ntop(mac_dst, sizeof(mac_dst),
7824 				    mne->mne_mac);
7825 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7826 				    "%s %s ARP %s -> %s",
7827 				    sc->sc_if_xname,
7828 				    mne->mne_bif->bif_ifp->if_xname,
7829 				    mac_src, mac_dst);
7830 			}
7831 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7832 		}
7833 	} else {
7834 		/* handle conflicting ARP (sender matches mne) */
7835 		struct in_addr spa;
7836 
7837 		bcopy(ea->arp_spa, &spa, sizeof(spa));
7838 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7839 			/* check the source IP for a NAT entry */
7840 			mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7841 		}
7842 	}
7843 
7844 done:
7845 	return mne;
7846 }
7847 
7848 static boolean_t
7849 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7850     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7851 {
7852 	struct ether_arp        * __single ea;
7853 	struct ether_header     * __single eh;
7854 	struct in_addr          ip;
7855 	struct mac_nat_entry    *mne = NULL;
7856 	u_short                 op;
7857 	boolean_t               translate = FALSE;
7858 
7859 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7860 		goto done;
7861 	}
7862 	op = ea->arp_op;
7863 	switch (op) {
7864 	case HTONS_ARPOP_REQUEST:
7865 	case HTONS_ARPOP_REPLY:
7866 		/* only care about REQUEST and REPLY */
7867 		break;
7868 	default:
7869 		goto done;
7870 	}
7871 
7872 	bcopy(ea->arp_spa, &ip, sizeof(ip));
7873 	if (ip.s_addr == 0) {
7874 		goto done;
7875 	}
7876 	/* XXX validate IP address: no multicast/broadcast */
7877 	mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7878 	    (const char *)ea->arp_sha);
7879 	if (mnr != NULL && mne != NULL) {
7880 		/* record the offset to do the replacement */
7881 		translate = TRUE;
7882 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7883 	}
7884 
7885 done:
7886 	return translate;
7887 }
7888 
7889 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
7890 	                         + sizeof(struct ip))
7891 static uint8_t * __indexable
7892 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
7893 {
7894 	uint8_t         *header = NULL;
7895 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7896 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
7897 
7898 	if (mbuf_pkthdr_len(*data) < minlen) {
7899 		BRIDGE_LOG(LOG_DEBUG, flags,
7900 		    "IP %s short frame %lu < %lu",
7901 		    get_in_out_string(is_output),
7902 		    mbuf_pkthdr_len(*data), minlen);
7903 		goto done;
7904 	}
7905 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7906 		BRIDGE_LOG(LOG_DEBUG, flags,
7907 		    "IP %s size %lu mbuf_pullup fail",
7908 		    get_in_out_string(is_output),
7909 		    minlen);
7910 		*data = NULL;
7911 		goto done;
7912 	}
7913 	header = mtod(*data, uint8_t *);
7914 done:
7915 	return header;
7916 }
7917 
7918 static struct mac_nat_entry *
7919 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
7920 {
7921 	struct in_addr          dst;
7922 	uint8_t                 *header;
7923 	struct ip               *iphdr;
7924 	struct mac_nat_entry    *mne = NULL;
7925 
7926 	header = get_ether_ip_header_ptr(data, FALSE);
7927 	if (header == NULL) {
7928 		goto done;
7929 	}
7930 	iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
7931 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
7932 	/* XXX validate IP address */
7933 	if (dst.s_addr == 0) {
7934 		goto done;
7935 	}
7936 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
7937 done:
7938 	return mne;
7939 }
7940 
7941 static void
7942 bridge_mac_nat_udp_output(struct bridge_softc *sc,
7943     struct bridge_iflist *bif, mbuf_t m,
7944     uint8_t ip_header_len, struct mac_nat_record *mnr)
7945 {
7946 	uint16_t        dp_flags;
7947 	errno_t         error;
7948 	size_t          offset;
7949 	struct udphdr   udphdr;
7950 
7951 	/* copy the UDP header */
7952 	offset = sizeof(struct ether_header) + ip_header_len;
7953 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
7954 	if (error != 0) {
7955 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7956 		    "mbuf_copydata udphdr failed %d",
7957 		    error);
7958 		return;
7959 	}
7960 	if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
7961 	    udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
7962 		/* not a BOOTP/DHCP packet */
7963 		return;
7964 	}
7965 	/* check whether the broadcast bit is already set */
7966 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
7967 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
7968 	if (error != 0) {
7969 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7970 		    "mbuf_copydata dp_flags failed %d",
7971 		    error);
7972 		return;
7973 	}
7974 	if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
7975 		/* it's already set, nothing to do */
7976 		return;
7977 	}
7978 	/* broadcast bit needs to be set */
7979 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
7980 	mnr->mnr_ip_header_len = ip_header_len;
7981 	if (udphdr.uh_sum != 0) {
7982 		uint16_t        delta;
7983 
7984 		/* adjust checksum to take modified dp_flags into account */
7985 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
7986 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
7987 	}
7988 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7989 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
7990 	    sc->sc_if_xname,
7991 	    bif->bif_ifp->if_xname,
7992 	    ntohs(mnr->mnr_ip_dhcp_flags),
7993 	    ntohs(mnr->mnr_ip_udp_csum));
7994 	return;
7995 }
7996 
7997 static boolean_t
7998 bridge_mac_nat_ip_output(struct bridge_softc *sc,
7999     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8000 {
8001 #pragma unused(mnr)
8002 	uint8_t                 *header;
8003 	struct ether_header     *eh;
8004 	struct in_addr          ip;
8005 	struct ip               *iphdr;
8006 	uint8_t                 ip_header_len;
8007 	struct mac_nat_entry    *mne = NULL;
8008 	boolean_t               translate = FALSE;
8009 
8010 	header = get_ether_ip_header_ptr(data, TRUE);
8011 	if (header == NULL) {
8012 		goto done;
8013 	}
8014 
8015 	eh = (struct ether_header *)header;
8016 	iphdr = (struct ip *)(header + sizeof(*eh));
8017 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8018 	if (ip_header_len < sizeof(ip)) {
8019 		/* bogus IP header */
8020 		goto done;
8021 	}
8022 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8023 	/* XXX validate the source address */
8024 	if (ip.s_addr != 0) {
8025 		mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8026 		    (const char *)eh->ether_shost);
8027 	}
8028 	if (mnr != NULL) {
8029 		if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8030 			/* handle DHCP must broadcast */
8031 			bridge_mac_nat_udp_output(sc, bif, *data,
8032 			    ip_header_len, mnr);
8033 		}
8034 		translate = TRUE;
8035 	}
8036 done:
8037 	return translate;
8038 }
8039 
8040 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8041 	                         + sizeof(struct ip6_hdr))
8042 static uint8_t * __indexable
8043 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8044 {
8045 	uint8_t         *header = NULL;
8046 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8047 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8048 
8049 	if (mbuf_pkthdr_len(*data) < minlen) {
8050 		BRIDGE_LOG(LOG_DEBUG, flags,
8051 		    "IP %s short frame %lu < %lu",
8052 		    get_in_out_string(is_output),
8053 		    mbuf_pkthdr_len(*data), minlen);
8054 		goto done;
8055 	}
8056 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8057 		BRIDGE_LOG(LOG_DEBUG, flags,
8058 		    "IP %s size %lu mbuf_pullup fail",
8059 		    get_in_out_string(is_output),
8060 		    minlen);
8061 		*data = NULL;
8062 		goto done;
8063 	}
8064 	header = mtod(*data, uint8_t *);
8065 done:
8066 	return header;
8067 }
8068 
8069 #include <netinet/icmp6.h>
8070 #include <netinet6/nd6.h>
8071 
8072 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8073 
8074 static void
8075 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8076     struct bridge_iflist *bif,
8077     mbuf_t *data, struct ip6_hdr *ip6h,
8078     struct in6_addr *saddrp,
8079     struct mac_nat_record *mnr)
8080 {
8081 	uint8_t *header;
8082 	struct ether_header *eh;
8083 	struct icmp6_hdr *icmp6;
8084 	uint8_t         icmp6_type;
8085 	uint32_t        icmp6len;
8086 	int             lladdrlen = 0;
8087 	char            *lladdr = NULL;
8088 	unsigned int    off = sizeof(*ip6h);
8089 
8090 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8091 	if (icmp6len < sizeof(*icmp6)) {
8092 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8093 		    "short IPv6 payload length %d < %lu",
8094 		    icmp6len, sizeof(*icmp6));
8095 		return;
8096 	}
8097 
8098 	/* pullup IP6 header + ICMPv6 header */
8099 	header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8100 	if (header == NULL) {
8101 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8102 		    "failed to pullup icmp6 header");
8103 		return;
8104 	}
8105 	eh = (struct ether_header *)header;
8106 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8107 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8108 	icmp6_type = icmp6->icmp6_type;
8109 	switch (icmp6_type) {
8110 	case ND_NEIGHBOR_SOLICIT:
8111 	case ND_NEIGHBOR_ADVERT:
8112 	case ND_ROUTER_ADVERT:
8113 	case ND_ROUTER_SOLICIT:
8114 		break;
8115 	default:
8116 		return;
8117 	}
8118 
8119 	/* pullup IP6 header + payload */
8120 	header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8121 	if (header == NULL) {
8122 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8123 		    "failed to pullup icmp6 + payload");
8124 		return;
8125 	}
8126 	eh = (struct ether_header *)header;
8127 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8128 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8129 
8130 	switch (icmp6_type) {
8131 	case ND_NEIGHBOR_SOLICIT: {
8132 		struct nd_neighbor_solicit *nd_ns;
8133 		union nd_opts ndopts;
8134 		boolean_t is_dad_probe;
8135 		struct in6_addr taddr;
8136 
8137 		if (icmp6len < sizeof(*nd_ns)) {
8138 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8139 			    "short nd_ns %d < %lu",
8140 			    icmp6len, sizeof(*nd_ns));
8141 			return;
8142 		}
8143 
8144 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8145 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8146 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8147 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8148 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8149 			    "invalid target ignored");
8150 			return;
8151 		}
8152 
8153 		/* parse options */
8154 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8155 		if (nd6_options(&ndopts) < 0) {
8156 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8157 			    "invalid ND6 NS option");
8158 			return;
8159 		}
8160 		if (ndopts.nd_opts_src_lladdr != NULL) {
8161 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8162 			    lladdr, lladdrlen);
8163 		}
8164 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8165 		if (lladdr != NULL) {
8166 			if (is_dad_probe) {
8167 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8168 				    "bad ND6 DAD packet");
8169 				return;
8170 			}
8171 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8172 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8173 				    "source lladdrlen %d != %lu",
8174 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8175 				return;
8176 			}
8177 		}
8178 		if (is_dad_probe) {
8179 			/* node is trying use taddr, create an mne for taddr */
8180 			*saddrp = taddr;
8181 		}
8182 		break;
8183 	}
8184 	case ND_NEIGHBOR_ADVERT: {
8185 		struct nd_neighbor_advert *nd_na;
8186 		union nd_opts ndopts;
8187 		struct in6_addr taddr;
8188 
8189 
8190 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8191 
8192 		if (icmp6len < sizeof(*nd_na)) {
8193 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8194 			    "short nd_na %d < %lu",
8195 			    icmp6len, sizeof(*nd_na));
8196 			return;
8197 		}
8198 
8199 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8200 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8201 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8202 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8203 			    "invalid target ignored");
8204 			return;
8205 		}
8206 
8207 		/* parse options */
8208 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8209 		if (nd6_options(&ndopts) < 0) {
8210 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8211 			    "invalid ND6 NA option");
8212 			return;
8213 		}
8214 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8215 			/* target linklayer, nothing to do */
8216 			return;
8217 		}
8218 
8219 		ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8220 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8221 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8222 			    "target lladdrlen %d != %lu",
8223 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8224 			return;
8225 		}
8226 		break;
8227 	}
8228 	case ND_ROUTER_ADVERT:
8229 	case ND_ROUTER_SOLICIT: {
8230 		union nd_opts ndopts;
8231 		uint32_t type_length;
8232 		const char *description;
8233 
8234 		if (icmp6_type == ND_ROUTER_ADVERT) {
8235 			type_length = sizeof(struct nd_router_advert);
8236 			description = "RA";
8237 		} else {
8238 			type_length = sizeof(struct nd_router_solicit);
8239 			description = "RS";
8240 		}
8241 		if (icmp6len < type_length) {
8242 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8243 			    "short ND6 %s %d < %d",
8244 			    description, icmp6len, type_length);
8245 			return;
8246 		}
8247 
8248 		/* parse options */
8249 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8250 		    icmp6len - type_length, &ndopts);
8251 		if (nd6_options(&ndopts) < 0) {
8252 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8253 			    "invalid ND6 %s option", description);
8254 			return;
8255 		}
8256 		if (ndopts.nd_opts_src_lladdr != NULL) {
8257 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8258 
8259 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8260 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8261 				    "source lladdrlen %d != %lu",
8262 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8263 				return;
8264 			}
8265 		}
8266 		break;
8267 	}
8268 	default:
8269 		break;
8270 	}
8271 
8272 	if (lladdr != NULL) {
8273 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8274 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8275 		mnr->mnr_ip6_icmp6_len = icmp6len;
8276 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8277 		mnr->mnr_ip6_header_len = off;
8278 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8279 			const char *str;
8280 
8281 			switch (mnr->mnr_ip6_icmp6_type) {
8282 			case ND_ROUTER_ADVERT:
8283 				str = "ROUTER ADVERT";
8284 				break;
8285 			case ND_ROUTER_SOLICIT:
8286 				str = "ROUTER SOLICIT";
8287 				break;
8288 			case ND_NEIGHBOR_ADVERT:
8289 				str = "NEIGHBOR ADVERT";
8290 				break;
8291 			case ND_NEIGHBOR_SOLICIT:
8292 				str = "NEIGHBOR SOLICIT";
8293 				break;
8294 			default:
8295 				str = "";
8296 				break;
8297 			}
8298 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8299 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8300 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8301 			    mnr->mnr_ip6_header_len,
8302 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8303 		}
8304 	}
8305 }
8306 
8307 static struct mac_nat_entry *
8308 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8309 {
8310 	struct in6_addr         dst;
8311 	uint8_t                 *header;
8312 	struct ether_header     *eh;
8313 	struct ip6_hdr          *ip6h;
8314 	struct mac_nat_entry    *mne = NULL;
8315 
8316 	header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8317 	if (header == NULL) {
8318 		goto done;
8319 	}
8320 	eh = (struct ether_header *)header;
8321 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8322 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8323 	/* XXX validate IPv6 address */
8324 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8325 		goto done;
8326 	}
8327 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8328 
8329 done:
8330 	return mne;
8331 }
8332 
8333 static boolean_t
8334 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8335     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8336 {
8337 	uint8_t                 *header;
8338 	struct ether_header     *eh;
8339 	ether_addr_t            ether_shost;
8340 	struct ip6_hdr          *ip6h;
8341 	struct in6_addr         saddr;
8342 	boolean_t               translate;
8343 
8344 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8345 	header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8346 	if (header == NULL) {
8347 		translate = FALSE;
8348 		goto done;
8349 	}
8350 	eh = (struct ether_header *)header;
8351 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8352 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8353 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8354 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8355 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8356 	}
8357 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8358 		goto done;
8359 	}
8360 	(void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8361 	    (const char *)ether_shost.octet);
8362 
8363 done:
8364 	return translate;
8365 }
8366 
8367 /*
8368  * Function: bridge_mac_nat_input:
8369  *
8370  * Purpose:
8371  *   Process a unicast packet arriving on the external interface `external_ifp`.
8372  *
8373  *   If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8374  *   the mac_nat_entry table. If an entry is found, and the interface is
8375  *   not `external_ifp`, replace the destination MAC address in the
8376  *   ethernet header with the corresponding internal MAC address, and return
8377  *   the interface via `*dst_if`.
8378  *
8379  * Returns:
8380  *   NULL if the packet was deallocated during processing.
8381  *
8382  *   Otherwise, returns non-NULL packet that should:
8383  *   1) if `*dst_if` is NULL, continue on as an input packet
8384  *      over `external_ifp`, OR
8385  *   2) if `*dst_if` is not NULL, be delivered as an output packet
8386  *      over `*dst_if`.
8387  */
8388 static mbuf_t
8389 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8390     mbuf_t m, ifnet_t * dst_if)
8391 {
8392 	struct ether_header     *eh;
8393 	mbuf_t                  m0 = m;
8394 	struct mac_nat_entry    *mne = NULL;
8395 
8396 	BRIDGE_LOCK_ASSERT_HELD(sc);
8397 	*dst_if = NULL;
8398 	eh = mtod(m, struct ether_header *);
8399 	switch (eh->ether_type) {
8400 	case HTONS_ETHERTYPE_ARP:
8401 		mne = bridge_mac_nat_arp_input(sc, &m);
8402 		break;
8403 	case HTONS_ETHERTYPE_IP:
8404 		mne = bridge_mac_nat_ip_input(sc, &m);
8405 		break;
8406 	case HTONS_ETHERTYPE_IPV6:
8407 		mne = bridge_mac_nat_ipv6_input(sc, &m);
8408 		break;
8409 	default:
8410 		break;
8411 	}
8412 	if (m != NULL & mne != NULL) {
8413 		*dst_if = mne->mne_bif->bif_ifp;
8414 		if (*dst_if == external_ifp) {
8415 			/* receive packet for ifp */
8416 			*dst_if = NULL;
8417 		} else {
8418 			/* replace the destination MAC with internal one */
8419 			if (m != m0) {
8420 				/* it may have changed */
8421 				eh = mtod(m, struct ether_header *);
8422 			}
8423 			bcopy(mne->mne_mac, eh->ether_dhost,
8424 			    sizeof(eh->ether_dhost));
8425 		}
8426 	}
8427 	return m;
8428 }
8429 
8430 
8431 static mblist
8432 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8433     mbuf_t m, mbuf_t * forward_head)
8434 {
8435 	mblist          forward;
8436 	mbuf_t          next_packet;
8437 	mblist          ret;
8438 
8439 	mblist_init(&ret);
8440 	mblist_init(&forward);
8441 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8442 		ifnet_ref_t     dst_if;
8443 
8444 		/* take packet out of the list */
8445 		next_packet = scan->m_nextpkt;
8446 		scan->m_nextpkt = NULL;
8447 
8448 		scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8449 		if (scan != NULL) {
8450 			if (dst_if != NULL) {
8451 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8452 				    "%s MAC-NAT input translate to %s",
8453 				    sc->sc_if_xname, dst_if->if_xname);
8454 				/* use rcvif to store the egress interface */
8455 				mbuf_pkthdr_setrcvif(scan, dst_if);
8456 				/* add it to the forwarding list */
8457 				mblist_append(&forward, scan);
8458 			} else {
8459 				/* add it to the "continue on as input" list */
8460 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8461 				    "%s MAC-NAT input for %s",
8462 				    sc->sc_if_xname,
8463 				    external_ifp->if_xname);
8464 				mblist_append(&ret, scan);
8465 			}
8466 		}
8467 	}
8468 	*forward_head = forward.head;
8469 	return ret;
8470 }
8471 
8472 /*
8473  * bridge_mac_nat_translate_list:
8474  * Process a list of packets destined to the MAC-NAT interface `dst_if`
8475  * from the bridge member `sbif`.
8476  *
8477  * For each packet in the list, update the MAC-NAT record, and if
8478  * translation is required, translate it.
8479  *
8480  * Returns the list of packets that should be delivered to the MAC-NAT
8481  * interface.
8482  */
8483 static mbuf_t
8484 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8485     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8486 {
8487 	mbuf_t          next_packet;
8488 	mblist          ret;
8489 
8490 	mblist_init(&ret);
8491 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8492 		struct mac_nat_record   mnr;
8493 		bool                    translate_mac;
8494 
8495 		/* take packet out of the list */
8496 		next_packet = scan->m_nextpkt;
8497 		scan->m_nextpkt = NULL;
8498 		translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8499 		if (scan != NULL) {
8500 			if (translate_mac) {
8501 				bridge_mac_nat_translate(&scan, &mnr,
8502 				    IF_LLADDR(dst_if));
8503 			}
8504 			if (scan != NULL) {
8505 				/* add it back to the list */
8506 				mblist_append(&ret, scan);
8507 			}
8508 		}
8509 	}
8510 	return ret.head;
8511 }
8512 
8513 /*
8514  * bridge_mac_nat_copy_and_translate_list:
8515  * Same as bridge_mac_nat_translate_list() except that a copy of the
8516  * packet list is returned instead.
8517  *
8518  * The packet list `m` is left unaltered.
8519  */
8520 static mbuf_t
8521 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8522     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8523 {
8524 	mbuf_t          next_packet;
8525 	mblist          ret;
8526 
8527 	mblist_init(&ret);
8528 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8529 		mbuf_ref_t              mc = NULL;
8530 		struct mac_nat_record   mnr;
8531 		bool                    translate_mac;
8532 
8533 		/* take packet out of the list, make a copy, put it back */
8534 		next_packet = scan->m_nextpkt;
8535 		scan->m_nextpkt = NULL;
8536 		mc = m_dup(scan, M_DONTWAIT);
8537 		scan->m_nextpkt = next_packet;
8538 		if (mc == NULL) {
8539 			continue;
8540 		}
8541 		translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8542 		if (mc != NULL) {
8543 			if (translate_mac) {
8544 				bridge_mac_nat_translate(&mc, &mnr,
8545 				    IF_LLADDR(dst_if));
8546 			}
8547 			if (mc != NULL) {
8548 				/* add it to the new list */
8549 				mblist_append(&ret, mc);
8550 			}
8551 		}
8552 	}
8553 	return ret.head;
8554 }
8555 
8556 static void
8557 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8558     mbuf_t m)
8559 {
8560 	int             count = 0;
8561 	ifnet_t         dst_if;
8562 	mblist          list;
8563 	int             n_lists = 0;
8564 	mbuf_t          next_packet;
8565 
8566 	mblist_init(&list);
8567 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8568 		ifnet_t         this_if;
8569 
8570 		next_packet = scan->m_nextpkt;
8571 		this_if = mbuf_pkthdr_rcvif(scan);
8572 		mbuf_pkthdr_setrcvif(scan, NULL);
8573 		if (list.head == NULL) {
8574 			/* start a new list */
8575 			list.head = list.tail = scan;
8576 			count = 1;
8577 			dst_if = this_if;
8578 		} else if (dst_if != this_if) {
8579 			/* send up the previous chain */
8580 			if (list.tail != NULL) {
8581 				/* terminate the list */
8582 				list.tail->m_nextpkt = NULL;
8583 			}
8584 			n_lists++;
8585 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8586 			    "(%s): sublist %u pkts %u",
8587 			    dst_if->if_xname, n_lists, count);
8588 			bridge_enqueue(bridge_ifp, NULL,
8589 			    dst_if, etypef, list.head,
8590 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
8591 
8592 			/* start new list */
8593 			list.head = list.tail = scan;
8594 			count = 1;
8595 			dst_if = this_if;
8596 		} else {
8597 			count++;
8598 			list.tail = scan;
8599 		}
8600 		if (next_packet == NULL) {
8601 			/* last list */
8602 			n_lists++;
8603 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8604 			    "(%s): sublist %u pkts %u",
8605 			    dst_if->if_xname, n_lists, count);
8606 			bridge_enqueue(bridge_ifp, NULL,
8607 			    dst_if, etypef, list.head,
8608 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
8609 		}
8610 	}
8611 	return;
8612 }
8613 
8614 /*
8615  * bridge_mac_nat_output:
8616  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8617  * from the interface 'bif'.
8618  *
8619  * Create a mac_nat_entry containing the source IP address and MAC address
8620  * from the packet. Populate a mac_nat_record with information detailing
8621  * how to translate the packet. Translation takes place later by calling
8622  * `bridge_mac_nat_translate()`.
8623  *
8624  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8625  * interface is generating an output packet. No translation is required in this
8626  * case, we just record the IP address used to prevent another bif from
8627  * claiming our IP address.
8628  *
8629  * Returns:
8630  * TRUE if the packet should be translated (*mnr updated as well),
8631  * FALSE otherwise.
8632  *
8633  * *data may be updated to point at a different mbuf chain or NULL if
8634  * the chain was deallocated during processing.
8635  */
8636 
8637 static boolean_t
8638 bridge_mac_nat_output(struct bridge_softc *sc,
8639     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8640 {
8641 	struct ether_header     *eh;
8642 	boolean_t               translate = FALSE;
8643 
8644 	BRIDGE_LOCK_ASSERT_HELD(sc);
8645 	assert(sc->sc_mac_nat_bif != NULL);
8646 
8647 	eh = mtod(*data, struct ether_header *);
8648 	if (mnr != NULL) {
8649 		bzero(mnr, sizeof(*mnr));
8650 		mnr->mnr_ether_type = eh->ether_type;
8651 	}
8652 	switch (eh->ether_type) {
8653 	case HTONS_ETHERTYPE_ARP:
8654 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8655 		break;
8656 	case HTONS_ETHERTYPE_IP:
8657 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8658 		break;
8659 	case HTONS_ETHERTYPE_IPV6:
8660 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8661 		break;
8662 	default:
8663 		break;
8664 	}
8665 	return translate;
8666 }
8667 
8668 static void
8669 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8670     const char eaddr[ETHER_ADDR_LEN])
8671 {
8672 	errno_t                 error;
8673 
8674 	if (mnr->mnr_arp_offset == 0) {
8675 		return;
8676 	}
8677 	/* replace the source hardware address */
8678 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8679 	    ETHER_ADDR_LEN, eaddr,
8680 	    MBUF_DONTWAIT);
8681 	if (error != 0) {
8682 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8683 		    "mbuf_copyback failed");
8684 		m_freem(*data);
8685 		*data = NULL;
8686 	}
8687 	return;
8688 }
8689 
8690 static void
8691 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8692 {
8693 	errno_t         error;
8694 	size_t          offset;
8695 
8696 	if (mnr->mnr_ip_header_len == 0) {
8697 		return;
8698 	}
8699 	/* update the UDP checksum */
8700 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8701 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8702 	    sizeof(mnr->mnr_ip_udp_csum),
8703 	    &mnr->mnr_ip_udp_csum,
8704 	    MBUF_DONTWAIT);
8705 	if (error != 0) {
8706 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8707 		    "mbuf_copyback uh_sum failed");
8708 		m_freem(*data);
8709 		*data = NULL;
8710 	}
8711 	/* update the DHCP must broadcast flag */
8712 	offset += sizeof(struct udphdr);
8713 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8714 	    sizeof(mnr->mnr_ip_dhcp_flags),
8715 	    &mnr->mnr_ip_dhcp_flags,
8716 	    MBUF_DONTWAIT);
8717 	if (error != 0) {
8718 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8719 		    "mbuf_copyback dp_flags failed");
8720 		m_freem(*data);
8721 		*data = NULL;
8722 	}
8723 }
8724 
8725 static void
8726 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8727     const char eaddr[ETHER_ADDR_LEN])
8728 {
8729 	uint16_t        cksum;
8730 	errno_t         error;
8731 	mbuf_t          m = *data;
8732 
8733 	if (mnr->mnr_ip6_header_len == 0) {
8734 		return;
8735 	}
8736 	switch (mnr->mnr_ip6_icmp6_type) {
8737 	case ND_ROUTER_ADVERT:
8738 	case ND_ROUTER_SOLICIT:
8739 	case ND_NEIGHBOR_SOLICIT:
8740 	case ND_NEIGHBOR_ADVERT:
8741 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8742 			/* nothing to do */
8743 			return;
8744 		}
8745 		break;
8746 	default:
8747 		return;
8748 	}
8749 
8750 	/*
8751 	 * replace the lladdr
8752 	 */
8753 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8754 	    ETHER_ADDR_LEN, eaddr,
8755 	    MBUF_DONTWAIT);
8756 	if (error != 0) {
8757 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8758 		    "mbuf_copyback lladdr failed");
8759 		m_freem(m);
8760 		*data = NULL;
8761 		return;
8762 	}
8763 
8764 	/*
8765 	 * recompute the icmp6 checksum
8766 	 */
8767 
8768 	/* skip past the ethernet header */
8769 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8770 
8771 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8772 	/* set the checksum to zero */
8773 	cksum = 0;
8774 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8775 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8776 	if (error != 0) {
8777 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8778 		    "mbuf_copyback cksum=0 failed");
8779 		m_freem(m);
8780 		*data = NULL;
8781 		return;
8782 	}
8783 	/* compute and set the new checksum */
8784 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8785 	    mnr->mnr_ip6_icmp6_len);
8786 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8787 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8788 	if (error != 0) {
8789 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8790 		    "mbuf_copyback cksum failed");
8791 		m_freem(m);
8792 		*data = NULL;
8793 		return;
8794 	}
8795 	/* restore the ethernet header */
8796 	_mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8797 	return;
8798 }
8799 
8800 static void
8801 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8802     const char eaddr[ETHER_ADDR_LEN])
8803 {
8804 	struct ether_header     *eh;
8805 
8806 	/* replace the source ethernet address with the single MAC */
8807 	eh = mtod(*data, struct ether_header *);
8808 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8809 	switch (mnr->mnr_ether_type) {
8810 	case HTONS_ETHERTYPE_ARP:
8811 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8812 		break;
8813 
8814 	case HTONS_ETHERTYPE_IP:
8815 		bridge_mac_nat_ip_translate(data, mnr);
8816 		break;
8817 
8818 	case HTONS_ETHERTYPE_IPV6:
8819 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8820 		break;
8821 
8822 	default:
8823 		break;
8824 	}
8825 	return;
8826 }
8827 
8828 /*
8829  * bridge packet filtering
8830  */
8831 
8832 /*
8833  * Perform basic checks on header size since
8834  * pfil assumes ip_input has already processed
8835  * it for it.  Cut-and-pasted from ip_input.c.
8836  * Given how simple the IPv6 version is,
8837  * does the IPv4 version really need to be
8838  * this complicated?
8839  *
8840  * XXX Should we update ipstat here, or not?
8841  * XXX Right now we update ipstat but not
8842  * XXX csum_counter.
8843  */
8844 static int
8845 bridge_ip_checkbasic(struct mbuf **mp)
8846 {
8847 	struct mbuf *m = *mp;
8848 	struct ip *ip;
8849 	int len, hlen;
8850 	u_short sum;
8851 
8852 	if (*mp == NULL) {
8853 		return -1;
8854 	}
8855 
8856 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8857 		/* max_linkhdr is already rounded up to nearest 4-byte */
8858 		if ((m = m_copyup(m, sizeof(struct ip),
8859 		    max_linkhdr)) == NULL) {
8860 			/* XXXJRT new stat, please */
8861 			ipstat.ips_toosmall++;
8862 			goto bad;
8863 		}
8864 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8865 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8866 			ipstat.ips_toosmall++;
8867 			goto bad;
8868 		}
8869 	}
8870 	ip = mtod(m, struct ip *);
8871 	if (ip == NULL) {
8872 		goto bad;
8873 	}
8874 
8875 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8876 		ipstat.ips_badvers++;
8877 		goto bad;
8878 	}
8879 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8880 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8881 		ipstat.ips_badhlen++;
8882 		goto bad;
8883 	}
8884 	if (hlen > m->m_len) {
8885 		if ((m = m_pullup(m, hlen)) == 0) {
8886 			ipstat.ips_badhlen++;
8887 			goto bad;
8888 		}
8889 		ip = mtod(m, struct ip *);
8890 		if (ip == NULL) {
8891 			goto bad;
8892 		}
8893 	}
8894 
8895 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8896 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8897 	} else {
8898 		if (hlen == sizeof(struct ip)) {
8899 			sum = in_cksum_hdr(ip);
8900 		} else {
8901 			sum = in_cksum(m, hlen);
8902 		}
8903 	}
8904 	if (sum) {
8905 		ipstat.ips_badsum++;
8906 		goto bad;
8907 	}
8908 
8909 	/* Retrieve the packet length. */
8910 	len = ntohs(ip->ip_len);
8911 
8912 	/*
8913 	 * Check for additional length bogosity
8914 	 */
8915 	if (len < hlen) {
8916 		ipstat.ips_badlen++;
8917 		goto bad;
8918 	}
8919 
8920 	/*
8921 	 * Check that the amount of data in the buffers
8922 	 * is as at least much as the IP header would have us expect.
8923 	 * Drop packet if shorter than we expect.
8924 	 */
8925 	if (m->m_pkthdr.len < len) {
8926 		ipstat.ips_tooshort++;
8927 		goto bad;
8928 	}
8929 
8930 	/* Checks out, proceed */
8931 	*mp = m;
8932 	return 0;
8933 
8934 bad:
8935 	*mp = m;
8936 	return -1;
8937 }
8938 
8939 /*
8940  * Same as above, but for IPv6.
8941  * Cut-and-pasted from ip6_input.c.
8942  * XXX Should we update ip6stat, or not?
8943  */
8944 static int
8945 bridge_ip6_checkbasic(struct mbuf **mp)
8946 {
8947 	struct mbuf *m = *mp;
8948 	struct ip6_hdr *ip6;
8949 
8950 	/*
8951 	 * If the IPv6 header is not aligned, slurp it up into a new
8952 	 * mbuf with space for link headers, in the event we forward
8953 	 * it.  Otherwise, if it is aligned, make sure the entire base
8954 	 * IPv6 header is in the first mbuf of the chain.
8955 	 */
8956 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8957 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8958 		/* max_linkhdr is already rounded up to nearest 4-byte */
8959 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8960 		    max_linkhdr)) == NULL) {
8961 			/* XXXJRT new stat, please */
8962 			ip6stat.ip6s_toosmall++;
8963 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8964 			goto bad;
8965 		}
8966 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8967 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8968 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8969 			ip6stat.ip6s_toosmall++;
8970 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8971 			goto bad;
8972 		}
8973 	}
8974 
8975 	ip6 = mtod(m, struct ip6_hdr *);
8976 
8977 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8978 		ip6stat.ip6s_badvers++;
8979 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8980 		goto bad;
8981 	}
8982 
8983 	/* Checks out, proceed */
8984 	*mp = m;
8985 	return 0;
8986 
8987 bad:
8988 	*mp = m;
8989 	return -1;
8990 }
8991 
8992 /*
8993  * the PF routines expect to be called from ip_input, so we
8994  * need to do and undo here some of the same processing.
8995  *
8996  * XXX : this is heavily inspired on bridge_pfil()
8997  */
8998 static int
8999 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9000     bool input)
9001 {
9002 	/*
9003 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9004 	 */
9005 
9006 	int snap, error, i, hlen;
9007 	struct ether_header *eh1, eh2;
9008 	struct ip *ip;
9009 	struct llc llc1;
9010 	u_int16_t ether_type;
9011 
9012 	snap = 0;
9013 	error = -1;     /* Default error if not error == 0 */
9014 
9015 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9016 		return 0; /* filtering is disabled */
9017 	}
9018 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9019 	if ((*mp)->m_len < i) {
9020 		*mp = m_pullup(*mp, i);
9021 		if (*mp == NULL) {
9022 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9023 			return -1;
9024 		}
9025 	}
9026 
9027 	eh1 = mtod(*mp, struct ether_header *);
9028 	ether_type = ntohs(eh1->ether_type);
9029 
9030 	/*
9031 	 * Check for SNAP/LLC.
9032 	 */
9033 	if (ether_type < ETHERMTU) {
9034 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9035 
9036 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9037 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9038 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9039 		    llc2->llc_control == LLC_UI) {
9040 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9041 			snap = 1;
9042 		}
9043 	}
9044 
9045 	/*
9046 	 * If we're trying to filter bridge traffic, don't look at anything
9047 	 * other than IP and ARP traffic.  If the filter doesn't understand
9048 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9049 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9050 	 * but of course we don't have an AppleTalk filter to begin with.
9051 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9052 	 * ARP traffic.)
9053 	 */
9054 	switch (ether_type) {
9055 	case ETHERTYPE_ARP:
9056 	case ETHERTYPE_REVARP:
9057 		return 0;         /* Automatically pass */
9058 
9059 	case ETHERTYPE_IP:
9060 	case ETHERTYPE_IPV6:
9061 		break;
9062 	default:
9063 		/*
9064 		 * Check to see if the user wants to pass non-ip
9065 		 * packets, these will not be checked by pf and
9066 		 * passed unconditionally so the default is to drop.
9067 		 */
9068 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9069 			goto bad;
9070 		}
9071 		break;
9072 	}
9073 
9074 	/* Strip off the Ethernet header and keep a copy. */
9075 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9076 	m_adj(*mp, ETHER_HDR_LEN);
9077 
9078 	/* Strip off snap header, if present */
9079 	if (snap) {
9080 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9081 		m_adj(*mp, sizeof(struct llc));
9082 	}
9083 
9084 	/*
9085 	 * Check the IP header for alignment and errors
9086 	 */
9087 	switch (ether_type) {
9088 	case ETHERTYPE_IP:
9089 		error = bridge_ip_checkbasic(mp);
9090 		break;
9091 	case ETHERTYPE_IPV6:
9092 		error = bridge_ip6_checkbasic(mp);
9093 		break;
9094 	default:
9095 		error = 0;
9096 		break;
9097 	}
9098 	if (error) {
9099 		goto bad;
9100 	}
9101 
9102 	error = 0;
9103 
9104 	/*
9105 	 * Run the packet through pf rules
9106 	 */
9107 	switch (ether_type) {
9108 	case ETHERTYPE_IP:
9109 		/*
9110 		 * before calling the firewall, swap fields the same as
9111 		 * IP does. here we assume the header is contiguous
9112 		 */
9113 		ip = mtod(*mp, struct ip *);
9114 
9115 		ip->ip_len = ntohs(ip->ip_len);
9116 		ip->ip_off = ntohs(ip->ip_off);
9117 
9118 		if (ifp != NULL) {
9119 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9120 		}
9121 
9122 		if (*mp == NULL || error != 0) { /* filter may consume */
9123 			break;
9124 		}
9125 
9126 		/* Recalculate the ip checksum and restore byte ordering */
9127 		ip = mtod(*mp, struct ip *);
9128 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9129 		if (hlen < (int)sizeof(struct ip)) {
9130 			goto bad;
9131 		}
9132 		if (hlen > (*mp)->m_len) {
9133 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9134 				goto bad;
9135 			}
9136 			ip = mtod(*mp, struct ip *);
9137 			if (ip == NULL) {
9138 				goto bad;
9139 			}
9140 		}
9141 		ip->ip_len = htons(ip->ip_len);
9142 		ip->ip_off = htons(ip->ip_off);
9143 		ip->ip_sum = 0;
9144 		if (hlen == sizeof(struct ip)) {
9145 			ip->ip_sum = in_cksum_hdr(ip);
9146 		} else {
9147 			ip->ip_sum = in_cksum(*mp, hlen);
9148 		}
9149 		break;
9150 
9151 	case ETHERTYPE_IPV6:
9152 		if (ifp != NULL) {
9153 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9154 		}
9155 
9156 		if (*mp == NULL || error != 0) { /* filter may consume */
9157 			break;
9158 		}
9159 		break;
9160 	default:
9161 		error = 0;
9162 		break;
9163 	}
9164 
9165 	if (*mp == NULL) {
9166 		return error;
9167 	}
9168 	if (error != 0) {
9169 		goto bad;
9170 	}
9171 
9172 	error = -1;
9173 
9174 	/*
9175 	 * Finally, put everything back the way it was and return
9176 	 */
9177 	if (snap) {
9178 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9179 		if (*mp == NULL) {
9180 			return error;
9181 		}
9182 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9183 	}
9184 
9185 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9186 	if (*mp == NULL) {
9187 		return error;
9188 	}
9189 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9190 
9191 	return 0;
9192 
9193 bad:
9194 	m_freem(*mp);
9195 	*mp = NULL;
9196 	return error;
9197 }
9198 
9199 #if BRIDGESTP
9200 static void
9201 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9202 {
9203 	mbuf_t  next_packet = NULL;
9204 
9205 	for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9206 		next_packet = scan->m_nextpkt;
9207 		scan->m_nextpkt = NULL;
9208 		bstp_input(bp, scan);
9209 	}
9210 }
9211 #endif /* BRIDGESTP */
9212 
9213 static mblist
9214 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9215 {
9216 	mbuf_t          next_packet = NULL;
9217 	mblist          ret;
9218 
9219 	mblist_init(&ret);
9220 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9221 		errno_t                 error;
9222 
9223 		/* take packet out of the list */
9224 		next_packet = scan->m_nextpkt;
9225 		scan->m_nextpkt = NULL;
9226 		/* filter the ARP packet */
9227 		error = bridge_host_filter_arp(bif, &scan);
9228 		if (error != 0 && scan != NULL) {
9229 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9230 				brlog_mbuf_data(scan, 0,
9231 				    sizeof(struct ether_header) +
9232 				    sizeof(struct ip));
9233 			}
9234 			m_freem(scan);
9235 			scan = NULL;
9236 		}
9237 		if (scan != NULL) {
9238 			/* add it to the list */
9239 			mblist_append(&ret, scan);
9240 		}
9241 	}
9242 	return ret;
9243 }
9244 
9245 static mbuf_t
9246 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9247     bool is_ipv4, bool host_filter, bool checksum)
9248 {
9249 	uint32_t                dbgf = 0;
9250 	errno_t                 error;
9251 	ip_packet_info          info;
9252 	u_int                   mac_hlen = sizeof(struct ether_header);
9253 
9254 	if (host_filter) {
9255 		dbgf |= BR_DBGF_HOSTFILTER;
9256 	}
9257 	if (checksum) {
9258 		dbgf |= BR_DBGF_CHECKSUM;
9259 	}
9260 	/* get the IP protocol header */
9261 	error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9262 	    &bif->bif_stats.brms_in_ip);
9263 	if (error != 0) {
9264 		BRIDGE_LOG(LOG_NOTICE, dbgf,
9265 		    "%s(%s) bridge_get_ip_proto failed %d",
9266 		    bridge_ifp->if_xname,
9267 		    bif->bif_ifp->if_xname, error);
9268 		goto drop;
9269 	}
9270 	if (host_filter) {
9271 		bool            drop = true;
9272 
9273 		/* restrict IP protocols */
9274 		switch (info.ip_proto) {
9275 		case IPPROTO_ICMP:
9276 		case IPPROTO_IGMP:
9277 			drop = !is_ipv4;
9278 			break;
9279 		case IPPROTO_TCP:
9280 		case IPPROTO_UDP:
9281 			drop = false;
9282 			break;
9283 		case IPPROTO_ICMPV6:
9284 			drop = is_ipv4;
9285 			break;
9286 		default:
9287 			break;
9288 		}
9289 		if (drop) {
9290 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9291 			goto drop;
9292 		}
9293 		bridge_hostfilter_stats.brhf_ip_ok += 1;
9294 	}
9295 	if (checksum) {
9296 		/* need to compute IP/UDP/TCP/checksums */
9297 		error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9298 		if (error != 0) {
9299 			BRIDGE_LOG(LOG_NOTICE, dbgf,
9300 			    "%s(%s) bridge_offload_checksum failed %d",
9301 			    bridge_ifp->if_xname,
9302 			    bif->bif_ifp->if_xname, error);
9303 			goto drop;
9304 		}
9305 	}
9306 	return m;
9307 
9308 drop:
9309 	/* toss the packet */
9310 	if (m != NULL) {
9311 		if (host_filter &&
9312 		    BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9313 			brlog_mbuf_data(m, 0,
9314 			    sizeof(struct ether_header) +
9315 			    sizeof(struct ip));
9316 		}
9317 		m_freem(m);
9318 		m = NULL;
9319 	}
9320 	return NULL;
9321 }
9322 
9323 static mblist
9324 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9325     mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9326 {
9327 	bool                    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9328 	mbuf_t                  next_packet = NULL;
9329 	mblist                  ret;
9330 
9331 	mblist_init(&ret);
9332 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9333 		/* take packet out of the list */
9334 		next_packet = scan->m_nextpkt;
9335 		scan->m_nextpkt = NULL;
9336 		scan = bridge_filter_checksum(bridge_ifp, bif,
9337 		    scan, is_ipv4, host_filter, checksum);
9338 		if (scan != NULL) {
9339 			/* add packet to the list */
9340 			mblist_append(&ret, scan);
9341 		}
9342 	}
9343 	return ret;
9344 }
9345 
9346 static mbuf_t
9347 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9348     mbuf_t m, bool is_ipv4)
9349 {
9350 	mblist          ret;
9351 	mbuf_t          next_packet;
9352 
9353 	mblist_init(&ret);
9354 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9355 		uint32_t        csum_flags;
9356 
9357 		/* take it out of the list */
9358 		next_packet = scan->m_nextpkt;
9359 		scan->m_nextpkt = NULL;
9360 
9361 		csum_flags = scan->m_pkthdr.csum_flags;
9362 		if ((csum_flags & checksum_request_flags) != 0) {
9363 			/* compute the checksum now */
9364 			scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9365 			    is_ipv4, false, true);
9366 			if (scan != NULL) {
9367 				/* clear offload now */
9368 				scan->m_pkthdr.csum_flags &= csum_flags;
9369 			}
9370 		}
9371 		if (scan != NULL) {
9372 			mblist_append(&ret, scan);
9373 		}
9374 	}
9375 	return ret.head;
9376 }
9377 
9378 static mbuf_t
9379 copy_broadcast_packet(mbuf_t m)
9380 {
9381 	mbuf_t  mc;
9382 
9383 	/* make a copy of the packet */
9384 	mc = m_dup(m, M_DONTWAIT);
9385 	if (mc != NULL) {
9386 		struct ether_header *eh;
9387 
9388 		/* make copy look like it is broadcast */
9389 		mc->m_flags |= M_BCAST;
9390 		eh = mtod(mc, struct ether_header *);
9391 		bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9392 	}
9393 	return mc;
9394 }
9395 
9396 static mblist
9397 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9398 {
9399 	mblist          ip_bcast;
9400 	mbuf_t          next_packet = NULL;
9401 	mblist          ret;
9402 
9403 	mblist_init(&ret);
9404 	mblist_init(&ip_bcast);
9405 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9406 		mbuf_t  bcast_pkt = NULL;
9407 		uint8_t *header;
9408 
9409 		/* take packet out of the list */
9410 		next_packet = scan->m_nextpkt;
9411 		scan->m_nextpkt = NULL;
9412 
9413 		header = get_ether_ip_header_ptr(&scan, FALSE);
9414 		if (header != NULL) {
9415 			struct in_addr  dst;
9416 			struct ip       *iphdr;
9417 
9418 			iphdr = (struct ip *)(header + sizeof(struct ether_header));
9419 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9420 			if (dst.s_addr == INADDR_BROADCAST) {
9421 				bcast_pkt = copy_broadcast_packet(scan);
9422 			}
9423 		}
9424 		if (bcast_pkt != NULL) {
9425 			/* add packet to broadcast list */
9426 			mblist_append(&ip_bcast, bcast_pkt);
9427 		}
9428 		if (scan != NULL) {
9429 			/* add packet back into the list */
9430 			mblist_append(&ret, scan);
9431 		}
9432 	}
9433 	*ip_bcast_head = ip_bcast.head;
9434 	return ret;
9435 }
9436 
9437 static ifnet_t
9438 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9439     struct bridge_iflist * sbif)
9440 {
9441 	struct bridge_iflist * bif;
9442 
9443 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9444 		if (bif == sbif) {
9445 			/* skip the input member */
9446 			continue;
9447 		}
9448 		if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9449 			return bif->bif_ifp;
9450 		}
9451 	}
9452 	return NULL;
9453 }
9454 
9455 
9456 /*
9457  * Function: bridge_input_list
9458  *
9459  * Purpose:
9460  *   Process a list of input packets through the bridge.
9461  *   The caller ensures that all of the packets in the list
9462  *  `list_head` .. `list_tail` have the same ethernet header.
9463  *
9464  * Returns:
9465  *    Non-NULL head of the chain of packets that were not consumed/freed,
9466  *    *tail_p set to the tail of that chain.
9467  *
9468  *    NULL if all of the packets were consumed.
9469  */
9470 static mblist
9471 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9472     struct ether_header * eh_in_p, mblist list, bool is_promisc)
9473 {
9474 	struct bridge_iflist *  bif;
9475 	ifnet_t                 bridge_ifp;
9476 	bool                    checksum_offload;
9477 	uint8_t *               dhost;
9478 #if BRIDGESTP
9479 	bool                    discarding = false;
9480 #endif /* BRIDGESTP */
9481 	ifnet_t                 dst_if = NULL;
9482 	errno_t                 error;
9483 	ether_type_flag_t       etypef;
9484 	bool                    host_filter;
9485 	bool                    host_filter_drop = false;
9486 	mbuf_ref_t              ip_bcast = NULL;
9487 	bool                    is_bridge_mac = false;
9488 	bool                    is_broadcast;
9489 	bool                    is_ifp_mac;
9490 	ifnet_t                 member_input = NULL;
9491 	uint8_t *               shost;
9492 	bool                    uses_virtio = false;
9493 	uint16_t                vlan;
9494 
9495 	if (ifp->if_bridge == NULL) {
9496 		/* no longer part of bridge */
9497 		goto done;
9498 	}
9499 	bridge_ifp = sc->sc_ifp;
9500 	is_broadcast = IS_BCAST_MCAST(list.head);
9501 	is_ifp_mac = (!is_broadcast && !is_promisc);
9502 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9503 	    "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9504 	    bridge_ifp->if_xname, ifp->if_xname, list.count,
9505 	    (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9506 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9507 	    (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9508 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9509 
9510 	/* assume we'll return all packets */
9511 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9512 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9513 		    "%s not running passing along",
9514 		    bridge_ifp->if_xname);
9515 		goto done;
9516 	}
9517 
9518 	vlan = VLANTAGOF(m);
9519 
9520 	/* lookup the bridge member */
9521 	BRIDGE_LOCK(sc);
9522 	bif = bridge_lookup_member_if(sc, ifp);
9523 	if (bif == NULL) {
9524 		BRIDGE_UNLOCK(sc);
9525 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9526 		    "%s bridge_lookup_member_if failed",
9527 		    bridge_ifp->if_xname);
9528 		goto done;
9529 	}
9530 
9531 	uses_virtio = bif_uses_virtio(bif);
9532 
9533 	/*
9534 	 * host filter drops packets that:
9535 	 * - are not ARP, IPv4, or IPv6
9536 	 * - have incorrect source MAC address
9537 	 */
9538 	host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9539 	etypef = ether_type_flag_get(eh_in_p->ether_type);
9540 	if (host_filter
9541 	    && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9542 		/* ether type not one of ARP, IPv4, or IPv6 */
9543 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9544 		host_filter_drop = true;
9545 	} else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9546 	    bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9547 	    != 0) {
9548 		/* only allow the single source MAC address */
9549 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9550 		    __func__, __LINE__);
9551 		host_filter_drop = true;
9552 	}
9553 	if (host_filter_drop) {
9554 		BRIDGE_UNLOCK(sc);
9555 		m_freem_list(list.head);
9556 		list.head = list.tail = NULL;
9557 		goto done;
9558 	}
9559 
9560 #if BRIDGESTP
9561 	discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9562 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9563 #endif /* BRIDGESTP */
9564 
9565 	dhost = eh_in_p->ether_dhost;
9566 	shost = eh_in_p->ether_shost;
9567 	/*
9568 	 * Reserved multicast address listed in 802.1D section 7.12.6
9569 	 * must not be forwarded by the bridge.
9570 	 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9571 	 */
9572 	if (is_broadcast) {
9573 		if (IS_MCAST(list.head)) {
9574 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9575 			    " multicast: "
9576 			    "%02x:%02x:%02x:%02x:%02x:%02x",
9577 			    dhost[0], dhost[1],
9578 			    dhost[2], dhost[3],
9579 			    dhost[4], dhost[5]);
9580 		}
9581 		if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9582 			if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9583 				/* multicast for spanning tree */
9584 #if BRIDGESTP
9585 				bridge_bstp_input_list(&bif->bif_stp, list.head);
9586 #else /* BRIDGESTP */
9587 				m_freem_list(list.head);
9588 #endif /* BRIDGESTP */
9589 				list.head = list.tail = NULL;
9590 				BRIDGE_UNLOCK(sc);
9591 				goto done;
9592 			}
9593 			if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9594 				/* allow packet to continue up the stack */
9595 				BRIDGE_UNLOCK(sc);
9596 				goto done;
9597 			}
9598 		}
9599 		/* broadcast to all members */
9600 		os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9601 	}
9602 
9603 #if BRIDGESTP
9604 	if (discarding) {
9605 		BRIDGE_UNLOCK(sc);
9606 		goto done;
9607 	}
9608 #endif /* BRIDGESTP */
9609 
9610 	/* If the interface is learning, record the address. */
9611 	if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9612 		error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9613 		/*
9614 		 * If the interface has addresses limits then deny any source
9615 		 * that is not in the cache.
9616 		 */
9617 		if (error != 0 && bif->bif_addrmax) {
9618 			BRIDGE_UNLOCK(sc);
9619 			goto done;
9620 		}
9621 	}
9622 #if BRIDGESTP
9623 	if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9624 	    bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9625 		BRIDGE_UNLOCK(sc);
9626 		goto done;
9627 	}
9628 #endif /* BRIDGESTP */
9629 
9630 	/*
9631 	 * If the packet is not IP, let the host filter drop ARP packets.
9632 	 * Otherwise, if the host filter is enabled or we need to compute
9633 	 * checksums, do that.
9634 	 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9635 	 * check for IPv4 broadcast packets. Accumulate those in a separate
9636 	 * list `ip_bcast`.
9637 	 */
9638 	checksum_offload = bif_has_checksum_offload(bif);
9639 	if (!ether_type_flag_is_ip(etypef)) {
9640 		/* host filter process ARP */
9641 		if (host_filter) {
9642 			/* host filter check earlier means this must be ARP */
9643 			VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9644 			list = bridge_filter_arp_list(bif, list.head);
9645 			if (list.head == NULL) {
9646 				VERIFY(list.tail == NULL);
9647 				BRIDGE_UNLOCK(sc);
9648 				goto done;
9649 			}
9650 		}
9651 	} else if (host_filter || checksum_offload) {
9652 		/* host filter and/or checksum */
9653 		list = bridge_filter_checksum_list(bridge_ifp, bif,
9654 		    list.head, etypef, host_filter, checksum_offload);
9655 		if (list.head == NULL) {
9656 			VERIFY(list.tail == NULL);
9657 			BRIDGE_UNLOCK(sc);
9658 			goto done;
9659 		}
9660 	} else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9661 	    etypef == ETHER_TYPE_FLAG_IPV4) {
9662 		/* look for broadcast IPv4 packet */
9663 		list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9664 		if (list.head == NULL && ip_bcast == NULL) {
9665 			/* all packets were consumed */
9666 			BRIDGE_UNLOCK(sc);
9667 			goto done;
9668 		}
9669 	}
9670 
9671 	/*
9672 	 * If the bridge has an address assigned, and the destination MAC
9673 	 * matches the bridge interface, claim the packets for the bridge
9674 	 * interface.
9675 	 */
9676 	if ((sc->sc_flags & SCF_ADDRESS_ASSIGNED) != 0 &&
9677 	    !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9678 		is_bridge_mac = true;
9679 	}
9680 	if (is_ifp_mac) {
9681 		/* unicast to the interface */
9682 		if (sc->sc_mac_nat_bif == bif) {
9683 			mbuf_ref_t  forward = NULL;
9684 
9685 			if (list.head != NULL) {
9686 				/* handle MAC-NAT if enabled */
9687 				list = bridge_mac_nat_input_list(sc, ifp,
9688 				    list.head, &forward);
9689 			}
9690 			if (ip_bcast != NULL) {
9691 				/* forward to all members except this one */
9692 				/* bridge_broadcast_list unlocks */
9693 				bridge_broadcast_list(sc, bif, etypef,
9694 				    ip_bcast);
9695 			} else {
9696 				BRIDGE_UNLOCK(sc);
9697 			}
9698 			if (forward != NULL) {
9699 				bridge_mac_nat_forward_list(bridge_ifp, etypef,
9700 				    forward);
9701 			}
9702 		} else {
9703 			BRIDGE_UNLOCK(sc);
9704 		}
9705 		/* unicast packets for this interface do not get forwarded */
9706 		goto done;
9707 	}
9708 	if (is_bridge_mac || list.head == NULL) {
9709 		BRIDGE_UNLOCK(sc);
9710 		goto done;
9711 	}
9712 	if (!is_broadcast) {
9713 		/* find where to send the packet */
9714 		dst_if = bridge_rtlookup(sc, dhost, vlan);
9715 		if (ifp == dst_if) {
9716 			/* nothing to forward */
9717 			BRIDGE_UNLOCK(sc);
9718 			goto done;
9719 		}
9720 		if (dst_if == NULL) {
9721 			/* if a member is the dhost, deliver as input */
9722 			member_input = bridge_find_member(sc, dhost, bif);
9723 			if (member_input != NULL) {
9724 				/* grab packets destined to member */
9725 				BRIDGE_UNLOCK(sc);
9726 				goto done;
9727 			}
9728 			/* if a member is shost, there's a loop, drop it */
9729 			if (bridge_find_member(sc, shost, bif) != NULL) {
9730 				BRIDGE_UNLOCK(sc);
9731 				m_freem_list(list.head);
9732 				list.head = list.tail = NULL;
9733 				goto done;
9734 			}
9735 		}
9736 	}
9737 	if (dst_if == NULL) {
9738 		mbuf_t  m;
9739 
9740 		m = copy_packet_list(list.head);
9741 		if (m != NULL) {
9742 			/* bridge_broadcast_list unlocks */
9743 			bridge_broadcast_list(sc, bif, etypef, m);
9744 		} else {
9745 			BRIDGE_UNLOCK(sc);
9746 		}
9747 	} else {
9748 		/* bridge_forward_list() consumes list and unlocks */
9749 		bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9750 		list.head = list.tail = NULL;
9751 	}
9752 
9753 done:
9754 	if (list.head != NULL) {
9755 		if (member_input != NULL) {
9756 			/* member gets the packets */
9757 			inject_input_packet_list(member_input, list.head, true);
9758 			list.head = list.tail = NULL;
9759 		} else if (is_bridge_mac) {
9760 			/* bridge consumes all the unicast packets */
9761 			bridge_interface_input_list(bridge_ifp, etypef, list,
9762 			    uses_virtio);
9763 			list.head = list.tail = NULL;
9764 		} else {
9765 			adjust_input_packet_list(list.head);
9766 		}
9767 	}
9768 	return list;
9769 }
9770 
9771 static inline void
9772 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9773 {
9774 	/* duplicate some of the work done in ether_demux */
9775 	if ((eh->ether_dhost[0] & 1) == 0) {
9776 		if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9777 			m->m_flags |= M_PROMISC;
9778 		}
9779 	} else {
9780 		/* Check for broadcast */
9781 		if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9782 			m->m_flags |= M_BCAST;
9783 		} else {
9784 			m->m_flags |= M_MCAST;
9785 		}
9786 	}
9787 	if (m->m_flags & M_HASFCS) {
9788 		/*
9789 		 * If the M_HASFCS is set by the driver we want to make sure
9790 		 * that we strip off the trailing FCS data before handing it
9791 		 * up the stack.
9792 		 */
9793 		m_adj(m, -ETHER_CRC_LEN);
9794 		m->m_flags &= ~M_HASFCS;
9795 	}
9796 	return;
9797 }
9798 
9799 static mbuf_t
9800 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9801 {
9802 	mbuf_t  next_packet = NULL;
9803 	mblist  ret;
9804 
9805 	mblist_init(&ret);
9806 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9807 		next_packet = scan->m_nextpkt;
9808 
9809 		/* remove packet from list, and pass through PF */
9810 		scan->m_nextpkt = NULL;
9811 		MBUF_INPUT_CHECK(scan, ifp);
9812 		bridge_pf(&scan, ifp, sc_filter_flags, input);
9813 		if (scan != NULL) {
9814 			/* add packet back to the list */
9815 			mblist_append(&ret, scan);
9816 		}
9817 	}
9818 	return ret.head;
9819 }
9820 
9821 static inline bool
9822 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9823 {
9824 	bool                    included = false;
9825 	char * __single         header;
9826 	size_t                  header_length = 0;
9827 
9828 	header = m->m_pkthdr.pkt_hdr;
9829 	if (header >= (char *)mbuf_datastart(m) &&
9830 	    header <= mtod(m, char *)) {
9831 		header_length = mtod(m, char *) - header;
9832 		if (header_length >= ETHER_HDR_LEN) {
9833 			included = true;
9834 		}
9835 	}
9836 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9837 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9838 	    "header length %lu", sc->sc_ifp->if_xname,
9839 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9840 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9841 	    (uint64_t)VM_KERNEL_ADDRPERM(header),
9842 	    included ? "inside" : "outside", header_length);
9843 	if (!included) {
9844 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9845 		    "%s: frame_header outside mbuf", ifp->if_xname);
9846 	}
9847 	return included;
9848 }
9849 
9850 
9851 mbuf_t
9852 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9853 {
9854 	struct ether_header eh;
9855 	mblist          list;
9856 	volatile bool   list_is_promisc;
9857 	int             n_lists = 0;
9858 	mbuf_t          next_packet = NULL;
9859 	mblist          ret;
9860 	struct bridge_softc * __single sc = ifp->if_bridge;
9861 	uint32_t        sc_filter_flags;
9862 
9863 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9864 	    "(%s): count %u", ifp->if_xname, cnt);
9865 
9866 	/* run packet list through PF first */
9867 	sc_filter_flags = sc->sc_filter_flags;
9868 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9869 		in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9870 	}
9871 
9872 	/* form sublists with the same ethernet header */
9873 	mblist_init(&list);
9874 	mblist_init(&ret);
9875 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9876 		struct ether_header *   eh_p;
9877 		volatile bool           is_promisc;
9878 		mblist                  resid;
9879 
9880 		/* take it out of the list */
9881 		next_packet = scan->m_nextpkt;
9882 		scan->m_nextpkt = NULL;
9883 
9884 		/* don't loop the packet */
9885 		if ((scan->m_flags & M_PROTO1) != 0) {
9886 			mblist_append(&ret, scan);
9887 			continue;
9888 		}
9889 		/* Check if this mbuf looks valid */
9890 		MBUF_INPUT_CHECK(scan, ifp);
9891 
9892 		/* if the frame header isn't in the first mbuf, ignore */
9893 		if (!bridge_check_frame_header(sc, ifp, scan)) {
9894 			mblist_append(&ret, scan);
9895 			continue;
9896 		}
9897 		eh_p = __unsafe_forge_single(struct ether_header *,
9898 		    scan->m_pkthdr.pkt_hdr);
9899 		update_mbuf_flags(ifp, scan, eh_p);
9900 
9901 		/* set start back to include ether header */
9902 		_mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
9903 
9904 		is_promisc = get_and_clear_promisc(scan);
9905 		if (list.head == NULL) {
9906 			/* start a new list */
9907 			mblist_append(&list, scan);
9908 			bcopy(eh_p, &eh, sizeof(eh));
9909 			list_is_promisc = is_promisc;
9910 		} else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
9911 			n_lists++;
9912 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9913 			    "(%s): sublist %u pkts %u",
9914 			    ifp->if_xname, n_lists, list.count);
9915 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9916 				brlog_ether_header(&eh);
9917 			}
9918 			resid = bridge_input_list(sc, ifp, &eh, list,
9919 			    list_is_promisc);
9920 			if (resid.head != NULL) {
9921 				/* add to the packets to be returned */
9922 				mblist_append_list(&ret, resid);
9923 			}
9924 			/* start new list */
9925 			mblist_init(&list);
9926 			mblist_append(&list, scan);
9927 			list_is_promisc = is_promisc;
9928 			bcopy(eh_p, &eh, sizeof(eh));
9929 		} else {
9930 			mblist_append(&list, scan);
9931 			VERIFY(is_promisc == list_is_promisc);
9932 		}
9933 		if (next_packet == NULL) {
9934 			/* last list */
9935 			n_lists++;
9936 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9937 			    "(%s): sublist %u pkts %u",
9938 			    ifp->if_xname, n_lists, list.count);
9939 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
9940 				brlog_ether_header(&eh);
9941 			}
9942 			resid = bridge_input_list(sc, ifp, &eh, list,
9943 			    list_is_promisc);
9944 			if (resid.head != NULL) {
9945 				/* add to the packets to be returned */
9946 				mblist_append_list(&ret, resid);
9947 			}
9948 		}
9949 	}
9950 	return ret.head;
9951 }
9952 
9953 /*
9954  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9955  * All rights reserved.
9956  *
9957  * Redistribution and use in source and binary forms, with or without
9958  * modification, are permitted provided that the following conditions
9959  * are met:
9960  *   1. Redistributions of source code must retain the above copyright
9961  *      notice, this list of conditions and the following disclaimer.
9962  *   2. Redistributions in binary form must reproduce the above copyright
9963  *      notice, this list of conditions and the following disclaimer in the
9964  *      documentation and/or other materials provided with the distribution.
9965  *
9966  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9967  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9968  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9969  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9970  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9971  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9972  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9973  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9974  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9975  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9976  * SUCH DAMAGE.
9977  */
9978 
9979 /*
9980  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9981  *
9982  * Create a queue of packets/segments which fit the given mss + hdr_len.
9983  * m0 points to mbuf chain to be segmented.
9984  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9985  * into segments of length MSS bytes and then copy the first hdr_len bytes
9986  * from m0 at the top of each segment.
9987  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9988  * in each segment after the first hdr_len bytes
9989  *
9990  * Return the new queue with the segments on success, NULL on failure.
9991  * (the mbuf queue is freed in this case).
9992  */
9993 
9994 static mblist
9995 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
9996 {
9997 	int off = 0, n, firstlen;
9998 	struct mbuf *mseg;
9999 	int total_len = m0->m_pkthdr.len;
10000 	mblist ret;
10001 
10002 	mblist_init(&ret);
10003 	mblist_append(&ret, m0);
10004 
10005 	/*
10006 	 * Segmentation useless
10007 	 */
10008 	if (total_len <= hdr_len + mss) {
10009 		n = 1;
10010 		goto done;
10011 	}
10012 
10013 	if (hdr2_buf == NULL || hdr2_len <= 0) {
10014 		hdr2_buf = NULL;
10015 		hdr2_len = 0;
10016 	}
10017 
10018 	off = hdr_len + mss;
10019 	firstlen = mss; /* first segment stored in the original mbuf */
10020 	ret.bytes = off;
10021 	for (n = 1; off < total_len; off += mss, n++) {
10022 		struct mbuf *m;
10023 		/*
10024 		 * Copy the header from the original packet
10025 		 * and create a new mbuf chain
10026 		 */
10027 		if (MHLEN < hdr_len) {
10028 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10029 		} else {
10030 			m = m_gethdr(M_NOWAIT, MT_DATA);
10031 		}
10032 
10033 		if (m == NULL) {
10034 #ifdef GSO_DEBUG
10035 			D("MGETHDR error\n");
10036 #endif
10037 			goto err;
10038 		}
10039 
10040 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10041 
10042 		m->m_len = hdr_len;
10043 		/*
10044 		 * if the optional header is present, copy it
10045 		 */
10046 		if (hdr2_buf != NULL) {
10047 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10048 		}
10049 
10050 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10051 		if (off + mss >= total_len) {           /* last segment */
10052 			mss = total_len - off;
10053 		}
10054 		/*
10055 		 * Copy the payload from original packet
10056 		 */
10057 		mseg = m_copym(m0, off, mss, M_NOWAIT);
10058 		if (mseg == NULL) {
10059 			m_freem(m);
10060 #ifdef GSO_DEBUG
10061 			D("m_copym error\n");
10062 #endif
10063 			goto err;
10064 		}
10065 		m_cat(m, mseg);
10066 
10067 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10068 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10069 		/*
10070 		 * Copy the checksum flags and data (in_cksum() need this)
10071 		 */
10072 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10073 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10074 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10075 
10076 		mblist_append(&ret, m);
10077 	}
10078 
10079 	/*
10080 	 * Update first segment.
10081 	 * If the optional header is present, is necessary
10082 	 * to insert it into the first segment.
10083 	 */
10084 	if (hdr2_buf == NULL) {
10085 		m_adj(m0, hdr_len + firstlen - total_len);
10086 		m0->m_pkthdr.len = hdr_len + firstlen;
10087 	} else {
10088 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10089 		if (mseg == NULL) {
10090 #ifdef GSO_DEBUG
10091 			D("m_copym error\n");
10092 #endif
10093 			goto err;
10094 		}
10095 		m_adj(m0, hdr_len - total_len);
10096 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10097 		m_cat(m0, mseg);
10098 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10099 	}
10100 
10101 done:
10102 	return ret;
10103 
10104 err:
10105 	if (ret.head != NULL) {
10106 		m_freem_list(ret.head);
10107 		mblist_init(&ret);
10108 	}
10109 	return ret;
10110 }
10111 
10112 /*
10113  * Wrappers of IPv4 checksum functions
10114  */
10115 static inline void
10116 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10117 {
10118 	m->m_data += mac_hlen;
10119 	m->m_len -= mac_hlen;
10120 	m->m_pkthdr.len -= mac_hlen;
10121 #if __FreeBSD_version < 1000000
10122 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10123 #endif
10124 
10125 	in_delayed_cksum(m);
10126 
10127 #if __FreeBSD_version < 1000000
10128 	ip->ip_len = htons(ip->ip_len);
10129 #endif
10130 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10131 	m->m_len += mac_hlen;
10132 	m->m_pkthdr.len += mac_hlen;
10133 	m->m_data -= mac_hlen;
10134 }
10135 
10136 static inline void
10137 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10138 {
10139 	m->m_data += mac_hlen;
10140 
10141 	ip->ip_sum = in_cksum(m, ip_hlen);
10142 
10143 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
10144 	m->m_data -= mac_hlen;
10145 }
10146 
10147 /*
10148  * Structure that contains the state during the TCP segmentation
10149  */
10150 struct gso_ip_tcp_state {
10151 	void    (*update)
10152 	(struct gso_ip_tcp_state*, struct mbuf*);
10153 	void    (*internal)
10154 	(struct gso_ip_tcp_state*, struct mbuf*);
10155 	u_int ip_m0_len;
10156 	uint8_t * __counted_by(ip_m0_len) hdr;
10157 	struct tcphdr *tcp;
10158 	int mac_hlen;
10159 	int ip_hlen;
10160 	int tcp_hlen;
10161 	int hlen;
10162 	int pay_len;
10163 	int sw_csum;
10164 	uint32_t tcp_seq;
10165 	uint16_t ip_id;
10166 	boolean_t is_tx;
10167 };
10168 
10169 /*
10170  * Update the pointers to TCP and IPv4 headers
10171  */
10172 static inline void
10173 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10174 {
10175 	state->hdr = mtodo(m, state->mac_hlen);
10176 	state->ip_m0_len = m->m_len - state->mac_hlen;
10177 	state->ip_hlen = state->ip_hlen;
10178 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10179 	state->pay_len = m->m_pkthdr.len - state->hlen;
10180 }
10181 
10182 /*
10183  * Set properly the TCP and IPv4 headers
10184  */
10185 static inline void
10186 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10187 {
10188 	struct ip *ip;
10189 	/*
10190 	 * Update IP header
10191 	 */
10192 	ip = (struct ip *)state->hdr;
10193 	ip->ip_id = htons((state->ip_id)++);
10194 	ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10195 	/*
10196 	 * TCP Checksum
10197 	 */
10198 	state->tcp->th_sum = 0;
10199 	state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10200 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10201 	/*
10202 	 * Checksum HW not supported (TCP)
10203 	 */
10204 	if (state->sw_csum & CSUM_DELAY_DATA) {
10205 		gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10206 	}
10207 
10208 	state->tcp_seq += state->pay_len;
10209 	/*
10210 	 * IP Checksum
10211 	 */
10212 	ip->ip_sum = 0;
10213 	/*
10214 	 * Checksum HW not supported (IP)
10215 	 */
10216 	if (state->sw_csum & CSUM_IP) {
10217 		gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10218 	}
10219 }
10220 
10221 
10222 /*
10223  * Updates the pointers to TCP and IPv6 headers
10224  */
10225 static inline void
10226 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10227 {
10228 	state->hdr = mtodo(m, state->mac_hlen);
10229 	state->ip_m0_len = m->m_len - state->mac_hlen;
10230 	state->ip_hlen = state->ip_hlen;
10231 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10232 	state->pay_len = m->m_pkthdr.len - state->hlen;
10233 }
10234 
10235 /*
10236  * Sets properly the TCP and IPv6 headers
10237  */
10238 static inline void
10239 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10240 {
10241 	struct ip6_hdr *ip6;
10242 
10243 	ip6 = (struct ip6_hdr *)state->hdr;
10244 	ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10245 	/*
10246 	 * TCP Checksum
10247 	 */
10248 	state->tcp->th_sum = 0;
10249 	state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10250 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10251 	/*
10252 	 * Checksum HW not supported (TCP)
10253 	 */
10254 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10255 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10256 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10257 	}
10258 	state->tcp_seq += state->pay_len;
10259 }
10260 
10261 /*
10262  * Init the state during the TCP segmentation
10263  */
10264 static void
10265 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10266     bool is_ipv4, int mac_hlen, int ip_hlen,
10267     uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10268     struct tcphdr * tcp_hdr)
10269 {
10270 #pragma unused(ifp)
10271 
10272 	state->hdr = ip_hdr;
10273 	state->ip_m0_len = ip_m0_len;
10274 	state->ip_hlen = ip_hlen;
10275 	state->tcp = tcp_hdr;
10276 	if (is_ipv4) {
10277 		state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10278 		state->update = gso_ipv4_tcp_update;
10279 		state->internal = gso_ipv4_tcp_internal;
10280 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10281 	} else {
10282 		state->update = gso_ipv6_tcp_update;
10283 		state->internal = gso_ipv6_tcp_internal;
10284 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10285 	}
10286 	state->mac_hlen = mac_hlen;
10287 	state->tcp_hlen = state->tcp->th_off << 2;
10288 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10289 	state->tcp_seq = ntohl(state->tcp->th_seq);
10290 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10291 	return;
10292 }
10293 
10294 /*
10295  * GSO on TCP/IP (v4 or v6)
10296  *
10297  * Segment the given mbuf and return the list of packets.
10298  *
10299  */
10300 static mblist
10301 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10302 {
10303 	struct mbuf *m;
10304 	int mss = 0;
10305 #ifdef GSO_STATS
10306 	int total_len = m0->m_pkthdr.len;
10307 #endif /* GSO_STATS */
10308 	mblist  seg;
10309 
10310 	mss = _mbuf_get_tso_mss(m0);
10311 	if (mss == 0 && !is_tx) {
10312 		uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10313 
10314 		if (seg_cnt != 0) {
10315 			uint32_t        hdr_len;
10316 			uint32_t        len;
10317 
10318 			/* approximate the MSS using LRO seg cnt */
10319 			hdr_len = state->ip_hlen + state->tcp_hlen;
10320 			len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10321 			mss = len / seg_cnt;
10322 			m0->m_pkthdr.rx_seg_cnt = 0;
10323 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10324 			    "%s: mss %d = len %d / seg cnt %d",
10325 			    ifp->if_xname, mss, len, seg_cnt);
10326 		}
10327 	}
10328 	if (mss == 0) {
10329 		/* hack: we don't have the actual MSS */
10330 		u_int reduce_mss;
10331 
10332 		reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10333 		    : if_bridge_tso_reduce_mss_forwarding;
10334 		mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10335 		    reduce_mss;
10336 		assert(mss > 0);
10337 	} else if (is_tx) {
10338 		bool    is_ipv4;
10339 		bool    do_tso = true;
10340 
10341 		if (TSO_IPV4_OK(ifp, m0)) {
10342 			is_ipv4 = true;
10343 		} else if (TSO_IPV6_OK(ifp, m0)) {
10344 			is_ipv4 = false;
10345 		} else {
10346 			do_tso = false;
10347 		}
10348 		if (do_tso) { /* TSO with GSO */
10349 			uint32_t        if_tso_max;
10350 
10351 			if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10352 			mss = if_tso_max - state->ip_hlen - state->tcp_hlen;
10353 		}
10354 	}
10355 	seg = m_seg(m0, state->hlen, mss, 0, 0);
10356 	if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10357 		return seg;
10358 	}
10359 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10360 	    "%s %s mss %d nsegs %d",
10361 	    ifp->if_xname,
10362 	    is_tx ? "TX" : "RX",
10363 	    mss, seg.count);
10364 #ifdef GSO_STATS
10365 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10366 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10367 	GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10368 #endif /* GSO_STATS */
10369 
10370 	/* first pkt */
10371 	VERIFY(seg.head == m0);
10372 	m = m0;
10373 
10374 	state->update(state, m);
10375 
10376 	do {
10377 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10378 
10379 		state->internal(state, m);
10380 		m = m->m_nextpkt;
10381 		state->update(state, m);
10382 		state->tcp->th_flags &= ~TH_CWR;
10383 		state->tcp->th_seq = htonl(state->tcp_seq);
10384 	} while (m->m_nextpkt);
10385 
10386 	/* last pkt */
10387 	state->internal(state, m);
10388 
10389 #ifdef GSO_STATS
10390 	if (!error) {
10391 		GSOSTAT_INC(tcp.gsos_segmented);
10392 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10393 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10394 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10395 	}
10396 #endif /* GSO_STATS */
10397 	return seg;
10398 }
10399 
10400 /*
10401  * GSO for TCP/IPv[46]
10402  */
10403 static mblist
10404 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10405     u_int mac_hlen, bool is_ipv4, bool is_tx)
10406 {
10407 	uint32_t csum_flags;
10408 	struct gso_ip_tcp_state state;
10409 	struct tcphdr *tcp;
10410 
10411 	assert(info_p->ip_proto_hdr != NULL);
10412 	tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10413 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10414 	    info_p->ip_hlen + info_p->ip_opt_len,
10415 	    info_p->ip_hdr, info_p->ip_m0_len, tcp);
10416 	csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10417 	m->m_pkthdr.csum_flags = csum_flags;
10418 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10419 	return gso_ip_tcp(ifp, m, &state, is_tx);
10420 }
10421 
10422 static mblist
10423 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10424 {
10425 	int error;
10426 	ip_packet_info info;
10427 	struct bripstats stats; /* XXX ignored */
10428 	mblist ret;
10429 
10430 	error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10431 	if (error != 0) {
10432 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10433 		    "%s bridge_get_tcp_header failed %d (%s)",
10434 		    ifp->if_xname, error,
10435 		    is_tx ? "TX" : "RX");
10436 		if (m != NULL) {
10437 			m_freem(m);
10438 			m = NULL;
10439 		}
10440 		goto no_segment;
10441 	}
10442 	if (info.ip_proto_hdr == NULL) {
10443 		/* not actually a TCP packet, no segmentation */
10444 		goto no_segment;
10445 	}
10446 	if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10447 		goto no_segment;
10448 	}
10449 	return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10450 
10451 no_segment:
10452 	mblist_init(&ret);
10453 	if (m != NULL) {
10454 		mblist_append(&ret, m);
10455 	}
10456 	return ret;
10457 }
10458