xref: /xnu-11417.140.69/bsd/net/if_bridge.c (revision 43a90889846e00bfb5cf1d255cdc0a701a1e05a4)
1 /*
2  * Copyright (c) 2004-2025 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 
142 #include <netinet/in.h> /* for struct arpcom */
143 #include <netinet/tcp.h> /* for struct tcphdr */
144 #include <netinet/in_systm.h>
145 #include <netinet/in_var.h>
146 #define _IP_VHL
147 #include <netinet/ip.h>
148 #include <netinet/ip_var.h>
149 #include <netinet/ip6.h>
150 #include <netinet6/ip6_var.h>
151 #include <netinet/if_ether.h> /* for struct arpcom */
152 #include <net/bridgestp.h>
153 #include <net/if_bridgevar.h>
154 #include <net/if_llc.h>
155 #if NVLAN > 0
156 #include <net/if_vlan_var.h>
157 #endif /* NVLAN > 0 */
158 
159 #include <net/if_ether.h>
160 #include <net/dlil.h>
161 #include <net/kpi_interfacefilter.h>
162 #include <net/pfvar.h>
163 
164 #include <net/route.h>
165 #include <dev/random/randomdev.h>
166 
167 #include <netinet/bootp.h>
168 #include <netinet/dhcp.h>
169 
170 #if SKYWALK
171 #include <skywalk/nexus/netif/nx_netif.h>
172 #endif /* SKYWALK */
173 
174 #include <net/sockaddr_utils.h>
175 #include <net/mblist.h>
176 
177 #include <os/log.h>
178 
179 #define _TSO_CSUM       (CSUM_TSO_IPV4 | CSUM_TSO_IPV6)
180 
181 static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
182 
183 
184 #define __M_FLAGS_ARE_SET(m, flags)     (((m)->m_flags & (flags)) != 0)
185 #define IS_BCAST(m)                     __M_FLAGS_ARE_SET(m, M_BCAST)
186 #define IS_MCAST(m)                     __M_FLAGS_ARE_SET(m, M_MCAST)
187 #define IS_BCAST_MCAST(m)               __M_FLAGS_ARE_SET(m, M_BCAST | M_MCAST)
188 
189 #define HTONS_ETHERTYPE_ARP             htons(ETHERTYPE_ARP)
190 #define HTONS_ETHERTYPE_IP              htons(ETHERTYPE_IP)
191 #define HTONS_ETHERTYPE_IPV6            htons(ETHERTYPE_IPV6)
192 #define HTONS_ARPHRD_ETHER              htons(ARPHRD_ETHER)
193 #define HTONS_ARPOP_REQUEST             htons(ARPOP_REQUEST)
194 #define HTONS_ARPOP_REPLY               htons(ARPOP_REPLY)
195 #define HTONS_IPPORT_BOOTPC             htons(IPPORT_BOOTPC)
196 #define HTONS_IPPORT_BOOTPS             htons(IPPORT_BOOTPS)
197 #define HTONS_DHCP_FLAGS_BROADCAST      htons(DHCP_FLAGS_BROADCAST)
198 
199 /*
200  * if_bridge_debug, BR_DBGF_*
201  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
202  *   to enable additional logs for the corresponding bridge function
203  * - "sysctl net.link.bridge.debug" controls the value of
204  *   'if_bridge_debug'
205  */
206 static uint32_t if_bridge_debug = 0;
207 #define BR_DBGF_LIFECYCLE       0x0001
208 #define BR_DBGF_INPUT           0x0002
209 #define BR_DBGF_OUTPUT          0x0004
210 #define BR_DBGF_RT_TABLE        0x0008
211 #define BR_DBGF_DELAYED_CALL    0x0010
212 #define BR_DBGF_IOCTL           0x0020
213 #define BR_DBGF_MBUF            0x0040
214 #define BR_DBGF_MCAST           0x0080
215 #define BR_DBGF_HOSTFILTER      0x0100
216 #define BR_DBGF_CHECKSUM        0x0200
217 #define BR_DBGF_MAC_NAT         0x0400
218 #define BR_DBGF_INPUT_LIST      0x0800
219 
220 /*
221  * if_bridge_log_level
222  * - 'if_bridge_log_level' ensures that by default important logs are
223  *   logged regardless of if_bridge_debug by comparing the log level
224  *   in BRIDGE_LOG to if_bridge_log_level
225  * - use "sysctl net.link.bridge.log_level" controls the value of
226  *   'if_bridge_log_level'
227  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
228  *   logs must use LOG_NOTICE to ensure they appear by default
229  */
230 static int if_bridge_log_level = LOG_NOTICE;
231 
232 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
233 
234 /*
235  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
236  * - macros to generate the specified log conditionally based on
237  *   the specified log level and debug flags
238  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
239  */
240 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
241 	do {                                                            \
242 	        if (__level <= if_bridge_log_level ||                   \
243 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
244 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
245 	                       __func__, ## __VA_ARGS__);       \
246 	        }                                                       \
247 	} while (0)
248 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
249 	do {                                                    \
250 	        if (__level <= if_bridge_log_level ||           \
251 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
252 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
253 	        }                                                               \
254 	} while (0)
255 
256 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
257 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
258 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
259 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
260 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
261 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
262 
263 #define BRIDGE_LOCK_DEBUG      1
264 #if BRIDGE_LOCK_DEBUG
265 
266 #define BR_LCKDBG_MAX                   4
267 
268 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
269 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
270 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
271 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
272 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
273 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
274 
275 #else /* !BRIDGE_LOCK_DEBUG */
276 
277 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
278 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
279 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
280 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
281 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
282 	        (_err) = EBUSY;                                         \
283 	else {                                                          \
284 	        (_sc)->sc_iflist_ref++;                                 \
285 	        (_err) = 0;                                             \
286 	}                                                               \
287 	_BRIDGE_UNLOCK(_sc);                                            \
288 } while (0)
289 #define BRIDGE_UNREF(_sc)               do {                            \
290 	_BRIDGE_LOCK(_sc);                                              \
291 	(_sc)->sc_iflist_ref--;                                         \
292 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
293 	        _BRIDGE_UNLOCK(_sc);                                    \
294 	        wakeup(&(_sc)->sc_cv);                                  \
295 	} else                                                          \
296 	        _BRIDGE_UNLOCK(_sc);                                    \
297 } while (0)
298 #define BRIDGE_XLOCK(_sc)               do {                            \
299 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
300 	(_sc)->sc_iflist_xcnt++;                                        \
301 	while ((_sc)->sc_iflist_ref > 0)                                \
302 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
303 	            "BRIDGE_XLOCK", NULL);                              \
304 } while (0)
305 #define BRIDGE_XDROP(_sc)               do {                            \
306 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
307 	(_sc)->sc_iflist_xcnt--;                                        \
308 } while (0)
309 
310 #endif /* BRIDGE_LOCK_DEBUG */
311 
312 #define BRIDGE_BPF_TAP_IN(ifp, m) \
313 	do {                                                            \
314 	        if (ifp->if_bpf != NULL) {                              \
315 	                bpf_tap_in(ifp, DLT_EN10MB, m, NULL, 0);        \
316 	        }                                                       \
317 	} while(0)
318 
319 #define BRIDGE_BPF_TAP_OUT(ifp, m)                                      \
320 	do {                                                            \
321 	        if (ifp->if_bpf != NULL) {                              \
322 	                bpf_tap_out(ifp, DLT_EN10MB, m, NULL, 0);       \
323 	        }                                                       \
324 	} while(0)
325 
326 
327 /*
328  * Initial size of the route hash table.  Must be a power of two.
329  */
330 #ifndef BRIDGE_RTHASH_SIZE
331 #define BRIDGE_RTHASH_SIZE              16
332 #endif
333 
334 /*
335  * Maximum size of the routing hash table
336  */
337 #define BRIDGE_RTHASH_SIZE_MAX          2048
338 
339 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
340 
341 /*
342  * Maximum number of addresses to cache.
343  */
344 #ifndef BRIDGE_RTABLE_MAX
345 #define BRIDGE_RTABLE_MAX               100
346 #endif
347 
348 /*
349  * Timeout (in seconds) for entries learned dynamically.
350  */
351 #ifndef BRIDGE_RTABLE_TIMEOUT
352 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
353 #endif
354 
355 /*
356  * Number of seconds between walks of the route list.
357  */
358 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
359 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
360 #endif
361 
362 /*
363  * Number of MAC NAT entries
364  * - sized based on 16 clients (including MAC NAT interface)
365  *   each with 4 addresses
366  */
367 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
368 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
369 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
370 
371 /*
372  * List of capabilities to possibly mask on the member interface.
373  */
374 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
375 /*
376  * List of capabilities to disable on the member interface.
377  */
378 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
379 
380 /*
381  * Bridge interface list entry.
382  */
383 struct bridge_iflist {
384 	TAILQ_ENTRY(bridge_iflist) bif_next;
385 	struct ifnet            *bif_ifp;       /* member if */
386 	struct bstp_port        bif_stp;        /* STP state */
387 	uint32_t                bif_ifflags;    /* member if flags */
388 	int                     bif_savedcaps;  /* saved capabilities */
389 	uint32_t                bif_addrmax;    /* max # of addresses */
390 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
391 	uint32_t                bif_addrexceeded; /* # of address violations */
392 
393 	interface_filter_t      bif_iff_ref;
394 	struct bridge_softc     *bif_sc;
395 	uint32_t                bif_flags;
396 
397 	/* host filter */
398 	struct in_addr          bif_hf_ipsrc;
399 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
400 
401 	struct ifbrmstats       bif_stats;
402 };
403 
404 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)405 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
406 {
407 	return (bif->bif_ifflags & flags) != 0;
408 }
409 
410 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)411 bif_has_checksum_offload(struct bridge_iflist * bif)
412 {
413 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
414 }
415 
416 static inline bool
bif_has_mac_nat(struct bridge_iflist * bif)417 bif_has_mac_nat(struct bridge_iflist * bif)
418 {
419 	return bif_ifflags_are_set(bif, IFBIF_MAC_NAT);
420 }
421 
422 static inline bool
bif_uses_virtio(struct bridge_iflist * bif)423 bif_uses_virtio(struct bridge_iflist * bif)
424 {
425 	return bif_ifflags_are_set(bif, IFBIF_USES_VIRTIO);
426 }
427 
428 /* fake errors to make the code clearer */
429 #define _EBADIP                 EJUSTRETURN
430 #define _EBADIPCHECKSUM         EJUSTRETURN
431 #define _EBADIPV6               EJUSTRETURN
432 #define _EBADUDP                EJUSTRETURN
433 #define _EBADTCP                EJUSTRETURN
434 #define _EBADUDPCHECKSUM        EJUSTRETURN
435 #define _EBADTCPCHECKSUM        EJUSTRETURN
436 
437 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
438 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
439 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
440 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
441 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
442 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
443 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
444 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
445 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
446 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
447 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
448 #define BIFF_LRO_DISABLED       0x800   /* LRO was disabled */
449 #if SKYWALK
450 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
451 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
452 #endif /* SKYWALK */
453 
454 /*
455  * mac_nat_entry
456  * - translates between an IP address and MAC address on a specific
457  *   bridge interface member
458  */
459 struct mac_nat_entry {
460 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
461 	struct bridge_iflist    *mne_bif;       /* originating interface */
462 	unsigned long           mne_expire;     /* expiration time */
463 	union {
464 		struct in_addr  mneu_ip;        /* originating IPv4 address */
465 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
466 	} mne_u;
467 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
468 	uint8_t                 mne_flags;
469 	uint8_t                 mne_reserved;
470 };
471 #define mne_ip  mne_u.mneu_ip
472 #define mne_ip6 mne_u.mneu_ip6
473 
474 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
475 
476 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
477 
478 /*
479  * mac_nat_record
480  * - used by bridge_mac_nat_output() to convey the translation that needs
481  *   to take place in bridge_mac_nat_translate
482  * - holds enough information so that the translation can be done later
483  *   when the destination interface is the MAC-NAT interface
484  */
485 struct mac_nat_record {
486 	uint16_t                mnr_ether_type;
487 	union {
488 		uint16_t        mnru_arp_offset;
489 		struct {
490 			uint16_t mnruip_dhcp_flags;
491 			uint16_t mnruip_udp_csum;
492 			uint8_t  mnruip_header_len;
493 		} mnru_ip;
494 		struct {
495 			uint16_t mnruip6_icmp6_len;
496 			uint16_t mnruip6_lladdr_offset;
497 			uint8_t mnruip6_icmp6_type;
498 			uint8_t mnruip6_header_len;
499 		} mnru_ip6;
500 	} mnr_u;
501 };
502 
503 #define mnr_arp_offset  mnr_u.mnru_arp_offset
504 
505 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
506 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
507 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
508 
509 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
510 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
511 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
512 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
513 
514 /*
515  * Bridge route node.
516  */
517 struct bridge_rtnode {
518 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
519 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
520 	struct bridge_iflist    *brt_dst;       /* destination if */
521 	unsigned long           brt_expire;     /* expiration time */
522 	uint8_t                 brt_flags;      /* address flags */
523 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
524 	uint16_t                brt_vlan;       /* vlan id */
525 };
526 
527 #define brt_ifp                 brt_dst->bif_ifp
528 
529 /*
530  * Bridge delayed function call context
531  */
532 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
533 
534 struct bridge_delayed_call {
535 	struct bridge_softc     *bdc_sc;
536 	bridge_delayed_func_t   bdc_func; /* Function to call */
537 	struct timespec         bdc_ts; /* Time to call */
538 	u_int32_t               bdc_flags;
539 	thread_call_t           bdc_thread_call;
540 };
541 
542 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
543 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
544 
545 /*
546  * Software state for each bridge.
547  */
548 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
549 
550 struct bridge_softc {
551 	struct ifnet            *sc_ifp;        /* make this an interface */
552 	uint32_t                sc_flags;
553 	LIST_ENTRY(bridge_softc) sc_list;
554 	decl_lck_mtx_data(, sc_mtx);
555 	struct _bridge_rtnode_list * __counted_by(sc_rthash_size) sc_rthash;  /* our forwarding table */
556 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
557 	uint32_t                sc_rthash_key;  /* key for hash */
558 	uint32_t                sc_rthash_size; /* size of the hash table */
559 	struct bridge_delayed_call sc_aging_timer;
560 	struct bridge_delayed_call sc_resize_call;
561 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
562 	struct bstp_state       sc_stp;         /* STP state */
563 	void                    *sc_cv;
564 	uint32_t                sc_brtmax;      /* max # of addresses */
565 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
566 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
567 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
568 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
569 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
570 	uint32_t                sc_brtexceeded; /* # of cache drops */
571 	uint32_t                sc_filter_flags; /* ipf and flags */
572 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
573 	u_char                  sc_defaddr[6];  /* Default MAC address */
574 	char                    sc_if_xname[IFNAMSIZ];
575 
576 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
577 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
578 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
579 	uint32_t                sc_mne_max;      /* max # of entries */
580 	uint32_t                sc_mne_count;    /* cur. # of entries */
581 	uint32_t                sc_mne_allocation_failures;
582 #if BRIDGE_LOCK_DEBUG
583 	/*
584 	 * Locking and unlocking calling history
585 	 */
586 	void                    *lock_lr[BR_LCKDBG_MAX];
587 	int                     next_lock_lr;
588 	void                    *unlock_lr[BR_LCKDBG_MAX];
589 	int                     next_unlock_lr;
590 #endif /* BRIDGE_LOCK_DEBUG */
591 };
592 
593 #define SCF_DETACHING            0x01
594 #define SCF_RESIZING             0x02
595 #define SCF_MEDIA_ACTIVE         0x04
596 #define SCF_PROTO_ATTACHED       0x08
597 
598 typedef enum {
599 	CHECKSUM_OPERATION_NONE = 0,
600 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
601 	CHECKSUM_OPERATION_FINALIZE = 2,
602 	CHECKSUM_OPERATION_COMPUTE = 3,
603 } ChecksumOperation;
604 
605 typedef struct {
606 	u_int           ip_hlen;        /* IP header length */
607 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
608 	u_int           ip_m0_len;      /* bytes available at ip_hdr (without jumping mbufs) */
609 	u_int           ip_opt_len;     /* IPv6 options headers length */
610 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
611 	bool            ip_is_ipv4;
612 	bool            ip_is_fragmented;
613 	uint8_t         *__sized_by(ip_m0_len) ip_hdr;   /* pointer to IP header */
614 	uint8_t         *__indexable ip_proto_hdr;   /* ptr to protocol header (TCP) */
615 } ip_packet_info, *ip_packet_info_t;
616 
617 struct bridge_hostfilter_stats bridge_hostfilter_stats;
618 
619 typedef uint8_t ether_type_flag_t;
620 
621 typedef enum {
622 	pkt_direction_RX,
623 	pkt_direction_TX
624 } pkt_direction_t;
625 
626 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
627 #if BRIDGE_LOCK_DEBUG
628 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
629 #else
630 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
631 #endif
632 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
633 
634 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
635 
636 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
637 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
638 
639 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
640 static int      bridge_clone_destroy(struct ifnet *);
641 
642 static errno_t  bridge_ioctl(struct ifnet *, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)));
643 #if HAS_IF_CAP
644 static void     bridge_mutecaps(struct bridge_softc *);
645 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
646     int);
647 #endif
648 static errno_t bridge_set_tso(struct bridge_softc *);
649 static void     bridge_proto_attach_changed(struct ifnet *);
650 static int      bridge_init(struct ifnet *);
651 static void     bridge_ifstop(struct ifnet *, int);
652 static int      bridge_output(struct ifnet *, struct mbuf *);
653 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
654 static void     bridge_start(struct ifnet *);
655 static mblist   bridge_input_list(struct bridge_softc *, ifnet_t,
656     struct ether_header *, mblist, bool);
657 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
658     mbuf_t *, char **);
659 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
660     mbuf_t *);
661 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
662     mbuf_t *m);
663 static int      bridge_enqueue(ifnet_t, ifnet_t, ifnet_t,
664     ether_type_flag_t, mbuf_t, ChecksumOperation, pkt_direction_t);
665 static mbuf_t   bridge_checksum_offload_list(ifnet_t, struct bridge_iflist *,
666     mbuf_t, bool);
667 static mbuf_t   bridge_filter_checksum(ifnet_t, struct bridge_iflist * bif,
668     mbuf_t m, bool, bool, bool);
669 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
670 
671 static void     bridge_aging_timer(struct bridge_softc *sc);
672 
673 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
674     ether_type_flag_t, mbuf_t);
675 static void     bridge_broadcast_list(struct bridge_softc *,
676     struct bridge_iflist *, ether_type_flag_t, mbuf_t, pkt_direction_t);
677 
678 static void     bridge_span(struct bridge_softc *, ether_type_flag_t, struct mbuf *);
679 
680 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
681     uint16_t, struct bridge_iflist *, int, uint8_t);
682 static struct bridge_iflist * bridge_rtlookup_bif(struct bridge_softc *,
683     const uint8_t[ETHER_ADDR_LEN], uint16_t);
684 static void     bridge_rttrim(struct bridge_softc *);
685 static void     bridge_rtage(struct bridge_softc *);
686 static void     bridge_rtflush(struct bridge_softc *, int);
687 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t[ETHER_ADDR_LEN],
688     uint16_t);
689 
690 static int      bridge_rtable_init(struct bridge_softc *);
691 static void     bridge_rtable_fini(struct bridge_softc *);
692 
693 static void     bridge_rthash_resize(struct bridge_softc *);
694 
695 static int      bridge_rtnode_addr_cmp(const uint8_t[ETHER_ADDR_LEN], const uint8_t[ETHER_ADDR_LEN]);
696 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
697     const uint8_t[ETHER_ADDR_LEN], uint16_t);
698 static int      bridge_rtnode_hash(struct bridge_softc *,
699     struct bridge_rtnode *);
700 static int      bridge_rtnode_insert(struct bridge_softc *,
701     struct bridge_rtnode *);
702 static void     bridge_rtnode_destroy(struct bridge_softc *,
703     struct bridge_rtnode *);
704 #if BRIDGESTP
705 static void     bridge_rtable_expire(struct ifnet *, int);
706 static void     bridge_state_change(struct ifnet *, int);
707 #endif /* BRIDGESTP */
708 
709 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
710     char * __sized_by(IFNAMSIZ) name);
711 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
712     struct ifnet *ifp);
713 static void     bridge_delete_member(struct bridge_softc *,
714     struct bridge_iflist *);
715 static void     bridge_delete_span(struct bridge_softc *,
716     struct bridge_iflist *);
717 
718 static int      bridge_ioctl_add(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
719 static int      bridge_ioctl_del(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
720 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
721 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
722 static int      bridge_ioctl_scache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
723 static int      bridge_ioctl_gcache(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
724 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
725 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
726 static int      bridge_ioctl_rts32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
727 static int      bridge_ioctl_rts64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
728 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
729 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
730 static int      bridge_ioctl_sto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
731 static int      bridge_ioctl_gto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
732 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
733 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
734 static int      bridge_ioctl_flush(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
735 static int      bridge_ioctl_gpri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
736 static int      bridge_ioctl_spri(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
737 static int      bridge_ioctl_ght(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
738 static int      bridge_ioctl_sht(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
739 static int      bridge_ioctl_gfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
740 static int      bridge_ioctl_sfd(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
741 static int      bridge_ioctl_gma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
742 static int      bridge_ioctl_sma(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
743 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
744 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
745 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
746 static int      bridge_ioctl_addspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
747 static int      bridge_ioctl_delspan(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
748 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
749 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
750 static int      bridge_ioctl_grte(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
751 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
752 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
753 static int      bridge_ioctl_sproto(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
754 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
755 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len);
756 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
757 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
758 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
759 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
760 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
761 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
762 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
763 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *__sized_by(arg_len) arg, size_t arg_len);
764 
765 static int      bridge_pf(struct mbuf **, struct ifnet *,
766     uint32_t sc_filter_flags, bool input);
767 static int bridge_ip_checkbasic(struct mbuf **);
768 static int bridge_ip6_checkbasic(struct mbuf **);
769 
770 static void bridge_detach(ifnet_t);
771 static void bridge_link_event(struct ifnet *, u_int32_t);
772 static void bridge_iflinkevent(struct ifnet *);
773 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
774 static int interface_media_active(struct ifnet *);
775 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
776 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
777 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
778 
779 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
780     struct bridge_iflist *);
781 static void bridge_mac_nat_disable(struct bridge_softc *sc);
782 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
783 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
784 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
785     struct bridge_iflist *);
786 static mbuf_t bridge_mac_nat_input(struct bridge_softc *, ifnet_t, mbuf_t,
787     ifnet_t * dst_if);
788 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
789     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
790 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
791     const char[ETHER_ADDR_LEN]);
792 
793 static mblist bridge_mac_nat_input_list(struct bridge_softc *sc,
794     ifnet_t external_ifp, mbuf_t m, mbuf_t * forward_head);
795 static mbuf_t bridge_mac_nat_translate_list(struct bridge_softc * sc,
796     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
797 static mbuf_t bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
798     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m);
799 
800 static mbuf_t   bridge_pf_list(mbuf_t m, ifnet_t ifp,
801     uint32_t sc_filter_flags, bool input);
802 
803 static inline ifnet_t
bridge_rtlookup(struct bridge_softc * sc,const uint8_t addr[ETHER_ADDR_LEN],uint16_t vlan)804 bridge_rtlookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
805     uint16_t vlan)
806 {
807 	struct bridge_iflist *  bif;
808 	ifnet_t                 ifp = NULL;
809 
810 	bif = bridge_rtlookup_bif(sc, addr, vlan);
811 	if (bif != NULL) {
812 		ifp = bif->bif_ifp;
813 	}
814 	return ifp;
815 }
816 
817 static bool in_addr_is_ours(const struct in_addr);
818 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
819 
820 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
821 
822 static mblist
823 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx);
824 
825 static mblist
826 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
827     u_int mac_hlen, bool is_ipv4, bool is_tx);
828 
829 static inline mblist
gso_tcp_transmit(ifnet_t ifp,mbuf_t m,u_int mac_hlen,bool is_ipv4)830 gso_tcp_transmit(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4)
831 {
832 	return gso_tcp(ifp, m, mac_hlen, is_ipv4, true);
833 }
834 
835 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
836 #define VLANTAGOF(_m)   0
837 
838 #define BSTP_ETHERADDR_RANGE_FIRST      0x00
839 #define BSTP_ETHERADDR_RANGE_LAST       0x0f
840 
841 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
842 { 0x01, 0x80, 0xc2, 0x00, 0x00, BSTP_ETHERADDR_RANGE_FIRST };
843 
844 
845 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
846 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
847 
848 #if BRIDGESTP
849 static struct bstp_cb_ops bridge_ops = {
850 	.bcb_state = bridge_state_change,
851 	.bcb_rtage = bridge_rtable_expire
852 };
853 #endif /* BRIDGESTP */
854 
855 SYSCTL_DECL(_net_link);
856 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
857     "Bridge");
858 
859 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
860 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
861     CTLFLAG_RW | CTLFLAG_LOCKED,
862     &bridge_inherit_mac, 0,
863     "Inherit MAC address from the first bridge member");
864 
865 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
866     CTLFLAG_RW | CTLFLAG_LOCKED,
867     &bridge_rtable_prune_period, 0,
868     "Interval between pruning of routing table");
869 
870 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
871 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
872     CTLFLAG_RW | CTLFLAG_LOCKED,
873     &bridge_rtable_hash_size_max, 0,
874     "Maximum size of the routing hash table");
875 
876 #if BRIDGE_DELAYED_CALLBACK_DEBUG
877 static int bridge_delayed_callback_delay = 0;
878 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
879     CTLFLAG_RW | CTLFLAG_LOCKED,
880     &bridge_delayed_callback_delay, 0,
881     "Delay before calling delayed function");
882 #endif
883 
884 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
885     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
886     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
887 
888 #if BRIDGESTP
889 static int log_stp   = 0;   /* log STP state changes */
890 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
891     &log_stp, 0, "Log STP state changes");
892 #endif /* BRIDGESTP */
893 
894 struct bridge_control {
895 	int             (*bc_func)(struct bridge_softc *, void *__sized_by(arg_len) args, size_t arg_len);
896 	unsigned int    bc_argsize;
897 	unsigned int    bc_flags;
898 };
899 
900 #define BC_F_COPYIN             0x01    /* copy arguments in */
901 #define BC_F_COPYOUT            0x02    /* copy arguments out */
902 #define BC_F_SUSER              0x04    /* do super-user check */
903 
904 static const struct bridge_control bridge_control_table32[] = {
905 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
906 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
907 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
908 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
909 
910 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
911 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
912 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
913 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
914 
915 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
916 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
917 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYOUT },
919 
920 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
921 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
922 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
923 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
924 
925 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
926 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927 
928 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
929 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
930 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
931 	  .bc_flags = BC_F_COPYOUT },
932 
933 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 
939 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
940 	  .bc_flags = BC_F_COPYOUT },
941 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
942 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
943 
944 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
945 	  .bc_flags = BC_F_COPYOUT },
946 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
947 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
948 
949 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
950 	  .bc_flags = BC_F_COPYOUT },
951 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
952 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
953 
954 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
955 	  .bc_flags = BC_F_COPYOUT },
956 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
957 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958 
959 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
960 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
961 
962 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
963 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
964 
965 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
966 	  .bc_flags = BC_F_COPYOUT },
967 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
968 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
969 
970 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
971 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
972 
973 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
974 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
976 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 
978 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
979 	  .bc_flags = BC_F_COPYOUT },
980 
981 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
982 	  .bc_flags = BC_F_COPYOUT },
983 
984 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
985 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
986 
987 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
988 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
989 
990 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
991 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
992 
993 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
994 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995 
996 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
997 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
998 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
999 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1000 
1001 	{ .bc_func = bridge_ioctl_gmnelist32,
1002 	  .bc_argsize = sizeof(struct ifbrmnelist32),
1003 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1004 	{ .bc_func = bridge_ioctl_gifstats32,
1005 	  .bc_argsize = sizeof(struct ifbrmreq32),
1006 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1007 };
1008 
1009 static const struct bridge_control bridge_control_table64[] = {
1010 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
1011 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1012 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
1013 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1014 
1015 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
1016 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1017 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
1018 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1019 
1020 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
1021 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1022 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYOUT },
1024 
1025 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
1026 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1027 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
1028 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1029 
1030 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
1031 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032 
1033 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
1034 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1035 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
1036 	  .bc_flags = BC_F_COPYOUT },
1037 
1038 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1039 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040 
1041 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 
1044 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1045 	  .bc_flags = BC_F_COPYOUT },
1046 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1047 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1048 
1049 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1052 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1053 
1054 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1055 	  .bc_flags = BC_F_COPYOUT },
1056 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1057 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1058 
1059 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1060 	  .bc_flags = BC_F_COPYOUT },
1061 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1062 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063 
1064 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1065 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1066 
1067 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1068 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1069 
1070 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1071 	  .bc_flags = BC_F_COPYOUT },
1072 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1073 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1074 
1075 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1076 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1077 
1078 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1079 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1080 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1081 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1082 
1083 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1084 	  .bc_flags = BC_F_COPYOUT },
1085 
1086 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1087 	  .bc_flags = BC_F_COPYOUT },
1088 
1089 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1090 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1091 
1092 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1093 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1094 
1095 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1096 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1097 
1098 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1099 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1100 
1101 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1102 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1103 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1104 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1105 
1106 	{ .bc_func = bridge_ioctl_gmnelist64,
1107 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1108 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1109 	{ .bc_func = bridge_ioctl_gifstats64,
1110 	  .bc_argsize = sizeof(struct ifbrmreq64),
1111 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1112 };
1113 
1114 static const unsigned int bridge_control_table_size =
1115     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1116 
1117 static LIST_HEAD(, bridge_softc) bridge_list =
1118     LIST_HEAD_INITIALIZER(bridge_list);
1119 
1120 #define BRIDGENAME      "bridge"
1121 #define BRIDGES_MAX     IF_MAXUNIT
1122 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1123 
1124 static struct if_clone bridge_cloner =
1125     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1126     0, BRIDGES_MAX);
1127 
1128 static int if_bridge_txstart = 0;
1129 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1130     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1131 
1132 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1133     &if_bridge_debug, 0, "Bridge debug flags");
1134 
1135 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1136     CTLFLAG_RW | CTLFLAG_LOCKED,
1137     &if_bridge_log_level, 0, "Bridge log level");
1138 
1139 static int if_bridge_output_skip_filters = 1;
1140 SYSCTL_INT(_net_link_bridge, OID_AUTO, output_skip_filters,
1141     CTLFLAG_RW | CTLFLAG_LOCKED,
1142     &if_bridge_output_skip_filters, 0, "Bridge skip output filters");
1143 
1144 int bridge_enable_early_input = 1;   /* DLIL early input */
1145 SYSCTL_INT(_net_link_bridge, OID_AUTO, enable_early_input,
1146     CTLFLAG_RW | CTLFLAG_LOCKED,
1147     &bridge_enable_early_input, 0,
1148     "Bridge enable early input");
1149 
1150 int bridge_allow_lro_num_seg = 1;   /* allow LRO_NUM_SEG to keep LRO enabled */
1151 SYSCTL_INT(_net_link_bridge, OID_AUTO, allow_lro_num_seg,
1152     CTLFLAG_RW | CTLFLAG_LOCKED,
1153     &bridge_allow_lro_num_seg, 0,
1154     "Bridge allow LRO_NUM_SEG to keep LRO enabled");
1155 
1156 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1157 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1158 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1159 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1160 
1161 static u_int if_bridge_tso_reduce_mss_forwarding
1162         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1163 static u_int if_bridge_tso_reduce_mss_tx
1164         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1165 
1166 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1167 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1168 {
1169 	int     changed;
1170 	int     error;
1171 	u_int   new_value;
1172 
1173 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1174 	    &changed);
1175 	if (error == 0 && changed != 0) {
1176 		if (new_value > val_max) {
1177 			return EINVAL;
1178 		}
1179 		*val = new_value;
1180 	}
1181 	return error;
1182 }
1183 
1184 static int
1185 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1186 {
1187 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1188     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1189 }
1190 
1191 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1192     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1193     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1194     "Bridge tso reduce mss when forwarding");
1195 
1196 static int
1197 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1198 {
1199 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1200     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1201 }
1202 
1203 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1204     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1205     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1206     "Bridge tso reduce mss on transmit");
1207 
1208 #if DEBUG || DEVELOPMENT
1209 /*
1210  * net.link.bridge.reduce_tso_mtu
1211  * - when non-zero, the bridge overrides the interface TSO MTU to a lower
1212  *   value (i.e. 16K) to enable testing the "use GSO instead" path
1213  */
1214 static int if_bridge_reduce_tso_mtu = 0;
1215 SYSCTL_INT(_net_link_bridge, OID_AUTO, reduce_tso_mtu,
1216     CTLFLAG_RW | CTLFLAG_LOCKED,
1217     &if_bridge_reduce_tso_mtu, 0, "Bridge interface reduce TSO MTU");
1218 
1219 #endif /* DEBUG || DEVELOPMENT */
1220 
1221 static void brlog_ether_header(struct ether_header *);
1222 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1223 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1224 static void brlog_mbuf(mbuf_t, const char *, const char *);
1225 static void brlog_link(struct bridge_softc * sc);
1226 
1227 #if BRIDGE_LOCK_DEBUG
1228 static void bridge_lock(struct bridge_softc *);
1229 static void bridge_unlock(struct bridge_softc *);
1230 static int bridge_lock2ref(struct bridge_softc *);
1231 static void bridge_unref(struct bridge_softc *);
1232 static void bridge_xlock(struct bridge_softc *);
1233 static void bridge_xdrop(struct bridge_softc *);
1234 
1235 #define DECL_RETURN_ADDR(v) void * __single v = __unsafe_forge_single(void *, __builtin_return_address(0))
1236 
1237 static void
bridge_lock(struct bridge_softc * sc)1238 bridge_lock(struct bridge_softc *sc)
1239 {
1240 	DECL_RETURN_ADDR(lr_saved);
1241 
1242 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1243 
1244 	_BRIDGE_LOCK(sc);
1245 
1246 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1247 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1248 }
1249 
1250 static void
bridge_unlock(struct bridge_softc * sc)1251 bridge_unlock(struct bridge_softc *sc)
1252 {
1253 	DECL_RETURN_ADDR(lr_saved);
1254 
1255 	BRIDGE_LOCK_ASSERT_HELD(sc);
1256 
1257 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1258 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1259 
1260 	_BRIDGE_UNLOCK(sc);
1261 }
1262 
1263 static int
bridge_lock2ref(struct bridge_softc * sc)1264 bridge_lock2ref(struct bridge_softc *sc)
1265 {
1266 	int error = 0;
1267 	DECL_RETURN_ADDR(lr_saved);
1268 
1269 	BRIDGE_LOCK_ASSERT_HELD(sc);
1270 
1271 	if (sc->sc_iflist_xcnt > 0) {
1272 		error = EBUSY;
1273 	} else {
1274 		sc->sc_iflist_ref++;
1275 	}
1276 
1277 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1278 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1279 
1280 	_BRIDGE_UNLOCK(sc);
1281 
1282 	return error;
1283 }
1284 
1285 static void
bridge_unref(struct bridge_softc * sc)1286 bridge_unref(struct bridge_softc *sc)
1287 {
1288 	DECL_RETURN_ADDR(lr_saved);
1289 
1290 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1291 
1292 	_BRIDGE_LOCK(sc);
1293 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1294 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1295 
1296 	sc->sc_iflist_ref--;
1297 
1298 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1299 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1300 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1301 		_BRIDGE_UNLOCK(sc);
1302 		wakeup(&sc->sc_cv);
1303 	} else {
1304 		_BRIDGE_UNLOCK(sc);
1305 	}
1306 }
1307 
1308 static void
bridge_xlock(struct bridge_softc * sc)1309 bridge_xlock(struct bridge_softc *sc)
1310 {
1311 	DECL_RETURN_ADDR(lr_saved);
1312 
1313 	BRIDGE_LOCK_ASSERT_HELD(sc);
1314 
1315 	sc->sc_iflist_xcnt++;
1316 	while (sc->sc_iflist_ref > 0) {
1317 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1318 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1319 
1320 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1321 
1322 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1323 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1324 	}
1325 }
1326 
1327 #undef DECL_RETURN_ADDR
1328 
1329 static void
bridge_xdrop(struct bridge_softc * sc)1330 bridge_xdrop(struct bridge_softc *sc)
1331 {
1332 	BRIDGE_LOCK_ASSERT_HELD(sc);
1333 
1334 	sc->sc_iflist_xcnt--;
1335 }
1336 
1337 #endif /* BRIDGE_LOCK_DEBUG */
1338 
1339 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1340 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1341 {
1342 	if (m) {
1343 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1344 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1345 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1346 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1347 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1348 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1349 		    suffix ? suffix : "");
1350 	} else {
1351 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1352 	}
1353 }
1354 
1355 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1356 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1357 {
1358 	if (m) {
1359 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1360 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1361 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1362 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1363 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1364 		    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
1365 		    (unsigned int)mbuf_maxlen(m),
1366 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1367 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1368 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1369 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1370 			brlog_mbuf_pkthdr(m, "", suffix);
1371 		}
1372 	} else {
1373 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1374 	}
1375 }
1376 
1377 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1378 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1379 {
1380 	mbuf_t                  n;
1381 	size_t                  i, j;
1382 	size_t                  pktlen, mlen, maxlen;
1383 	unsigned char   *ptr;
1384 
1385 	pktlen = mbuf_pkthdr_len(m);
1386 
1387 	if (offset > pktlen) {
1388 		return;
1389 	}
1390 
1391 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1392 	n = m;
1393 	mlen = mbuf_len(n);
1394 	ptr = mtod(n, unsigned char *);
1395 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1396 		if (j >= mlen) {
1397 			n = mbuf_next(n);
1398 			if (n == 0) {
1399 				break;
1400 			}
1401 			ptr = mtod(n, unsigned char *);
1402 			mlen = mbuf_len(n);
1403 			j = 0;
1404 		}
1405 		if (i >= offset) {
1406 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1407 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1408 		}
1409 	}
1410 }
1411 
1412 static void
brlog_ether_header(struct ether_header * eh)1413 brlog_ether_header(struct ether_header *eh)
1414 {
1415 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1416 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1417 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1418 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1419 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1420 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1421 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1422 	    ntohs(eh->ether_type));
1423 }
1424 
1425 static char *
ether_ntop(char * __sized_by (len)buf,size_t len,const u_char ap[ETHER_ADDR_LEN])1426 ether_ntop(char * __sized_by(len) buf, size_t len, const u_char ap[ETHER_ADDR_LEN])
1427 {
1428 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1429 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1430 
1431 	return buf;
1432 }
1433 
1434 static void
brlog_link(struct bridge_softc * sc)1435 brlog_link(struct bridge_softc * sc)
1436 {
1437 	int i;
1438 	uint32_t sdl_buffer[(offsetof(struct sockaddr_dl, sdl_data) +
1439 	IFNAMSIZ + ETHER_ADDR_LEN)];
1440 	struct sockaddr_dl *sdl = SDL((uint8_t*)&sdl_buffer); /* SDL requires byte pointer */
1441 	const u_char * lladdr;
1442 	char lladdr_str[48];
1443 
1444 	memset(sdl_buffer, 0, sizeof(sdl_buffer));
1445 	sdl->sdl_family = AF_LINK;
1446 	sdl->sdl_nlen = strbuflen(sc->sc_if_xname);
1447 	sdl->sdl_alen = ETHER_ADDR_LEN;
1448 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1449 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1450 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1451 	lladdr_str[0] = '\0';
1452 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1453 	    i < sdl->sdl_alen;
1454 	    i++, lladdr++) {
1455 		char    byte_str[4];
1456 
1457 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1458 		    *lladdr);
1459 		strbufcat(lladdr_str, byte_str);
1460 	}
1461 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1462 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1463 	    " slen %d addr %s", sc->sc_if_xname,
1464 	    sdl->sdl_len, sdl->sdl_index,
1465 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1466 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1467 }
1468 
1469 static int
_mbuf_get_tso_mss(mbuf_t m)1470 _mbuf_get_tso_mss(mbuf_t m)
1471 {
1472 	int     mss = 0;
1473 
1474 	if ((m->m_pkthdr.csum_flags & _TSO_CSUM) != 0) {
1475 		mss = m->m_pkthdr.tso_segsz;
1476 	}
1477 	return mss;
1478 }
1479 
1480 /*
1481  * bridgeattach:
1482  *
1483  *	Pseudo-device attach routine.
1484  */
1485 __private_extern__ int
bridgeattach(int n)1486 bridgeattach(int n)
1487 {
1488 #pragma unused(n)
1489 	int error;
1490 
1491 	LIST_INIT(&bridge_list);
1492 
1493 #if BRIDGESTP
1494 	bstp_sys_init();
1495 #endif /* BRIDGESTP */
1496 
1497 	error = if_clone_attach(&bridge_cloner);
1498 	if (error != 0) {
1499 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1500 	}
1501 	return error;
1502 }
1503 
1504 static void
_mbuf_adjust_pkthdr_and_data(mbuf_t m,int len)1505 _mbuf_adjust_pkthdr_and_data(mbuf_t m, int len)
1506 {
1507 	mbuf_setdata(m, mtodo(m, len), mbuf_len(m) - len);
1508 	mbuf_pkthdr_adjustlen(m, -len);
1509 }
1510 
1511 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1512 bridge_ifnet_set_attrs(struct ifnet * ifp)
1513 {
1514 	errno_t         error;
1515 
1516 	error = ifnet_set_mtu(ifp, ETHERMTU);
1517 	if (error != 0) {
1518 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1519 		goto done;
1520 	}
1521 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1522 	if (error != 0) {
1523 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1524 		goto done;
1525 	}
1526 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1527 	if (error != 0) {
1528 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1529 		goto done;
1530 	}
1531 	error = ifnet_set_flags(ifp,
1532 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1533 	    0xffff);
1534 
1535 	if (error != 0) {
1536 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1537 		goto done;
1538 	}
1539 done:
1540 	return error;
1541 }
1542 
1543 static void
bridge_interface_proto_attach_changed(ifnet_t ifp)1544 bridge_interface_proto_attach_changed(ifnet_t ifp)
1545 {
1546 	uint32_t                        proto_count;
1547 	struct bridge_softc * __single  sc = ifp->if_softc;
1548 
1549 	proto_count = if_get_protolist(ifp, NULL, 0);
1550 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1551 	    "%s: proto count %d", ifp->if_xname, proto_count);
1552 
1553 	if (sc == NULL) {
1554 		return;
1555 	}
1556 	BRIDGE_LOCK(sc);
1557 	if ((sc->sc_flags & SCF_DETACHING) != 0) {
1558 		BRIDGE_UNLOCK(sc);
1559 		return;
1560 	}
1561 	if (proto_count >= 2) {
1562 		/* an upper layer protocol is attached */
1563 		sc->sc_flags |= SCF_PROTO_ATTACHED;
1564 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1565 		    "%s: setting SCF_PROTO_ATTACHED", ifp->if_xname);
1566 	} else {
1567 		/* an upper layer protocol was detached */
1568 		sc->sc_flags &= ~SCF_PROTO_ATTACHED;
1569 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
1570 		    "%s: clearing SCF_PROTO_ATTACHED", ifp->if_xname);
1571 	}
1572 	BRIDGE_UNLOCK(sc);
1573 }
1574 
1575 static void
bridge_interface_event(struct ifnet * ifp,__unused protocol_family_t protocol,const struct kev_msg * event)1576 bridge_interface_event(struct ifnet * ifp,
1577     __unused protocol_family_t protocol, const struct kev_msg * event)
1578 {
1579 	int         event_code;
1580 
1581 	if (event->vendor_code != KEV_VENDOR_APPLE
1582 	    || event->kev_class != KEV_NETWORK_CLASS
1583 	    || event->kev_subclass != KEV_DL_SUBCLASS) {
1584 		return;
1585 	}
1586 	event_code = event->event_code;
1587 	switch (event_code) {
1588 	case KEV_DL_PROTO_DETACHED:
1589 	case KEV_DL_PROTO_ATTACHED:
1590 		bridge_interface_proto_attach_changed(ifp);
1591 		break;
1592 	default:
1593 		break;
1594 	}
1595 	return;
1596 }
1597 
1598 /*
1599  * Function: bridge_interface_attach_protocol
1600  * Purpose:
1601  *   Attach a protocol to the bridge to get events on the interface,
1602  *   in particular, whether protocols are attached/detached.
1603  */
1604 static int
bridge_interface_attach_protocol(ifnet_t ifp)1605 bridge_interface_attach_protocol(ifnet_t ifp)
1606 {
1607 	int                                 error;
1608 	struct ifnet_attach_proto_param_v2  reg;
1609 
1610 	bzero(&reg, sizeof(reg));
1611 	reg.event = bridge_interface_event;
1612 
1613 	error = ifnet_attach_protocol_v2(ifp, PF_BRIDGE, &reg);
1614 	if (error != 0) {
1615 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
1616 		    "%s: ifnet_attach_protocol failed, %d",
1617 		    ifp->if_xname, error);
1618 	}
1619 	return error;
1620 }
1621 
1622 static void
bridge_interface_detach_protocol(ifnet_t ifp)1623 bridge_interface_detach_protocol(ifnet_t ifp)
1624 {
1625 	(void)ifnet_detach_protocol(ifp, PF_BRIDGE);
1626 }
1627 
1628 /*
1629  * bridge_clone_create:
1630  *
1631  *	Create a new bridge instance.
1632  */
1633 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1634 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1635 {
1636 #pragma unused(params)
1637 	ifnet_ref_t ifp = NULL;
1638 	struct bridge_softc *sc = NULL;
1639 	struct bridge_softc *sc2 = NULL;
1640 	struct ifnet_init_eparams init_params;
1641 	errno_t error = 0;
1642 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1643 	int fb, retry, has_hostid;
1644 
1645 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1646 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1647 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1648 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1649 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1650 	sc->sc_filter_flags = 0;
1651 
1652 	TAILQ_INIT(&sc->sc_iflist);
1653 
1654 	/* use the interface name as the unique id for ifp recycle */
1655 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1656 	    ifc->ifc_name, unit);
1657 	bzero(&init_params, sizeof(init_params));
1658 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1659 	init_params.len                 = sizeof(init_params);
1660 	/* Initialize our routing table. */
1661 	error = bridge_rtable_init(sc);
1662 	if (error != 0) {
1663 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1664 		goto done;
1665 	}
1666 	TAILQ_INIT(&sc->sc_spanlist);
1667 	if (if_bridge_txstart) {
1668 		init_params.start = bridge_start;
1669 	} else {
1670 		init_params.flags = IFNET_INIT_LEGACY;
1671 		init_params.output = bridge_output;
1672 	}
1673 	init_params.uniqueid_len        = strbuflen(sc->sc_if_xname);
1674 	init_params.uniqueid            = sc->sc_if_xname;
1675 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1676 	init_params.name                = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
1677 	init_params.unit                = unit;
1678 	init_params.family              = IFNET_FAMILY_ETHERNET;
1679 	init_params.type                = IFT_BRIDGE;
1680 	init_params.demux               = ether_demux;
1681 	init_params.add_proto           = ether_add_proto;
1682 	init_params.del_proto           = ether_del_proto;
1683 	init_params.check_multi         = ether_check_multi;
1684 	init_params.framer_extended     = ether_frameout_extended;
1685 	init_params.softc               = sc;
1686 	init_params.ioctl               = bridge_ioctl;
1687 	init_params.detach              = bridge_detach;
1688 	init_params.broadcast_addr      = etherbroadcastaddr;
1689 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1690 
1691 	error = ifnet_allocate_extended(&init_params, &ifp);
1692 	if (error != 0) {
1693 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1694 		goto done;
1695 	}
1696 	LIST_INIT(&sc->sc_mne_list);
1697 	LIST_INIT(&sc->sc_mne_list_v6);
1698 	sc->sc_ifp = ifp;
1699 	error = bridge_ifnet_set_attrs(ifp);
1700 	if (error != 0) {
1701 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1702 		    error);
1703 		goto done;
1704 	}
1705 	/*
1706 	 * Generate an ethernet address with a locally administered address.
1707 	 *
1708 	 * Since we are using random ethernet addresses for the bridge, it is
1709 	 * possible that we might have address collisions, so make sure that
1710 	 * this hardware address isn't already in use on another bridge.
1711 	 * The first try uses the "hostid" and falls back to read_frandom();
1712 	 * for "hostid", we use the MAC address of the first-encountered
1713 	 * Ethernet-type interface that is currently configured.
1714 	 */
1715 	fb = 0;
1716 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1717 	for (retry = 1; retry != 0;) {
1718 		if (fb || has_hostid == 0) {
1719 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1720 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1721 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1722 		} else {
1723 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1724 			    ETHER_ADDR_LEN);
1725 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1726 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1727 			sc->sc_defaddr[3] =     /* stir it up a bit */
1728 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1729 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1730 			/*
1731 			 * Mix in the LSB as it's actually pretty significant,
1732 			 * see rdar://14076061
1733 			 */
1734 			sc->sc_defaddr[4] =
1735 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1736 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1737 			    sc->sc_defaddr[5];
1738 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1739 		}
1740 
1741 		fb = 1;
1742 		retry = 0;
1743 		lck_mtx_lock(&bridge_list_mtx);
1744 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1745 			if (_ether_cmp(sc->sc_defaddr,
1746 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1747 				retry = 1;
1748 			}
1749 		}
1750 		lck_mtx_unlock(&bridge_list_mtx);
1751 	}
1752 
1753 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1754 
1755 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1756 		brlog_link(sc);
1757 	}
1758 	error = ifnet_attach(ifp, NULL);
1759 	if (error != 0) {
1760 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1761 		goto done;
1762 	}
1763 	(void)bridge_interface_attach_protocol(ifp);
1764 
1765 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1766 	    IFT_ETHER);
1767 	if (error != 0) {
1768 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1769 		    error);
1770 		goto done;
1771 	}
1772 
1773 	ifnet_set_offload(ifp,
1774 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1775 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1776 	error = bridge_set_tso(sc);
1777 	if (error != 0) {
1778 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1779 		goto done;
1780 	}
1781 #if BRIDGESTP
1782 	bstp_attach(&sc->sc_stp, &bridge_ops);
1783 #endif /* BRIDGESTP */
1784 
1785 	lck_mtx_lock(&bridge_list_mtx);
1786 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1787 	lck_mtx_unlock(&bridge_list_mtx);
1788 
1789 	/* attach as ethernet */
1790 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1791 	    NULL, NULL);
1792 
1793 done:
1794 	if (error != 0) {
1795 		if (ifp != NULL) {
1796 			bridge_interface_detach_protocol(ifp);
1797 		}
1798 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1799 		/* TBD: Clean up: sc, sc_rthash etc */
1800 	}
1801 
1802 	return error;
1803 }
1804 
1805 /*
1806  * bridge_clone_destroy:
1807  *
1808  *	Destroy a bridge instance.
1809  */
1810 static int
bridge_clone_destroy(struct ifnet * ifp)1811 bridge_clone_destroy(struct ifnet *ifp)
1812 {
1813 	struct bridge_softc * __single sc = ifp->if_softc;
1814 	struct bridge_iflist *bif;
1815 	errno_t error;
1816 
1817 	bridge_interface_detach_protocol(ifp);
1818 
1819 	BRIDGE_LOCK(sc);
1820 	if ((sc->sc_flags & SCF_DETACHING)) {
1821 		BRIDGE_UNLOCK(sc);
1822 		return 0;
1823 	}
1824 	sc->sc_flags |= SCF_DETACHING;
1825 
1826 	bridge_ifstop(ifp, 1);
1827 
1828 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1829 
1830 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1831 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1832 
1833 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1834 	if (error != 0) {
1835 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1836 	}
1837 
1838 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1839 		bridge_delete_member(sc, bif);
1840 	}
1841 
1842 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1843 		bridge_delete_span(sc, bif);
1844 	}
1845 	BRIDGE_UNLOCK(sc);
1846 
1847 	error = ifnet_detach(ifp);
1848 	if (error != 0) {
1849 		panic("%s (%d): ifnet_detach(%p) failed %d",
1850 		    __func__, __LINE__, ifp, error);
1851 	}
1852 	return 0;
1853 }
1854 
1855 #define DRVSPEC do { \
1856 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1857 	        error = EINVAL;                                         \
1858 	        break;                                                  \
1859 	}                                                               \
1860 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1861                                                                         \
1862 	if (cmd == SIOCGDRVSPEC &&                                      \
1863 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1864 	        error = EINVAL;                                         \
1865 	        break;                                                  \
1866 	} else if (cmd == SIOCSDRVSPEC &&                               \
1867 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1868 	        error = EINVAL;                                         \
1869 	        break;                                                  \
1870 	}                                                               \
1871                                                                         \
1872 	if (bc->bc_flags & BC_F_SUSER) {                                \
1873 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1874 	            KAUTH_GENERIC_ISSUSER);                             \
1875 	        if (error)                                              \
1876 	                break;                                          \
1877 	}                                                               \
1878                                                                         \
1879 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1880 	    ifd->ifd_len > sizeof (args)) {                             \
1881 	        error = EINVAL;                                         \
1882 	        break;                                                  \
1883 	}                                                               \
1884                                                                         \
1885 	bzero(&args, sizeof (args));                                    \
1886 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1887 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1888 	        if (error)                                              \
1889 	                break;                                          \
1890 	}                                                               \
1891                                                                         \
1892 	BRIDGE_LOCK(sc);                                                \
1893 	error = (*bc->bc_func)(sc, &args, sizeof(args));                \
1894 	BRIDGE_UNLOCK(sc);                                              \
1895 	if (error)                                                      \
1896 	        break;                                                  \
1897                                                                         \
1898 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1899 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1900 } while (0)
1901 
1902 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1903 interface_needs_input_broadcast(struct ifnet * ifp)
1904 {
1905 	/*
1906 	 * Selectively enable input broadcast only when necessary.
1907 	 * The bridge interface itself attaches a fake protocol
1908 	 * so checking for at least two protocols means that the
1909 	 * interface is being used for something besides bridging
1910 	 * and needs to see broadcast packets from other members.
1911 	 */
1912 	return if_get_protolist(ifp, NULL, 0) >= 2;
1913 }
1914 
1915 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1916 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1917 {
1918 	boolean_t       old_input_broadcast;
1919 
1920 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1921 	if (input_broadcast) {
1922 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1923 	} else {
1924 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1925 	}
1926 	return old_input_broadcast != input_broadcast;
1927 }
1928 
1929 /*
1930  * bridge_ioctl:
1931  *
1932  *	Handle a control request from the operator.
1933  */
1934 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * __sized_by (IOCPARM_LEN (cmd))data)1935 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *__sized_by(IOCPARM_LEN(cmd)) data)
1936 {
1937 	struct bridge_softc * __single sc = ifp->if_softc;
1938 	struct ifreq *ifr = (struct ifreq *)data;
1939 	struct bridge_iflist *bif;
1940 	int error = 0;
1941 
1942 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1943 
1944 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1945 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1946 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1947 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1948 	    (char)IOCGROUP(cmd), cmd & 0xff);
1949 
1950 	switch (cmd) {
1951 	case SIOCSIFADDR:
1952 	case SIOCAIFADDR:
1953 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1954 		break;
1955 
1956 	case SIOCGIFMEDIA32:
1957 	case SIOCGIFMEDIA64: {
1958 		// cast to 32bit version to work within bounds with 32bit userspace
1959 		struct ifmediareq32 *ifmr = (struct ifmediareq32 *)data;
1960 		user_addr_t user_addr;
1961 
1962 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1963 		    ((struct ifmediareq64 *)data)->ifmu_ulist :
1964 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
1965 
1966 		ifmr->ifm_status = IFM_AVALID;
1967 		ifmr->ifm_mask = 0;
1968 		ifmr->ifm_count = 1;
1969 
1970 		BRIDGE_LOCK(sc);
1971 		if (!(sc->sc_flags & SCF_DETACHING) &&
1972 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1973 			ifmr->ifm_status |= IFM_ACTIVE;
1974 			ifmr->ifm_active = ifmr->ifm_current =
1975 			    IFM_ETHER | IFM_AUTO;
1976 		} else {
1977 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1978 		}
1979 		BRIDGE_UNLOCK(sc);
1980 
1981 		if (user_addr != USER_ADDR_NULL) {
1982 			error = copyout(&ifmr->ifm_current, user_addr,
1983 			    sizeof(int));
1984 		}
1985 		break;
1986 	}
1987 
1988 	case SIOCADDMULTI:
1989 	case SIOCDELMULTI:
1990 		break;
1991 
1992 	case SIOCSDRVSPEC32:
1993 	case SIOCGDRVSPEC32: {
1994 		union {
1995 			struct ifbreq ifbreq;
1996 			struct ifbifconf32 ifbifconf;
1997 			struct ifbareq32 ifbareq;
1998 			struct ifbaconf32 ifbaconf;
1999 			struct ifbrparam ifbrparam;
2000 			struct ifbropreq32 ifbropreq;
2001 		} args;
2002 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
2003 		const struct bridge_control *bridge_control_table =
2004 		    bridge_control_table32, *bc;
2005 
2006 		DRVSPEC;
2007 
2008 		break;
2009 	}
2010 	case SIOCSDRVSPEC64:
2011 	case SIOCGDRVSPEC64: {
2012 		union {
2013 			struct ifbreq ifbreq;
2014 			struct ifbifconf64 ifbifconf;
2015 			struct ifbareq64 ifbareq;
2016 			struct ifbaconf64 ifbaconf;
2017 			struct ifbrparam ifbrparam;
2018 			struct ifbropreq64 ifbropreq;
2019 		} args;
2020 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
2021 		const struct bridge_control *bridge_control_table =
2022 		    bridge_control_table64, *bc;
2023 
2024 		DRVSPEC;
2025 
2026 		break;
2027 	}
2028 
2029 	case SIOCSIFFLAGS:
2030 		if (!(ifp->if_flags & IFF_UP) &&
2031 		    (ifp->if_flags & IFF_RUNNING)) {
2032 			/*
2033 			 * If interface is marked down and it is running,
2034 			 * then stop and disable it.
2035 			 */
2036 			BRIDGE_LOCK(sc);
2037 			bridge_ifstop(ifp, 1);
2038 			BRIDGE_UNLOCK(sc);
2039 		} else if ((ifp->if_flags & IFF_UP) &&
2040 		    !(ifp->if_flags & IFF_RUNNING)) {
2041 			/*
2042 			 * If interface is marked up and it is stopped, then
2043 			 * start it.
2044 			 */
2045 			BRIDGE_LOCK(sc);
2046 			error = bridge_init(ifp);
2047 			BRIDGE_UNLOCK(sc);
2048 		}
2049 		break;
2050 
2051 	case SIOCSIFLLADDR:
2052 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
2053 		    ifr->ifr_addr.sa_len);
2054 		if (error != 0) {
2055 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2056 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
2057 			    error);
2058 		}
2059 		break;
2060 
2061 	case SIOCSIFMTU:
2062 		if (ifr->ifr_mtu < 576) {
2063 			error = EINVAL;
2064 			break;
2065 		}
2066 		BRIDGE_LOCK(sc);
2067 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2068 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2069 			BRIDGE_UNLOCK(sc);
2070 			break;
2071 		}
2072 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2073 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
2074 				BRIDGE_LOG(LOG_NOTICE, 0,
2075 				    "%s invalid MTU: %u(%s) != %d",
2076 				    sc->sc_ifp->if_xname,
2077 				    bif->bif_ifp->if_mtu,
2078 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
2079 				error = EINVAL;
2080 				break;
2081 			}
2082 		}
2083 		if (!error) {
2084 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
2085 		}
2086 		BRIDGE_UNLOCK(sc);
2087 		break;
2088 
2089 	default:
2090 		error = ether_ioctl(ifp, cmd, data);
2091 		if (error != 0 && error != EOPNOTSUPP) {
2092 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
2093 			    "ifp %s cmd 0x%08lx "
2094 			    "(%c%c [%lu] %c %lu) failed error: %d",
2095 			    ifp->if_xname, cmd,
2096 			    (cmd & IOC_IN) ? 'I' : ' ',
2097 			    (cmd & IOC_OUT) ? 'O' : ' ',
2098 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
2099 			    cmd & 0xff, error);
2100 		}
2101 		break;
2102 	}
2103 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2104 
2105 	return error;
2106 }
2107 
2108 #if HAS_IF_CAP
2109 /*
2110  * bridge_mutecaps:
2111  *
2112  *	Clear or restore unwanted capabilities on the member interface
2113  */
2114 static void
bridge_mutecaps(struct bridge_softc * sc)2115 bridge_mutecaps(struct bridge_softc *sc)
2116 {
2117 	struct bridge_iflist *bif;
2118 	int enabled, mask;
2119 
2120 	/* Initial bitmask of capabilities to test */
2121 	mask = BRIDGE_IFCAPS_MASK;
2122 
2123 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2124 		/* Every member must support it or its disabled */
2125 		mask &= bif->bif_savedcaps;
2126 	}
2127 
2128 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2129 		enabled = bif->bif_ifp->if_capenable;
2130 		enabled &= ~BRIDGE_IFCAPS_STRIP;
2131 		/* strip off mask bits and enable them again if allowed */
2132 		enabled &= ~BRIDGE_IFCAPS_MASK;
2133 		enabled |= mask;
2134 
2135 		bridge_set_ifcap(sc, bif, enabled);
2136 	}
2137 }
2138 
2139 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2140 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2141 {
2142 	struct ifnet *ifp = bif->bif_ifp;
2143 	struct ifreq ifr;
2144 	int error;
2145 
2146 	bzero(&ifr, sizeof(ifr));
2147 	ifr.ifr_reqcap = set;
2148 
2149 	if (ifp->if_capenable != set) {
2150 		IFF_LOCKGIANT(ifp);
2151 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2152 		IFF_UNLOCKGIANT(ifp);
2153 		if (error) {
2154 			BRIDGE_LOG(LOG_NOTICE, 0,
2155 			    "%s error setting interface capabilities on %s",
2156 			    sc->sc_ifp->if_xname, ifp->if_xname);
2157 		}
2158 	}
2159 }
2160 #endif /* HAS_IF_CAP */
2161 
2162 static errno_t
siocsifcap(struct ifnet * ifp,uint32_t cap_enable)2163 siocsifcap(struct ifnet * ifp, uint32_t cap_enable)
2164 {
2165 	struct ifreq    ifr;
2166 
2167 	bzero(&ifr, sizeof(ifr));
2168 	ifr.ifr_reqcap = cap_enable;
2169 	return ifnet_ioctl(ifp, 0, SIOCSIFCAP, &ifr);
2170 }
2171 
2172 static const char *
enable_disable_str(boolean_t enable)2173 enable_disable_str(boolean_t enable)
2174 {
2175 	return (const char * __null_terminated)(enable ? "enable" : "disable");
2176 }
2177 
2178 static boolean_t
bridge_set_lro(struct ifnet * ifp,boolean_t enable)2179 bridge_set_lro(struct ifnet * ifp, boolean_t enable)
2180 {
2181 	uint32_t        cap_enable;
2182 	uint32_t        cap_supported;
2183 	boolean_t       changed = FALSE;
2184 	boolean_t       lro_enabled;
2185 
2186 	cap_supported = ifnet_capabilities_supported(ifp);
2187 	if ((cap_supported & IFCAP_LRO) == 0) {
2188 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2189 		    "%s doesn't support LRO",
2190 		    ifp->if_xname);
2191 		goto done;
2192 	}
2193 	if (bridge_allow_lro_num_seg != 0 &&
2194 	    (cap_supported & IFCAP_LRO_NUM_SEG) != 0) {
2195 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2196 		    "%s supports LRO_NUM_SEG, leaving LRO enabled",
2197 		    ifp->if_xname);
2198 		goto done;
2199 	}
2200 	cap_enable = ifnet_capabilities_enabled(ifp);
2201 	lro_enabled = (cap_enable & IFCAP_LRO) != 0;
2202 	if (lro_enabled != enable) {
2203 		errno_t         error;
2204 
2205 		if (enable) {
2206 			cap_enable |= IFCAP_LRO;
2207 		} else {
2208 			cap_enable &= ~IFCAP_LRO;
2209 		}
2210 		error = siocsifcap(ifp, cap_enable);
2211 		if (error != 0) {
2212 			BRIDGE_LOG(LOG_NOTICE, 0,
2213 			    "%s %s failed (cap 0x%x) %d",
2214 			    ifp->if_xname,
2215 			    enable_disable_str(enable),
2216 			    cap_enable,
2217 			    error);
2218 		} else {
2219 			changed = TRUE;
2220 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2221 			    "%s %s success (cap 0x%x)",
2222 			    ifp->if_xname,
2223 			    enable_disable_str(enable),
2224 			    cap_enable);
2225 		}
2226 	}
2227 done:
2228 	return changed;
2229 }
2230 
2231 static errno_t
bridge_set_tso(struct bridge_softc * sc)2232 bridge_set_tso(struct bridge_softc *sc)
2233 {
2234 	struct bridge_iflist *bif;
2235 	u_int32_t tso_v4_mtu;
2236 	u_int32_t tso_v6_mtu;
2237 	ifnet_offload_t offload;
2238 	errno_t error = 0;
2239 
2240 	/* By default, support TSO */
2241 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2242 	tso_v4_mtu = IP_MAXPACKET;
2243 	tso_v6_mtu = IP_MAXPACKET;
2244 
2245 	/* Use the lowest common denominator of the members */
2246 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2247 		ifnet_t ifp = bif->bif_ifp;
2248 
2249 		if (ifp == NULL) {
2250 			continue;
2251 		}
2252 
2253 		if (offload & IFNET_TSO_IPV4) {
2254 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2255 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2256 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2257 				}
2258 			} else {
2259 				offload &= ~IFNET_TSO_IPV4;
2260 				tso_v4_mtu = 0;
2261 			}
2262 		}
2263 		if (offload & IFNET_TSO_IPV6) {
2264 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2265 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2266 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2267 				}
2268 			} else {
2269 				offload &= ~IFNET_TSO_IPV6;
2270 				tso_v6_mtu = 0;
2271 			}
2272 		}
2273 	}
2274 
2275 	if (offload != sc->sc_ifp->if_hwassist) {
2276 		error = ifnet_set_offload(sc->sc_ifp, offload);
2277 		if (error != 0) {
2278 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2279 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2280 			    sc->sc_ifp->if_xname, offload, error);
2281 			goto done;
2282 		}
2283 		/*
2284 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2285 		 * as large as the interface MTU
2286 		 */
2287 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2288 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2289 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2290 			}
2291 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2292 			    tso_v4_mtu);
2293 			if (error != 0) {
2294 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2295 				    "ifnet_set_tso_mtu(%s, "
2296 				    "AF_INET, %u) failed %d",
2297 				    sc->sc_ifp->if_xname,
2298 				    tso_v4_mtu, error);
2299 				goto done;
2300 			}
2301 		}
2302 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2303 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2304 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2305 			}
2306 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2307 			    tso_v6_mtu);
2308 			if (error != 0) {
2309 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2310 				    "ifnet_set_tso_mtu(%s, "
2311 				    "AF_INET6, %u) failed %d",
2312 				    sc->sc_ifp->if_xname,
2313 				    tso_v6_mtu, error);
2314 				goto done;
2315 			}
2316 		}
2317 	}
2318 done:
2319 	return error;
2320 }
2321 
2322 static const char *
sanitize_ifname(char * __sized_by (IFNAMSIZ)ifname)2323 sanitize_ifname(char * __sized_by(IFNAMSIZ) ifname)
2324 {
2325 	ifname[IFNAMSIZ - 1] = '\0';
2326 	return __unsafe_null_terminated_from_indexable(ifname, &ifname[IFNAMSIZ - 1]);
2327 }
2328 
2329 /*
2330  * bridge_lookup_member:
2331  *
2332  *	Lookup a bridge member interface.
2333  */
2334 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,char * __sized_by (IFNAMSIZ)name_unsanitized)2335 bridge_lookup_member(struct bridge_softc *sc, char * __sized_by(IFNAMSIZ) name_unsanitized)
2336 {
2337 	struct bridge_iflist *bif;
2338 	struct ifnet *ifp;
2339 	const char * __null_terminated name = sanitize_ifname(name_unsanitized);
2340 
2341 	BRIDGE_LOCK_ASSERT_HELD(sc);
2342 
2343 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2344 		ifp = bif->bif_ifp;
2345 		if (strcmp(ifp->if_xname, name) == 0) {
2346 			return bif;
2347 		}
2348 	}
2349 
2350 	return NULL;
2351 }
2352 
2353 /*
2354  * bridge_lookup_member_if:
2355  *
2356  *	Lookup a bridge member interface by ifnet*.
2357  */
2358 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2359 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2360 {
2361 	struct bridge_iflist *bif;
2362 
2363 	BRIDGE_LOCK_ASSERT_HELD(sc);
2364 
2365 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2366 		if (bif->bif_ifp == member_ifp) {
2367 			return bif;
2368 		}
2369 	}
2370 
2371 	return NULL;
2372 }
2373 
2374 static inline bool
get_and_clear_promisc(mbuf_t m)2375 get_and_clear_promisc(mbuf_t m)
2376 {
2377 	bool    is_promisc;
2378 
2379 	/*
2380 	 * Need to clear the promiscuous flag otherwise the packet will be
2381 	 * dropped by DLIL after processing filters
2382 	 */
2383 	is_promisc = (mbuf_flags(m) & MBUF_PROMISC) != 0;
2384 	if (is_promisc) {
2385 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
2386 	}
2387 	return is_promisc;
2388 }
2389 
2390 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2391 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2392     mbuf_t *data, char **frame_ptr)
2393 {
2394 #pragma unused(protocol)
2395 	errno_t error = 0;
2396 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2397 	struct bridge_softc *sc = bif->bif_sc;
2398 	int included = 0;
2399 	struct ether_header * eh_p;
2400 	size_t frmlen = 0;
2401 	bool is_promisc;
2402 	mblist list;
2403 	mbuf_t m = *data;
2404 
2405 	if ((m->m_flags & M_PROTO1)) {
2406 		goto out;
2407 	}
2408 
2409 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2410 	    *frame_ptr <= mtod(m, char *)) {
2411 		included = 1;
2412 		frmlen = mtod(m, char *) - *frame_ptr;
2413 	}
2414 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2415 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2416 	    "frmlen %lu", sc->sc_ifp->if_xname,
2417 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2418 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
2419 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2420 	    included ? "inside" : "outside", frmlen);
2421 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2422 		brlog_mbuf(m, "bridge_iff_input[", "");
2423 		brlog_ether_header((struct ether_header *)
2424 		    (void *)*frame_ptr);
2425 		brlog_mbuf_data(m, 0, 20);
2426 	}
2427 	if (included == 0) {
2428 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2429 		goto out;
2430 	}
2431 
2432 	/* Move data pointer to start of frame to the link layer header */
2433 	_mbuf_adjust_pkthdr_and_data(m, -frmlen);
2434 
2435 	/* make sure we can access the ethernet header */
2436 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2437 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2438 		    "short frame %lu < %lu",
2439 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2440 		goto out;
2441 	}
2442 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2443 		error = mbuf_pullup(data, sizeof(struct ether_header));
2444 		if (error != 0) {
2445 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2446 			    "mbuf_pullup(%lu) failed %d",
2447 			    sizeof(struct ether_header),
2448 			    error);
2449 			error = EJUSTRETURN;
2450 			goto out;
2451 		}
2452 		if (m != *data) {
2453 			m = *data;
2454 			*frame_ptr = mtod(m, char *);
2455 		}
2456 	}
2457 	mblist_init(&list);
2458 	mblist_append(&list, m);
2459 	is_promisc = get_and_clear_promisc(m);
2460 	eh_p = __unsafe_forge_single(struct ether_header *, *frame_ptr);
2461 	list = bridge_input_list(sc, ifp, eh_p, list, is_promisc);
2462 	m = *data = list.head;
2463 	if (m == NULL) {
2464 		error = EJUSTRETURN;
2465 	}
2466 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2467 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2468 		brlog_mbuf(m, "bridge_iff_input]", "");
2469 	}
2470 
2471 out:
2472 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2473 
2474 	return error;
2475 }
2476 
2477 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2478 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2479     mbuf_t *data)
2480 {
2481 #pragma unused(protocol)
2482 	errno_t error = 0;
2483 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2484 	struct bridge_softc *sc = bif->bif_sc;
2485 	mbuf_t m = *data;
2486 
2487 	if ((m->m_flags & M_PROTO1)) {
2488 		goto out;
2489 	}
2490 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2491 	    "%s from %s m 0x%llx data 0x%llx",
2492 	    sc->sc_ifp->if_xname, ifp->if_xname,
2493 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2494 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)));
2495 
2496 	error = bridge_member_output(sc, ifp, data);
2497 	if (error != 0 && error != EJUSTRETURN) {
2498 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2499 		    "bridge_member_output failed error %d",
2500 		    error);
2501 	}
2502 out:
2503 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2504 
2505 	return error;
2506 }
2507 
2508 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2509 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2510     const struct kev_msg *event_msg)
2511 {
2512 #pragma unused(protocol)
2513 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2514 	struct bridge_softc *sc = bif->bif_sc;
2515 
2516 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2517 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2518 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2519 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2520 		    "%s event_code %u - %s",
2521 		    ifp->if_xname, event_msg->event_code,
2522 		    dlil_kev_dl_code_str(event_msg->event_code));
2523 
2524 		switch (event_msg->event_code) {
2525 		case KEV_DL_LINK_OFF:
2526 		case KEV_DL_LINK_ON: {
2527 			bridge_iflinkevent(ifp);
2528 #if BRIDGESTP
2529 			bstp_linkstate(ifp, event_msg->event_code);
2530 #endif /* BRIDGESTP */
2531 			break;
2532 		}
2533 		case KEV_DL_SIFFLAGS: {
2534 			if ((ifp->if_flags & IFF_UP) == 0) {
2535 				break;
2536 			}
2537 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2538 				errno_t error;
2539 
2540 				error = ifnet_set_promiscuous(ifp, 1);
2541 				if (error != 0) {
2542 					BRIDGE_LOG(LOG_NOTICE, 0,
2543 					    "ifnet_set_promiscuous (%s)"
2544 					    " failed %d", ifp->if_xname,
2545 					    error);
2546 				} else {
2547 					bif->bif_flags |= BIFF_PROMISC;
2548 				}
2549 			}
2550 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2551 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2552 				errno_t error;
2553 
2554 				error = if_allmulti(ifp, 1);
2555 				if (error != 0) {
2556 					BRIDGE_LOG(LOG_NOTICE, 0,
2557 					    "if_allmulti (%s)"
2558 					    " failed %d", ifp->if_xname,
2559 					    error);
2560 				} else {
2561 					bif->bif_flags |= BIFF_ALL_MULTI;
2562 #ifdef XNU_PLATFORM_AppleTVOS
2563 					ip6_forwarding = 1;
2564 #endif /* XNU_PLATFORM_AppleTVOS */
2565 				}
2566 			}
2567 			break;
2568 		}
2569 		case KEV_DL_IFCAP_CHANGED: {
2570 			BRIDGE_LOCK(sc);
2571 			bridge_set_tso(sc);
2572 			BRIDGE_UNLOCK(sc);
2573 			break;
2574 		}
2575 		case KEV_DL_PROTO_DETACHED:
2576 		case KEV_DL_PROTO_ATTACHED: {
2577 			bridge_proto_attach_changed(ifp);
2578 			break;
2579 		}
2580 		default:
2581 			break;
2582 		}
2583 	}
2584 }
2585 
2586 /*
2587  * bridge_iff_detached:
2588  *
2589  *      Called when our interface filter has been detached from a
2590  *      member interface.
2591  */
2592 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2593 bridge_iff_detached(void *cookie, ifnet_t ifp)
2594 {
2595 #pragma unused(cookie)
2596 	struct bridge_iflist *bif;
2597 	struct bridge_softc * __single sc = ifp->if_bridge;
2598 
2599 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2600 
2601 	/* Check if the interface is a bridge member */
2602 	if (sc != NULL) {
2603 		BRIDGE_LOCK(sc);
2604 		bif = bridge_lookup_member_if(sc, ifp);
2605 		if (bif != NULL) {
2606 			bridge_delete_member(sc, bif);
2607 		}
2608 		BRIDGE_UNLOCK(sc);
2609 		return;
2610 	}
2611 	/* Check if the interface is a span port */
2612 	lck_mtx_lock(&bridge_list_mtx);
2613 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2614 		BRIDGE_LOCK(sc);
2615 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2616 		if (ifp == bif->bif_ifp) {
2617 			bridge_delete_span(sc, bif);
2618 			break;
2619 		}
2620 		BRIDGE_UNLOCK(sc);
2621 	}
2622 	lck_mtx_unlock(&bridge_list_mtx);
2623 }
2624 
2625 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2626 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2627     char *header)
2628 {
2629 #pragma unused(protocol, packet, header)
2630 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2631 	    ifp->if_xname);
2632 	return 0;
2633 }
2634 
2635 static int
bridge_attach_protocol(struct ifnet * ifp)2636 bridge_attach_protocol(struct ifnet *ifp)
2637 {
2638 	int     error;
2639 	struct ifnet_attach_proto_param reg;
2640 
2641 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2642 	bzero(&reg, sizeof(reg));
2643 	reg.input = bridge_proto_input;
2644 
2645 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2646 	if (error) {
2647 		BRIDGE_LOG(LOG_NOTICE, 0,
2648 		    "ifnet_attach_protocol(%s) failed, %d",
2649 		    ifp->if_xname, error);
2650 	}
2651 
2652 	return error;
2653 }
2654 
2655 static int
bridge_detach_protocol(struct ifnet * ifp)2656 bridge_detach_protocol(struct ifnet *ifp)
2657 {
2658 	int     error;
2659 
2660 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2661 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2662 	if (error) {
2663 		BRIDGE_LOG(LOG_NOTICE, 0,
2664 		    "ifnet_detach_protocol(%s) failed, %d",
2665 		    ifp->if_xname, error);
2666 	}
2667 
2668 	return error;
2669 }
2670 
2671 /*
2672  * bridge_delete_member:
2673  *
2674  *	Delete the specified member interface.
2675  */
2676 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2677 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2678 {
2679 #if SKYWALK
2680 	boolean_t add_netagent = FALSE;
2681 #endif /* SKYWALK */
2682 	uint32_t    bif_flags;
2683 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2684 	int lladdr_changed = 0, error;
2685 	uint8_t eaddr[ETHER_ADDR_LEN];
2686 	u_int32_t event_code = 0;
2687 
2688 	BRIDGE_LOCK_ASSERT_HELD(sc);
2689 	VERIFY(ifs != NULL);
2690 
2691 	/*
2692 	 * Remove the member from the list first so it cannot be found anymore
2693 	 * when we release the bridge lock below
2694 	 */
2695 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2696 		bif->bif_flags &= ~BIFF_IN_MEMBER_LIST;
2697 		BRIDGE_XLOCK(sc);
2698 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2699 		BRIDGE_XDROP(sc);
2700 	}
2701 	if (sc->sc_mac_nat_bif != NULL) {
2702 		if (bif == sc->sc_mac_nat_bif) {
2703 			bridge_mac_nat_disable(sc);
2704 		} else {
2705 			bridge_mac_nat_flush_entries(sc, bif);
2706 		}
2707 	}
2708 #if BRIDGESTP
2709 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2710 		bstp_disable(&bif->bif_stp);
2711 	}
2712 #endif /* BRIDGESTP */
2713 
2714 	/*
2715 	 * If removing the interface that gave the bridge its mac address, set
2716 	 * the mac address of the bridge to the address of the next member, or
2717 	 * to its default address if no members are left.
2718 	 */
2719 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2720 		ifnet_release(sc->sc_ifaddr);
2721 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2722 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2723 			sc->sc_ifaddr = NULL;
2724 		} else {
2725 			struct ifnet *fif =
2726 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2727 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2728 			sc->sc_ifaddr = fif;
2729 			ifnet_reference(fif);   /* for sc_ifaddr */
2730 		}
2731 		lladdr_changed = 1;
2732 	}
2733 
2734 #if HAS_IF_CAP
2735 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2736 #endif /* HAS_IF_CAP */
2737 
2738 	error = bridge_set_tso(sc);
2739 	if (error != 0) {
2740 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2741 	}
2742 
2743 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2744 
2745 	KASSERT(bif->bif_addrcnt == 0,
2746 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2747 
2748 	/*
2749 	 * Update link status of the bridge based on its remaining members
2750 	 */
2751 	event_code = bridge_updatelinkstatus(sc);
2752 	bif_flags = bif->bif_flags;
2753 	BRIDGE_UNLOCK(sc);
2754 
2755 	/* only perform these steps if the interface is still attached */
2756 	if (ifnet_is_attached(ifs, 1)) {
2757 #if SKYWALK
2758 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2759 
2760 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2761 			ifnet_detach_flowswitch_nexus(ifs);
2762 		}
2763 #endif /* SKYWALK */
2764 		/* disable promiscuous mode */
2765 		if ((bif_flags & BIFF_PROMISC) != 0) {
2766 			(void) ifnet_set_promiscuous(ifs, 0);
2767 		}
2768 		/* disable all multi */
2769 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2770 			(void)if_allmulti(ifs, 0);
2771 		}
2772 #if HAS_IF_CAP
2773 		/* re-enable any interface capabilities */
2774 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2775 #endif
2776 		/* detach bridge "protocol" */
2777 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2778 			(void)bridge_detach_protocol(ifs);
2779 		}
2780 		/* detach interface filter */
2781 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2782 			iflt_detach(bif->bif_iff_ref);
2783 		}
2784 		/* re-enable LRO */
2785 		if ((bif_flags & BIFF_LRO_DISABLED) != 0) {
2786 			(void)bridge_set_lro(ifs, TRUE);
2787 		}
2788 		ifnet_decr_iorefcnt(ifs);
2789 	}
2790 
2791 	if (lladdr_changed &&
2792 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2793 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2794 	}
2795 
2796 	if (event_code != 0) {
2797 		bridge_link_event(bifp, event_code);
2798 	}
2799 
2800 #if BRIDGESTP
2801 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2802 #endif /* BRIDGESTP */
2803 
2804 	kfree_type(struct bridge_iflist, bif);
2805 	ifs->if_bridge = NULL;
2806 #if SKYWALK
2807 	if (add_netagent && ifnet_is_attached(ifs, 1)) {
2808 		(void)ifnet_add_netagent(ifs);
2809 		ifnet_decr_iorefcnt(ifs);
2810 	}
2811 #endif /* SKYWALK */
2812 
2813 	ifnet_release(ifs);
2814 
2815 	BRIDGE_LOCK(sc);
2816 }
2817 
2818 /*
2819  * bridge_delete_span:
2820  *
2821  *	Delete the specified span interface.
2822  */
2823 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2824 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2825 {
2826 	BRIDGE_LOCK_ASSERT_HELD(sc);
2827 
2828 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2829 	    ("%s: not a span interface", __func__));
2830 
2831 	ifnet_release(bif->bif_ifp);
2832 
2833 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2834 	kfree_type(struct bridge_iflist, bif);
2835 }
2836 
2837 static int
bridge_ioctl_add(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)2838 bridge_ioctl_add(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
2839 {
2840 	struct ifbreq * __single req = arg;
2841 	struct bridge_iflist *bif = NULL;
2842 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2843 	int error = 0, lladdr_changed = 0;
2844 	uint8_t eaddr[ETHER_ADDR_LEN];
2845 	struct iff_filter iff;
2846 	u_int32_t event_code = 0;
2847 	boolean_t input_broadcast;
2848 	int media_active;
2849 	boolean_t wifi_infra = FALSE;
2850 
2851 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
2852 	if (ifs == NULL) {
2853 		return ENOENT;
2854 	}
2855 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2856 		return EINVAL;
2857 	}
2858 
2859 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2860 		return EINVAL;
2861 	}
2862 
2863 	/* If it's in the span list, it can't be a member. */
2864 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2865 		if (ifs == bif->bif_ifp) {
2866 			return EBUSY;
2867 		}
2868 	}
2869 
2870 	if (ifs->if_bridge == sc) {
2871 		return EEXIST;
2872 	}
2873 
2874 	if (ifs->if_bridge != NULL) {
2875 		return EBUSY;
2876 	}
2877 
2878 	switch (ifs->if_type) {
2879 	case IFT_ETHER:
2880 		if (strcmp(ifs->if_name, "en") == 0 &&
2881 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2882 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2883 			/* XXX is there a better way to identify Wi-Fi STA? */
2884 			wifi_infra = TRUE;
2885 		}
2886 		break;
2887 	case IFT_L2VLAN:
2888 	case IFT_IEEE8023ADLAG:
2889 		break;
2890 	default:
2891 		return EINVAL;
2892 	}
2893 
2894 	/* fail to add the interface if the MTU doesn't match */
2895 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2896 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2897 		    sc->sc_ifp->if_xname,
2898 		    ifs->if_xname);
2899 		return EINVAL;
2900 	}
2901 
2902 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2903 		/* there's already an interface that's doing MAC NAT */
2904 		return EBUSY;
2905 	}
2906 
2907 	/* prevent the interface from detaching while we add the member */
2908 	if (!ifnet_is_attached(ifs, 1)) {
2909 		return ENXIO;
2910 	}
2911 
2912 	/* allocate a new member */
2913 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2914 	bif->bif_ifp = ifs;
2915 	ifnet_reference(ifs);
2916 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2917 #if HAS_IF_CAP
2918 	bif->bif_savedcaps = ifs->if_capenable;
2919 #endif /* HAS_IF_CAP */
2920 	bif->bif_sc = sc;
2921 	if (wifi_infra) {
2922 		(void)bridge_mac_nat_enable(sc, bif);
2923 	}
2924 
2925 	/* Allow the first Ethernet member to define the MTU */
2926 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2927 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2928 	}
2929 
2930 	/*
2931 	 * Assign the interface's MAC address to the bridge if it's the first
2932 	 * member and the MAC address of the bridge has not been changed from
2933 	 * the default (randomly) generated one.
2934 	 */
2935 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2936 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2937 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2938 		sc->sc_ifaddr = ifs;
2939 		ifnet_reference(ifs);   /* for sc_ifaddr */
2940 		lladdr_changed = 1;
2941 	}
2942 
2943 	ifs->if_bridge = sc;
2944 #if BRIDGESTP
2945 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2946 #endif /* BRIDGESTP */
2947 
2948 #if HAS_IF_CAP
2949 	/* Set interface capabilities to the intersection set of all members */
2950 	bridge_mutecaps(sc);
2951 #endif /* HAS_IF_CAP */
2952 
2953 	/*
2954 	 * Respect lock ordering with DLIL lock for the following operations
2955 	 */
2956 	BRIDGE_UNLOCK(sc);
2957 
2958 	/* enable promiscuous mode */
2959 	error = ifnet_set_promiscuous(ifs, 1);
2960 	switch (error) {
2961 	case 0:
2962 		bif->bif_flags |= BIFF_PROMISC;
2963 		break;
2964 	case ENETDOWN:
2965 	case EPWROFF:
2966 		BRIDGE_LOG(LOG_NOTICE, 0,
2967 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2968 		    ifs->if_xname, error);
2969 		/* Ignore error when device is not up */
2970 		error = 0;
2971 		break;
2972 	default:
2973 		BRIDGE_LOG(LOG_NOTICE, 0,
2974 		    "ifnet_set_promiscuous(%s) failed %d",
2975 		    ifs->if_xname, error);
2976 		BRIDGE_LOCK(sc);
2977 		goto out;
2978 	}
2979 	if (wifi_infra) {
2980 		int this_error;
2981 
2982 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2983 		bif->bif_flags |= BIFF_WIFI_INFRA;
2984 		this_error = if_allmulti(ifs, 1);
2985 		if (this_error == 0) {
2986 			bif->bif_flags |= BIFF_ALL_MULTI;
2987 #ifdef XNU_PLATFORM_AppleTVOS
2988 			ip6_forwarding = 1;
2989 #endif /* XNU_PLATFORM_AppleTVOS */
2990 		} else {
2991 			BRIDGE_LOG(LOG_NOTICE, 0,
2992 			    "if_allmulti(%s) failed %d, ignoring",
2993 			    ifs->if_xname, this_error);
2994 		}
2995 	}
2996 #if SKYWALK
2997 	/* ensure that the flowswitch is present for native interface */
2998 	if (SKYWALK_NATIVE(ifs)) {
2999 		if (ifnet_attach_flowswitch_nexus(ifs)) {
3000 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
3001 		}
3002 	}
3003 	/* remove the netagent on the flowswitch (rdar://75050182) */
3004 	if (if_is_fsw_netagent_enabled()) {
3005 		(void)ifnet_remove_netagent(ifs);
3006 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
3007 	}
3008 #endif /* SKYWALK */
3009 
3010 	/*
3011 	 * install an interface filter
3012 	 */
3013 	memset(&iff, 0, sizeof(struct iff_filter));
3014 	iff.iff_cookie = bif;
3015 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
3016 	iff.iff_input = bridge_iff_input;
3017 	iff.iff_output = bridge_iff_output;
3018 	iff.iff_event = bridge_iff_event;
3019 	iff.iff_detached = bridge_iff_detached;
3020 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
3021 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL | DLIL_IFF_BRIDGE);
3022 	if (error != 0) {
3023 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
3024 		BRIDGE_LOCK(sc);
3025 		goto out;
3026 	}
3027 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
3028 
3029 	/*
3030 	 * install a dummy "bridge" protocol
3031 	 */
3032 	if ((error = bridge_attach_protocol(ifs)) != 0) {
3033 		if (error != 0) {
3034 			BRIDGE_LOG(LOG_NOTICE, 0,
3035 			    "bridge_attach_protocol failed %d", error);
3036 			BRIDGE_LOCK(sc);
3037 			goto out;
3038 		}
3039 	}
3040 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
3041 
3042 	if (lladdr_changed &&
3043 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
3044 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
3045 	}
3046 
3047 	media_active = interface_media_active(ifs);
3048 
3049 	/* disable LRO if needed */
3050 	if (bridge_set_lro(ifs, FALSE)) {
3051 		bif->bif_flags |= BIFF_LRO_DISABLED;
3052 	}
3053 
3054 	/*
3055 	 * No failures past this point. Add the member to the list.
3056 	 */
3057 	BRIDGE_LOCK(sc);
3058 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
3059 	BRIDGE_XLOCK(sc);
3060 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
3061 	BRIDGE_XDROP(sc);
3062 
3063 	/* cache the member link status */
3064 	if (media_active != 0) {
3065 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3066 	} else {
3067 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3068 	}
3069 
3070 	/* the new member may change the link status of the bridge interface */
3071 	event_code = bridge_updatelinkstatus(sc);
3072 
3073 	/* check whether we need input broadcast or not */
3074 	input_broadcast = interface_needs_input_broadcast(ifs);
3075 	bif_set_input_broadcast(bif, input_broadcast);
3076 	BRIDGE_UNLOCK(sc);
3077 
3078 	if (event_code != 0) {
3079 		bridge_link_event(bifp, event_code);
3080 	}
3081 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3082 	    "%s input broadcast %s", ifs->if_xname,
3083 	    input_broadcast ? "ENABLED" : "DISABLED");
3084 
3085 	BRIDGE_LOCK(sc);
3086 	bridge_set_tso(sc);
3087 
3088 out:
3089 	/* allow the interface to detach */
3090 	ifnet_decr_iorefcnt(ifs);
3091 
3092 	if (error != 0) {
3093 		if (bif != NULL) {
3094 			bridge_delete_member(sc, bif);
3095 		}
3096 	} else if (IFNET_IS_VMNET(ifs)) {
3097 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
3098 	}
3099 
3100 	return error;
3101 }
3102 
3103 static int
bridge_ioctl_del(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3104 bridge_ioctl_del(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3105 {
3106 	struct ifbreq * __single req = arg;
3107 	struct bridge_iflist *bif;
3108 
3109 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3110 	if (bif == NULL) {
3111 		return ENOENT;
3112 	}
3113 
3114 	bridge_delete_member(sc, bif);
3115 
3116 	return 0;
3117 }
3118 
3119 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3120 bridge_ioctl_purge(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3121 {
3122 #pragma unused(sc, arg, arg_len)
3123 	return 0;
3124 }
3125 
3126 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3127 bridge_ioctl_gifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3128 {
3129 	struct ifbreq * __single req = arg;
3130 	struct bridge_iflist *bif;
3131 
3132 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3133 	if (bif == NULL) {
3134 		return ENOENT;
3135 	}
3136 
3137 	struct bstp_port *bp;
3138 
3139 	bp = &bif->bif_stp;
3140 	req->ifbr_state = bp->bp_state;
3141 	req->ifbr_priority = bp->bp_priority;
3142 	req->ifbr_path_cost = bp->bp_path_cost;
3143 	req->ifbr_proto = bp->bp_protover;
3144 	req->ifbr_role = bp->bp_role;
3145 	req->ifbr_stpflags = bp->bp_flags;
3146 	req->ifbr_ifsflags = bif->bif_ifflags;
3147 
3148 	/* Copy STP state options as flags */
3149 	if (bp->bp_operedge) {
3150 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
3151 	}
3152 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
3153 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
3154 	}
3155 	if (bp->bp_ptp_link) {
3156 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
3157 	}
3158 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
3159 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
3160 	}
3161 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
3162 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
3163 	}
3164 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
3165 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
3166 	}
3167 
3168 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
3169 	req->ifbr_addrcnt = bif->bif_addrcnt;
3170 	req->ifbr_addrmax = bif->bif_addrmax;
3171 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
3172 
3173 	return 0;
3174 }
3175 
3176 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3177 bridge_ioctl_sifflags(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3178 {
3179 	struct ifbreq * __single req = arg;
3180 	struct bridge_iflist *bif;
3181 #if BRIDGESTP
3182 	struct bstp_port *bp;
3183 #endif /* BRIDGESTP */
3184 	errno_t error;
3185 	uint32_t ifsflags;
3186 
3187 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3188 	if (bif == NULL) {
3189 		return ENOENT;
3190 	}
3191 
3192 	ifsflags = req->ifbr_ifsflags;
3193 	if (ifsflags & IFBIF_SPAN) {
3194 		/* SPAN is readonly */
3195 		return EINVAL;
3196 	}
3197 #define CHECKSUM_VIRTIO (IFBIF_CHECKSUM_OFFLOAD | IFBIF_USES_VIRTIO)
3198 	if ((ifsflags & CHECKSUM_VIRTIO) == CHECKSUM_VIRTIO) {
3199 		/* can't specify checksum and virtio */
3200 		return EINVAL;
3201 	}
3202 	if ((ifsflags & IFBIF_MAC_NAT) != 0 &&
3203 	    ((ifsflags & CHECKSUM_VIRTIO) != 0 ||
3204 	    (bif->bif_flags & BIFF_HOST_FILTER) != 0)) {
3205 		/* MAC-NAT can't be used with checksum, host filter, or virtio */
3206 		return EINVAL;
3207 	}
3208 	if ((ifsflags & IFBIF_MAC_NAT) != 0) {
3209 		error = bridge_mac_nat_enable(sc, bif);
3210 		if (error != 0) {
3211 			return error;
3212 		}
3213 	} else if (sc->sc_mac_nat_bif == bif) {
3214 		bridge_mac_nat_disable(sc);
3215 	}
3216 
3217 #if BRIDGESTP
3218 	if (ifsflags & IFBIF_STP) {
3219 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
3220 			error = bstp_enable(&bif->bif_stp);
3221 			if (error) {
3222 				return error;
3223 			}
3224 		}
3225 	} else {
3226 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
3227 			bstp_disable(&bif->bif_stp);
3228 		}
3229 	}
3230 
3231 	/* Pass on STP flags */
3232 	bp = &bif->bif_stp;
3233 	bstp_set_edge(bp, ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
3234 	bstp_set_autoedge(bp, ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
3235 	bstp_set_ptp(bp, ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
3236 	bstp_set_autoptp(bp, ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
3237 #else /* !BRIDGESTP */
3238 	if (ifsflags & IFBIF_STP) {
3239 		return EOPNOTSUPP;
3240 	}
3241 #endif /* !BRIDGESTP */
3242 
3243 	/* Save the bits relating to the bridge */
3244 	bif->bif_ifflags = ifsflags & IFBIFMASK;
3245 
3246 	return 0;
3247 }
3248 
3249 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3250 bridge_ioctl_scache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3251 {
3252 	struct ifbrparam * __single param = arg;
3253 
3254 	sc->sc_brtmax = param->ifbrp_csize;
3255 	bridge_rttrim(sc);
3256 	return 0;
3257 }
3258 
3259 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3260 bridge_ioctl_gcache(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3261 {
3262 	struct ifbrparam * __single param = arg;
3263 
3264 	param->ifbrp_csize = sc->sc_brtmax;
3265 
3266 	return 0;
3267 }
3268 
3269 #define BRIDGE_IOCTL_GIFS do { \
3270 	struct bridge_iflist *bif;                                      \
3271 	struct ifbreq breq;                                             \
3272 	char *buf, *outbuf;                                             \
3273 	unsigned int count, buflen, len;                                \
3274                                                                         \
3275 	count = 0;                                                      \
3276 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3277 	        count++;                                                \
3278 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3279 	        count++;                                                \
3280                                                                         \
3281 	buflen = sizeof (breq) * count;                                 \
3282 	if (bifc->ifbic_len == 0) {                                     \
3283 	        bifc->ifbic_len = buflen;                               \
3284 	        return (0);                                             \
3285 	}                                                               \
3286 	BRIDGE_UNLOCK(sc);                                              \
3287 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3288 	BRIDGE_LOCK(sc);                                                \
3289                                                                         \
3290 	count = 0;                                                      \
3291 	buf = outbuf;                                                   \
3292 	len = min(bifc->ifbic_len, buflen);                             \
3293 	bzero(&breq, sizeof (breq));                                    \
3294 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3295 	        if (len < sizeof (breq))                                \
3296 	                break;                                          \
3297                                                                         \
3298 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3299 	            "%s", bif->bif_ifp->if_xname);                      \
3300 	/* Fill in the ifbreq structure */                      \
3301 	        error = bridge_ioctl_gifflags(sc, &breq, sizeof(breq)); \
3302 	        if (error)                                              \
3303 	                break;                                          \
3304 	        memcpy(buf, &breq, sizeof (breq));                      \
3305 	        count++;                                                \
3306 	        buf += sizeof (breq);                                   \
3307 	        len -= sizeof (breq);                                   \
3308 	}                                                               \
3309 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3310 	        if (len < sizeof (breq))                                \
3311 	                break;                                          \
3312                                                                         \
3313 	        snprintf(breq.ifbr_ifsname,                             \
3314 	                 sizeof (breq.ifbr_ifsname),                    \
3315 	                 "%s", bif->bif_ifp->if_xname);                 \
3316 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3317 	        breq.ifbr_portno                                        \
3318 	                = bif->bif_ifp->if_index & 0xfff;               \
3319 	        memcpy(buf, &breq, sizeof (breq));                      \
3320 	        count++;                                                \
3321 	        buf += sizeof (breq);                                   \
3322 	        len -= sizeof (breq);                                   \
3323 	}                                                               \
3324                                                                         \
3325 	BRIDGE_UNLOCK(sc);                                              \
3326 	bifc->ifbic_len = sizeof (breq) * count;                        \
3327 	if (bifc->ifbic_len > 0) {                                      \
3328 	        error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);\
3329 	}                                                               \
3330 	BRIDGE_LOCK(sc);                                                \
3331 	kfree_data(outbuf, buflen);                                     \
3332 } while (0)
3333 
3334 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3335 bridge_ioctl_gifs64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3336 {
3337 	struct ifbifconf64 * __single bifc = arg;
3338 	int error = 0;
3339 
3340 	BRIDGE_IOCTL_GIFS;
3341 
3342 	return error;
3343 }
3344 
3345 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3346 bridge_ioctl_gifs32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3347 {
3348 	struct ifbifconf32 * __single bifc = arg;
3349 	int error = 0;
3350 
3351 	BRIDGE_IOCTL_GIFS;
3352 
3353 	return error;
3354 }
3355 
3356 #define BRIDGE_IOCTL_RTS do {                                               \
3357 	struct bridge_rtnode *brt;                                          \
3358 	char *buf;                                                          \
3359 	char *outbuf = NULL;                                                \
3360 	unsigned int count, buflen, len;                                    \
3361 	unsigned long now;                                                  \
3362                                                                             \
3363 	if (bac->ifbac_len == 0)                                            \
3364 	        return (0);                                                 \
3365                                                                             \
3366 	bzero(&bareq, sizeof (bareq));                                      \
3367 	count = 0;                                                          \
3368 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3369 	        count++;                                                    \
3370 	buflen = sizeof (bareq) * count;                                    \
3371                                                                             \
3372 	BRIDGE_UNLOCK(sc);                                                  \
3373 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                    \
3374 	BRIDGE_LOCK(sc);                                                    \
3375                                                                             \
3376 	count = 0;                                                          \
3377 	buf = outbuf;                                                       \
3378 	len = min(bac->ifbac_len, buflen);                                  \
3379 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3380 	        if (len < sizeof (bareq))                                   \
3381 	                goto out;                                           \
3382 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3383 	                 "%s", brt->brt_ifp->if_xname);                     \
3384 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3385 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3386 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3387 	                now = (unsigned long) net_uptime();                 \
3388 	                if (now < brt->brt_expire)                          \
3389 	                        bareq.ifba_expire =                         \
3390 	                            brt->brt_expire - now;                  \
3391 	        } else                                                      \
3392 	                bareq.ifba_expire = 0;                              \
3393 	        bareq.ifba_flags = brt->brt_flags;                          \
3394                                                                             \
3395 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3396 	        count++;                                                    \
3397 	        buf += sizeof (bareq);                                      \
3398 	        len -= sizeof (bareq);                                      \
3399 	}                                                                   \
3400 out:                                                                        \
3401 	bac->ifbac_len = sizeof (bareq) * count;                            \
3402 	if (outbuf != NULL) {                                               \
3403 	        BRIDGE_UNLOCK(sc);                                          \
3404 	        if (bac->ifbac_len > 0) {                                   \
3405 	                error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);\
3406 	        }                                                           \
3407 	        kfree_data(outbuf, buflen);                                 \
3408 	        BRIDGE_LOCK(sc);                                            \
3409 	}                                                                   \
3410 	return (error);                                                     \
3411 } while (0)
3412 
3413 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3414 bridge_ioctl_rts64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3415 {
3416 	struct ifbaconf64 * __single bac = arg;
3417 	struct ifbareq64 bareq;
3418 	int error = 0;
3419 
3420 	BRIDGE_IOCTL_RTS;
3421 	return error;
3422 }
3423 
3424 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3425 bridge_ioctl_rts32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3426 {
3427 	struct ifbaconf32 * __single bac = arg;
3428 	struct ifbareq32 bareq;
3429 	int error = 0;
3430 
3431 	BRIDGE_IOCTL_RTS;
3432 	return error;
3433 }
3434 
3435 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3436 bridge_ioctl_saddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3437 {
3438 	struct ifbareq32 * __single req = arg;
3439 	struct bridge_iflist *bif;
3440 	int error;
3441 
3442 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3443 	if (bif == NULL) {
3444 		return ENOENT;
3445 	}
3446 
3447 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3448 	    req->ifba_flags);
3449 
3450 	return error;
3451 }
3452 
3453 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3454 bridge_ioctl_saddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3455 {
3456 	struct ifbareq64 * __single req = arg;
3457 	struct bridge_iflist *bif;
3458 	int error;
3459 
3460 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3461 	if (bif == NULL) {
3462 		return ENOENT;
3463 	}
3464 
3465 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3466 	    req->ifba_flags);
3467 
3468 	return error;
3469 }
3470 
3471 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3472 bridge_ioctl_sto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3473 {
3474 	struct ifbrparam * __single param = arg;
3475 
3476 	sc->sc_brttimeout = param->ifbrp_ctime;
3477 	return 0;
3478 }
3479 
3480 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3481 bridge_ioctl_gto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3482 {
3483 	struct ifbrparam * __single param = arg;
3484 
3485 	param->ifbrp_ctime = sc->sc_brttimeout;
3486 	return 0;
3487 }
3488 
3489 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3490 bridge_ioctl_daddr32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3491 {
3492 	struct ifbareq32 * __single req = arg;
3493 
3494 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3495 }
3496 
3497 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3498 bridge_ioctl_daddr64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3499 {
3500 	struct ifbareq64 * __single req = arg;
3501 
3502 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3503 }
3504 
3505 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3506 bridge_ioctl_flush(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3507 {
3508 	struct ifbreq * __single req = arg;
3509 
3510 	bridge_rtflush(sc, req->ifbr_ifsflags);
3511 	return 0;
3512 }
3513 
3514 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3515 bridge_ioctl_gpri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3516 {
3517 	struct ifbrparam * __single param = arg;
3518 	struct bstp_state *bs = &sc->sc_stp;
3519 
3520 	param->ifbrp_prio = bs->bs_bridge_priority;
3521 	return 0;
3522 }
3523 
3524 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3525 bridge_ioctl_spri(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3526 {
3527 #if BRIDGESTP
3528 	struct ifbrparam *param = arg;
3529 
3530 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3531 #else /* !BRIDGESTP */
3532 #pragma unused(sc, arg)
3533 	return EOPNOTSUPP;
3534 #endif /* !BRIDGESTP */
3535 }
3536 
3537 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3538 bridge_ioctl_ght(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3539 {
3540 	struct ifbrparam * __single param = arg;
3541 	struct bstp_state *bs = &sc->sc_stp;
3542 
3543 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3544 	return 0;
3545 }
3546 
3547 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3548 bridge_ioctl_sht(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3549 {
3550 #if BRIDGESTP
3551 	struct ifbrparam *param = arg;
3552 
3553 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3554 #else /* !BRIDGESTP */
3555 #pragma unused(sc, arg)
3556 	return EOPNOTSUPP;
3557 #endif /* !BRIDGESTP */
3558 }
3559 
3560 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3561 bridge_ioctl_gfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3562 {
3563 	struct ifbrparam * __single param;
3564 	struct bstp_state *bs;
3565 
3566 	param = arg;
3567 	bs = &sc->sc_stp;
3568 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3569 	return 0;
3570 }
3571 
3572 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3573 bridge_ioctl_sfd(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3574 {
3575 #if BRIDGESTP
3576 	struct ifbrparam *param = arg;
3577 
3578 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3579 #else /* !BRIDGESTP */
3580 #pragma unused(sc, arg)
3581 	return EOPNOTSUPP;
3582 #endif /* !BRIDGESTP */
3583 }
3584 
3585 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3586 bridge_ioctl_gma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3587 {
3588 	struct ifbrparam * __single param;
3589 	struct bstp_state *bs;
3590 
3591 	param = arg;
3592 	bs = &sc->sc_stp;
3593 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3594 	return 0;
3595 }
3596 
3597 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3598 bridge_ioctl_sma(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3599 {
3600 #if BRIDGESTP
3601 	struct ifbrparam *param = arg;
3602 
3603 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3604 #else /* !BRIDGESTP */
3605 #pragma unused(sc, arg)
3606 	return EOPNOTSUPP;
3607 #endif /* !BRIDGESTP */
3608 }
3609 
3610 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3611 bridge_ioctl_sifprio(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3612 {
3613 #if BRIDGESTP
3614 	struct ifbreq *req = arg;
3615 	struct bridge_iflist *bif;
3616 
3617 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3618 	if (bif == NULL) {
3619 		return ENOENT;
3620 	}
3621 
3622 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3623 #else /* !BRIDGESTP */
3624 #pragma unused(sc, arg)
3625 	return EOPNOTSUPP;
3626 #endif /* !BRIDGESTP */
3627 }
3628 
3629 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3630 bridge_ioctl_sifcost(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3631 {
3632 #if BRIDGESTP
3633 	struct ifbreq *req = arg;
3634 	struct bridge_iflist *bif;
3635 
3636 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3637 	if (bif == NULL) {
3638 		return ENOENT;
3639 	}
3640 
3641 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3642 #else /* !BRIDGESTP */
3643 #pragma unused(sc, arg)
3644 	return EOPNOTSUPP;
3645 #endif /* !BRIDGESTP */
3646 }
3647 
3648 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3649 bridge_ioctl_gfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3650 {
3651 	struct ifbrparam * __single param = arg;
3652 
3653 	param->ifbrp_filter = sc->sc_filter_flags;
3654 
3655 	return 0;
3656 }
3657 
3658 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3659 bridge_ioctl_sfilt(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3660 {
3661 	struct ifbrparam * __single param = arg;
3662 
3663 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3664 		return EINVAL;
3665 	}
3666 
3667 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3668 		return EINVAL;
3669 	}
3670 
3671 	sc->sc_filter_flags = param->ifbrp_filter;
3672 
3673 	return 0;
3674 }
3675 
3676 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3677 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3678 {
3679 	struct ifbreq * __single req = arg;
3680 	struct bridge_iflist *bif;
3681 
3682 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3683 	if (bif == NULL) {
3684 		return ENOENT;
3685 	}
3686 
3687 	bif->bif_addrmax = req->ifbr_addrmax;
3688 	return 0;
3689 }
3690 
3691 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3692 bridge_ioctl_addspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3693 {
3694 	struct ifbreq * __single req = arg;
3695 	struct bridge_iflist *bif = NULL;
3696 	struct ifnet *ifs;
3697 
3698 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3699 	if (ifs == NULL) {
3700 		return ENOENT;
3701 	}
3702 
3703 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3704 		return EINVAL;
3705 	}
3706 
3707 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3708 	if (ifs == bif->bif_ifp) {
3709 		return EBUSY;
3710 	}
3711 
3712 	if (ifs->if_bridge != NULL) {
3713 		return EBUSY;
3714 	}
3715 
3716 	switch (ifs->if_type) {
3717 	case IFT_ETHER:
3718 	case IFT_L2VLAN:
3719 	case IFT_IEEE8023ADLAG:
3720 		break;
3721 	default:
3722 		return EINVAL;
3723 	}
3724 
3725 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3726 
3727 	bif->bif_ifp = ifs;
3728 	bif->bif_ifflags = IFBIF_SPAN;
3729 
3730 	ifnet_reference(bif->bif_ifp);
3731 
3732 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3733 
3734 	return 0;
3735 }
3736 
3737 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3738 bridge_ioctl_delspan(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3739 {
3740 	struct ifbreq * __single req = arg;
3741 	struct bridge_iflist *bif;
3742 	struct ifnet *ifs;
3743 
3744 	ifs = ifunit(sanitize_ifname(req->ifbr_ifsname));
3745 	if (ifs == NULL) {
3746 		return ENOENT;
3747 	}
3748 
3749 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3750 	if (ifs == bif->bif_ifp) {
3751 		break;
3752 	}
3753 
3754 	if (bif == NULL) {
3755 		return ENOENT;
3756 	}
3757 
3758 	bridge_delete_span(sc, bif);
3759 
3760 	return 0;
3761 }
3762 
3763 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3764 	struct bstp_state *bs = &sc->sc_stp;                            \
3765 	struct bstp_port *root_port;                                    \
3766                                                                         \
3767 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3768 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3769 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3770                                                                         \
3771 	root_port = bs->bs_root_port;                                   \
3772 	if (root_port == NULL)                                          \
3773 	        req->ifbop_root_port = 0;                               \
3774 	else                                                            \
3775 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3776                                                                         \
3777 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3778 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3779 	req->ifbop_protocol = bs->bs_protover;                          \
3780 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3781 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3782 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3783 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3784 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3785 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3786 } while (0)
3787 
3788 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3789 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3790 {
3791 	struct ifbropreq32 * __single req = arg;
3792 
3793 	BRIDGE_IOCTL_GBPARAM;
3794 	return 0;
3795 }
3796 
3797 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3798 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3799 {
3800 	struct ifbropreq64 * __single req = arg;
3801 
3802 	BRIDGE_IOCTL_GBPARAM;
3803 	return 0;
3804 }
3805 
3806 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3807 bridge_ioctl_grte(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3808 {
3809 	struct ifbrparam * __single param = arg;
3810 
3811 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3812 	return 0;
3813 }
3814 
3815 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3816 	struct bridge_iflist *bif;                                      \
3817 	struct bstp_port *bp;                                           \
3818 	struct ifbpstpreq bpreq;                                        \
3819 	char *buf, *outbuf;                                             \
3820 	unsigned int count, buflen, len;                                \
3821                                                                         \
3822 	count = 0;                                                      \
3823 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3824 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3825 	                count++;                                        \
3826 	}                                                               \
3827                                                                         \
3828 	buflen = sizeof (bpreq) * count;                                \
3829 	if (bifstp->ifbpstp_len == 0) {                                 \
3830 	        bifstp->ifbpstp_len = buflen;                           \
3831 	        return (0);                                             \
3832 	}                                                               \
3833                                                                         \
3834 	BRIDGE_UNLOCK(sc);                                              \
3835 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);                \
3836 	BRIDGE_LOCK(sc);                                                \
3837                                                                         \
3838 	count = 0;                                                      \
3839 	buf = outbuf;                                                   \
3840 	len = min(bifstp->ifbpstp_len, buflen);                         \
3841 	bzero(&bpreq, sizeof (bpreq));                                  \
3842 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3843 	        if (len < sizeof (bpreq))                               \
3844 	                break;                                          \
3845                                                                         \
3846 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3847 	                continue;                                       \
3848                                                                         \
3849 	        bp = &bif->bif_stp;                                     \
3850 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3851 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3852 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3853 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3854 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3855 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3856                                                                         \
3857 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3858 	        count++;                                                \
3859 	        buf += sizeof (bpreq);                                  \
3860 	        len -= sizeof (bpreq);                                  \
3861 	}                                                               \
3862                                                                         \
3863 	BRIDGE_UNLOCK(sc);                                              \
3864 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3865 	if (bifstp->ifbpstp_len > 0) {                                  \
3866 	        error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len);\
3867 	}                                                               \
3868 	BRIDGE_LOCK(sc);                                                \
3869 	kfree_data(outbuf, buflen);                                     \
3870 	return (error);                                                 \
3871 } while (0)
3872 
3873 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3874 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3875 {
3876 	struct ifbpstpconf32 * __single bifstp = arg;
3877 	int error = 0;
3878 
3879 	BRIDGE_IOCTL_GIFSSTP;
3880 	return error;
3881 }
3882 
3883 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3884 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3885 {
3886 	struct ifbpstpconf64 * __single bifstp = arg;
3887 	int error = 0;
3888 
3889 	BRIDGE_IOCTL_GIFSSTP;
3890 	return error;
3891 }
3892 
3893 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3894 bridge_ioctl_sproto(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3895 {
3896 #if BRIDGESTP
3897 	struct ifbrparam *param = arg;
3898 
3899 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3900 #else /* !BRIDGESTP */
3901 #pragma unused(sc, arg)
3902 	return EOPNOTSUPP;
3903 #endif /* !BRIDGESTP */
3904 }
3905 
3906 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3907 bridge_ioctl_stxhc(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3908 {
3909 #if BRIDGESTP
3910 	struct ifbrparam *param = arg;
3911 
3912 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3913 #else /* !BRIDGESTP */
3914 #pragma unused(sc, arg)
3915 	return EOPNOTSUPP;
3916 #endif /* !BRIDGESTP */
3917 }
3918 
3919 
3920 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3921 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3922 {
3923 	struct ifbrhostfilter * __single req = arg;
3924 	struct bridge_iflist *bif;
3925 
3926 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3927 	if (bif == NULL) {
3928 		return ENOENT;
3929 	}
3930 
3931 	bzero(req, sizeof(struct ifbrhostfilter));
3932 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3933 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3934 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3935 		    ETHER_ADDR_LEN);
3936 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3937 	}
3938 	return 0;
3939 }
3940 
3941 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)3942 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
3943 {
3944 	struct ifbrhostfilter * __single req = arg;
3945 	struct bridge_iflist *bif;
3946 
3947 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3948 	if (bif == NULL) {
3949 		return ENOENT;
3950 	}
3951 	if (bif_has_mac_nat(bif)) {
3952 		/* no host filter with MAC-NAT */
3953 		return EINVAL;
3954 	}
3955 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3956 		bif->bif_flags |= BIFF_HOST_FILTER;
3957 
3958 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3959 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3960 			    ETHER_ADDR_LEN);
3961 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3962 			    ETHER_ADDR_LEN) != 0) {
3963 				bif->bif_flags |= BIFF_HF_HWSRC;
3964 			} else {
3965 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3966 			}
3967 		}
3968 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3969 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3970 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3971 				bif->bif_flags |= BIFF_HF_IPSRC;
3972 			} else {
3973 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3974 			}
3975 		}
3976 	} else {
3977 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3978 		    BIFF_HF_IPSRC);
3979 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3980 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3981 	}
3982 
3983 	return 0;
3984 }
3985 
3986 static char *__indexable
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * __indexable buf,unsigned int * len_p)3987 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3988     unsigned int * count_p, char *__indexable buf,
3989     unsigned int * len_p)
3990 {
3991 	unsigned int            count = *count_p;
3992 	struct ifbrmne          ifbmne;
3993 	unsigned int            len = *len_p;
3994 	struct mac_nat_entry    *mne;
3995 	unsigned long           now;
3996 
3997 	bzero(&ifbmne, sizeof(ifbmne));
3998 	LIST_FOREACH(mne, list, mne_list) {
3999 		if (len < sizeof(ifbmne)) {
4000 			break;
4001 		}
4002 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
4003 		    "%s", mne->mne_bif->bif_ifp->if_xname);
4004 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
4005 		    sizeof(ifbmne.ifbmne_mac));
4006 		now = (unsigned long) net_uptime();
4007 		if (now < mne->mne_expire) {
4008 			ifbmne.ifbmne_expire = mne->mne_expire - now;
4009 		} else {
4010 			ifbmne.ifbmne_expire = 0;
4011 		}
4012 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
4013 			ifbmne.ifbmne_af = AF_INET6;
4014 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
4015 		} else {
4016 			ifbmne.ifbmne_af = AF_INET;
4017 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
4018 		}
4019 		memcpy(buf, &ifbmne, sizeof(ifbmne));
4020 		count++;
4021 		buf += sizeof(ifbmne);
4022 		len -= sizeof(ifbmne);
4023 	}
4024 	*count_p = count;
4025 	*len_p = len;
4026 	return buf;
4027 }
4028 
4029 /*
4030  * bridge_ioctl_gmnelist()
4031  *   Perform the get mac_nat_entry list ioctl.
4032  *
4033  * Note:
4034  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
4035  *   field size/layout except for the last field ifbml_buf, the user-supplied
4036  *   buffer pointer. That is passed in separately via the 'user_addr'
4037  *   parameter from the respective 32-bit or 64-bit ioctl routine.
4038  */
4039 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)4040 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
4041     user_addr_t user_addr)
4042 {
4043 	unsigned int            count;
4044 	char                    *buf;
4045 	int                     error = 0;
4046 	char                    *outbuf = NULL;
4047 	struct mac_nat_entry    *mne;
4048 	unsigned int            buflen;
4049 	unsigned int            len;
4050 
4051 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
4052 	count = 0;
4053 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
4054 		count++;
4055 	}
4056 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
4057 		count++;
4058 	}
4059 	buflen = sizeof(struct ifbrmne) * count;
4060 	if (buflen == 0 || mnl->ifbml_len == 0) {
4061 		mnl->ifbml_len = buflen;
4062 		return error;
4063 	}
4064 	BRIDGE_UNLOCK(sc);
4065 	outbuf = kalloc_data(buflen, Z_WAITOK | Z_ZERO);
4066 	BRIDGE_LOCK(sc);
4067 	count = 0;
4068 	buf = outbuf;
4069 	len = min(mnl->ifbml_len, buflen);
4070 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
4071 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
4072 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
4073 	BRIDGE_UNLOCK(sc);
4074 	if (mnl->ifbml_len > 0) {
4075 		error = copyout(outbuf, user_addr, mnl->ifbml_len);
4076 	}
4077 	kfree_data(outbuf, buflen);
4078 	BRIDGE_LOCK(sc);
4079 	return error;
4080 }
4081 
4082 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4083 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4084 {
4085 	struct ifbrmnelist64 * __single mnl = arg;
4086 
4087 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
4088 }
4089 
4090 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4091 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4092 {
4093 	struct ifbrmnelist32 * __single mnl = arg;
4094 
4095 	return bridge_ioctl_gmnelist(sc, arg,
4096 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
4097 }
4098 
4099 /*
4100  * bridge_ioctl_gifstats()
4101  *   Return per-member stats.
4102  *
4103  * Note:
4104  *   The ifbrmreq32 and ifbrmreq64 structures have the same
4105  *   field size/layout except for the last field brmr_buf, the user-supplied
4106  *   buffer pointer. That is passed in separately via the 'user_addr'
4107  *   parameter from the respective 32-bit or 64-bit ioctl routine.
4108  */
4109 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)4110 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
4111     user_addr_t user_addr)
4112 {
4113 	struct bridge_iflist    *bif;
4114 	int                     error = 0;
4115 	unsigned int            buflen;
4116 
4117 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
4118 	if (bif == NULL) {
4119 		error = ENOENT;
4120 		goto done;
4121 	}
4122 
4123 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
4124 	if (buflen == 0 || mreq->brmr_len == 0) {
4125 		mreq->brmr_len = buflen;
4126 		goto done;
4127 	}
4128 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
4129 		error = ENOBUFS;
4130 		goto done;
4131 	}
4132 	mreq->brmr_len = buflen;
4133 	error = copyout(&bif->bif_stats, user_addr, buflen);
4134 done:
4135 	return error;
4136 }
4137 
4138 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4139 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4140 {
4141 	struct ifbrmreq32 * __single mreq = arg;
4142 
4143 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4144 }
4145 
4146 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * __sized_by (arg_len)arg,size_t arg_len __unused)4147 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *__sized_by(arg_len) arg, size_t arg_len __unused)
4148 {
4149 	struct ifbrmreq64 * __single mreq = arg;
4150 
4151 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
4152 }
4153 
4154 /*
4155  * bridge_proto_attach_changed
4156  *
4157  *	Called when protocol attachment on the interface changes.
4158  */
4159 static void
bridge_proto_attach_changed(struct ifnet * ifp)4160 bridge_proto_attach_changed(struct ifnet *ifp)
4161 {
4162 	boolean_t changed = FALSE;
4163 	struct bridge_iflist *bif;
4164 	boolean_t input_broadcast;
4165 	struct bridge_softc * __single sc = ifp->if_bridge;
4166 
4167 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4168 	if (sc == NULL) {
4169 		return;
4170 	}
4171 	input_broadcast = interface_needs_input_broadcast(ifp);
4172 	BRIDGE_LOCK(sc);
4173 	bif = bridge_lookup_member_if(sc, ifp);
4174 	if (bif != NULL) {
4175 		changed = bif_set_input_broadcast(bif, input_broadcast);
4176 	}
4177 	BRIDGE_UNLOCK(sc);
4178 	if (changed) {
4179 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
4180 		    "%s input broadcast %s", ifp->if_xname,
4181 		    input_broadcast ? "ENABLED" : "DISABLED");
4182 	}
4183 	return;
4184 }
4185 
4186 /*
4187  * interface_media_active:
4188  *
4189  *	Tells if an interface media is active.
4190  */
4191 static int
interface_media_active(struct ifnet * ifp)4192 interface_media_active(struct ifnet *ifp)
4193 {
4194 	struct ifmediareq   ifmr;
4195 	int status = 0;
4196 
4197 	bzero(&ifmr, sizeof(ifmr));
4198 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
4199 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
4200 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
4201 		}
4202 	}
4203 
4204 	return status;
4205 }
4206 
4207 /*
4208  * bridge_updatelinkstatus:
4209  *
4210  *      Update the media active status of the bridge based on the
4211  *	media active status of its member.
4212  *	If changed, return the corresponding onf/off link event.
4213  */
4214 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)4215 bridge_updatelinkstatus(struct bridge_softc *sc)
4216 {
4217 	struct bridge_iflist *bif;
4218 	int active_member = 0;
4219 	u_int32_t event_code = 0;
4220 
4221 	BRIDGE_LOCK_ASSERT_HELD(sc);
4222 
4223 	/*
4224 	 * Find out if we have an active interface
4225 	 */
4226 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
4227 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
4228 			active_member = 1;
4229 			break;
4230 		}
4231 	}
4232 
4233 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4234 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
4235 		event_code = KEV_DL_LINK_ON;
4236 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
4237 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
4238 		event_code = KEV_DL_LINK_OFF;
4239 	}
4240 
4241 	return event_code;
4242 }
4243 
4244 /*
4245  * bridge_iflinkevent:
4246  */
4247 static void
bridge_iflinkevent(struct ifnet * ifp)4248 bridge_iflinkevent(struct ifnet *ifp)
4249 {
4250 	struct bridge_softc * __single sc = ifp->if_bridge;
4251 	struct bridge_iflist *bif;
4252 	u_int32_t event_code = 0;
4253 	int media_active;
4254 
4255 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4256 
4257 	/* Check if the interface is a bridge member */
4258 	if (sc == NULL) {
4259 		return;
4260 	}
4261 
4262 	media_active = interface_media_active(ifp);
4263 	BRIDGE_LOCK(sc);
4264 	bif = bridge_lookup_member_if(sc, ifp);
4265 	if (bif != NULL) {
4266 		if (media_active) {
4267 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4268 		} else {
4269 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4270 		}
4271 		if (sc->sc_mac_nat_bif != NULL) {
4272 			bridge_mac_nat_flush_entries(sc, bif);
4273 		}
4274 
4275 		event_code = bridge_updatelinkstatus(sc);
4276 	}
4277 	BRIDGE_UNLOCK(sc);
4278 
4279 	if (event_code != 0) {
4280 		bridge_link_event(sc->sc_ifp, event_code);
4281 	}
4282 }
4283 
4284 /*
4285  * bridge_delayed_callback:
4286  *
4287  *	Makes a delayed call
4288  */
4289 static void
bridge_delayed_callback(void * param,__unused void * param2)4290 bridge_delayed_callback(void *param, __unused void *param2)
4291 {
4292 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4293 	struct bridge_softc *sc = call->bdc_sc;
4294 
4295 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4296 	if (bridge_delayed_callback_delay > 0) {
4297 		struct timespec ts;
4298 
4299 		ts.tv_sec = bridge_delayed_callback_delay;
4300 		ts.tv_nsec = 0;
4301 
4302 		BRIDGE_LOG(LOG_NOTICE, 0,
4303 		    "sleeping for %d seconds",
4304 		    bridge_delayed_callback_delay);
4305 
4306 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4307 		    __func__, &ts);
4308 
4309 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4310 	}
4311 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4312 
4313 	BRIDGE_LOCK(sc);
4314 
4315 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4316 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4317 	    "%s call 0x%llx flags 0x%x",
4318 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4319 	    call->bdc_flags);
4320 }
4321 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4322 
4323 	if (call->bdc_flags & BDCF_CANCELLING) {
4324 		wakeup(call);
4325 	} else {
4326 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4327 			(*call->bdc_func)(sc);
4328 		}
4329 	}
4330 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4331 	BRIDGE_UNLOCK(sc);
4332 }
4333 
4334 /*
4335  * bridge_schedule_delayed_call:
4336  *
4337  *	Schedule a function to be called on a separate thread
4338  *      The actual call may be scheduled to run at a given time or ASAP.
4339  */
4340 static void
4341 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4342 {
4343 	uint64_t deadline = 0;
4344 	struct bridge_softc *sc = call->bdc_sc;
4345 
4346 	BRIDGE_LOCK_ASSERT_HELD(sc);
4347 
4348 	if ((sc->sc_flags & SCF_DETACHING) ||
4349 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4350 		return;
4351 	}
4352 
4353 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4354 		nanoseconds_to_absolutetime(
4355 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4356 			call->bdc_ts.tv_nsec, &deadline);
4357 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4358 	}
4359 
4360 	call->bdc_flags = BDCF_OUTSTANDING;
4361 
4362 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4363 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4364 	    "%s call 0x%llx flags 0x%x",
4365 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4366 	    call->bdc_flags);
4367 }
4368 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4369 
4370 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4371 		thread_call_func_delayed(
4372 			(thread_call_func_t)bridge_delayed_callback,
4373 			call, deadline);
4374 	} else {
4375 		if (call->bdc_thread_call == NULL) {
4376 			call->bdc_thread_call = thread_call_allocate(
4377 				(thread_call_func_t)bridge_delayed_callback,
4378 				call);
4379 		}
4380 		thread_call_enter(call->bdc_thread_call);
4381 	}
4382 }
4383 
4384 /*
4385  * bridge_cancel_delayed_call:
4386  *
4387  *	Cancel a queued or running delayed call.
4388  *	If call is running, does not return until the call is done to
4389  *	prevent race condition with the brigde interface getting destroyed
4390  */
4391 static void
4392 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4393 {
4394 	boolean_t result;
4395 	struct bridge_softc *sc = call->bdc_sc;
4396 
4397 	/*
4398 	 * The call was never scheduled
4399 	 */
4400 	if (sc == NULL) {
4401 		return;
4402 	}
4403 
4404 	BRIDGE_LOCK_ASSERT_HELD(sc);
4405 
4406 	call->bdc_flags |= BDCF_CANCELLING;
4407 
4408 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4409 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4410 		    "%s call 0x%llx flags 0x%x",
4411 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4412 		    call->bdc_flags);
4413 		result = thread_call_func_cancel(
4414 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4415 
4416 		if (result) {
4417 			/*
4418 			 * We managed to dequeue the delayed call
4419 			 */
4420 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4421 		} else {
4422 			/*
4423 			 * Wait for delayed call do be done running
4424 			 */
4425 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4426 		}
4427 	}
4428 	call->bdc_flags &= ~BDCF_CANCELLING;
4429 }
4430 
4431 /*
4432  * bridge_cleanup_delayed_call:
4433  *
4434  *	Dispose resource allocated for a delayed call
4435  *	Assume the delayed call is not queued or running .
4436  */
4437 static void
4438 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4439 {
4440 	boolean_t result;
4441 	struct bridge_softc *sc = call->bdc_sc;
4442 
4443 	/*
4444 	 * The call was never scheduled
4445 	 */
4446 	if (sc == NULL) {
4447 		return;
4448 	}
4449 
4450 	BRIDGE_LOCK_ASSERT_HELD(sc);
4451 
4452 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4453 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4454 
4455 	if (call->bdc_thread_call != NULL) {
4456 		result = thread_call_free(call->bdc_thread_call);
4457 		if (result == FALSE) {
4458 			panic("%s thread_call_free() failed for call %p",
4459 			    __func__, call);
4460 		}
4461 		call->bdc_thread_call = NULL;
4462 	}
4463 }
4464 
4465 /*
4466  * bridge_init:
4467  *
4468  *	Initialize a bridge interface.
4469  */
4470 static int
4471 bridge_init(struct ifnet *ifp)
4472 {
4473 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4474 	errno_t error;
4475 
4476 	BRIDGE_LOCK_ASSERT_HELD(sc);
4477 
4478 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4479 		return 0;
4480 	}
4481 
4482 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4483 
4484 	/*
4485 	 * Calling bridge_aging_timer() is OK as there are no entries to
4486 	 * age so we're just going to arm the timer
4487 	 */
4488 	bridge_aging_timer(sc);
4489 #if BRIDGESTP
4490 	if (error == 0) {
4491 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4492 	}
4493 #endif /* BRIDGESTP */
4494 	return error;
4495 }
4496 
4497 /*
4498  * bridge_ifstop:
4499  *
4500  *	Stop the bridge interface.
4501  */
4502 static void
4503 bridge_ifstop(struct ifnet *ifp, int disable)
4504 {
4505 #pragma unused(disable)
4506 	struct bridge_softc * __single sc = ifp->if_softc;
4507 
4508 	BRIDGE_LOCK_ASSERT_HELD(sc);
4509 
4510 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4511 		return;
4512 	}
4513 
4514 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4515 
4516 #if BRIDGESTP
4517 	bstp_stop(&sc->sc_stp);
4518 #endif /* BRIDGESTP */
4519 
4520 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4521 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4522 }
4523 
4524 static const uint32_t checksum_request_flags = (MBUF_CSUM_REQ_TCP |
4525     MBUF_CSUM_REQ_UDP | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6);
4526 
4527 static const mbuf_csum_performed_flags_t checksum_performed_all_good =
4528     (MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD
4529     | MBUF_CSUM_DID_DATA | MBUF_CSUM_PSEUDO_HDR);
4530 
4531 /*
4532  * bridge_compute_cksum:
4533  *
4534  *	If the packet has checksum flags, compare the hardware checksum
4535  *	capabilities of the source and destination interfaces. If they
4536  *	are the same, there's nothing to do. If they are different,
4537  *	finalize the checksum so that it can be sent on the destination
4538  *	interface.
4539  */
4540 static void
4541 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4542 {
4543 	uint32_t csum_flags;
4544 	uint16_t dst_hw_csum;
4545 	uint32_t did_sw = 0;
4546 	struct ether_header *eh;
4547 	uint16_t src_hw_csum;
4548 
4549 	if (src_if == dst_if) {
4550 		return;
4551 	}
4552 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4553 	if (csum_flags == 0) {
4554 		/* no checksum offload */
4555 		return;
4556 	}
4557 
4558 	/*
4559 	 * if destination/source differ in checksum offload
4560 	 * capabilities, finalize/compute the checksum
4561 	 */
4562 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4563 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4564 	if (dst_hw_csum == src_hw_csum) {
4565 		return;
4566 	}
4567 	eh = mtod(m, struct ether_header *);
4568 	switch (eh->ether_type) {
4569 	case HTONS_ETHERTYPE_IP:
4570 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4571 		break;
4572 	case HTONS_ETHERTYPE_IPV6:
4573 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4574 		break;
4575 	}
4576 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4577 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4578 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4579 	    m->m_pkthdr.csum_flags);
4580 }
4581 
4582 static inline errno_t
4583 bridge_transmit(ifnet_t ifp, mbuf_t m)
4584 {
4585 	struct flowadv  adv = { .code = FADV_SUCCESS };
4586 	errno_t         error;
4587 	int             flags = DLIL_OUTPUT_FLAGS_RAW;
4588 
4589 	flags = (if_bridge_output_skip_filters != 0)
4590 	    ? (DLIL_OUTPUT_FLAGS_RAW | DLIL_OUTPUT_FLAGS_SKIP_IF_FILTERS)
4591 	    : DLIL_OUTPUT_FLAGS_RAW;
4592 	error = dlil_output(ifp, 0, m, NULL, NULL, flags, &adv);
4593 	if (error == 0) {
4594 		if (adv.code == FADV_FLOW_CONTROLLED) {
4595 			error = EQFULL;
4596 		} else if (adv.code == FADV_SUSPENDED) {
4597 			error = EQSUSPENDED;
4598 		}
4599 	}
4600 	return error;
4601 }
4602 
4603 static int
4604 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4605     bool *is_fragmented)
4606 {
4607 	int newoff;
4608 
4609 	*is_fragmented = false;
4610 	while (1) {
4611 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4612 		if (newoff < 0) {
4613 			return off;
4614 		} else if (newoff < off) {
4615 			return -1;    /* invalid */
4616 		} else if (newoff == off) {
4617 			return newoff;
4618 		}
4619 		off = newoff;
4620 		proto = *nxtp;
4621 		if (proto == IPPROTO_FRAGMENT) {
4622 			*is_fragmented = true;
4623 		}
4624 	}
4625 }
4626 
4627 #define __ATOMIC_INC(s) os_atomic_inc(&s, relaxed)
4628 
4629 static int
4630 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4631     ip_packet_info_t info_p, struct bripstats * stats_p)
4632 {
4633 	int             error = 0;
4634 	u_int           hlen;
4635 	u_int           ip_hlen;
4636 	u_int           ip_pay_len;
4637 	struct mbuf *   m0 = *mp;
4638 	int             off;
4639 	int             opt_len = 0;
4640 	int             proto = 0;
4641 
4642 	bzero(info_p, sizeof(*info_p));
4643 	if (is_ipv4) {
4644 		struct ip *     ip;
4645 		u_int           ip_total_len;
4646 
4647 		/* IPv4 */
4648 		hlen = mac_hlen + sizeof(struct ip);
4649 		if (m0->m_pkthdr.len < hlen) {
4650 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4651 			    "Short IP packet %d < %d",
4652 			    m0->m_pkthdr.len, hlen);
4653 			error = _EBADIP;
4654 			__ATOMIC_INC(stats_p->bips_bad_ip);
4655 			goto done;
4656 		}
4657 		if (m0->m_len < hlen) {
4658 			*mp = m0 = m_pullup(m0, hlen);
4659 			if (m0 == NULL) {
4660 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4661 				    "m_pullup failed hlen %d",
4662 				    hlen);
4663 				error = ENOBUFS;
4664 				__ATOMIC_INC(stats_p->bips_bad_ip);
4665 				goto done;
4666 			}
4667 		}
4668 		ip = (struct ip *)mtodo(m0, mac_hlen);
4669 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4670 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4671 			    "bad IP version");
4672 			error = _EBADIP;
4673 			__ATOMIC_INC(stats_p->bips_bad_ip);
4674 			goto done;
4675 		}
4676 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4677 		if (ip_hlen < sizeof(struct ip)) {
4678 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4679 			    "bad IP header length %d < %d",
4680 			    ip_hlen,
4681 			    (int)sizeof(struct ip));
4682 			error = _EBADIP;
4683 			__ATOMIC_INC(stats_p->bips_bad_ip);
4684 			goto done;
4685 		}
4686 		hlen = mac_hlen + ip_hlen;
4687 		if (m0->m_len < hlen) {
4688 			*mp = m0 = m_pullup(m0, hlen);
4689 			if (m0 == NULL) {
4690 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4691 				    "m_pullup failed hlen %d",
4692 				    hlen);
4693 				error = ENOBUFS;
4694 				__ATOMIC_INC(stats_p->bips_bad_ip);
4695 				goto done;
4696 			}
4697 			ip = (struct ip *)mtodo(m0, mac_hlen);
4698 		}
4699 
4700 		ip_total_len = ntohs(ip->ip_len);
4701 		if (ip_total_len < ip_hlen) {
4702 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4703 			    "IP total len %d < header len %d",
4704 			    ip_total_len, ip_hlen);
4705 			error = _EBADIP;
4706 			__ATOMIC_INC(stats_p->bips_bad_ip);
4707 			goto done;
4708 		}
4709 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4710 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4711 			    "invalid IP payload length %d > %d",
4712 			    ip_total_len,
4713 			    (m0->m_pkthdr.len - mac_hlen));
4714 			error = _EBADIP;
4715 			__ATOMIC_INC(stats_p->bips_bad_ip);
4716 			goto done;
4717 		}
4718 		ip_pay_len = ip_total_len - ip_hlen;
4719 		info_p->ip_proto = ip->ip_p;
4720 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4721 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4722 		info_p->ip_hlen = ip_hlen;
4723 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4724 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4725 			info_p->ip_is_fragmented = true;
4726 		}
4727 		__ATOMIC_INC(stats_p->bips_ip);
4728 	} else {
4729 		struct ip6_hdr *ip6;
4730 
4731 		/* IPv6 */
4732 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4733 		if (m0->m_pkthdr.len < hlen) {
4734 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4735 			    "short IPv6 packet %d < %d",
4736 			    m0->m_pkthdr.len, hlen);
4737 			error = _EBADIPV6;
4738 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4739 			goto done;
4740 		}
4741 		if (m0->m_len < hlen) {
4742 			*mp = m0 = m_pullup(m0, hlen);
4743 			if (m0 == NULL) {
4744 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 				    "m_pullup failed hlen %d",
4746 				    hlen);
4747 				error = ENOBUFS;
4748 				__ATOMIC_INC(stats_p->bips_bad_ip6);
4749 				goto done;
4750 			}
4751 		}
4752 		ip6 = (struct ip6_hdr *)(mtodo(m0, mac_hlen));
4753 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4754 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4755 			    "bad IPv6 version");
4756 			error = _EBADIPV6;
4757 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4758 			goto done;
4759 		}
4760 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4761 		    &info_p->ip_is_fragmented);
4762 		if (off < 0 || m0->m_pkthdr.len < off) {
4763 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4764 			    "ip6_lasthdr() returned %d",
4765 			    off);
4766 			error = _EBADIPV6;
4767 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4768 			goto done;
4769 		}
4770 		ip_hlen = sizeof(*ip6);
4771 		opt_len = off - mac_hlen - ip_hlen;
4772 		if (opt_len < 0) {
4773 			error = _EBADIPV6;
4774 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4775 			goto done;
4776 		}
4777 		ip_pay_len = ntohs(ip6->ip6_plen);
4778 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4779 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4780 			    "invalid IPv6 payload length %d > %d",
4781 			    ip_pay_len,
4782 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4783 			error = _EBADIPV6;
4784 			__ATOMIC_INC(stats_p->bips_bad_ip6);
4785 			goto done;
4786 		}
4787 		info_p->ip_proto = proto;
4788 		info_p->ip_hdr = mtodo(m0, mac_hlen);
4789 		info_p->ip_m0_len = m0->m_len - mac_hlen;
4790 		info_p->ip_hlen = ip_hlen;
4791 		__ATOMIC_INC(stats_p->bips_ip6);
4792 	}
4793 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4794 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4795 	    is_ipv4 ? '4' : '6',
4796 	    proto, ip_hlen, ip_pay_len, opt_len,
4797 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4798 	info_p->ip_pay_len = ip_pay_len;
4799 	info_p->ip_opt_len = opt_len;
4800 	info_p->ip_is_ipv4 = is_ipv4;
4801 done:
4802 	return error;
4803 }
4804 
4805 static int
4806 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4807     ip_packet_info_t info_p, struct bripstats * stats_p)
4808 {
4809 	int             error;
4810 	u_int           hlen;
4811 
4812 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4813 	if (error != 0) {
4814 		goto done;
4815 	}
4816 	if (info_p->ip_proto != IPPROTO_TCP) {
4817 		/* not a TCP frame, not an error, just a bad guess */
4818 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4819 		    "non-TCP (%d) IPv%c frame %d bytes",
4820 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4821 		    (*mp)->m_pkthdr.len);
4822 		goto done;
4823 	}
4824 	if (info_p->ip_is_fragmented) {
4825 		/* both TSO and IP fragmentation don't make sense */
4826 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4827 		    "fragmented TSO packet?");
4828 		__ATOMIC_INC(stats_p->bips_bad_tcp);
4829 		error = _EBADTCP;
4830 		goto done;
4831 	}
4832 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4833 	    info_p->ip_opt_len;
4834 	if ((*mp)->m_len < hlen) {
4835 		*mp = m_pullup(*mp, hlen);
4836 		if (*mp == NULL) {
4837 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4838 			    "m_pullup %d failed",
4839 			    hlen);
4840 			__ATOMIC_INC(stats_p->bips_bad_tcp);
4841 			error = _EBADTCP;
4842 			goto done;
4843 		}
4844 	}
4845 	info_p->ip_proto_hdr = info_p->ip_hdr + info_p->ip_hlen +
4846 	    info_p->ip_opt_len;
4847 done:
4848 	return error;
4849 }
4850 
4851 static inline void
4852 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4853 {
4854 	if (proto == IPPROTO_TCP) {
4855 		__ATOMIC_INC(stats_p->brcs_tcp_checksum);
4856 	} else {
4857 		__ATOMIC_INC(stats_p->brcs_udp_checksum);
4858 	}
4859 	return;
4860 }
4861 
4862 #define ETHER_TYPE_FLAG_NONE    0x00
4863 #define ETHER_TYPE_FLAG_IPV4    0x01
4864 #define ETHER_TYPE_FLAG_IPV6    0x02
4865 #define ETHER_TYPE_FLAG_ARP     0x04
4866 #define ETHER_TYPE_FLAG_IP      (ETHER_TYPE_FLAG_IPV4 | ETHER_TYPE_FLAG_IPV6)
4867 #define ETHER_TYPE_FLAG_IP_ARP  (ETHER_TYPE_FLAG_IP | ETHER_TYPE_FLAG_ARP)
4868 
4869 static inline bool
4870 ether_type_flag_is_ip(ether_type_flag_t flag)
4871 {
4872 	return (flag & ETHER_TYPE_FLAG_IP) != 0;
4873 }
4874 
4875 static inline ether_type_flag_t
4876 ether_type_flag_get(uint16_t ether_type)
4877 {
4878 	ether_type_flag_t flag = ETHER_TYPE_FLAG_NONE;
4879 
4880 	switch (ether_type) {
4881 	case HTONS_ETHERTYPE_IP:
4882 		flag = ETHER_TYPE_FLAG_IPV4;
4883 		break;
4884 	case HTONS_ETHERTYPE_IPV6:
4885 		flag = ETHER_TYPE_FLAG_IPV6;
4886 		break;
4887 	case HTONS_ETHERTYPE_ARP:
4888 		flag = ETHER_TYPE_FLAG_ARP;
4889 		break;
4890 	default:
4891 		break;
4892 	}
4893 	return flag;
4894 }
4895 
4896 static bool
4897 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4898 {
4899 	uint16_t        ether_type;
4900 	bool            is_ip = TRUE;
4901 
4902 	ether_type = ntohs(eh->ether_type);
4903 	switch (ether_type) {
4904 	case ETHERTYPE_IP:
4905 		*is_ipv4 = TRUE;
4906 		break;
4907 	case ETHERTYPE_IPV6:
4908 		*is_ipv4 = FALSE;
4909 		break;
4910 	default:
4911 		is_ip = FALSE;
4912 		break;
4913 	}
4914 	return is_ip;
4915 }
4916 
4917 static errno_t
4918 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4919 {
4920 	struct brcsumstats *csum_stats_p;
4921 	struct ether_header     *eh;
4922 	errno_t         error = 0;
4923 	ip_packet_info  info;
4924 	bool            is_ipv4;
4925 	struct mbuf *   m;
4926 	u_int           mac_hlen = sizeof(struct ether_header);
4927 	uint16_t        sum;
4928 	bool            valid;
4929 
4930 	eh = mtod(*mp, struct ether_header *);
4931 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4932 		goto done;
4933 	}
4934 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4935 	    &stats_p->brms_out_ip);
4936 	m = *mp;
4937 	if (error != 0) {
4938 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4939 		    "bridge_get_ip_proto failed %d",
4940 		    error);
4941 		goto done;
4942 	}
4943 	if (is_ipv4) {
4944 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4945 			/* hardware offloaded IP header checksum */
4946 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4947 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4948 			    "IP checksum HW %svalid",
4949 			    valid ? "" : "in");
4950 			if (!valid) {
4951 				__ATOMIC_INC(stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum);
4952 				error = _EBADIPCHECKSUM;
4953 				goto done;
4954 			}
4955 			__ATOMIC_INC(stats_p->brms_out_cksum_good_hw.brcs_ip_checksum);
4956 		} else {
4957 			/* verify */
4958 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4959 			valid = (sum == 0);
4960 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4961 			    "IP checksum SW %svalid",
4962 			    valid ? "" : "in");
4963 			if (!valid) {
4964 				__ATOMIC_INC(stats_p->brms_out_cksum_bad.brcs_ip_checksum);
4965 				error = _EBADIPCHECKSUM;
4966 				goto done;
4967 			}
4968 			__ATOMIC_INC(stats_p->brms_out_cksum_good.brcs_ip_checksum);
4969 		}
4970 	}
4971 	if (info.ip_is_fragmented) {
4972 		/* can't verify checksum on fragmented packets */
4973 		goto done;
4974 	}
4975 	switch (info.ip_proto) {
4976 	case IPPROTO_TCP:
4977 		__ATOMIC_INC(stats_p->brms_out_ip.bips_tcp);
4978 		break;
4979 	case IPPROTO_UDP:
4980 		__ATOMIC_INC(stats_p->brms_out_ip.bips_udp);
4981 		break;
4982 	default:
4983 		goto done;
4984 	}
4985 	/* check for hardware offloaded UDP/TCP checksum */
4986 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4987 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4988 		/* checksum verified by hardware */
4989 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4990 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4991 		    "IPv%c %s checksum HW 0x%x %svalid",
4992 		    is_ipv4 ? '4' : '6',
4993 		    (info.ip_proto == IPPROTO_TCP)
4994 		    ? "TCP" : "UDP",
4995 		    m->m_pkthdr.csum_data,
4996 		    valid ? "" : "in" );
4997 		if (!valid) {
4998 			/* bad checksum */
4999 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
5000 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
5001 			    : _EBADTCPCHECKSUM;
5002 		} else {
5003 			/* good checksum */
5004 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
5005 		}
5006 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5007 		goto done;
5008 	}
5009 	/* adjust frame to skip mac-layer header */
5010 	_mbuf_adjust_pkthdr_and_data(m, mac_hlen);
5011 	if (is_ipv4) {
5012 		sum = inet_cksum(m, info.ip_proto,
5013 		    info.ip_hlen,
5014 		    info.ip_pay_len);
5015 	} else {
5016 		sum = inet6_cksum(m, info.ip_proto,
5017 		    info.ip_hlen + info.ip_opt_len,
5018 		    info.ip_pay_len - info.ip_opt_len);
5019 	}
5020 	valid = (sum == 0);
5021 	if (valid) {
5022 		csum_stats_p = &stats_p->brms_out_cksum_good;
5023 	} else {
5024 		csum_stats_p = &stats_p->brms_out_cksum_bad;
5025 		error = (info.ip_proto == IPPROTO_TCP)
5026 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
5027 	}
5028 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
5029 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5030 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
5031 	    is_ipv4 ? '4' : '6',
5032 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5033 	    valid ? "" : "in",
5034 	    sum, info.ip_hlen, info.ip_pay_len);
5035 	/* adjust frame back to start of mac-layer header */
5036 	_mbuf_adjust_pkthdr_and_data(m, -mac_hlen);
5037 
5038 done:
5039 	return error;
5040 }
5041 
5042 static mbuf_t
5043 bridge_verify_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * dbif,
5044     mbuf_t in_list, bool is_ipv4)
5045 {
5046 	mbuf_t          next_packet;
5047 	mblist          ret;
5048 
5049 	mblist_init(&ret);
5050 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5051 		errno_t         error;
5052 
5053 		/* take packet out of the list */
5054 		next_packet = scan->m_nextpkt;
5055 		scan->m_nextpkt = NULL;
5056 
5057 		if (scan->m_pkthdr.rx_seg_cnt > 1) {
5058 			/* LRO packet, compute checksum on large packet */
5059 			scan = bridge_filter_checksum(bridge_ifp, dbif, scan,
5060 			    is_ipv4, false, true);
5061 		} else {
5062 			/* verify checksum */
5063 			error = bridge_verify_checksum(&scan, &dbif->bif_stats);
5064 			if (error != 0) {
5065 				if (scan != NULL) {
5066 					m_freem(scan);
5067 					scan = NULL;
5068 				}
5069 			}
5070 		}
5071 
5072 		/* add it back to the list */
5073 		if (scan != NULL) {
5074 			mblist_append(&ret, scan);
5075 		}
5076 	}
5077 	return ret.head;
5078 }
5079 
5080 
5081 static errno_t
5082 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
5083     struct ifbrmstats * stats_p)
5084 {
5085 	uint16_t *      csum_p;
5086 	errno_t         error = 0;
5087 	u_int           hlen;
5088 	struct mbuf *   m0 = *mp;
5089 	u_int           mac_hlen = sizeof(struct ether_header);
5090 	u_int           pkt_hdr_len;
5091 	struct tcphdr * tcp;
5092 	u_int           tcp_hlen;
5093 	struct udphdr * udp;
5094 
5095 	if (info_p->ip_is_ipv4) {
5096 		/* compute IP header checksum */
5097 		struct ip *ip = (struct ip *)info_p->ip_hdr;
5098 		ip->ip_sum = 0;
5099 		ip->ip_sum = inet_cksum(m0, 0, mac_hlen, info_p->ip_hlen);
5100 		__ATOMIC_INC(stats_p->brms_in_computed_cksum.brcs_ip_checksum);
5101 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5102 		    "IPv4 checksum 0x%x",
5103 		    ntohs(ip->ip_sum));
5104 	}
5105 	if (info_p->ip_is_fragmented) {
5106 		/* can't compute checksum on fragmented packets */
5107 		goto done;
5108 	}
5109 	pkt_hdr_len = m0->m_pkthdr.len;
5110 	switch (info_p->ip_proto) {
5111 	case IPPROTO_TCP:
5112 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
5113 		    + sizeof(struct tcphdr);
5114 		if (m0->m_len < hlen) {
5115 			*mp = m0 = m_pullup(m0, hlen);
5116 			if (m0 == NULL) {
5117 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5118 				error = _EBADTCP;
5119 				goto done;
5120 			}
5121 		}
5122 		tcp = (struct tcphdr *)(info_p->ip_hdr + info_p->ip_hlen
5123 		    + info_p->ip_opt_len);
5124 		tcp_hlen = tcp->th_off << 2;
5125 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
5126 		if (hlen > pkt_hdr_len) {
5127 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5128 			    "bad tcp header length %u",
5129 			    tcp_hlen);
5130 			__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_tcp);
5131 			error = _EBADTCP;
5132 			goto done;
5133 		}
5134 		csum_p = &tcp->th_sum;
5135 		__ATOMIC_INC(stats_p->brms_in_ip.bips_tcp);
5136 		break;
5137 	case IPPROTO_UDP:
5138 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
5139 		if (m0->m_len < hlen) {
5140 			*mp = m0 = m_pullup(m0, hlen);
5141 			if (m0 == NULL) {
5142 				__ATOMIC_INC(stats_p->brms_in_ip.bips_bad_udp);
5143 				error = ENOBUFS;
5144 				goto done;
5145 			}
5146 		}
5147 		udp = (struct udphdr *)(info_p->ip_hdr + info_p->ip_hlen
5148 		    + info_p->ip_opt_len);
5149 		csum_p = &udp->uh_sum;
5150 		__ATOMIC_INC(stats_p->brms_in_ip.bips_udp);
5151 		break;
5152 	default:
5153 		/* not TCP or UDP */
5154 		goto done;
5155 	}
5156 	*csum_p = 0;
5157 	/* adjust frame to skip mac-layer header */
5158 	_mbuf_adjust_pkthdr_and_data(m0, mac_hlen);
5159 	if (info_p->ip_is_ipv4) {
5160 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
5161 		    info_p->ip_pay_len);
5162 	} else {
5163 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
5164 		    info_p->ip_hlen + info_p->ip_opt_len,
5165 		    info_p->ip_pay_len - info_p->ip_opt_len);
5166 	}
5167 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
5168 		/* RFC 1122 4.1.3.4 */
5169 		*csum_p = 0xffff;
5170 	}
5171 	/* adjust frame back to start of mac-layer header */
5172 	_mbuf_adjust_pkthdr_and_data(m0, -mac_hlen);
5173 	proto_csum_stats_increment(info_p->ip_proto,
5174 	    &stats_p->brms_in_computed_cksum);
5175 
5176 	/* indicate that the checksum is good */
5177 	mbuf_set_csum_performed(m0, checksum_performed_all_good, 0xffff);
5178 
5179 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5180 	    "IPv%c %s set checksum 0x%x",
5181 	    info_p->ip_is_ipv4 ? '4' : '6',
5182 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
5183 	    ntohs(*csum_p));
5184 done:
5185 	return error;
5186 }
5187 
5188 static inline void
5189 bridge_handle_checksum_op(ifnet_t src_ifp, ifnet_t dst_ifp,
5190     mbuf_t m, ChecksumOperation cksum_op)
5191 {
5192 	switch (cksum_op) {
5193 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
5194 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
5195 		break;
5196 	case CHECKSUM_OPERATION_FINALIZE:
5197 		/* the checksum might not be correct, finalize now */
5198 		VERIFY(dst_ifp != NULL);
5199 		bridge_finalize_cksum(dst_ifp, m);
5200 		break;
5201 	case CHECKSUM_OPERATION_COMPUTE:
5202 		VERIFY(dst_ifp != NULL && src_ifp != NULL);
5203 		bridge_compute_cksum(src_ifp, dst_ifp, m);
5204 		break;
5205 	default:
5206 		break;
5207 	}
5208 	return;
5209 }
5210 
5211 static uint32_t
5212 get_if_tso_mtu(struct ifnet * ifp, bool is_ipv4)
5213 {
5214 	uint32_t tso_mtu;
5215 
5216 	tso_mtu = is_ipv4 ? ifp->if_tso_v4_mtu : ifp->if_tso_v6_mtu;
5217 	if (tso_mtu == 0) {
5218 		tso_mtu = IP_MAXPACKET;
5219 	}
5220 
5221 #if DEBUG || DEVELOPMENT
5222 #define REDUCED_TSO_MTU         (16 * 1024)
5223 	if (if_bridge_reduce_tso_mtu != 0 && tso_mtu > REDUCED_TSO_MTU) {
5224 		tso_mtu = REDUCED_TSO_MTU;
5225 	}
5226 #endif /* DEBUG || DEVELOPMENT */
5227 	return tso_mtu;
5228 }
5229 
5230 /*
5231  * tso_hwassist:
5232  * - determine whether the destination interface supports TSO offload
5233  * - if the packet is already marked for offload and the hardware supports
5234  *   it, just allow the packet to continue on
5235  * - if not, parse the packet headers to verify that this is a large TCP
5236  *   packet requiring segmentation; if the hardware doesn't support it
5237  *   set need_sw_tso; otherwise, mark the packet for TSO offload
5238  */
5239 static int
5240 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
5241     int * mss_p, bool * need_gso, bool * is_large_tcp)
5242 {
5243 	uint32_t                csum_flags;
5244 	int                     error = 0;
5245 	ip_packet_info          info;
5246 	u_int32_t               if_csum;
5247 	u_int32_t               if_tso;
5248 	u_int32_t               mbuf_tso;
5249 	int                     mss = *mss_p;
5250 	uint8_t                 seg_cnt = 0;
5251 	bool                    supports_cksum = false;
5252 	uint32_t                pkt_mtu;
5253 	struct bripstats        stats;
5254 
5255 	*need_gso = false;
5256 	*is_large_tcp = false;
5257 	if (is_ipv4) {
5258 		/*
5259 		 * Enable both TCP and IP offload if the hardware supports it.
5260 		 * If the hardware doesn't support TCP offload, supports_cksum
5261 		 * will be false so we won't set either offload.
5262 		 */
5263 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
5264 		supports_cksum = (if_csum & CSUM_TCP) != 0;
5265 		if_tso = IFNET_TSO_IPV4;
5266 		mbuf_tso = CSUM_TSO_IPV4;
5267 	} else {
5268 		if_csum = (ifp->if_hwassist & CSUM_TCPIPV6);
5269 		supports_cksum = (if_csum & CSUM_TCPIPV6) != 0;
5270 		if_tso = IFNET_TSO_IPV6;
5271 		mbuf_tso = CSUM_TSO_IPV6;
5272 	}
5273 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5274 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
5275 	    ifp->if_xname, supports_cksum ? "" : " not",
5276 	    ifp->if_hwassist, if_csum);
5277 
5278 	/* verify that this is a large TCP frame */
5279 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
5280 	    &info, &stats);
5281 	if (error != 0) {
5282 		/* bad packet */
5283 		goto done;
5284 	}
5285 	if (info.ip_proto_hdr == NULL) {
5286 		/* not a TCP packet */
5287 		goto done;
5288 	}
5289 	pkt_mtu = info.ip_hlen + info.ip_pay_len + info.ip_opt_len;
5290 	if (mss == 0) {
5291 		/* check for LRO */
5292 		seg_cnt = (*mp)->m_pkthdr.rx_seg_cnt;
5293 		if (seg_cnt == 1 || (seg_cnt == 0 && pkt_mtu <= ifp->if_mtu)) {
5294 			/* not actually a large packet */
5295 			goto done;
5296 		}
5297 	}
5298 	if (mss == 0) {
5299 		uint32_t            hdr_len;
5300 		struct tcphdr *     tcp;
5301 
5302 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5303 		hdr_len = info.ip_hlen + info.ip_opt_len + (tcp->th_off << 2);
5304 
5305 		/* packet isn't marked, mark it now */
5306 		if (seg_cnt != 0) {
5307 			uint32_t    len;
5308 
5309 			/* approximate the MSS using the LRO seg cnt */
5310 			len = mbuf_pkthdr_len(*mp) - hdr_len - ETHER_HDR_LEN;
5311 			mss = len / seg_cnt;
5312 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5313 			    "%s: mss %d = len %d / seg cnt %d",
5314 			    ifp->if_xname, mss, len, seg_cnt);
5315 			if (mss <= 0) {
5316 				/* unexpected value */
5317 				mss = 0;
5318 				goto done;
5319 			}
5320 		} else {
5321 			mss = ifp->if_mtu - hdr_len
5322 			    - if_bridge_tso_reduce_mss_tx;
5323 			assert(mss > 0);
5324 		}
5325 		csum_flags = mbuf_tso;
5326 		if (supports_cksum) {
5327 			csum_flags |= if_csum;
5328 		}
5329 		(*mp)->m_pkthdr.tso_segsz = mss;
5330 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5331 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5332 	}
5333 	*is_large_tcp = true;
5334 	(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
5335 	if ((ifp->if_hwassist & if_tso) == 0) {
5336 		/* need gso if no hardware support */
5337 		*need_gso = true;
5338 	} else {
5339 		uint32_t                tso_mtu = 0;
5340 
5341 		tso_mtu = get_if_tso_mtu(ifp, is_ipv4);
5342 		if (pkt_mtu > tso_mtu) {
5343 			/* need gso if tso_mtu too small */
5344 			*need_gso = true;
5345 		}
5346 	}
5347 done:
5348 	*mss_p = mss;
5349 	return error;
5350 }
5351 
5352 /*
5353  * bridge_enqueue:
5354  *
5355  *	Enqueue a packet list on a bridge member interface.
5356  *
5357  */
5358 static int
5359 bridge_enqueue(ifnet_t bridge_ifp, ifnet_t src_if, ifnet_t dst_if,
5360     ether_type_flag_t etypef, mbuf_t in_list, ChecksumOperation orig_cksum_op,
5361     pkt_direction_t direction)
5362 {
5363 	int             enqueue_error = 0;
5364 	mbuf_t          next_packet;
5365 	uint32_t        out_errors = 0;
5366 	mblist          out_list;
5367 
5368 	VERIFY(dst_if != NULL);
5369 
5370 	mblist_init(&out_list);
5371 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
5372 		bool            check_gso = false;
5373 		ChecksumOperation cksum_op = orig_cksum_op;
5374 		errno_t         error = 0;
5375 		bool            is_ipv4 = false;
5376 		int             len;
5377 		int             mss = 0;
5378 		bool            need_gso = false;
5379 
5380 		scan->m_flags |= M_PROTO1; /* set to avoid loops */
5381 		next_packet = scan->m_nextpkt;
5382 		scan->m_nextpkt = NULL;
5383 		len = mbuf_pkthdr_len(scan);
5384 		is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
5385 		mss = _mbuf_get_tso_mss(scan);
5386 		if (mss != 0) {
5387 			/* packet is marked for segmentation */
5388 			check_gso = true;
5389 		} else if (direction == pkt_direction_RX &&
5390 		    scan->m_pkthdr.rx_seg_cnt != 0) {
5391 			/* LRO packet */
5392 			check_gso = true;
5393 		} else if (ether_type_flag_is_ip(etypef) &&
5394 		    len > (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5395 			/*
5396 			 * Need to segment the packet if it is a large frame
5397 			 * and the destination interface does not support TSO.
5398 			 *
5399 			 * Note that with trailers, it's possible for a packet to
5400 			 * be large but not actually require segmentation.
5401 			 */
5402 			check_gso = true;
5403 		}
5404 		if (check_gso) {
5405 			bool    is_large_tcp = false;
5406 
5407 			error = tso_hwassist(&scan, is_ipv4,
5408 			    dst_if, sizeof(struct ether_header), &mss,
5409 			    &need_gso, &is_large_tcp);
5410 			if (is_large_tcp &&
5411 			    cksum_op == CHECKSUM_OPERATION_CLEAR_OFFLOAD) {
5412 				cksum_op = CHECKSUM_OPERATION_NONE;
5413 			}
5414 		}
5415 		if (error != 0) {
5416 			if (scan != NULL) {
5417 				m_freem(scan);
5418 				scan = NULL;
5419 			}
5420 			out_errors++;
5421 		} else if (need_gso) {
5422 			int             mac_hlen = sizeof(struct ether_header);
5423 			mblist          segs;
5424 
5425 			/* segment packets, add to list */
5426 			segs = gso_tcp_transmit(dst_if, scan, mac_hlen,
5427 			    is_ipv4);
5428 			if (segs.head != NULL) {
5429 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5430 				    "%s (%s) append gso #segs %u bytes %u",
5431 				    bridge_ifp->if_xname,
5432 				    dst_if->if_xname,
5433 				    segs.count, segs.bytes);
5434 				mblist_append_list(&out_list, segs);
5435 			} else {
5436 				out_errors++;
5437 			}
5438 		} else {
5439 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5440 			    "%s (%s) append %d bytes mss %d op %d",
5441 			    bridge_ifp->if_xname,
5442 			    dst_if->if_xname,
5443 			    len, mss, cksum_op);
5444 			bridge_handle_checksum_op(src_if, dst_if,
5445 			    scan, cksum_op);
5446 			mblist_append(&out_list, scan);
5447 		}
5448 	}
5449 	if (out_list.head != NULL) {
5450 		enqueue_error = bridge_transmit(dst_if, out_list.head);
5451 		if (enqueue_error != 0) {
5452 			out_errors++;
5453 		}
5454 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5455 		    "%s (%s) bridge_transmit packets %u bytes %u error %d",
5456 		    bridge_ifp->if_xname,
5457 		    dst_if->if_xname,
5458 		    out_list.count, out_list.bytes, enqueue_error);
5459 	}
5460 	if (out_list.count != 0 || out_errors != 0) {
5461 		ifnet_stat_increment_out(bridge_ifp, out_list.count,
5462 		    out_list.bytes, out_errors);
5463 	}
5464 	return enqueue_error;
5465 }
5466 
5467 /*
5468  * bridge_member_output:
5469  *
5470  *	Send output from a bridge member interface.  This
5471  *	performs the bridging function for locally originated
5472  *	packets.
5473  *
5474  *	The mbuf has the Ethernet header already attached.
5475  */
5476 static errno_t
5477 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5478 {
5479 	struct bridge_iflist * bif = NULL;
5480 	ifnet_t bridge_ifp;
5481 	struct ether_header *eh;
5482 	ether_type_flag_t etypef;
5483 	struct ifnet *dst_if = NULL;
5484 	uint16_t vlan;
5485 	struct bridge_iflist *mac_nat_bif;
5486 	ifnet_t mac_nat_ifp;
5487 	mbuf_t m = *data;
5488 
5489 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5490 	    "ifp %s", ifp->if_xname);
5491 	if (m->m_len < ETHER_HDR_LEN) {
5492 		m = m_pullup(m, ETHER_HDR_LEN);
5493 		if (m == NULL) {
5494 			*data = NULL;
5495 			return EJUSTRETURN;
5496 		}
5497 	}
5498 
5499 	BRIDGE_LOCK(sc);
5500 	mac_nat_bif = sc->sc_mac_nat_bif;
5501 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5502 	if (mac_nat_ifp == ifp) {
5503 		/* record the IP address used by the MAC NAT interface */
5504 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5505 		m = *data;
5506 		if (m == NULL) {
5507 			/* packet was deallocated */
5508 			BRIDGE_UNLOCK(sc);
5509 			return EJUSTRETURN;
5510 		}
5511 	}
5512 	bridge_ifp = sc->sc_ifp;
5513 	eh = mtod(m, struct ether_header *);
5514 	vlan = VLANTAGOF(m);
5515 	etypef = ether_type_flag_get(eh->ether_type);
5516 
5517 	/*
5518 	 * APPLE MODIFICATION
5519 	 * If the packet is an 802.1X ethertype, then only send on the
5520 	 * original output interface.
5521 	 */
5522 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5523 		dst_if = ifp;
5524 		goto sendunicast;
5525 	}
5526 
5527 	/*
5528 	 * If bridge is down, but the original output interface is up,
5529 	 * go ahead and send out that interface.  Otherwise, the packet
5530 	 * is dropped below.
5531 	 */
5532 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5533 		dst_if = ifp;
5534 		goto sendunicast;
5535 	}
5536 
5537 	/*
5538 	 * If the packet is a multicast, or we don't know a better way to
5539 	 * get there, send to all interfaces.
5540 	 */
5541 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5542 		dst_if = NULL;
5543 	} else {
5544 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, vlan);
5545 		if (bif != NULL) {
5546 			dst_if = bif->bif_ifp;
5547 		}
5548 	}
5549 	if (dst_if == NULL) {
5550 		struct mbuf *mc;
5551 		errno_t error;
5552 
5553 
5554 		bridge_span(sc, etypef, m);
5555 
5556 		BRIDGE_LOCK2REF(sc, error);
5557 		if (error != 0) {
5558 			m_freem(m);
5559 			return EJUSTRETURN;
5560 		}
5561 
5562 		/*
5563 		 * Duplicate and send the packet across all member interfaces
5564 		 * except the originating interface.
5565 		 */
5566 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5567 			dst_if = bif->bif_ifp;
5568 			if (dst_if == ifp) {
5569 				/* skip the originating interface */
5570 				continue;
5571 			}
5572 			/* skip interface with inactive link status */
5573 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5574 				continue;
5575 			}
5576 
5577 			/* skip interface that isn't running */
5578 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5579 				continue;
5580 			}
5581 			/*
5582 			 * If the interface is participating in spanning
5583 			 * tree, make sure the port is in a state that
5584 			 * allows forwarding.
5585 			 */
5586 			if ((bif->bif_ifflags & IFBIF_STP) &&
5587 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5588 				continue;
5589 			}
5590 			/*
5591 			 * If the destination is the MAC NAT interface,
5592 			 * skip sending the packet. The packet can't be sent
5593 			 * if the source MAC is incorrect.
5594 			 */
5595 			if (dst_if == mac_nat_ifp) {
5596 				continue;
5597 			}
5598 
5599 			/* make a deep copy to send on this member interface */
5600 			mc = m_dup(m, M_DONTWAIT);
5601 			if (mc == NULL) {
5602 				(void)ifnet_stat_increment_out(bridge_ifp,
5603 				    0, 0, 1);
5604 				continue;
5605 			}
5606 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if, etypef,
5607 			    mc, CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5608 		}
5609 		BRIDGE_UNREF(sc);
5610 
5611 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5612 			m_freem(m);
5613 			return EJUSTRETURN;
5614 		}
5615 		/* allow packet to continue on the originating interface */
5616 		return 0;
5617 	}
5618 
5619 sendunicast:
5620 	/*
5621 	 * XXX Spanning tree consideration here?
5622 	 */
5623 
5624 	bridge_span(sc, etypef, m);
5625 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5626 		m_freem(m);
5627 		BRIDGE_UNLOCK(sc);
5628 		return EJUSTRETURN;
5629 	}
5630 
5631 	BRIDGE_UNLOCK(sc);
5632 	if (dst_if == ifp) {
5633 		/* allow packet to continue on the originating interface */
5634 		return 0;
5635 	}
5636 	if (dst_if != mac_nat_ifp) {
5637 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, etypef, m,
5638 		    CHECKSUM_OPERATION_COMPUTE, pkt_direction_TX);
5639 	} else {
5640 		/*
5641 		 * This is not the original output interface
5642 		 * and the destination is the MAC NAT interface.
5643 		 * Drop the packet because the packet can't be sent
5644 		 * if the source MAC is incorrect.
5645 		 */
5646 		m_freem(m);
5647 	}
5648 	return EJUSTRETURN;
5649 }
5650 
5651 /*
5652  * Output callback.
5653  *
5654  * This routine is called externally from above only when if_bridge_txstart
5655  * is disabled; otherwise it is called internally by bridge_start().
5656  */
5657 static int
5658 bridge_output(struct ifnet *ifp, struct mbuf *m)
5659 {
5660 	struct bridge_iflist *bif;
5661 	struct bridge_softc * __single sc = ifnet_softc(ifp);
5662 	struct ether_header *eh;
5663 	ether_type_flag_t etypef;
5664 	struct ifnet *dst_if = NULL;
5665 	int error = 0;
5666 
5667 	eh = mtod(m, struct ether_header *);
5668 	etypef = ether_type_flag_get(eh->ether_type);
5669 	BRIDGE_LOCK(sc);
5670 
5671 	if (!IS_BCAST_MCAST(m)) {
5672 		bif = bridge_rtlookup_bif(sc, eh->ether_dhost, 0);
5673 		if (bif != NULL) {
5674 			dst_if = bif->bif_ifp;
5675 		}
5676 	}
5677 
5678 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5679 
5680 	BRIDGE_BPF_TAP_OUT(ifp, m);
5681 
5682 	if (dst_if == NULL) {
5683 		/* callee will unlock */
5684 		bridge_broadcast(sc, NULL, etypef, m);
5685 	} else {
5686 		ifnet_t bridge_ifp;
5687 
5688 		bridge_ifp = sc->sc_ifp;
5689 		BRIDGE_UNLOCK(sc);
5690 
5691 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, etypef, m,
5692 		    CHECKSUM_OPERATION_FINALIZE, pkt_direction_TX);
5693 	}
5694 
5695 	return error;
5696 }
5697 
5698 static void
5699 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5700 {
5701 	struct ether_header *eh;
5702 	bool is_ipv4;
5703 	uint32_t sw_csum, hwcap;
5704 	uint32_t did_sw;
5705 	uint32_t csum_flags;
5706 
5707 	eh = mtod(m, struct ether_header *);
5708 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5709 		return;
5710 	}
5711 
5712 	/* do in software what the hardware cannot */
5713 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5714 	csum_flags = m->m_pkthdr.csum_flags;
5715 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5716 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5717 
5718 	if (is_ipv4) {
5719 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5720 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5721 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5722 				uint16_t start =
5723 				    sizeof(*eh) + sizeof(struct ip);
5724 				uint16_t ulpoff =
5725 				    m->m_pkthdr.csum_data & 0xffff;
5726 				m->m_pkthdr.csum_flags |=
5727 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5728 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5729 				m->m_pkthdr.csum_tx_start = start;
5730 			} else {
5731 				sw_csum |= (CSUM_DELAY_DATA &
5732 				    m->m_pkthdr.csum_flags);
5733 			}
5734 		}
5735 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5736 	} else {
5737 		if ((hwcap & CSUM_PARTIAL) &&
5738 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5739 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5740 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5741 				uint16_t start =
5742 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5743 				uint16_t ulpoff =
5744 				    m->m_pkthdr.csum_data & 0xffff;
5745 				m->m_pkthdr.csum_flags |=
5746 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5747 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5748 				m->m_pkthdr.csum_tx_start = start;
5749 			} else {
5750 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5751 				    m->m_pkthdr.csum_flags);
5752 			}
5753 		}
5754 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5755 	}
5756 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5757 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5758 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5759 	    did_sw, m->m_pkthdr.csum_flags);
5760 }
5761 
5762 /*
5763  * bridge_start:
5764  *
5765  *	Start output on a bridge.
5766  *
5767  * This routine is invoked by the start worker thread; because we never call
5768  * it directly, there is no need do deploy any serialization mechanism other
5769  * than what's already used by the worker thread, i.e. this is already single
5770  * threaded.
5771  *
5772  * This routine is called only when if_bridge_txstart is enabled.
5773  */
5774 static void
5775 bridge_start(struct ifnet *ifp)
5776 {
5777 	mbuf_ref_t m;
5778 
5779 	for (;;) {
5780 		if (ifnet_dequeue(ifp, &m) != 0) {
5781 			break;
5782 		}
5783 
5784 		(void) bridge_output(ifp, m);
5785 	}
5786 }
5787 
5788 static void
5789 prepare_input_packet(ifnet_t ifp, mbuf_t m)
5790 {
5791 	mbuf_pkthdr_setrcvif(m, ifp);
5792 	mbuf_pkthdr_setheader(m, mtod(m, void *));
5793 	/* adjust frame to skip mac-layer header */
5794 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
5795 }
5796 
5797 static void
5798 mark_tso_checksum_ok(mbuf_t m)
5799 {
5800 	if (_mbuf_get_tso_mss(m) != 0 ||
5801 	    (m->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
5802 		mbuf_set_csum_performed(m, checksum_performed_all_good, 0xffff);
5803 	}
5804 }
5805 
5806 static void
5807 inject_input_packet_list(ifnet_t ifp, mbuf_t in_list, bool m_proto1)
5808 {
5809 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5810 		/* mark the packets as arriving on the interface */
5811 		BRIDGE_BPF_TAP_IN(ifp, scan);
5812 		if (m_proto1) {
5813 			scan->m_flags |= M_PROTO1; /* set to avoid loops */
5814 		}
5815 		prepare_input_packet(ifp, scan);
5816 		mark_tso_checksum_ok(scan);
5817 	}
5818 	dlil_input_packet_list(ifp, in_list);
5819 	return;
5820 }
5821 
5822 static void
5823 adjust_input_packet_list(mbuf_t in_list)
5824 {
5825 	for (mbuf_t scan = in_list; scan != NULL; scan = scan->m_nextpkt) {
5826 		mbuf_pkthdr_setheader(scan, mtod(scan, void *));
5827 		_mbuf_adjust_pkthdr_and_data(scan, ETHER_HDR_LEN);
5828 	}
5829 }
5830 
5831 static bool
5832 in_addr_is_ours(struct in_addr ip)
5833 {
5834 	struct in_ifaddr *ia;
5835 	bool             ours = false;
5836 
5837 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5838 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5839 		if (ia->ia_addr.sin_addr.s_addr == ip.s_addr) {
5840 			ours = true;
5841 			break;
5842 		}
5843 	}
5844 	lck_rw_done(&in_ifaddr_rwlock);
5845 	return ours;
5846 }
5847 
5848 static bool
5849 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5850 {
5851 	struct in6_addr         dst_ip;
5852 	struct in6_ifaddr       *ia6;
5853 	bool                    ours = false;
5854 
5855 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5856 		/* need to embed scope ID for comparison */
5857 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5858 		dst_ip.s6_addr16[1] = htons(ifscope);
5859 		ip6_p = &dst_ip;
5860 	}
5861 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5862 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5863 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5864 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5865 			ours = true;
5866 			break;
5867 		}
5868 	}
5869 	lck_rw_done(&in6_ifaddr_rwlock);
5870 	return ours;
5871 }
5872 
5873 static bool
5874 ip_packet_info_dst_is_our_ip(ip_packet_info_t info_p, int index)
5875 {
5876 	/* if the destination is our IP address, don't segment */
5877 	bool    our_ip = false;
5878 
5879 	if (info_p->ip_is_ipv4) {
5880 		struct ip *     hdr;
5881 		struct in_addr  dst_ip;
5882 
5883 		hdr = (struct ip *)(info_p->ip_hdr);
5884 		bcopy(&hdr->ip_dst, &dst_ip, sizeof(dst_ip));
5885 		our_ip = in_addr_is_ours(dst_ip);
5886 	} else {
5887 		struct ip6_hdr *        hdr;
5888 
5889 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5890 		our_ip = in6_addr_is_ours(&hdr->ip6_dst, index);
5891 	}
5892 	return our_ip;
5893 }
5894 
5895 typedef union {
5896 	struct in_addr  ip;
5897 	struct in6_addr ip6;
5898 } ip_addr, *ip_addr_t;
5899 
5900 static void
5901 ip_packet_info_copy_dst_ip_addr(ip_packet_info_t info_p, ip_addr_t ipaddr)
5902 {
5903 	if (info_p->ip_is_ipv4) {
5904 		struct ip *     hdr;
5905 
5906 		hdr = (struct ip *)(info_p->ip_hdr);
5907 		bcopy(&hdr->ip_dst, &ipaddr->ip, sizeof(ipaddr->ip));
5908 	} else {
5909 		struct ip6_hdr *        hdr;
5910 
5911 		hdr = (struct ip6_hdr *)(info_p->ip_hdr);
5912 		bcopy(&hdr->ip6_dst, &ipaddr->ip6, sizeof(ipaddr->ip6));
5913 	}
5914 }
5915 
5916 static bool
5917 ip_addr_are_equal(ip_addr_t addr1, ip_addr_t addr2, bool is_ipv4)
5918 {
5919 	bool    equal;
5920 
5921 	if (is_ipv4) {
5922 		equal = addr1->ip.s_addr == addr2->ip.s_addr;
5923 	} else {
5924 		equal = IN6_ARE_ADDR_EQUAL(&addr1->ip6, &addr2->ip6);
5925 	}
5926 	return equal;
5927 }
5928 
5929 static bool
5930 ip_addr_is_ours(ip_addr_t ipaddr, int index, bool is_ipv4)
5931 {
5932 	bool    our_ip;
5933 
5934 	if (is_ipv4) {
5935 		our_ip = in_addr_is_ours(ipaddr->ip);
5936 	} else {
5937 		our_ip = in6_addr_is_ours(&ipaddr->ip6, index);
5938 	}
5939 	return our_ip;
5940 }
5941 
5942 static void
5943 bridge_interface_input_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
5944     mblist list, bool bif_uses_virtio)
5945 {
5946 	uint32_t        in_errors = 0;
5947 	bool            is_ipv4;
5948 	mblist          in_list;
5949 	ip_addr         last_ip;
5950 	bool            last_ip_ours = false;
5951 	bool            last_ip_valid = false;
5952 	u_int           mac_hlen;
5953 	bool            may_forward = false;
5954 	mbuf_t          next_packet;
5955 
5956 	switch (etypef) {
5957 	case ETHER_TYPE_FLAG_IPV4:
5958 		is_ipv4 = true;
5959 		may_forward = (ipforwarding != 0);
5960 		break;
5961 	case ETHER_TYPE_FLAG_IPV6:
5962 		is_ipv4 = false;
5963 		may_forward = (ip6_forwarding != 0);
5964 		break;
5965 	}
5966 	if (!may_forward) {
5967 		in_list = list;
5968 		goto done;
5969 	}
5970 
5971 	mblist_init(&in_list);
5972 	mac_hlen = sizeof(struct ether_header);
5973 	bzero(&last_ip, sizeof(last_ip));
5974 	for (mbuf_ref_t scan = list.head; scan != NULL; scan = next_packet) {
5975 		int             error;
5976 		ip_packet_info  info;
5977 		bool            ip_ours;
5978 		struct ifbrmstats stats; /* XXX should really be accounted */
5979 		ip_addr         this_ip;
5980 
5981 		/* take it out of the list */
5982 		next_packet = scan->m_nextpkt;
5983 		scan->m_nextpkt = NULL;
5984 
5985 		/* check for TCP packet and get IP header */
5986 		error = bridge_get_tcp_header(&scan, mac_hlen, is_ipv4,
5987 		    &info, &stats.brms_in_ip);
5988 		if (error != 0) {
5989 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5990 			    "%s bridge_get_tcp_header failed %d",
5991 			    bridge_ifp->if_xname, error);
5992 			if (scan != NULL) {
5993 				m_freem(scan);
5994 				scan = NULL;
5995 			}
5996 			in_errors++;
5997 			continue;
5998 		}
5999 		ip_packet_info_copy_dst_ip_addr(&info, &this_ip);
6000 		if (last_ip_valid &&
6001 		    ip_addr_are_equal(&last_ip, &this_ip, is_ipv4)) {
6002 			/* use cached result */
6003 			ip_ours = last_ip_ours;
6004 		} else {
6005 			ip_ours = ip_addr_is_ours(&this_ip,
6006 			    bridge_ifp->if_index,
6007 			    is_ipv4);
6008 			/* cache the result */
6009 			last_ip_valid = true;
6010 			last_ip_ours = ip_ours;
6011 			last_ip = this_ip;
6012 		}
6013 
6014 		/* if the packet is destined to us, just send it up */
6015 		if (ip_ours) {
6016 			mblist_append(&in_list, scan);
6017 			continue;
6018 		}
6019 		/*
6020 		 * If this is a TCP packet that's marked for TSO or LRO, or
6021 		 * we think it's a large packet, segment it.
6022 		 */
6023 		if (info.ip_proto_hdr != NULL &&
6024 		    ((bif_uses_virtio && _mbuf_get_tso_mss(scan) != 0) ||
6025 		    (!bif_uses_virtio &&
6026 		    (scan->m_pkthdr.rx_seg_cnt > 1 ||
6027 		    (mbuf_pkthdr_len(scan) >
6028 		    (bridge_ifp->if_mtu + ETHER_HDR_LEN)))))) {
6029 			mblist          seg;
6030 
6031 			seg = gso_tcp_with_info(bridge_ifp, scan, &info,
6032 			    mac_hlen, is_ipv4, false);
6033 			if (seg.head == NULL) {
6034 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
6035 				    "gso_tcp returned no packets");
6036 				in_errors++;
6037 				continue;
6038 			}
6039 			if (seg.count > 1) {
6040 				/* packet was segmented+checksummed */
6041 				mblist_append_list(&in_list, seg);
6042 				continue;
6043 			}
6044 			/* there's just one packet, no segmentation */
6045 			scan = seg.head;
6046 		}
6047 		/* need checksum if it's marked for checksum offload */
6048 		if (bif_uses_virtio &&
6049 		    (scan->m_pkthdr.csum_flags & checksum_request_flags) != 0) {
6050 			error = bridge_offload_checksum(&scan, &info, &stats);
6051 			if (error != 0) {
6052 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6053 				    "%s bridge_offload_checksum failed %d",
6054 				    bridge_ifp->if_xname, error);
6055 				if (scan != NULL) {
6056 					m_freem(scan);
6057 					scan = NULL;
6058 				}
6059 				in_errors++;
6060 				continue;
6061 			}
6062 		}
6063 		mblist_append(&in_list, scan);
6064 	}
6065 
6066 done:
6067 	if (in_list.head != NULL) {
6068 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6069 		    "%s packets %d bytes %d",
6070 		    bridge_ifp->if_xname,
6071 		    in_list.count, in_list.bytes);
6072 		/* Mark the packets as arriving on the bridge interface */
6073 		inject_input_packet_list(bridge_ifp, in_list.head, false);
6074 		ifnet_stat_increment_in(bridge_ifp, in_list.count,
6075 		    in_list.bytes, in_errors);
6076 	} else if (in_errors != 0) {
6077 		ifnet_stat_increment_in(bridge_ifp, 0, 0, in_errors);
6078 	}
6079 	return;
6080 }
6081 
6082 /*
6083  * bridge_broadcast:
6084  *
6085  *	Send a frame to all interfaces that are members of
6086  *	the bridge, except for the one on which the packet
6087  *	arrived.
6088  *
6089  *	NOTE: Releases the lock on return.
6090  */
6091 static void
6092 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6093     ether_type_flag_t etypef, mbuf_t m)
6094 {
6095 	ifnet_t bridge_ifp;
6096 	struct bridge_iflist *dbif;
6097 	struct ifnet * src_if;
6098 	mbuf_ref_t mc;
6099 	struct mbuf *mc_in;
6100 	int error = 0, used = 0;
6101 	ChecksumOperation cksum_op;
6102 	struct mac_nat_record mnr;
6103 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6104 	boolean_t translate_mac = FALSE;
6105 	uint32_t sc_filter_flags;
6106 	bool is_bcast_mcast;
6107 
6108 	bridge_ifp = sc->sc_ifp;
6109 	if (sbif != NULL) {
6110 		src_if = sbif->bif_ifp;
6111 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6112 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6113 			/* get the translation record */
6114 			translate_mac
6115 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6116 			if (m == NULL) {
6117 				/* packet was deallocated */
6118 				BRIDGE_UNLOCK(sc);
6119 				return;
6120 			}
6121 		}
6122 	} else {
6123 		/*
6124 		 * sbif is NULL when the bridge interface calls
6125 		 * bridge_broadcast().
6126 		 */
6127 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6128 		src_if = NULL;
6129 	}
6130 
6131 	BRIDGE_LOCK2REF(sc, error);
6132 	if (error) {
6133 		m_freem(m);
6134 		return;
6135 	}
6136 	is_bcast_mcast = IS_BCAST_MCAST(m);
6137 	sc_filter_flags = sc->sc_filter_flags;
6138 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6139 		ifnet_t         dst_if;
6140 
6141 		dst_if = dbif->bif_ifp;
6142 		if (dst_if == src_if) {
6143 			/* skip the interface that the packet came in on */
6144 			continue;
6145 		}
6146 
6147 		/* Private segments can not talk to each other */
6148 		if (sbif != NULL &&
6149 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6150 			continue;
6151 		}
6152 
6153 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6154 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6155 			continue;
6156 		}
6157 
6158 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6159 		    !is_bcast_mcast) {
6160 			continue;
6161 		}
6162 
6163 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6164 			continue;
6165 		}
6166 
6167 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6168 			continue;
6169 		}
6170 
6171 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6172 			mc = m;
6173 			used = 1;
6174 		} else {
6175 			mc = m_dup(m, M_DONTWAIT);
6176 			if (mc == NULL) {
6177 				(void) ifnet_stat_increment_out(bridge_ifp,
6178 				    0, 0, 1);
6179 				continue;
6180 			}
6181 		}
6182 
6183 		/*
6184 		 * If broadcast input is enabled, do so only if this
6185 		 * is an input packet.
6186 		 */
6187 		if (sbif != NULL && is_bcast_mcast &&
6188 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6189 			mc_in = m_dup(mc, M_DONTWAIT);
6190 			/* this could fail, but we continue anyways */
6191 		} else {
6192 			mc_in = NULL;
6193 		}
6194 
6195 		/* out */
6196 		if (translate_mac && mac_nat_bif == dbif) {
6197 			/* translate the packet */
6198 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6199 		}
6200 
6201 		if (mc != NULL && sbif != NULL &&
6202 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6203 			if (used == 0) {
6204 				/* Keep the layer3 header aligned */
6205 				int i = min(mc->m_pkthdr.len, max_protohdr);
6206 				mc = m_copyup(mc, i, ETHER_ALIGN);
6207 				if (mc == NULL) {
6208 					(void) ifnet_stat_increment_out(
6209 						sc->sc_ifp, 0, 0, 1);
6210 					if (mc_in != NULL) {
6211 						m_freem(mc_in);
6212 						mc_in = NULL;
6213 					}
6214 					continue;
6215 				}
6216 			}
6217 			if (bridge_pf(&mc, dst_if, sc_filter_flags, false) != 0) {
6218 				if (mc_in != NULL) {
6219 					m_freem(mc_in);
6220 					mc_in = NULL;
6221 				}
6222 				continue;
6223 			}
6224 			if (mc == NULL) {
6225 				if (mc_in != NULL) {
6226 					m_freem(mc_in);
6227 					mc_in = NULL;
6228 				}
6229 				continue;
6230 			}
6231 		}
6232 
6233 		if (mc != NULL) {
6234 			/* verify checksum if necessary */
6235 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6236 			    !bif_has_checksum_offload(sbif)) {
6237 				error = bridge_verify_checksum(&mc,
6238 				    &dbif->bif_stats);
6239 				if (error != 0) {
6240 					if (mc != NULL) {
6241 						m_freem(mc);
6242 					}
6243 					mc = NULL;
6244 				}
6245 			}
6246 			if (mc != NULL) {
6247 				(void) bridge_enqueue(bridge_ifp,
6248 				    NULL, dst_if, etypef, mc, cksum_op,
6249 				    pkt_direction_TX);
6250 			}
6251 		}
6252 
6253 		/* in */
6254 		if (mc_in == NULL) {
6255 			continue;
6256 		}
6257 		BRIDGE_BPF_TAP_IN(dst_if, mc_in);
6258 		prepare_input_packet(dst_if, mc_in);
6259 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6260 		dlil_input_packet_list(dst_if, mc_in);
6261 	}
6262 	if (used == 0) {
6263 		m_freem(m);
6264 	}
6265 
6266 
6267 	BRIDGE_UNREF(sc);
6268 }
6269 
6270 static mbuf_t
6271 copy_packet_list(mbuf_t m)
6272 {
6273 	mblist  ret;
6274 	mbuf_t  next_packet;
6275 
6276 	mblist_init(&ret);
6277 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
6278 		mbuf_t  copy_m;
6279 
6280 		/* take it out of the list */
6281 		next_packet = scan->m_nextpkt;
6282 		scan->m_nextpkt = NULL;
6283 
6284 		/* create a copy and add it to the new list */
6285 		copy_m = m_dup(scan, M_DONTWAIT);
6286 		if (copy_m != NULL) {
6287 			mblist_append(&ret, copy_m);
6288 		}
6289 
6290 		/* put it back in the original list */
6291 		scan->m_nextpkt = next_packet;
6292 	}
6293 	return ret.head;
6294 }
6295 
6296 /*
6297  * bridge_broadcast_list:
6298  *
6299  *      Broadcast a list of packets to all members except `sbif`.
6300  *      Consumes `m` before returning.
6301  *
6302  *	NOTE: Releases the lock on return.
6303  */
6304 static void
6305 bridge_broadcast_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6306     ether_type_flag_t etypef, mbuf_t m, pkt_direction_t direction)
6307 {
6308 	ifnet_t                 bridge_ifp;
6309 	bool                    bridge_needs_input;
6310 	struct bridge_iflist *  dbif;
6311 	bool                    is_bcast_mcast;
6312 	errno_t                 error = 0;
6313 	ChecksumOperation       cksum_op;
6314 	struct bridge_iflist *  mac_nat_bif = sc->sc_mac_nat_bif;
6315 	ifnet_t                 mac_nat_if = NULL;
6316 	bool                    need_mac_nat = false;
6317 	mbuf_t                  out_mac_nat = NULL;
6318 	ifnet_t                 src_if;
6319 	uint32_t                sc_filter_flags;
6320 	bool                    used = false;
6321 
6322 	bridge_ifp = sc->sc_ifp;
6323 	if (sbif != NULL) {
6324 		src_if = sbif->bif_ifp;
6325 
6326 		if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6327 			bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6328 
6329 			/* compute checksum on packets marked with offload */
6330 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6331 			    m, is_ipv4);
6332 			if (m == NULL) {
6333 				BRIDGE_UNLOCK(sc);
6334 				goto done;
6335 			}
6336 			cksum_op = CHECKSUM_OPERATION_NONE;
6337 		} else {
6338 			cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6339 		}
6340 
6341 		/*
6342 		 * If MAC-NAT is enabled and we'll be sending the packets
6343 		 * over it, verify that it is up and active before
6344 		 * deciding to make a translated copy.
6345 		 */
6346 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6347 			mac_nat_if = mac_nat_bif->bif_ifp;
6348 			if ((mac_nat_if->if_flags & IFF_RUNNING) != 0 &&
6349 			    (mac_nat_bif->bif_flags & BIFF_MEDIA_ACTIVE) != 0) {
6350 				need_mac_nat = true;
6351 			}
6352 		}
6353 	} else {
6354 		/*
6355 		 * sbif is NULL when the bridge interface calls
6356 		 * bridge_broadcast_list() (TBD).
6357 		 */
6358 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6359 		src_if = NULL;
6360 	}
6361 
6362 	/*
6363 	 * Create a translated copy for packets destined to MAC-NAT interface.
6364 	 */
6365 	if (need_mac_nat) {
6366 		out_mac_nat
6367 		        = bridge_mac_nat_copy_and_translate_list(sc, sbif,
6368 		    mac_nat_if, m);
6369 	}
6370 	sc_filter_flags = sc->sc_filter_flags;
6371 	bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
6372 	BRIDGE_LOCK2REF(sc, error);
6373 	if (error) {
6374 		goto done;
6375 	}
6376 	is_bcast_mcast = IS_BCAST_MCAST(m);
6377 
6378 	/* make a copy for the bridge interface */
6379 	if (sbif != NULL && is_bcast_mcast && bridge_needs_input) {
6380 		mbuf_t  in_list;
6381 
6382 		in_list = copy_packet_list(m);
6383 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6384 		    "%s mcast for us in_m %p",
6385 		    bridge_ifp->if_xname, in_list);
6386 		if (in_list != NULL) {
6387 			inject_input_packet_list(bridge_ifp, in_list, false);
6388 		}
6389 	}
6390 
6391 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6392 		ifnet_t         dst_if;
6393 		mbuf_t          in_m = NULL;
6394 		mbuf_t          out_m = NULL;
6395 
6396 		dst_if = dbif->bif_ifp;
6397 		if (dst_if == src_if) {
6398 			/* skip the interface that the packet came in on */
6399 			continue;
6400 		}
6401 
6402 		/* Private segments can not talk to each other */
6403 		if (sbif != NULL &&
6404 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6405 			continue;
6406 		}
6407 
6408 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6409 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6410 			continue;
6411 		}
6412 
6413 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6414 		    !is_bcast_mcast) {
6415 			continue;
6416 		}
6417 
6418 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6419 			continue;
6420 		}
6421 
6422 		if ((dbif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
6423 			continue;
6424 		}
6425 		if (dbif == mac_nat_bif) {
6426 			/* translated copy was created above, use that */
6427 			out_m = out_mac_nat;
6428 			out_mac_nat = NULL;
6429 		} else if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6430 			/* consume `m` */
6431 			out_m = m;
6432 			used = true;
6433 		} else {
6434 			/* needs a copy */
6435 			out_m = copy_packet_list(m);
6436 		}
6437 
6438 		if (out_m == NULL) {
6439 			ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
6440 			continue;
6441 		}
6442 		/*
6443 		 * If broadcast input is enabled, do so only if this
6444 		 * is an input packet.
6445 		 */
6446 		if (sbif != NULL && is_bcast_mcast &&
6447 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6448 			in_m = copy_packet_list(m);
6449 			/* this could fail, but we continue anyways */
6450 		} else {
6451 			in_m = NULL;
6452 		}
6453 
6454 		if (sbif != NULL &&
6455 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6456 			out_m = bridge_pf_list(out_m, dst_if,
6457 			    sc_filter_flags, false);
6458 		}
6459 		if (out_m != NULL) {
6460 			/* verify checksum if necessary */
6461 			if (sbif != NULL &&
6462 			    ether_type_flag_is_ip(etypef) &&
6463 			    bif_has_checksum_offload(dbif) &&
6464 			    !bif_has_checksum_offload(sbif)) {
6465 				bool is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6466 
6467 				out_m = bridge_verify_checksum_list(bridge_ifp,
6468 				    dbif, out_m, is_ipv4);
6469 			}
6470 			if (out_m != NULL) {
6471 				bridge_enqueue(bridge_ifp, src_if, dst_if,
6472 				    etypef, out_m, cksum_op, direction);
6473 			}
6474 		}
6475 
6476 		/* in */
6477 		if (in_m != NULL) {
6478 			inject_input_packet_list(dst_if, in_m, true);
6479 		}
6480 	}
6481 
6482 	BRIDGE_UNREF(sc);
6483 
6484 done:
6485 	if (out_mac_nat != NULL) {
6486 		m_freem_list(out_mac_nat);
6487 	}
6488 	if (!used) {
6489 		m_freem_list(m);
6490 	}
6491 	return;
6492 }
6493 
6494 #define NEEDED_CSUM_IPV4   (IF_HWASSIST_CSUM_UDP | IF_HWASSIST_CSUM_TCP)
6495 #define NEEDED_CSUM_IPV6   (IF_HWASSIST_CSUM_UDPIPV6 | IF_HWASSIST_CSUM_TCPIPV6)
6496 
6497 static bool
6498 interface_supports_hw_checksum(ifnet_t ifp, bool is_ipv4)
6499 {
6500 	uint32_t        hwcap = IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
6501 	uint32_t        needed = is_ipv4 ? NEEDED_CSUM_IPV4 : NEEDED_CSUM_IPV6;
6502 	bool            supports;
6503 
6504 	supports = (hwcap & needed) == needed;
6505 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM, "%s: does %ssupport checksum",
6506 	    ifp->if_xname, supports ? "" : "not ");
6507 	return supports;
6508 }
6509 
6510 static void
6511 bridge_forward_list(struct bridge_softc *sc, struct bridge_iflist * sbif,
6512     ifnet_t dst_if, ether_type_flag_t etypef, mbuf_t m)
6513 {
6514 	bool                    checksum_ok = false;
6515 	ChecksumOperation       cksum_op;
6516 	ifnet_t                 bridge_ifp;
6517 	struct bridge_iflist *  dbif;
6518 	uint32_t                sc_filter_flags;
6519 	ifnet_t                 src_if;
6520 
6521 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6522 		goto drop;
6523 	}
6524 	dbif = bridge_lookup_member_if(sc, dst_if);
6525 	if (dbif == NULL) {
6526 		/* Not a member of the bridge (anymore?) */
6527 		goto drop;
6528 	}
6529 
6530 	/* Private segments can not talk to each other */
6531 	if ((sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) != 0) {
6532 		goto drop;
6533 	}
6534 	bridge_ifp = sc->sc_ifp;
6535 	src_if = sbif->bif_ifp;
6536 	cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6537 	if (ether_type_flag_is_ip(etypef) && bif_uses_virtio(sbif)) {
6538 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6539 
6540 		if (dbif == sc->sc_mac_nat_bif ||
6541 		    (IFNET_IS_VMNET(dst_if) && !bif_uses_virtio(dbif)) ||
6542 		    !interface_supports_hw_checksum(dst_if, is_ipv4)) {
6543 			/* compute checksums now if necessary */
6544 			m = bridge_checksum_offload_list(bridge_ifp, sbif,
6545 			    m, is_ipv4);
6546 			checksum_ok = true;
6547 		} else {
6548 			cksum_op = CHECKSUM_OPERATION_NONE;
6549 		}
6550 	}
6551 
6552 	if (dbif == sc->sc_mac_nat_bif) {
6553 		/* translate the packets before forwarding them */
6554 		if ((etypef & ETHER_TYPE_FLAG_IP_ARP) != 0) {
6555 			m = bridge_mac_nat_translate_list(sc, sbif, dst_if, m);
6556 		}
6557 	} else if (!checksum_ok && ether_type_flag_is_ip(etypef) &&
6558 	    bif_has_checksum_offload(dbif) && !bif_has_checksum_offload(sbif)) {
6559 		bool    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
6560 
6561 		/*
6562 		 * If the destination interface has checksum offload enabled,
6563 		 * verify the checksum now, unless the source interface also has
6564 		 * checksum offload enabled. The checksum in that case has
6565 		 * already just been computed and verifying it is unnecessary.
6566 		 */
6567 		m = bridge_verify_checksum_list(bridge_ifp, dbif, m, is_ipv4);
6568 	}
6569 	sc_filter_flags = sc->sc_filter_flags;
6570 	BRIDGE_UNLOCK(sc);
6571 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6572 		m = bridge_pf_list(m, dst_if, sc_filter_flags, false);
6573 	}
6574 
6575 	/*
6576 	 * We're forwarding inbound packets for which the checksums must
6577 	 * already have been computed and if required, verified, or
6578 	 * packets from a virtio-enabled interface for which we rely
6579 	 * on the packet containing appropriate offload flags.
6580 	 */
6581 	if (m != NULL) {
6582 		bridge_enqueue(bridge_ifp, src_if, dst_if, etypef, m,
6583 		    cksum_op, pkt_direction_RX);
6584 	}
6585 	return;
6586 
6587 drop:
6588 	BRIDGE_UNLOCK(sc);
6589 	m_freem_list(m);
6590 	return;
6591 }
6592 
6593 /*
6594  * bridge_span:
6595  *
6596  *	Duplicate a packet out one or more interfaces that are in span mode,
6597  *	the original mbuf is unmodified.
6598  */
6599 static void
6600 bridge_span(struct bridge_softc *sc, ether_type_flag_t etypef, struct mbuf *m)
6601 {
6602 	struct bridge_iflist *bif;
6603 	struct ifnet *dst_if;
6604 	struct mbuf *mc;
6605 
6606 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6607 		return;
6608 	}
6609 
6610 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6611 		dst_if = bif->bif_ifp;
6612 
6613 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6614 			continue;
6615 		}
6616 
6617 		mc = m_copypacket(m, M_DONTWAIT);
6618 		if (mc == NULL) {
6619 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6620 			continue;
6621 		}
6622 
6623 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, etypef, mc,
6624 		    CHECKSUM_OPERATION_NONE, pkt_direction_TX);
6625 	}
6626 }
6627 
6628 /*
6629  * bridge_rtupdate:
6630  *
6631  *	Add a bridge routing entry.
6632  */
6633 static int
6634 bridge_rtupdate(struct bridge_softc *sc, const uint8_t dst[ETHER_ADDR_LEN], uint16_t vlan,
6635     struct bridge_iflist *bif, int setflags, uint8_t flags)
6636 {
6637 	struct bridge_rtnode *brt;
6638 	int error;
6639 
6640 	BRIDGE_LOCK_ASSERT_HELD(sc);
6641 
6642 	/* Check the source address is valid and not multicast. */
6643 	if (ETHER_IS_MULTICAST(dst) ||
6644 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6645 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6646 		return EINVAL;
6647 	}
6648 
6649 	/* 802.1p frames map to vlan 1 */
6650 	if (vlan == 0) {
6651 		vlan = 1;
6652 	}
6653 
6654 	/*
6655 	 * A route for this destination might already exist.  If so,
6656 	 * update it, otherwise create a new one.
6657 	 */
6658 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6659 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6660 			sc->sc_brtexceeded++;
6661 			return ENOSPC;
6662 		}
6663 		/* Check per interface address limits (if enabled) */
6664 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6665 			bif->bif_addrexceeded++;
6666 			return ENOSPC;
6667 		}
6668 
6669 		/*
6670 		 * Allocate a new bridge forwarding node, and
6671 		 * initialize the expiration time and Ethernet
6672 		 * address.
6673 		 */
6674 		brt = zalloc_noblock(bridge_rtnode_pool);
6675 		if (brt == NULL) {
6676 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6677 			    "zalloc_nolock failed");
6678 			return ENOMEM;
6679 		}
6680 		bzero(brt, sizeof(struct bridge_rtnode));
6681 
6682 		if (bif->bif_ifflags & IFBIF_STICKY) {
6683 			brt->brt_flags = IFBAF_STICKY;
6684 		} else {
6685 			brt->brt_flags = IFBAF_DYNAMIC;
6686 		}
6687 
6688 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6689 		brt->brt_vlan = vlan;
6690 
6691 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6692 			zfree(bridge_rtnode_pool, brt);
6693 			return error;
6694 		}
6695 		brt->brt_dst = bif;
6696 		bif->bif_addrcnt++;
6697 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6698 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6699 		    "on %s count %u hashsize %u",
6700 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6701 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6702 		    sc->sc_rthash_size);
6703 	}
6704 
6705 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6706 	    brt->brt_dst != bif) {
6707 		brt->brt_dst->bif_addrcnt--;
6708 		brt->brt_dst = bif;
6709 		brt->brt_dst->bif_addrcnt++;
6710 	}
6711 
6712 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6713 		unsigned long now;
6714 
6715 		now = (unsigned long) net_uptime();
6716 		brt->brt_expire = now + sc->sc_brttimeout;
6717 	}
6718 	if (setflags) {
6719 		brt->brt_flags = flags;
6720 	}
6721 
6722 	return 0;
6723 }
6724 
6725 /*
6726  * bridge_rtlookup:
6727  *
6728  *	Lookup the destination interface for an address.
6729  */
6730 static struct bridge_iflist *
6731 bridge_rtlookup_bif(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
6732     uint16_t vlan)
6733 {
6734 	struct bridge_rtnode *brt;
6735 
6736 	BRIDGE_LOCK_ASSERT_HELD(sc);
6737 
6738 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6739 		return NULL;
6740 	}
6741 
6742 	return brt->brt_dst;
6743 }
6744 
6745 /*
6746  * bridge_rttrim:
6747  *
6748  *	Trim the routine table so that we have a number
6749  *	of routing entries less than or equal to the
6750  *	maximum number.
6751  */
6752 static void
6753 bridge_rttrim(struct bridge_softc *sc)
6754 {
6755 	struct bridge_rtnode *brt, *nbrt;
6756 
6757 	BRIDGE_LOCK_ASSERT_HELD(sc);
6758 
6759 	/* Make sure we actually need to do this. */
6760 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6761 		return;
6762 	}
6763 
6764 	/* Force an aging cycle; this might trim enough addresses. */
6765 	bridge_rtage(sc);
6766 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6767 		return;
6768 	}
6769 
6770 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6771 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6772 			bridge_rtnode_destroy(sc, brt);
6773 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6774 				return;
6775 			}
6776 		}
6777 	}
6778 }
6779 
6780 /*
6781  * bridge_aging_timer:
6782  *
6783  *	Aging periodic timer for the bridge routing table.
6784  */
6785 static void
6786 bridge_aging_timer(struct bridge_softc *sc)
6787 {
6788 	BRIDGE_LOCK_ASSERT_HELD(sc);
6789 
6790 	bridge_rtage(sc);
6791 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6792 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6793 		sc->sc_aging_timer.bdc_sc = sc;
6794 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6795 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6796 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6797 	}
6798 }
6799 
6800 /*
6801  * bridge_rtage:
6802  *
6803  *	Perform an aging cycle.
6804  */
6805 static void
6806 bridge_rtage(struct bridge_softc *sc)
6807 {
6808 	struct bridge_rtnode *brt, *nbrt;
6809 	unsigned long now;
6810 
6811 	BRIDGE_LOCK_ASSERT_HELD(sc);
6812 
6813 	now = (unsigned long) net_uptime();
6814 
6815 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6816 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6817 			if (now >= brt->brt_expire) {
6818 				bridge_rtnode_destroy(sc, brt);
6819 			}
6820 		}
6821 	}
6822 	if (sc->sc_mac_nat_bif != NULL) {
6823 		bridge_mac_nat_age_entries(sc, now);
6824 	}
6825 }
6826 
6827 /*
6828  * bridge_rtflush:
6829  *
6830  *	Remove all dynamic addresses from the bridge.
6831  */
6832 static void
6833 bridge_rtflush(struct bridge_softc *sc, int full)
6834 {
6835 	struct bridge_rtnode *brt, *nbrt;
6836 
6837 	BRIDGE_LOCK_ASSERT_HELD(sc);
6838 
6839 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6840 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6841 			bridge_rtnode_destroy(sc, brt);
6842 		}
6843 	}
6844 }
6845 
6846 /*
6847  * bridge_rtdaddr:
6848  *
6849  *	Remove an address from the table.
6850  */
6851 static int
6852 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN], uint16_t vlan)
6853 {
6854 	struct bridge_rtnode *brt;
6855 	int found = 0;
6856 
6857 	BRIDGE_LOCK_ASSERT_HELD(sc);
6858 
6859 	/*
6860 	 * If vlan is zero then we want to delete for all vlans so the lookup
6861 	 * may return more than one.
6862 	 */
6863 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6864 		bridge_rtnode_destroy(sc, brt);
6865 		found = 1;
6866 	}
6867 
6868 	return found ? 0 : ENOENT;
6869 }
6870 
6871 /*
6872  * bridge_rtdelete:
6873  *
6874  *	Delete routes to a specific member interface.
6875  */
6876 static void
6877 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6878 {
6879 	struct bridge_rtnode *brt, *nbrt;
6880 
6881 	BRIDGE_LOCK_ASSERT_HELD(sc);
6882 
6883 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6884 		if (brt->brt_ifp == ifp && (full ||
6885 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6886 			bridge_rtnode_destroy(sc, brt);
6887 		}
6888 	}
6889 }
6890 
6891 /*
6892  * bridge_rtable_init:
6893  *
6894  *	Initialize the route table for this bridge.
6895  */
6896 static int
6897 bridge_rtable_init(struct bridge_softc *sc)
6898 {
6899 	u_int32_t i;
6900 
6901 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6902 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6903 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6904 
6905 	for (i = 0; i < sc->sc_rthash_size; i++) {
6906 		LIST_INIT(&sc->sc_rthash[i]);
6907 	}
6908 
6909 	sc->sc_rthash_key = RandomULong();
6910 
6911 	LIST_INIT(&sc->sc_rtlist);
6912 
6913 	return 0;
6914 }
6915 
6916 /*
6917  * bridge_rthash_delayed_resize:
6918  *
6919  *	Resize the routing table hash on a delayed thread call.
6920  */
6921 static void
6922 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6923 {
6924 	u_int32_t new_rthash_size = 0;
6925 	u_int32_t old_rthash_size = 0;
6926 	struct _bridge_rtnode_list *new_rthash = NULL;
6927 	struct _bridge_rtnode_list *old_rthash = NULL;
6928 	u_int32_t i;
6929 	struct bridge_rtnode *brt;
6930 	int error = 0;
6931 
6932 	BRIDGE_LOCK_ASSERT_HELD(sc);
6933 
6934 	/*
6935 	 * Four entries per hash bucket is our ideal load factor
6936 	 */
6937 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6938 		goto out;
6939 	}
6940 
6941 	/*
6942 	 * Doubling the number of hash buckets may be too simplistic
6943 	 * especially when facing a spike of new entries
6944 	 */
6945 	new_rthash_size = sc->sc_rthash_size * 2;
6946 
6947 	sc->sc_flags |= SCF_RESIZING;
6948 	BRIDGE_UNLOCK(sc);
6949 
6950 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6951 	    Z_WAITOK | Z_ZERO);
6952 
6953 	BRIDGE_LOCK(sc);
6954 	sc->sc_flags &= ~SCF_RESIZING;
6955 
6956 	if (new_rthash == NULL) {
6957 		error = ENOMEM;
6958 		goto out;
6959 	}
6960 	if ((sc->sc_flags & SCF_DETACHING)) {
6961 		error = ENODEV;
6962 		goto out;
6963 	}
6964 	/*
6965 	 * Fail safe from here on
6966 	 */
6967 	old_rthash = sc->sc_rthash;
6968 	old_rthash_size = sc->sc_rthash_size;
6969 	sc->sc_rthash = new_rthash;
6970 	sc->sc_rthash_size = new_rthash_size;
6971 
6972 	/*
6973 	 * Get a new key to force entries to be shuffled around to reduce
6974 	 * the likelihood they will land in the same buckets
6975 	 */
6976 	sc->sc_rthash_key = RandomULong();
6977 
6978 	for (i = 0; i < sc->sc_rthash_size; i++) {
6979 		LIST_INIT(&sc->sc_rthash[i]);
6980 	}
6981 
6982 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6983 		LIST_REMOVE(brt, brt_hash);
6984 		(void) bridge_rtnode_hash(sc, brt);
6985 	}
6986 out:
6987 	if (error == 0) {
6988 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6989 		    "%s new size %u",
6990 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6991 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6992 	} else {
6993 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6994 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6995 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6996 	}
6997 }
6998 
6999 /*
7000  * Resize the number of hash buckets based on the load factor
7001  * Currently only grow
7002  * Failing to resize the hash table is not fatal
7003  */
7004 static void
7005 bridge_rthash_resize(struct bridge_softc *sc)
7006 {
7007 	BRIDGE_LOCK_ASSERT_HELD(sc);
7008 
7009 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
7010 		return;
7011 	}
7012 
7013 	/*
7014 	 * Four entries per hash bucket is our ideal load factor
7015 	 */
7016 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
7017 		return;
7018 	}
7019 	/*
7020 	 * Hard limit on the size of the routing hash table
7021 	 */
7022 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
7023 		return;
7024 	}
7025 
7026 	sc->sc_resize_call.bdc_sc = sc;
7027 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
7028 	bridge_schedule_delayed_call(&sc->sc_resize_call);
7029 }
7030 
7031 /*
7032  * bridge_rtable_fini:
7033  *
7034  *	Deconstruct the route table for this bridge.
7035  */
7036 static void
7037 bridge_rtable_fini(struct bridge_softc *sc)
7038 {
7039 	KASSERT(sc->sc_brtcnt == 0,
7040 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
7041 	kfree_type_counted_by(struct _bridge_rtnode_list, sc->sc_rthash_size,
7042 	    sc->sc_rthash);
7043 	sc->sc_rthash = NULL;
7044 	sc->sc_rthash_size = 0;
7045 }
7046 
7047 /*
7048  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
7049  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
7050  */
7051 #define mix(a, b, c)                                                    \
7052 do {                                                                    \
7053 	a -= b; a -= c; a ^= (c >> 13);                                 \
7054 	b -= c; b -= a; b ^= (a << 8);                                  \
7055 	c -= a; c -= b; c ^= (b >> 13);                                 \
7056 	a -= b; a -= c; a ^= (c >> 12);                                 \
7057 	b -= c; b -= a; b ^= (a << 16);                                 \
7058 	c -= a; c -= b; c ^= (b >> 5);                                  \
7059 	a -= b; a -= c; a ^= (c >> 3);                                  \
7060 	b -= c; b -= a; b ^= (a << 10);                                 \
7061 	c -= a; c -= b; c ^= (b >> 15);                                 \
7062 } while ( /*CONSTCOND*/ 0)
7063 
7064 static __inline uint32_t
7065 bridge_rthash(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN])
7066 {
7067 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7068 
7069 	b += addr[5] << 8;
7070 	b += addr[4];
7071 	a += addr[3] << 24;
7072 	a += addr[2] << 16;
7073 	a += addr[1] << 8;
7074 	a += addr[0];
7075 
7076 	mix(a, b, c);
7077 
7078 	return c & BRIDGE_RTHASH_MASK(sc);
7079 }
7080 
7081 #undef mix
7082 
7083 static int
7084 bridge_rtnode_addr_cmp(const uint8_t a[ETHER_ADDR_LEN], const uint8_t b[ETHER_ADDR_LEN])
7085 {
7086 	int i, d;
7087 
7088 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7089 		d = ((int)a[i]) - ((int)b[i]);
7090 	}
7091 
7092 	return d;
7093 }
7094 
7095 /*
7096  * bridge_rtnode_lookup:
7097  *
7098  *	Look up a bridge route node for the specified destination. Compare the
7099  *	vlan id or if zero then just return the first match.
7100  */
7101 static struct bridge_rtnode *
7102 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t addr[ETHER_ADDR_LEN],
7103     uint16_t vlan)
7104 {
7105 	struct bridge_rtnode *brt;
7106 	uint32_t hash;
7107 	int dir;
7108 
7109 	BRIDGE_LOCK_ASSERT_HELD(sc);
7110 
7111 	hash = bridge_rthash(sc, addr);
7112 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7113 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7114 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7115 			return brt;
7116 		}
7117 		if (dir > 0) {
7118 			return NULL;
7119 		}
7120 	}
7121 
7122 	return NULL;
7123 }
7124 
7125 /*
7126  * bridge_rtnode_hash:
7127  *
7128  *	Insert the specified bridge node into the route hash table.
7129  *	This is used when adding a new node or to rehash when resizing
7130  *	the hash table
7131  */
7132 static int
7133 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7134 {
7135 	struct bridge_rtnode *lbrt;
7136 	uint32_t hash;
7137 	int dir;
7138 
7139 	BRIDGE_LOCK_ASSERT_HELD(sc);
7140 
7141 	hash = bridge_rthash(sc, brt->brt_addr);
7142 
7143 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7144 	if (lbrt == NULL) {
7145 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7146 		goto out;
7147 	}
7148 
7149 	do {
7150 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7151 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7152 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7153 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7154 			    sc->sc_ifp->if_xname,
7155 			    brt->brt_addr[0], brt->brt_addr[1],
7156 			    brt->brt_addr[2], brt->brt_addr[3],
7157 			    brt->brt_addr[4], brt->brt_addr[5]);
7158 			return EEXIST;
7159 		}
7160 		if (dir > 0) {
7161 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7162 			goto out;
7163 		}
7164 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7165 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7166 			goto out;
7167 		}
7168 		lbrt = LIST_NEXT(lbrt, brt_hash);
7169 	} while (lbrt != NULL);
7170 
7171 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7172 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7173 	    sc->sc_ifp->if_xname,
7174 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7175 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7176 out:
7177 	return 0;
7178 }
7179 
7180 /*
7181  * bridge_rtnode_insert:
7182  *
7183  *	Insert the specified bridge node into the route table.  We
7184  *	assume the entry is not already in the table.
7185  */
7186 static int
7187 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7188 {
7189 	int error;
7190 
7191 	error = bridge_rtnode_hash(sc, brt);
7192 	if (error != 0) {
7193 		return error;
7194 	}
7195 
7196 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7197 	sc->sc_brtcnt++;
7198 
7199 	bridge_rthash_resize(sc);
7200 
7201 	return 0;
7202 }
7203 
7204 /*
7205  * bridge_rtnode_destroy:
7206  *
7207  *	Destroy a bridge rtnode.
7208  */
7209 static void
7210 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7211 {
7212 	BRIDGE_LOCK_ASSERT_HELD(sc);
7213 
7214 	LIST_REMOVE(brt, brt_hash);
7215 
7216 	LIST_REMOVE(brt, brt_list);
7217 	sc->sc_brtcnt--;
7218 	brt->brt_dst->bif_addrcnt--;
7219 	zfree(bridge_rtnode_pool, brt);
7220 }
7221 
7222 #if BRIDGESTP
7223 /*
7224  * bridge_rtable_expire:
7225  *
7226  *	Set the expiry time for all routes on an interface.
7227  */
7228 static void
7229 bridge_rtable_expire(struct ifnet *ifp, int age)
7230 {
7231 	struct bridge_softc *sc = ifp->if_bridge;
7232 	struct bridge_rtnode *brt;
7233 
7234 	BRIDGE_LOCK(sc);
7235 
7236 	/*
7237 	 * If the age is zero then flush, otherwise set all the expiry times to
7238 	 * age for the interface
7239 	 */
7240 	if (age == 0) {
7241 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7242 	} else {
7243 		unsigned long now;
7244 
7245 		now = (unsigned long) net_uptime();
7246 
7247 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7248 			/* Cap the expiry time to 'age' */
7249 			if (brt->brt_ifp == ifp &&
7250 			    brt->brt_expire > now + age &&
7251 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7252 				brt->brt_expire = now + age;
7253 			}
7254 		}
7255 	}
7256 	BRIDGE_UNLOCK(sc);
7257 }
7258 
7259 /*
7260  * bridge_state_change:
7261  *
7262  *	Callback from the bridgestp code when a port changes states.
7263  */
7264 static void
7265 bridge_state_change(struct ifnet *ifp, int state)
7266 {
7267 	struct bridge_softc *sc = ifp->if_bridge;
7268 	static const char *stpstates[] = {
7269 		"disabled",
7270 		"listening",
7271 		"learning",
7272 		"forwarding",
7273 		"blocking",
7274 		"discarding"
7275 	};
7276 
7277 	if (log_stp) {
7278 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7279 		    sc->sc_ifp->if_xname,
7280 		    stpstates[state], ifp->if_xname);
7281 	}
7282 }
7283 #endif /* BRIDGESTP */
7284 
7285 /*
7286  * bridge_detach:
7287  *
7288  *	Callback when interface has been detached.
7289  */
7290 static void
7291 bridge_detach(ifnet_t ifp)
7292 {
7293 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7294 
7295 #if BRIDGESTP
7296 	bstp_detach(&sc->sc_stp);
7297 #endif /* BRIDGESTP */
7298 
7299 	/* Tear down the routing table. */
7300 	bridge_rtable_fini(sc);
7301 
7302 	lck_mtx_lock(&bridge_list_mtx);
7303 	LIST_REMOVE(sc, sc_list);
7304 	lck_mtx_unlock(&bridge_list_mtx);
7305 
7306 	ifnet_release(ifp);
7307 
7308 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7309 	kfree_type(struct bridge_softc, sc);
7310 }
7311 
7312 /*
7313  * bridge_link_event:
7314  *
7315  *	Report a data link event on an interface
7316  */
7317 static void
7318 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7319 {
7320 	struct event {
7321 		u_int32_t ifnet_family;
7322 		u_int32_t unit;
7323 		char if_name[IFNAMSIZ];
7324 	};
7325 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7326 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7327 	struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
7328 
7329 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7330 	    "%s event_code %u - %s", ifp->if_xname,
7331 	    event_code, dlil_kev_dl_code_str(event_code));
7332 	header->total_size   = sizeof(message);
7333 	header->vendor_code  = KEV_VENDOR_APPLE;
7334 	header->kev_class    = KEV_NETWORK_CLASS;
7335 	header->kev_subclass = KEV_DL_SUBCLASS;
7336 	header->event_code   = event_code;
7337 	data->ifnet_family   = ifnet_family(ifp);
7338 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7339 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7340 	ifnet_event(ifp, header);
7341 }
7342 
7343 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7344 	        bridge_hostfilter_stats.reason++;                       \
7345 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7346 	                   "%s.%d" #reason, func, line);                \
7347 	        error = EINVAL;                                         \
7348 	}
7349 
7350 static int
7351 bridge_host_filter_arp(struct bridge_iflist *bif, mbuf_t *data)
7352 {
7353 	struct ether_arp *ea;
7354 	struct ether_header *eh;
7355 	int error = EINVAL;
7356 	mbuf_t m = *data;
7357 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7358 
7359 	/*
7360 	 * Make the Ethernet and ARP headers contiguous
7361 	 */
7362 	if (mbuf_pkthdr_len(m) < minlen) {
7363 		BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7364 		goto done;
7365 	}
7366 	if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7367 		BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7368 		    __func__, __LINE__);
7369 		goto done;
7370 	}
7371 	m = *data;
7372 
7373 	/*
7374 	 * Restrict Ethernet protocols to ARP and IP/IPv6
7375 	 */
7376 	eh = mtod(m, struct ether_header *);
7377 	ea = (struct ether_arp *)(eh + 1);
7378 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7379 		BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7380 		    __func__, __LINE__);
7381 		goto done;
7382 	}
7383 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7384 		BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7385 		    __func__, __LINE__);
7386 		goto done;
7387 	}
7388 	/*
7389 	 * Verify the address lengths are correct
7390 	 */
7391 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7392 		BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7393 		goto done;
7394 	}
7395 	if (ea->arp_pln != sizeof(struct in_addr)) {
7396 		BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7397 		    __func__, __LINE__);
7398 		goto done;
7399 	}
7400 	/*
7401 	 * Allow only ARP request or ARP reply
7402 	 */
7403 	if (ea->arp_op != HTONS_ARPOP_REQUEST &&
7404 	    ea->arp_op != HTONS_ARPOP_REPLY) {
7405 		BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7406 		goto done;
7407 	}
7408 	if ((bif->bif_flags & BIFF_HF_HWSRC) != 0) {
7409 		/*
7410 		 * Verify source hardware address matches
7411 		 */
7412 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7413 		    ETHER_ADDR_LEN) != 0) {
7414 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7415 			goto done;
7416 		}
7417 	}
7418 	if ((bif->bif_flags & BIFF_HF_IPSRC) != 0) {
7419 		/*
7420 		 * Verify source protocol address:
7421 		 * May be null for an ARP probe
7422 		 */
7423 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7424 		    sizeof(struct in_addr)) != 0 &&
7425 		    bcmp(ea->arp_spa, &inaddr_any,
7426 		    sizeof(struct in_addr)) != 0) {
7427 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7428 			goto done;
7429 		}
7430 	}
7431 	bridge_hostfilter_stats.brhf_arp_ok += 1;
7432 	error = 0;
7433 done:
7434 	return error;
7435 }
7436 
7437 /*
7438  * MAC NAT
7439  */
7440 
7441 static errno_t
7442 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7443 {
7444 	errno_t         error = 0;
7445 
7446 	BRIDGE_LOCK_ASSERT_HELD(sc);
7447 
7448 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7449 		error = EINVAL;
7450 		goto done;
7451 	}
7452 	if (sc->sc_mac_nat_bif != NULL) {
7453 		if (sc->sc_mac_nat_bif != bif) {
7454 			error = EBUSY;
7455 		}
7456 		goto done;
7457 	}
7458 	sc->sc_mac_nat_bif = bif;
7459 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7460 	bridge_mac_nat_populate_entries(sc);
7461 
7462 done:
7463 	return error;
7464 }
7465 
7466 static void
7467 bridge_mac_nat_disable(struct bridge_softc *sc)
7468 {
7469 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7470 
7471 	assert(mac_nat_bif != NULL);
7472 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7473 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7474 	sc->sc_mac_nat_bif = NULL;
7475 	return;
7476 }
7477 
7478 static void
7479 mac_nat_entry_print2(struct mac_nat_entry *mne,
7480     const char ifname[IFNAMSIZ], const char *msg1, const char *msg2)
7481 {
7482 	int             af;
7483 	char            etopbuf[24];
7484 	char            ntopbuf[MAX_IPv6_STR_LEN];
7485 	const char      *space;
7486 
7487 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7488 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7489 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7490 	if (msg2 == NULL) {
7491 		msg2 = "";
7492 		space = "";
7493 	} else {
7494 		space = " ";
7495 	}
7496 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7497 	    "%.*s %s%s%s %p (%s, %s, %s)", IFNAMSIZ, ifname, msg1, space, msg2, mne,
7498 	    mne->mne_bif->bif_ifp->if_xname, ntopbuf, etopbuf);
7499 }
7500 
7501 static void
7502 mac_nat_entry_print(struct mac_nat_entry *mne,
7503     const char ifname[IFNAMSIZ], const char *msg)
7504 {
7505 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7506 }
7507 
7508 static struct mac_nat_entry *
7509 bridge_lookup_mac_nat_entry_ipv4(const struct bridge_softc *sc, const struct in_addr *ip)
7510 {
7511 	struct mac_nat_entry    *mne;
7512 	struct mac_nat_entry    *ret_mne = NULL;
7513 
7514 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7515 		if (mne->mne_ip.s_addr == ip->s_addr) {
7516 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7517 				mac_nat_entry_print(mne, sc->sc_if_xname,
7518 				    "found");
7519 			}
7520 			ret_mne = mne;
7521 			break;
7522 		}
7523 	}
7524 
7525 	return ret_mne;
7526 }
7527 
7528 static struct mac_nat_entry *
7529 bridge_lookup_mac_nat_entry_ipv6(const struct bridge_softc *sc, const struct in6_addr *ip6)
7530 {
7531 	struct mac_nat_entry    *mne;
7532 	struct mac_nat_entry    *ret_mne = NULL;
7533 
7534 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7535 		if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7536 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7537 				mac_nat_entry_print(mne, sc->sc_if_xname,
7538 				    "found");
7539 			}
7540 			ret_mne = mne;
7541 			break;
7542 		}
7543 	}
7544 
7545 	return ret_mne;
7546 }
7547 
7548 static void
7549 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7550     struct mac_nat_entry *mne, const char *reason)
7551 {
7552 	LIST_REMOVE(mne, mne_list);
7553 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7554 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7555 	}
7556 	zfree(bridge_mne_pool, mne);
7557 	sc->sc_mne_count--;
7558 }
7559 
7560 static struct mac_nat_entry *
7561 bridge_create_mac_nat_entry_common(struct bridge_softc *sc,
7562     struct bridge_iflist *bif, const char eaddr[ETHER_ADDR_LEN])
7563 {
7564 	struct mac_nat_entry *mne;
7565 
7566 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7567 		sc->sc_mne_allocation_failures++;
7568 		return NULL;
7569 	}
7570 
7571 	mne = zalloc_noblock(bridge_mne_pool);
7572 	if (mne == NULL) {
7573 		sc->sc_mne_allocation_failures++;
7574 		return NULL;
7575 	}
7576 
7577 	sc->sc_mne_count++;
7578 	bzero(mne, sizeof(*mne));
7579 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7580 
7581 	mne->mne_bif = bif;
7582 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7583 
7584 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7585 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7586 	}
7587 
7588 	return mne;
7589 }
7590 
7591 static struct mac_nat_entry *
7592 bridge_create_mac_nat_entry_ipv4(struct bridge_softc *sc,
7593     struct bridge_iflist *bif, const struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7594 {
7595 	struct mac_nat_entry *mne;
7596 
7597 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7598 	if (mne == NULL) {
7599 		return NULL;
7600 	}
7601 
7602 	bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7603 	LIST_INSERT_HEAD(&sc->sc_mne_list, mne, mne_list);
7604 
7605 	return mne;
7606 }
7607 
7608 static struct mac_nat_entry *
7609 bridge_create_mac_nat_entry_ipv6(struct bridge_softc *sc,
7610     struct bridge_iflist *bif, const struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7611 {
7612 	struct mac_nat_entry *mne;
7613 
7614 	mne = bridge_create_mac_nat_entry_common(sc, bif, eaddr);
7615 	if (mne == NULL) {
7616 		return NULL;
7617 	}
7618 
7619 	bcopy(ip6, &mne->mne_ip6, sizeof(mne->mne_ip6));
7620 	mne->mne_flags |= MNE_FLAGS_IPV6;
7621 	LIST_INSERT_HEAD(&sc->sc_mne_list_v6, mne, mne_list);
7622 
7623 	return mne;
7624 }
7625 
7626 static struct mac_nat_entry *
7627 bridge_update_mac_nat_entry_common(struct bridge_softc *sc, struct bridge_iflist *bif,
7628     struct mac_nat_entry *mne, const char eaddr[ETHER_ADDR_LEN])
7629 {
7630 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7631 
7632 	if (mne->mne_bif == mac_nat_bif) {
7633 		/* the MAC NAT interface takes precedence */
7634 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7635 			if (mne->mne_bif != bif) {
7636 				mac_nat_entry_print2(mne,
7637 				    sc->sc_if_xname, "reject",
7638 				    bif->bif_ifp->if_xname);
7639 			}
7640 		}
7641 	} else if (mne->mne_bif != bif) {
7642 		const char *__null_terminated old_if = mne->mne_bif->bif_ifp->if_xname;
7643 
7644 		mne->mne_bif = bif;
7645 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7646 			mac_nat_entry_print2(mne,
7647 			    sc->sc_if_xname, "replaced",
7648 			    old_if);
7649 		}
7650 		bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7651 	}
7652 
7653 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7654 
7655 	return mne;
7656 }
7657 
7658 static struct mac_nat_entry *
7659 bridge_update_mac_nat_entry_ipv4(struct bridge_softc *sc,
7660     struct bridge_iflist *bif, struct in_addr *ip, const char eaddr[ETHER_ADDR_LEN])
7661 {
7662 	struct mac_nat_entry *mne;
7663 
7664 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, ip);
7665 	if (mne != NULL) {
7666 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7667 	}
7668 
7669 	mne = bridge_create_mac_nat_entry_ipv4(sc, bif, ip, eaddr);
7670 	return mne;
7671 }
7672 
7673 static struct mac_nat_entry *
7674 bridge_update_mac_nat_entry_ipv6(struct bridge_softc *sc,
7675     struct bridge_iflist *bif, struct in6_addr *ip6, const char eaddr[ETHER_ADDR_LEN])
7676 {
7677 	struct mac_nat_entry *mne;
7678 
7679 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, ip6);
7680 	if (mne != NULL) {
7681 		return bridge_update_mac_nat_entry_common(sc, bif, mne, eaddr);
7682 	}
7683 
7684 	mne = bridge_create_mac_nat_entry_ipv6(sc, bif, ip6, eaddr);
7685 	return mne;
7686 }
7687 
7688 static void
7689 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7690     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7691 {
7692 	struct mac_nat_entry *mne;
7693 	struct mac_nat_entry *tmne;
7694 
7695 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7696 		if (bif != NULL && mne->mne_bif != bif) {
7697 			continue;
7698 		}
7699 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7700 	}
7701 }
7702 
7703 /*
7704  * bridge_mac_nat_flush_entries:
7705  *
7706  * Flush MAC NAT entries for the specified member. Flush all entries if
7707  * the member is the one that requires MAC NAT, otherwise just flush the
7708  * ones for the specified member.
7709  */
7710 static void
7711 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7712 {
7713 	struct bridge_iflist *flush_bif;
7714 
7715 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7716 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7717 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7718 }
7719 
7720 static void
7721 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7722 {
7723 	errno_t                 error;
7724 	ifnet_t                 ifp;
7725 	uint16_t                addresses_count = 0;
7726 	ifaddr_t                * __counted_by(addresses_count) list;
7727 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7728 
7729 	assert(mac_nat_bif != NULL);
7730 	ifp = mac_nat_bif->bif_ifp;
7731 	error = ifnet_get_address_list_family_with_count(ifp, &list, &addresses_count, 0);
7732 	if (error != 0) {
7733 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7734 		    "ifnet_get_address_list(%s) failed %d",
7735 		    ifp->if_xname, error);
7736 		return;
7737 	}
7738 
7739 	for (uint16_t i = 0; i < addresses_count; ++i) {
7740 		sa_family_t af;
7741 
7742 		af = ifaddr_address_family(list[i]);
7743 		switch (af) {
7744 		case AF_INET: {
7745 			struct sockaddr_in sin;
7746 
7747 			error = ifaddr_address(list[i], (struct sockaddr *)&sin, sizeof(sin));
7748 			if (error != 0) {
7749 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7750 				    "ifaddr_address failed %d",
7751 				    error);
7752 				break;
7753 			}
7754 
7755 			bridge_create_mac_nat_entry_ipv4(sc, mac_nat_bif, &sin.sin_addr, IF_LLADDR(ifp));
7756 			break;
7757 		}
7758 
7759 		case AF_INET6: {
7760 			struct sockaddr_in6 sin6;
7761 
7762 			error = ifaddr_address(list[i], (struct sockaddr *)&sin6, sizeof(sin6));
7763 			if (error != 0) {
7764 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7765 				    "ifaddr_address failed %d",
7766 				    error);
7767 				break;
7768 			}
7769 
7770 			if (IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) {
7771 				/* remove scope ID */
7772 				sin6.sin6_addr.s6_addr16[1] = 0;
7773 			}
7774 
7775 			bridge_create_mac_nat_entry_ipv6(sc, mac_nat_bif, &sin6.sin6_addr, IF_LLADDR(ifp));
7776 			break;
7777 		}
7778 
7779 		default:
7780 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7781 			    "ifaddr_address_family unknown %d",
7782 			    af);
7783 			break;
7784 		}
7785 	}
7786 
7787 	ifnet_address_list_free_counted_by(list, addresses_count);
7788 	return;
7789 }
7790 
7791 static void
7792 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7793     struct mac_nat_entry_list *list, unsigned long now)
7794 {
7795 	struct mac_nat_entry *mne;
7796 	struct mac_nat_entry *tmne;
7797 
7798 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7799 		if (now >= mne->mne_expire) {
7800 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7801 		}
7802 	}
7803 }
7804 
7805 static void
7806 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7807 {
7808 	if (sc->sc_mac_nat_bif == NULL) {
7809 		return;
7810 	}
7811 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7812 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7813 }
7814 
7815 static const char *
7816 get_in_out_string(boolean_t is_output)
7817 {
7818 	return (const char * __null_terminated)(is_output ? "OUT" : "IN");
7819 }
7820 
7821 /*
7822  * is_valid_arp_packet:
7823  *	Verify that this is a valid ARP packet.
7824  *
7825  *	Returns TRUE if the packet is valid, FALSE otherwise.
7826  */
7827 static boolean_t
7828 is_valid_arp_packet(mbuf_t *data, bool is_output,
7829     struct ether_header **eh_p, struct ether_arp **ea_p)
7830 {
7831 	struct ether_arp *ea;
7832 	struct ether_header *eh;
7833 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7834 	boolean_t is_valid = FALSE;
7835 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7836 
7837 	if (mbuf_pkthdr_len(*data) < minlen) {
7838 		BRIDGE_LOG(LOG_DEBUG, flags,
7839 		    "ARP %s short frame %lu < %lu",
7840 		    get_in_out_string(is_output),
7841 		    mbuf_pkthdr_len(*data), minlen);
7842 		goto done;
7843 	}
7844 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7845 		BRIDGE_LOG(LOG_DEBUG, flags,
7846 		    "ARP %s size %lu mbuf_pullup fail",
7847 		    get_in_out_string(is_output),
7848 		    minlen);
7849 		*data = NULL;
7850 		goto done;
7851 	}
7852 
7853 	/* validate ARP packet */
7854 	eh = mtod(*data, struct ether_header *);
7855 	ea = (struct ether_arp *)(eh + 1);
7856 	if (ea->arp_hrd != HTONS_ARPHRD_ETHER) {
7857 		BRIDGE_LOG(LOG_DEBUG, flags,
7858 		    "ARP %s htype not ethernet",
7859 		    get_in_out_string(is_output));
7860 		goto done;
7861 	}
7862 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7863 		BRIDGE_LOG(LOG_DEBUG, flags,
7864 		    "ARP %s hlen not ethernet",
7865 		    get_in_out_string(is_output));
7866 		goto done;
7867 	}
7868 	if (ea->arp_pro != HTONS_ETHERTYPE_IP) {
7869 		BRIDGE_LOG(LOG_DEBUG, flags,
7870 		    "ARP %s ptype not IP",
7871 		    get_in_out_string(is_output));
7872 		goto done;
7873 	}
7874 	if (ea->arp_pln != sizeof(struct in_addr)) {
7875 		BRIDGE_LOG(LOG_DEBUG, flags,
7876 		    "ARP %s plen not IP",
7877 		    get_in_out_string(is_output));
7878 		goto done;
7879 	}
7880 	is_valid = TRUE;
7881 	*ea_p = ea;
7882 	*eh_p = eh;
7883 done:
7884 	return is_valid;
7885 }
7886 
7887 static struct mac_nat_entry *
7888 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7889 {
7890 	struct ether_arp        * __single ea;
7891 	struct ether_header     * __single eh;
7892 	struct mac_nat_entry    *mne = NULL;
7893 	u_short                 op;
7894 	struct in_addr          tpa;
7895 
7896 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7897 		goto done;
7898 	}
7899 	op = ea->arp_op;
7900 	switch (op) {
7901 	case HTONS_ARPOP_REQUEST:
7902 	case HTONS_ARPOP_REPLY:
7903 		/* only care about REQUEST and REPLY */
7904 		break;
7905 	default:
7906 		goto done;
7907 	}
7908 
7909 	/* check the target IP address for a NAT entry */
7910 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7911 	if (tpa.s_addr != 0) {
7912 		mne = bridge_lookup_mac_nat_entry_ipv4(sc, &tpa);
7913 	}
7914 	if (mne != NULL) {
7915 		if (op == HTONS_ARPOP_REPLY) {
7916 			/* translate the MAC address */
7917 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7918 				char    mac_src[24];
7919 				char    mac_dst[24];
7920 
7921 				ether_ntop(mac_src, sizeof(mac_src),
7922 				    ea->arp_tha);
7923 				ether_ntop(mac_dst, sizeof(mac_dst),
7924 				    mne->mne_mac);
7925 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7926 				    "%s %s ARP %s -> %s",
7927 				    sc->sc_if_xname,
7928 				    mne->mne_bif->bif_ifp->if_xname,
7929 				    mac_src, mac_dst);
7930 			}
7931 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7932 		}
7933 	} else {
7934 		/* handle conflicting ARP (sender matches mne) */
7935 		struct in_addr spa;
7936 
7937 		bcopy(ea->arp_spa, &spa, sizeof(spa));
7938 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7939 			/* check the source IP for a NAT entry */
7940 			mne = bridge_lookup_mac_nat_entry_ipv4(sc, &spa);
7941 		}
7942 	}
7943 
7944 done:
7945 	return mne;
7946 }
7947 
7948 static boolean_t
7949 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7950     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7951 {
7952 	struct ether_arp        * __single ea;
7953 	struct ether_header     * __single eh;
7954 	struct in_addr          ip;
7955 	struct mac_nat_entry    *mne = NULL;
7956 	u_short                 op;
7957 	boolean_t               translate = FALSE;
7958 
7959 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7960 		goto done;
7961 	}
7962 	op = ea->arp_op;
7963 	switch (op) {
7964 	case HTONS_ARPOP_REQUEST:
7965 	case HTONS_ARPOP_REPLY:
7966 		/* only care about REQUEST and REPLY */
7967 		break;
7968 	default:
7969 		goto done;
7970 	}
7971 
7972 	bcopy(ea->arp_spa, &ip, sizeof(ip));
7973 	if (ip.s_addr == 0) {
7974 		goto done;
7975 	}
7976 	/* XXX validate IP address: no multicast/broadcast */
7977 	mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
7978 	    (const char *)ea->arp_sha);
7979 	if (mnr != NULL && mne != NULL) {
7980 		/* record the offset to do the replacement */
7981 		translate = TRUE;
7982 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
7983 	}
7984 
7985 done:
7986 	return translate;
7987 }
7988 
7989 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
7990 	                         + sizeof(struct ip))
7991 static uint8_t * __indexable
7992 get_ether_ip_header_ptr(mbuf_t *data, boolean_t is_output)
7993 {
7994 	uint8_t         *header = NULL;
7995 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7996 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
7997 
7998 	if (mbuf_pkthdr_len(*data) < minlen) {
7999 		BRIDGE_LOG(LOG_DEBUG, flags,
8000 		    "IP %s short frame %lu < %lu",
8001 		    get_in_out_string(is_output),
8002 		    mbuf_pkthdr_len(*data), minlen);
8003 		goto done;
8004 	}
8005 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8006 		BRIDGE_LOG(LOG_DEBUG, flags,
8007 		    "IP %s size %lu mbuf_pullup fail",
8008 		    get_in_out_string(is_output),
8009 		    minlen);
8010 		*data = NULL;
8011 		goto done;
8012 	}
8013 	header = mtod(*data, uint8_t *);
8014 done:
8015 	return header;
8016 }
8017 
8018 static struct mac_nat_entry *
8019 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8020 {
8021 	struct in_addr          dst;
8022 	uint8_t                 *header;
8023 	struct ip               *iphdr;
8024 	struct mac_nat_entry    *mne = NULL;
8025 
8026 	header = get_ether_ip_header_ptr(data, FALSE);
8027 	if (header == NULL) {
8028 		goto done;
8029 	}
8030 	iphdr = (struct ip *)(void *)(header + sizeof(struct ether_header));
8031 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8032 	/* XXX validate IP address */
8033 	if (dst.s_addr == 0) {
8034 		goto done;
8035 	}
8036 	mne = bridge_lookup_mac_nat_entry_ipv4(sc, &dst);
8037 done:
8038 	return mne;
8039 }
8040 
8041 static void
8042 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8043     struct bridge_iflist *bif, mbuf_t m,
8044     uint8_t ip_header_len, struct mac_nat_record *mnr)
8045 {
8046 	uint16_t        dp_flags;
8047 	errno_t         error;
8048 	size_t          offset;
8049 	struct udphdr   udphdr;
8050 
8051 	/* copy the UDP header */
8052 	offset = sizeof(struct ether_header) + ip_header_len;
8053 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8054 	if (error != 0) {
8055 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8056 		    "mbuf_copydata udphdr failed %d",
8057 		    error);
8058 		return;
8059 	}
8060 	if (udphdr.uh_sport != HTONS_IPPORT_BOOTPC ||
8061 	    udphdr.uh_dport != HTONS_IPPORT_BOOTPS) {
8062 		/* not a BOOTP/DHCP packet */
8063 		return;
8064 	}
8065 	/* check whether the broadcast bit is already set */
8066 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8067 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8068 	if (error != 0) {
8069 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8070 		    "mbuf_copydata dp_flags failed %d",
8071 		    error);
8072 		return;
8073 	}
8074 	if ((dp_flags & HTONS_DHCP_FLAGS_BROADCAST) != 0) {
8075 		/* it's already set, nothing to do */
8076 		return;
8077 	}
8078 	/* broadcast bit needs to be set */
8079 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8080 	mnr->mnr_ip_header_len = ip_header_len;
8081 	if (udphdr.uh_sum != 0) {
8082 		uint16_t        delta;
8083 
8084 		/* adjust checksum to take modified dp_flags into account */
8085 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8086 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8087 	}
8088 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8089 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8090 	    sc->sc_if_xname,
8091 	    bif->bif_ifp->if_xname,
8092 	    ntohs(mnr->mnr_ip_dhcp_flags),
8093 	    ntohs(mnr->mnr_ip_udp_csum));
8094 	return;
8095 }
8096 
8097 static boolean_t
8098 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8099     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8100 {
8101 #pragma unused(mnr)
8102 	uint8_t                 *header;
8103 	struct ether_header     *eh;
8104 	struct in_addr          ip;
8105 	struct ip               *iphdr;
8106 	uint8_t                 ip_header_len;
8107 	struct mac_nat_entry    *mne = NULL;
8108 	boolean_t               translate = FALSE;
8109 
8110 	header = get_ether_ip_header_ptr(data, TRUE);
8111 	if (header == NULL) {
8112 		goto done;
8113 	}
8114 
8115 	eh = (struct ether_header *)header;
8116 	iphdr = (struct ip *)(header + sizeof(*eh));
8117 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8118 	if (ip_header_len < sizeof(ip)) {
8119 		/* bogus IP header */
8120 		goto done;
8121 	}
8122 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8123 	/* XXX validate the source address */
8124 	if (ip.s_addr != 0) {
8125 		mne = bridge_update_mac_nat_entry_ipv4(sc, bif, &ip,
8126 		    (const char *)eh->ether_shost);
8127 	}
8128 	if (mnr != NULL) {
8129 		if (ip.s_addr == 0 && iphdr->ip_p == IPPROTO_UDP) {
8130 			/* handle DHCP must broadcast */
8131 			bridge_mac_nat_udp_output(sc, bif, *data,
8132 			    ip_header_len, mnr);
8133 		}
8134 		translate = TRUE;
8135 	}
8136 done:
8137 	return translate;
8138 }
8139 
8140 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8141 	                         + sizeof(struct ip6_hdr))
8142 static uint8_t * __indexable
8143 get_ether_ipv6_header_ptr(mbuf_t *data, size_t plen, boolean_t is_output)
8144 {
8145 	uint8_t         *header = NULL;
8146 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8147 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8148 
8149 	if (mbuf_pkthdr_len(*data) < minlen) {
8150 		BRIDGE_LOG(LOG_DEBUG, flags,
8151 		    "IP %s short frame %lu < %lu",
8152 		    get_in_out_string(is_output),
8153 		    mbuf_pkthdr_len(*data), minlen);
8154 		goto done;
8155 	}
8156 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8157 		BRIDGE_LOG(LOG_DEBUG, flags,
8158 		    "IP %s size %lu mbuf_pullup fail",
8159 		    get_in_out_string(is_output),
8160 		    minlen);
8161 		*data = NULL;
8162 		goto done;
8163 	}
8164 	header = mtod(*data, uint8_t *);
8165 done:
8166 	return header;
8167 }
8168 
8169 #include <netinet/icmp6.h>
8170 #include <netinet6/nd6.h>
8171 
8172 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8173 
8174 static void
8175 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8176     struct bridge_iflist *bif,
8177     mbuf_t *data, struct ip6_hdr *ip6h,
8178     struct in6_addr *saddrp,
8179     struct mac_nat_record *mnr)
8180 {
8181 	uint8_t *header;
8182 	struct ether_header *eh;
8183 	struct icmp6_hdr *icmp6;
8184 	uint8_t         icmp6_type;
8185 	uint32_t        icmp6len;
8186 	int             lladdrlen = 0;
8187 	char            *lladdr = NULL;
8188 	unsigned int    off = sizeof(*ip6h);
8189 
8190 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8191 	if (icmp6len < sizeof(*icmp6)) {
8192 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8193 		    "short IPv6 payload length %d < %lu",
8194 		    icmp6len, sizeof(*icmp6));
8195 		return;
8196 	}
8197 
8198 	/* pullup IP6 header + ICMPv6 header */
8199 	header = get_ether_ipv6_header_ptr(data, sizeof(*icmp6), TRUE);
8200 	if (header == NULL) {
8201 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8202 		    "failed to pullup icmp6 header");
8203 		return;
8204 	}
8205 	eh = (struct ether_header *)header;
8206 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8207 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8208 	icmp6_type = icmp6->icmp6_type;
8209 	switch (icmp6_type) {
8210 	case ND_NEIGHBOR_SOLICIT:
8211 	case ND_NEIGHBOR_ADVERT:
8212 	case ND_ROUTER_ADVERT:
8213 	case ND_ROUTER_SOLICIT:
8214 		break;
8215 	default:
8216 		return;
8217 	}
8218 
8219 	/* pullup IP6 header + payload */
8220 	header = get_ether_ipv6_header_ptr(data, icmp6len, TRUE);
8221 	if (header == NULL) {
8222 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8223 		    "failed to pullup icmp6 + payload");
8224 		return;
8225 	}
8226 	eh = (struct ether_header *)header;
8227 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8228 	icmp6 = (struct icmp6_hdr *)(header + sizeof(*eh) + off);
8229 
8230 	switch (icmp6_type) {
8231 	case ND_NEIGHBOR_SOLICIT: {
8232 		struct nd_neighbor_solicit *nd_ns;
8233 		union nd_opts ndopts;
8234 		boolean_t is_dad_probe;
8235 		struct in6_addr taddr;
8236 
8237 		if (icmp6len < sizeof(*nd_ns)) {
8238 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8239 			    "short nd_ns %d < %lu",
8240 			    icmp6len, sizeof(*nd_ns));
8241 			return;
8242 		}
8243 
8244 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8245 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8246 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8247 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8248 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8249 			    "invalid target ignored");
8250 			return;
8251 		}
8252 
8253 		/* parse options */
8254 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8255 		if (nd6_options(&ndopts) < 0) {
8256 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8257 			    "invalid ND6 NS option");
8258 			return;
8259 		}
8260 		if (ndopts.nd_opts_src_lladdr != NULL) {
8261 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len,
8262 			    lladdr, lladdrlen);
8263 		}
8264 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8265 		if (lladdr != NULL) {
8266 			if (is_dad_probe) {
8267 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8268 				    "bad ND6 DAD packet");
8269 				return;
8270 			}
8271 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8272 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8273 				    "source lladdrlen %d != %lu",
8274 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8275 				return;
8276 			}
8277 		}
8278 		if (is_dad_probe) {
8279 			/* node is trying use taddr, create an mne for taddr */
8280 			*saddrp = taddr;
8281 		}
8282 		break;
8283 	}
8284 	case ND_NEIGHBOR_ADVERT: {
8285 		struct nd_neighbor_advert *nd_na;
8286 		union nd_opts ndopts;
8287 		struct in6_addr taddr;
8288 
8289 
8290 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8291 
8292 		if (icmp6len < sizeof(*nd_na)) {
8293 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8294 			    "short nd_na %d < %lu",
8295 			    icmp6len, sizeof(*nd_na));
8296 			return;
8297 		}
8298 
8299 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8300 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8301 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8302 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8303 			    "invalid target ignored");
8304 			return;
8305 		}
8306 
8307 		/* parse options */
8308 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8309 		if (nd6_options(&ndopts) < 0) {
8310 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8311 			    "invalid ND6 NA option");
8312 			return;
8313 		}
8314 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8315 			/* target linklayer, nothing to do */
8316 			return;
8317 		}
8318 
8319 		ND_OPT_LLADDR(ndopts.nd_opts_tgt_lladdr, nd_opt_len, lladdr, lladdrlen);
8320 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8321 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8322 			    "target lladdrlen %d != %lu",
8323 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8324 			return;
8325 		}
8326 		break;
8327 	}
8328 	case ND_ROUTER_ADVERT:
8329 	case ND_ROUTER_SOLICIT: {
8330 		union nd_opts ndopts;
8331 		uint32_t type_length;
8332 		const char *description;
8333 
8334 		if (icmp6_type == ND_ROUTER_ADVERT) {
8335 			type_length = sizeof(struct nd_router_advert);
8336 			description = "RA";
8337 		} else {
8338 			type_length = sizeof(struct nd_router_solicit);
8339 			description = "RS";
8340 		}
8341 		if (icmp6len < type_length) {
8342 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8343 			    "short ND6 %s %d < %d",
8344 			    description, icmp6len, type_length);
8345 			return;
8346 		}
8347 
8348 		/* parse options */
8349 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8350 		    icmp6len - type_length, &ndopts);
8351 		if (nd6_options(&ndopts) < 0) {
8352 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8353 			    "invalid ND6 %s option", description);
8354 			return;
8355 		}
8356 		if (ndopts.nd_opts_src_lladdr != NULL) {
8357 			ND_OPT_LLADDR(ndopts.nd_opts_src_lladdr, nd_opt_len, lladdr, lladdrlen);
8358 
8359 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8360 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8361 				    "source lladdrlen %d != %lu",
8362 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8363 				return;
8364 			}
8365 		}
8366 		break;
8367 	}
8368 	default:
8369 		break;
8370 	}
8371 
8372 	if (lladdr != NULL) {
8373 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8374 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8375 		mnr->mnr_ip6_icmp6_len = icmp6len;
8376 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8377 		mnr->mnr_ip6_header_len = off;
8378 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8379 			const char *str;
8380 
8381 			switch (mnr->mnr_ip6_icmp6_type) {
8382 			case ND_ROUTER_ADVERT:
8383 				str = "ROUTER ADVERT";
8384 				break;
8385 			case ND_ROUTER_SOLICIT:
8386 				str = "ROUTER SOLICIT";
8387 				break;
8388 			case ND_NEIGHBOR_ADVERT:
8389 				str = "NEIGHBOR ADVERT";
8390 				break;
8391 			case ND_NEIGHBOR_SOLICIT:
8392 				str = "NEIGHBOR SOLICIT";
8393 				break;
8394 			default:
8395 				str = "";
8396 				break;
8397 			}
8398 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8399 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8400 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8401 			    mnr->mnr_ip6_header_len,
8402 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8403 		}
8404 	}
8405 }
8406 
8407 static struct mac_nat_entry *
8408 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8409 {
8410 	struct in6_addr         dst;
8411 	uint8_t                 *header;
8412 	struct ether_header     *eh;
8413 	struct ip6_hdr          *ip6h;
8414 	struct mac_nat_entry    *mne = NULL;
8415 
8416 	header = get_ether_ipv6_header_ptr(data, 0, FALSE);
8417 	if (header == NULL) {
8418 		goto done;
8419 	}
8420 	eh = (struct ether_header *)header;
8421 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8422 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8423 	/* XXX validate IPv6 address */
8424 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8425 		goto done;
8426 	}
8427 	mne = bridge_lookup_mac_nat_entry_ipv6(sc, &dst);
8428 
8429 done:
8430 	return mne;
8431 }
8432 
8433 static boolean_t
8434 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8435     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8436 {
8437 	uint8_t                 *header;
8438 	struct ether_header     *eh;
8439 	ether_addr_t            ether_shost;
8440 	struct ip6_hdr          *ip6h;
8441 	struct in6_addr         saddr;
8442 	boolean_t               translate;
8443 
8444 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8445 	header = get_ether_ipv6_header_ptr(data, 0, TRUE);
8446 	if (header == NULL) {
8447 		translate = FALSE;
8448 		goto done;
8449 	}
8450 	eh = (struct ether_header *)header;
8451 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8452 	ip6h = (struct ip6_hdr *)(header + sizeof(*eh));
8453 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8454 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8455 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8456 	}
8457 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8458 		goto done;
8459 	}
8460 	(void)bridge_update_mac_nat_entry_ipv6(sc, bif, &saddr,
8461 	    (const char *)ether_shost.octet);
8462 
8463 done:
8464 	return translate;
8465 }
8466 
8467 /*
8468  * Function: bridge_mac_nat_input:
8469  *
8470  * Purpose:
8471  *   Process a unicast packet arriving on the external interface `external_ifp`.
8472  *
8473  *   If the packet is ARP, IPv4, or IPv6, lookup the address from the packet in
8474  *   the mac_nat_entry table. If an entry is found, and the interface is
8475  *   not `external_ifp`, replace the destination MAC address in the
8476  *   ethernet header with the corresponding internal MAC address, and return
8477  *   the interface via `*dst_if`.
8478  *
8479  * Returns:
8480  *   NULL if the packet was deallocated during processing.
8481  *
8482  *   Otherwise, returns non-NULL packet that should:
8483  *   1) if `*dst_if` is NULL, continue on as an input packet
8484  *      over `external_ifp`, OR
8485  *   2) if `*dst_if` is not NULL, be delivered as an output packet
8486  *      over `*dst_if`.
8487  */
8488 static mbuf_t
8489 bridge_mac_nat_input(struct bridge_softc *sc, ifnet_t external_ifp,
8490     mbuf_t m, ifnet_t * dst_if)
8491 {
8492 	struct ether_header     *eh;
8493 	mbuf_t                  m0 = m;
8494 	struct mac_nat_entry    *mne = NULL;
8495 
8496 	BRIDGE_LOCK_ASSERT_HELD(sc);
8497 	*dst_if = NULL;
8498 	eh = mtod(m, struct ether_header *);
8499 	switch (eh->ether_type) {
8500 	case HTONS_ETHERTYPE_ARP:
8501 		mne = bridge_mac_nat_arp_input(sc, &m);
8502 		break;
8503 	case HTONS_ETHERTYPE_IP:
8504 		mne = bridge_mac_nat_ip_input(sc, &m);
8505 		break;
8506 	case HTONS_ETHERTYPE_IPV6:
8507 		mne = bridge_mac_nat_ipv6_input(sc, &m);
8508 		break;
8509 	default:
8510 		break;
8511 	}
8512 	if (m != NULL & mne != NULL) {
8513 		*dst_if = mne->mne_bif->bif_ifp;
8514 		if (*dst_if == external_ifp) {
8515 			/* receive packet for ifp */
8516 			*dst_if = NULL;
8517 		} else {
8518 			/* replace the destination MAC with internal one */
8519 			if (m != m0) {
8520 				/* it may have changed */
8521 				eh = mtod(m, struct ether_header *);
8522 			}
8523 			bcopy(mne->mne_mac, eh->ether_dhost,
8524 			    sizeof(eh->ether_dhost));
8525 		}
8526 	}
8527 	return m;
8528 }
8529 
8530 
8531 static mblist
8532 bridge_mac_nat_input_list(struct bridge_softc *sc, ifnet_t external_ifp,
8533     mbuf_t m, mbuf_t * forward_head)
8534 {
8535 	mblist          forward;
8536 	mbuf_t          next_packet;
8537 	mblist          ret;
8538 
8539 	mblist_init(&ret);
8540 	mblist_init(&forward);
8541 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8542 		ifnet_ref_t     dst_if;
8543 
8544 		/* take packet out of the list */
8545 		next_packet = scan->m_nextpkt;
8546 		scan->m_nextpkt = NULL;
8547 
8548 		scan = bridge_mac_nat_input(sc, external_ifp, scan, &dst_if);
8549 		if (scan != NULL) {
8550 			if (dst_if != NULL) {
8551 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8552 				    "%s MAC-NAT input translate to %s",
8553 				    sc->sc_if_xname, dst_if->if_xname);
8554 				/* use rcvif to store the egress interface */
8555 				mbuf_pkthdr_setrcvif(scan, dst_if);
8556 				/* add it to the forwarding list */
8557 				mblist_append(&forward, scan);
8558 			} else {
8559 				/* add it to the "continue on as input" list */
8560 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8561 				    "%s MAC-NAT input for %s",
8562 				    sc->sc_if_xname,
8563 				    external_ifp->if_xname);
8564 				mblist_append(&ret, scan);
8565 			}
8566 		}
8567 	}
8568 	*forward_head = forward.head;
8569 	return ret;
8570 }
8571 
8572 /*
8573  * bridge_mac_nat_translate_list:
8574  * Process a list of packets destined to the MAC-NAT interface `dst_if`
8575  * from the bridge member `sbif`.
8576  *
8577  * For each packet in the list, update the MAC-NAT record, and if
8578  * translation is required, translate it.
8579  *
8580  * Returns the list of packets that should be delivered to the MAC-NAT
8581  * interface.
8582  */
8583 static mbuf_t
8584 bridge_mac_nat_translate_list(struct bridge_softc * sc,
8585     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8586 {
8587 	mbuf_t          next_packet;
8588 	mblist          ret;
8589 
8590 	mblist_init(&ret);
8591 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
8592 		struct mac_nat_record   mnr;
8593 		bool                    translate_mac;
8594 
8595 		/* take packet out of the list */
8596 		next_packet = scan->m_nextpkt;
8597 		scan->m_nextpkt = NULL;
8598 		translate_mac = bridge_mac_nat_output(sc, sbif, &scan, &mnr);
8599 		if (scan != NULL) {
8600 			if (translate_mac) {
8601 				bridge_mac_nat_translate(&scan, &mnr,
8602 				    IF_LLADDR(dst_if));
8603 			}
8604 			if (scan != NULL) {
8605 				/* add it back to the list */
8606 				mblist_append(&ret, scan);
8607 			}
8608 		}
8609 	}
8610 	return ret.head;
8611 }
8612 
8613 /*
8614  * bridge_mac_nat_copy_and_translate_list:
8615  * Same as bridge_mac_nat_translate_list() except that a copy of the
8616  * packet list is returned instead.
8617  *
8618  * The packet list `m` is left unaltered.
8619  */
8620 static mbuf_t
8621 bridge_mac_nat_copy_and_translate_list(struct bridge_softc * sc,
8622     struct bridge_iflist *sbif, ifnet_t dst_if, mbuf_t m)
8623 {
8624 	mbuf_t          next_packet;
8625 	mblist          ret;
8626 
8627 	mblist_init(&ret);
8628 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8629 		mbuf_ref_t              mc = NULL;
8630 		struct mac_nat_record   mnr;
8631 		bool                    translate_mac;
8632 
8633 		/* take packet out of the list, make a copy, put it back */
8634 		next_packet = scan->m_nextpkt;
8635 		scan->m_nextpkt = NULL;
8636 		mc = m_dup(scan, M_DONTWAIT);
8637 		scan->m_nextpkt = next_packet;
8638 		if (mc == NULL) {
8639 			continue;
8640 		}
8641 		translate_mac = bridge_mac_nat_output(sc, sbif, &mc, &mnr);
8642 		if (mc != NULL) {
8643 			if (translate_mac) {
8644 				bridge_mac_nat_translate(&mc, &mnr,
8645 				    IF_LLADDR(dst_if));
8646 			}
8647 			if (mc != NULL) {
8648 				/* add it to the new list */
8649 				mblist_append(&ret, mc);
8650 			}
8651 		}
8652 	}
8653 	return ret.head;
8654 }
8655 
8656 static void
8657 bridge_mac_nat_forward_list(ifnet_t bridge_ifp, ether_type_flag_t etypef,
8658     mbuf_t m)
8659 {
8660 	int             count = 0;
8661 	ifnet_t         dst_if;
8662 	mblist          list;
8663 	int             n_lists = 0;
8664 	mbuf_t          next_packet;
8665 
8666 	mblist_init(&list);
8667 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
8668 		ifnet_t         this_if;
8669 
8670 		next_packet = scan->m_nextpkt;
8671 		this_if = mbuf_pkthdr_rcvif(scan);
8672 		mbuf_pkthdr_setrcvif(scan, NULL);
8673 		if (list.head == NULL) {
8674 			/* start a new list */
8675 			list.head = list.tail = scan;
8676 			count = 1;
8677 			dst_if = this_if;
8678 		} else if (dst_if != this_if) {
8679 			/* send up the previous chain */
8680 			if (list.tail != NULL) {
8681 				/* terminate the list */
8682 				list.tail->m_nextpkt = NULL;
8683 			}
8684 			n_lists++;
8685 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8686 			    "(%s): sublist %u pkts %u",
8687 			    dst_if->if_xname, n_lists, count);
8688 			bridge_enqueue(bridge_ifp, NULL,
8689 			    dst_if, etypef, list.head,
8690 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8691 
8692 			/* start new list */
8693 			list.head = list.tail = scan;
8694 			count = 1;
8695 			dst_if = this_if;
8696 		} else {
8697 			count++;
8698 			list.tail = scan;
8699 		}
8700 		if (next_packet == NULL) {
8701 			/* last list */
8702 			n_lists++;
8703 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8704 			    "(%s): sublist %u pkts %u",
8705 			    dst_if->if_xname, n_lists, count);
8706 			bridge_enqueue(bridge_ifp, NULL,
8707 			    dst_if, etypef, list.head,
8708 			    CHECKSUM_OPERATION_CLEAR_OFFLOAD, pkt_direction_RX);
8709 		}
8710 	}
8711 	return;
8712 }
8713 
8714 /*
8715  * bridge_mac_nat_output:
8716  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8717  * from the interface 'bif'.
8718  *
8719  * Create a mac_nat_entry containing the source IP address and MAC address
8720  * from the packet. Populate a mac_nat_record with information detailing
8721  * how to translate the packet. Translation takes place later by calling
8722  * `bridge_mac_nat_translate()`.
8723  *
8724  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8725  * interface is generating an output packet. No translation is required in this
8726  * case, we just record the IP address used to prevent another bif from
8727  * claiming our IP address.
8728  *
8729  * Returns:
8730  * TRUE if the packet should be translated (*mnr updated as well),
8731  * FALSE otherwise.
8732  *
8733  * *data may be updated to point at a different mbuf chain or NULL if
8734  * the chain was deallocated during processing.
8735  */
8736 
8737 static boolean_t
8738 bridge_mac_nat_output(struct bridge_softc *sc,
8739     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8740 {
8741 	struct ether_header     *eh;
8742 	boolean_t               translate = FALSE;
8743 
8744 	BRIDGE_LOCK_ASSERT_HELD(sc);
8745 	assert(sc->sc_mac_nat_bif != NULL);
8746 
8747 	eh = mtod(*data, struct ether_header *);
8748 	if (mnr != NULL) {
8749 		bzero(mnr, sizeof(*mnr));
8750 		mnr->mnr_ether_type = eh->ether_type;
8751 	}
8752 	switch (eh->ether_type) {
8753 	case HTONS_ETHERTYPE_ARP:
8754 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8755 		break;
8756 	case HTONS_ETHERTYPE_IP:
8757 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8758 		break;
8759 	case HTONS_ETHERTYPE_IPV6:
8760 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8761 		break;
8762 	default:
8763 		break;
8764 	}
8765 	return translate;
8766 }
8767 
8768 static void
8769 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8770     const char eaddr[ETHER_ADDR_LEN])
8771 {
8772 	errno_t                 error;
8773 
8774 	if (mnr->mnr_arp_offset == 0) {
8775 		return;
8776 	}
8777 	/* replace the source hardware address */
8778 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8779 	    ETHER_ADDR_LEN, eaddr,
8780 	    MBUF_DONTWAIT);
8781 	if (error != 0) {
8782 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8783 		    "mbuf_copyback failed");
8784 		m_freem(*data);
8785 		*data = NULL;
8786 	}
8787 	return;
8788 }
8789 
8790 static void
8791 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8792 {
8793 	errno_t         error;
8794 	size_t          offset;
8795 
8796 	if (mnr->mnr_ip_header_len == 0) {
8797 		return;
8798 	}
8799 	/* update the UDP checksum */
8800 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8801 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8802 	    sizeof(mnr->mnr_ip_udp_csum),
8803 	    &mnr->mnr_ip_udp_csum,
8804 	    MBUF_DONTWAIT);
8805 	if (error != 0) {
8806 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8807 		    "mbuf_copyback uh_sum failed");
8808 		m_freem(*data);
8809 		*data = NULL;
8810 	}
8811 	/* update the DHCP must broadcast flag */
8812 	offset += sizeof(struct udphdr);
8813 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8814 	    sizeof(mnr->mnr_ip_dhcp_flags),
8815 	    &mnr->mnr_ip_dhcp_flags,
8816 	    MBUF_DONTWAIT);
8817 	if (error != 0) {
8818 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8819 		    "mbuf_copyback dp_flags failed");
8820 		m_freem(*data);
8821 		*data = NULL;
8822 	}
8823 }
8824 
8825 static void
8826 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8827     const char eaddr[ETHER_ADDR_LEN])
8828 {
8829 	uint16_t        cksum;
8830 	errno_t         error;
8831 	mbuf_t          m = *data;
8832 
8833 	if (mnr->mnr_ip6_header_len == 0) {
8834 		return;
8835 	}
8836 	switch (mnr->mnr_ip6_icmp6_type) {
8837 	case ND_ROUTER_ADVERT:
8838 	case ND_ROUTER_SOLICIT:
8839 	case ND_NEIGHBOR_SOLICIT:
8840 	case ND_NEIGHBOR_ADVERT:
8841 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8842 			/* nothing to do */
8843 			return;
8844 		}
8845 		break;
8846 	default:
8847 		return;
8848 	}
8849 
8850 	/*
8851 	 * replace the lladdr
8852 	 */
8853 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8854 	    ETHER_ADDR_LEN, eaddr,
8855 	    MBUF_DONTWAIT);
8856 	if (error != 0) {
8857 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8858 		    "mbuf_copyback lladdr failed");
8859 		m_freem(m);
8860 		*data = NULL;
8861 		return;
8862 	}
8863 
8864 	/*
8865 	 * recompute the icmp6 checksum
8866 	 */
8867 
8868 	/* skip past the ethernet header */
8869 	_mbuf_adjust_pkthdr_and_data(m, ETHER_HDR_LEN);
8870 
8871 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8872 	/* set the checksum to zero */
8873 	cksum = 0;
8874 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8875 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8876 	if (error != 0) {
8877 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8878 		    "mbuf_copyback cksum=0 failed");
8879 		m_freem(m);
8880 		*data = NULL;
8881 		return;
8882 	}
8883 	/* compute and set the new checksum */
8884 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8885 	    mnr->mnr_ip6_icmp6_len);
8886 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8887 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8888 	if (error != 0) {
8889 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8890 		    "mbuf_copyback cksum failed");
8891 		m_freem(m);
8892 		*data = NULL;
8893 		return;
8894 	}
8895 	/* restore the ethernet header */
8896 	_mbuf_adjust_pkthdr_and_data(m, -ETHER_HDR_LEN);
8897 	return;
8898 }
8899 
8900 static void
8901 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8902     const char eaddr[ETHER_ADDR_LEN])
8903 {
8904 	struct ether_header     *eh;
8905 
8906 	/* replace the source ethernet address with the single MAC */
8907 	eh = mtod(*data, struct ether_header *);
8908 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8909 	switch (mnr->mnr_ether_type) {
8910 	case HTONS_ETHERTYPE_ARP:
8911 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8912 		break;
8913 
8914 	case HTONS_ETHERTYPE_IP:
8915 		bridge_mac_nat_ip_translate(data, mnr);
8916 		break;
8917 
8918 	case HTONS_ETHERTYPE_IPV6:
8919 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8920 		break;
8921 
8922 	default:
8923 		break;
8924 	}
8925 	return;
8926 }
8927 
8928 /*
8929  * bridge packet filtering
8930  */
8931 
8932 /*
8933  * Perform basic checks on header size since
8934  * pfil assumes ip_input has already processed
8935  * it for it.  Cut-and-pasted from ip_input.c.
8936  * Given how simple the IPv6 version is,
8937  * does the IPv4 version really need to be
8938  * this complicated?
8939  *
8940  * XXX Should we update ipstat here, or not?
8941  * XXX Right now we update ipstat but not
8942  * XXX csum_counter.
8943  */
8944 static int
8945 bridge_ip_checkbasic(struct mbuf **mp)
8946 {
8947 	struct mbuf *m = *mp;
8948 	struct ip *ip;
8949 	int len, hlen;
8950 	u_short sum;
8951 
8952 	if (*mp == NULL) {
8953 		return -1;
8954 	}
8955 
8956 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8957 		/* max_linkhdr is already rounded up to nearest 4-byte */
8958 		if ((m = m_copyup(m, sizeof(struct ip),
8959 		    max_linkhdr)) == NULL) {
8960 			/* XXXJRT new stat, please */
8961 			ipstat.ips_toosmall++;
8962 			goto bad;
8963 		}
8964 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8965 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8966 			ipstat.ips_toosmall++;
8967 			goto bad;
8968 		}
8969 	}
8970 	ip = mtod(m, struct ip *);
8971 	if (ip == NULL) {
8972 		goto bad;
8973 	}
8974 
8975 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8976 		ipstat.ips_badvers++;
8977 		goto bad;
8978 	}
8979 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8980 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8981 		ipstat.ips_badhlen++;
8982 		goto bad;
8983 	}
8984 	if (hlen > m->m_len) {
8985 		if ((m = m_pullup(m, hlen)) == 0) {
8986 			ipstat.ips_badhlen++;
8987 			goto bad;
8988 		}
8989 		ip = mtod(m, struct ip *);
8990 		if (ip == NULL) {
8991 			goto bad;
8992 		}
8993 	}
8994 
8995 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8996 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8997 	} else {
8998 		if (hlen == sizeof(struct ip)) {
8999 			sum = in_cksum_hdr(ip);
9000 		} else {
9001 			sum = in_cksum(m, hlen);
9002 		}
9003 	}
9004 	if (sum) {
9005 		ipstat.ips_badsum++;
9006 		goto bad;
9007 	}
9008 
9009 	/* Retrieve the packet length. */
9010 	len = ntohs(ip->ip_len);
9011 
9012 	/*
9013 	 * Check for additional length bogosity
9014 	 */
9015 	if (len < hlen) {
9016 		ipstat.ips_badlen++;
9017 		goto bad;
9018 	}
9019 
9020 	/*
9021 	 * Check that the amount of data in the buffers
9022 	 * is as at least much as the IP header would have us expect.
9023 	 * Drop packet if shorter than we expect.
9024 	 */
9025 	if (m->m_pkthdr.len < len) {
9026 		ipstat.ips_tooshort++;
9027 		goto bad;
9028 	}
9029 
9030 	/* Checks out, proceed */
9031 	*mp = m;
9032 	return 0;
9033 
9034 bad:
9035 	*mp = m;
9036 	return -1;
9037 }
9038 
9039 /*
9040  * Same as above, but for IPv6.
9041  * Cut-and-pasted from ip6_input.c.
9042  * XXX Should we update ip6stat, or not?
9043  */
9044 static int
9045 bridge_ip6_checkbasic(struct mbuf **mp)
9046 {
9047 	struct mbuf *m = *mp;
9048 	struct ip6_hdr *ip6;
9049 
9050 	/*
9051 	 * If the IPv6 header is not aligned, slurp it up into a new
9052 	 * mbuf with space for link headers, in the event we forward
9053 	 * it.  Otherwise, if it is aligned, make sure the entire base
9054 	 * IPv6 header is in the first mbuf of the chain.
9055 	 */
9056 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9057 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9058 		/* max_linkhdr is already rounded up to nearest 4-byte */
9059 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9060 		    max_linkhdr)) == NULL) {
9061 			/* XXXJRT new stat, please */
9062 			ip6stat.ip6s_toosmall++;
9063 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9064 			goto bad;
9065 		}
9066 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9067 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9068 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9069 			ip6stat.ip6s_toosmall++;
9070 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9071 			goto bad;
9072 		}
9073 	}
9074 
9075 	ip6 = mtod(m, struct ip6_hdr *);
9076 
9077 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9078 		ip6stat.ip6s_badvers++;
9079 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9080 		goto bad;
9081 	}
9082 
9083 	/* Checks out, proceed */
9084 	*mp = m;
9085 	return 0;
9086 
9087 bad:
9088 	*mp = m;
9089 	return -1;
9090 }
9091 
9092 /*
9093  * the PF routines expect to be called from ip_input, so we
9094  * need to do and undo here some of the same processing.
9095  *
9096  * XXX : this is heavily inspired on bridge_pfil()
9097  */
9098 static int
9099 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9100     bool input)
9101 {
9102 	/*
9103 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9104 	 */
9105 
9106 	int snap, error, i, hlen;
9107 	struct ether_header *eh1, eh2;
9108 	struct ip *ip;
9109 	struct llc llc1;
9110 	u_int16_t ether_type;
9111 
9112 	snap = 0;
9113 	error = -1;     /* Default error if not error == 0 */
9114 
9115 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9116 		return 0; /* filtering is disabled */
9117 	}
9118 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9119 	if ((*mp)->m_len < i) {
9120 		*mp = m_pullup(*mp, i);
9121 		if (*mp == NULL) {
9122 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9123 			return -1;
9124 		}
9125 	}
9126 
9127 	eh1 = mtod(*mp, struct ether_header *);
9128 	ether_type = ntohs(eh1->ether_type);
9129 
9130 	/*
9131 	 * Check for SNAP/LLC.
9132 	 */
9133 	if (ether_type < ETHERMTU) {
9134 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9135 
9136 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9137 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9138 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9139 		    llc2->llc_control == LLC_UI) {
9140 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9141 			snap = 1;
9142 		}
9143 	}
9144 
9145 	/*
9146 	 * If we're trying to filter bridge traffic, don't look at anything
9147 	 * other than IP and ARP traffic.  If the filter doesn't understand
9148 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9149 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9150 	 * but of course we don't have an AppleTalk filter to begin with.
9151 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9152 	 * ARP traffic.)
9153 	 */
9154 	switch (ether_type) {
9155 	case ETHERTYPE_ARP:
9156 	case ETHERTYPE_REVARP:
9157 		return 0;         /* Automatically pass */
9158 
9159 	case ETHERTYPE_IP:
9160 	case ETHERTYPE_IPV6:
9161 		break;
9162 	default:
9163 		/*
9164 		 * Check to see if the user wants to pass non-ip
9165 		 * packets, these will not be checked by pf and
9166 		 * passed unconditionally so the default is to drop.
9167 		 */
9168 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9169 			goto bad;
9170 		}
9171 		break;
9172 	}
9173 
9174 	/* Strip off the Ethernet header and keep a copy. */
9175 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9176 	m_adj(*mp, ETHER_HDR_LEN);
9177 
9178 	/* Strip off snap header, if present */
9179 	if (snap) {
9180 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9181 		m_adj(*mp, sizeof(struct llc));
9182 	}
9183 
9184 	/*
9185 	 * Check the IP header for alignment and errors
9186 	 */
9187 	switch (ether_type) {
9188 	case ETHERTYPE_IP:
9189 		error = bridge_ip_checkbasic(mp);
9190 		break;
9191 	case ETHERTYPE_IPV6:
9192 		error = bridge_ip6_checkbasic(mp);
9193 		break;
9194 	default:
9195 		error = 0;
9196 		break;
9197 	}
9198 	if (error) {
9199 		goto bad;
9200 	}
9201 
9202 	error = 0;
9203 
9204 	/*
9205 	 * Run the packet through pf rules
9206 	 */
9207 	switch (ether_type) {
9208 	case ETHERTYPE_IP:
9209 		/*
9210 		 * before calling the firewall, swap fields the same as
9211 		 * IP does. here we assume the header is contiguous
9212 		 */
9213 		ip = mtod(*mp, struct ip *);
9214 
9215 		ip->ip_len = ntohs(ip->ip_len);
9216 		ip->ip_off = ntohs(ip->ip_off);
9217 
9218 		if (ifp != NULL) {
9219 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9220 		}
9221 
9222 		if (*mp == NULL || error != 0) { /* filter may consume */
9223 			break;
9224 		}
9225 
9226 		/* Recalculate the ip checksum and restore byte ordering */
9227 		ip = mtod(*mp, struct ip *);
9228 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9229 		if (hlen < (int)sizeof(struct ip)) {
9230 			goto bad;
9231 		}
9232 		if (hlen > (*mp)->m_len) {
9233 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9234 				goto bad;
9235 			}
9236 			ip = mtod(*mp, struct ip *);
9237 			if (ip == NULL) {
9238 				goto bad;
9239 			}
9240 		}
9241 		ip->ip_len = htons(ip->ip_len);
9242 		ip->ip_off = htons(ip->ip_off);
9243 		ip->ip_sum = 0;
9244 		if (hlen == sizeof(struct ip)) {
9245 			ip->ip_sum = in_cksum_hdr(ip);
9246 		} else {
9247 			ip->ip_sum = in_cksum(*mp, hlen);
9248 		}
9249 		break;
9250 
9251 	case ETHERTYPE_IPV6:
9252 		if (ifp != NULL) {
9253 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9254 		}
9255 
9256 		if (*mp == NULL || error != 0) { /* filter may consume */
9257 			break;
9258 		}
9259 		break;
9260 	default:
9261 		error = 0;
9262 		break;
9263 	}
9264 
9265 	if (*mp == NULL) {
9266 		return error;
9267 	}
9268 	if (error != 0) {
9269 		goto bad;
9270 	}
9271 
9272 	error = -1;
9273 
9274 	/*
9275 	 * Finally, put everything back the way it was and return
9276 	 */
9277 	if (snap) {
9278 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9279 		if (*mp == NULL) {
9280 			return error;
9281 		}
9282 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9283 	}
9284 
9285 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9286 	if (*mp == NULL) {
9287 		return error;
9288 	}
9289 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9290 
9291 	return 0;
9292 
9293 bad:
9294 	m_freem(*mp);
9295 	*mp = NULL;
9296 	return error;
9297 }
9298 
9299 #if BRIDGESTP
9300 static void
9301 bridge_bstp_input_list(struct bstp_port *bp, struct mbuf *head)
9302 {
9303 	mbuf_t  next_packet = NULL;
9304 
9305 	for (mbuf_t scan = head; scan != NULL; scan = next_packet) {
9306 		next_packet = scan->m_nextpkt;
9307 		scan->m_nextpkt = NULL;
9308 		bstp_input(bp, scan);
9309 	}
9310 }
9311 #endif /* BRIDGESTP */
9312 
9313 static mblist
9314 bridge_filter_arp_list(struct bridge_iflist * bif, mbuf_t m)
9315 {
9316 	mbuf_t          next_packet = NULL;
9317 	mblist          ret;
9318 
9319 	mblist_init(&ret);
9320 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9321 		errno_t                 error;
9322 
9323 		/* take packet out of the list */
9324 		next_packet = scan->m_nextpkt;
9325 		scan->m_nextpkt = NULL;
9326 		/* filter the ARP packet */
9327 		error = bridge_host_filter_arp(bif, &scan);
9328 		if (error != 0 && scan != NULL) {
9329 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9330 				brlog_mbuf_data(scan, 0,
9331 				    sizeof(struct ether_header) +
9332 				    sizeof(struct ip));
9333 			}
9334 			m_freem(scan);
9335 			scan = NULL;
9336 		}
9337 		if (scan != NULL) {
9338 			/* add it to the list */
9339 			mblist_append(&ret, scan);
9340 		}
9341 	}
9342 	return ret;
9343 }
9344 
9345 static mbuf_t
9346 bridge_filter_checksum(ifnet_t bridge_ifp, struct bridge_iflist * bif, mbuf_t m,
9347     bool is_ipv4, bool host_filter, bool checksum)
9348 {
9349 	uint32_t                dbgf = 0;
9350 	errno_t                 error;
9351 	ip_packet_info          info;
9352 	u_int                   mac_hlen = sizeof(struct ether_header);
9353 
9354 	if (host_filter) {
9355 		dbgf |= BR_DBGF_HOSTFILTER;
9356 	}
9357 	if (checksum) {
9358 		dbgf |= BR_DBGF_CHECKSUM;
9359 	}
9360 	/* get the IP protocol header */
9361 	error = bridge_get_ip_proto(&m, mac_hlen, is_ipv4, &info,
9362 	    &bif->bif_stats.brms_in_ip);
9363 	if (error != 0) {
9364 		BRIDGE_LOG(LOG_NOTICE, dbgf,
9365 		    "%s(%s) bridge_get_ip_proto failed %d",
9366 		    bridge_ifp->if_xname,
9367 		    bif->bif_ifp->if_xname, error);
9368 		goto drop;
9369 	}
9370 	if (host_filter) {
9371 		bool            drop = true;
9372 
9373 		/* restrict IP protocols */
9374 		switch (info.ip_proto) {
9375 		case IPPROTO_ICMP:
9376 		case IPPROTO_IGMP:
9377 			drop = !is_ipv4;
9378 			break;
9379 		case IPPROTO_TCP:
9380 		case IPPROTO_UDP:
9381 			drop = false;
9382 			break;
9383 		case IPPROTO_ICMPV6:
9384 			drop = is_ipv4;
9385 			break;
9386 		default:
9387 			break;
9388 		}
9389 		if (drop) {
9390 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
9391 			goto drop;
9392 		}
9393 		bridge_hostfilter_stats.brhf_ip_ok += 1;
9394 	}
9395 	if (checksum) {
9396 		/* need to compute IP/UDP/TCP/checksums */
9397 		error = bridge_offload_checksum(&m, &info, &bif->bif_stats);
9398 		if (error != 0) {
9399 			BRIDGE_LOG(LOG_NOTICE, dbgf,
9400 			    "%s(%s) bridge_offload_checksum failed %d",
9401 			    bridge_ifp->if_xname,
9402 			    bif->bif_ifp->if_xname, error);
9403 			goto drop;
9404 		}
9405 	}
9406 	return m;
9407 
9408 drop:
9409 	/* toss the packet */
9410 	if (m != NULL) {
9411 		if (host_filter &&
9412 		    BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
9413 			brlog_mbuf_data(m, 0,
9414 			    sizeof(struct ether_header) +
9415 			    sizeof(struct ip));
9416 		}
9417 		m_freem(m);
9418 		m = NULL;
9419 	}
9420 	return NULL;
9421 }
9422 
9423 static mblist
9424 bridge_filter_checksum_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9425     mbuf_t in_list, ether_type_flag_t etypef, bool host_filter, bool checksum)
9426 {
9427 	bool                    is_ipv4 = (etypef == ETHER_TYPE_FLAG_IPV4);
9428 	mbuf_t                  next_packet = NULL;
9429 	mblist                  ret;
9430 
9431 	mblist_init(&ret);
9432 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9433 		/* take packet out of the list */
9434 		next_packet = scan->m_nextpkt;
9435 		scan->m_nextpkt = NULL;
9436 		scan = bridge_filter_checksum(bridge_ifp, bif,
9437 		    scan, is_ipv4, host_filter, checksum);
9438 		if (scan != NULL) {
9439 			/* add packet to the list */
9440 			mblist_append(&ret, scan);
9441 		}
9442 	}
9443 	return ret;
9444 }
9445 
9446 static mbuf_t
9447 bridge_checksum_offload_list(ifnet_t bridge_ifp, struct bridge_iflist * bif,
9448     mbuf_t m, bool is_ipv4)
9449 {
9450 	mblist          ret;
9451 	mbuf_t          next_packet;
9452 
9453 	mblist_init(&ret);
9454 	for (mbuf_t scan = m; scan != NULL; scan = next_packet) {
9455 		uint32_t        csum_flags;
9456 
9457 		/* take it out of the list */
9458 		next_packet = scan->m_nextpkt;
9459 		scan->m_nextpkt = NULL;
9460 
9461 		csum_flags = scan->m_pkthdr.csum_flags;
9462 		if ((csum_flags & checksum_request_flags) != 0) {
9463 			/* compute the checksum now */
9464 			scan = bridge_filter_checksum(bridge_ifp, bif, scan,
9465 			    is_ipv4, false, true);
9466 			if (scan != NULL) {
9467 				/* clear offload now */
9468 				scan->m_pkthdr.csum_flags &= csum_flags;
9469 			}
9470 		}
9471 		if (scan != NULL) {
9472 			mblist_append(&ret, scan);
9473 		}
9474 	}
9475 	return ret.head;
9476 }
9477 
9478 static mbuf_t
9479 copy_broadcast_packet(mbuf_t m)
9480 {
9481 	mbuf_t  mc;
9482 
9483 	/* make a copy of the packet */
9484 	mc = m_dup(m, M_DONTWAIT);
9485 	if (mc != NULL) {
9486 		struct ether_header *eh;
9487 
9488 		/* make copy look like it is broadcast */
9489 		mc->m_flags |= M_BCAST;
9490 		eh = mtod(mc, struct ether_header *);
9491 		bcopy(etherbroadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN);
9492 	}
9493 	return mc;
9494 }
9495 
9496 static mblist
9497 bridge_find_broadcast_ipv4(mbuf_t in_list, mbuf_t * ip_bcast_head)
9498 {
9499 	mblist          ip_bcast;
9500 	mbuf_t          next_packet = NULL;
9501 	mblist          ret;
9502 
9503 	mblist_init(&ret);
9504 	mblist_init(&ip_bcast);
9505 	for (mbuf_ref_t scan = in_list; scan != NULL; scan = next_packet) {
9506 		mbuf_t  bcast_pkt = NULL;
9507 		uint8_t *header;
9508 
9509 		/* take packet out of the list */
9510 		next_packet = scan->m_nextpkt;
9511 		scan->m_nextpkt = NULL;
9512 
9513 		header = get_ether_ip_header_ptr(&scan, FALSE);
9514 		if (header != NULL) {
9515 			struct in_addr  dst;
9516 			struct ip       *iphdr;
9517 
9518 			iphdr = (struct ip *)(header + sizeof(struct ether_header));
9519 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
9520 			if (dst.s_addr == INADDR_BROADCAST) {
9521 				bcast_pkt = copy_broadcast_packet(scan);
9522 			}
9523 		}
9524 		if (bcast_pkt != NULL) {
9525 			/* add packet to broadcast list */
9526 			mblist_append(&ip_bcast, bcast_pkt);
9527 		}
9528 		if (scan != NULL) {
9529 			/* add packet back into the list */
9530 			mblist_append(&ret, scan);
9531 		}
9532 	}
9533 	*ip_bcast_head = ip_bcast.head;
9534 	return ret;
9535 }
9536 
9537 static ifnet_t
9538 bridge_find_member(struct bridge_softc * sc, uint8_t * lladdr,
9539     struct bridge_iflist * sbif)
9540 {
9541 	struct bridge_iflist * bif;
9542 
9543 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
9544 		if (bif == sbif) {
9545 			/* skip the input member */
9546 			continue;
9547 		}
9548 		if (_ether_cmp(IF_LLADDR(bif->bif_ifp), lladdr) == 0) {
9549 			return bif->bif_ifp;
9550 		}
9551 	}
9552 	return NULL;
9553 }
9554 
9555 
9556 /*
9557  * Function: bridge_input_list
9558  *
9559  * Purpose:
9560  *   Process a list of input packets through the bridge.
9561  *   The caller ensures that all of the packets in the list
9562  *  `list_head` .. `list_tail` have the same ethernet header.
9563  *
9564  * Returns:
9565  *    Non-NULL head of the chain of packets that were not consumed/freed,
9566  *    *tail_p set to the tail of that chain.
9567  *
9568  *    NULL if all of the packets were consumed.
9569  */
9570 static mblist
9571 bridge_input_list(struct bridge_softc * sc, ifnet_t ifp,
9572     struct ether_header * eh_in_p, mblist list, bool is_promisc)
9573 {
9574 	struct bridge_iflist *  bif;
9575 	ifnet_t                 bridge_ifp;
9576 	bool                    bridge_needs_input;
9577 	bool                    checksum_offload;
9578 	uint8_t *               dhost;
9579 #if BRIDGESTP
9580 	bool                    discarding = false;
9581 #endif /* BRIDGESTP */
9582 	ifnet_t                 dst_if = NULL;
9583 	errno_t                 error;
9584 	ether_type_flag_t       etypef;
9585 	bool                    host_filter;
9586 	bool                    host_filter_drop = false;
9587 	mbuf_ref_t              ip_bcast = NULL;
9588 	bool                    is_bridge_mac = false;
9589 	bool                    is_broadcast;
9590 	bool                    is_ifp_mac;
9591 	ifnet_t                 member_input = NULL;
9592 	uint8_t *               shost;
9593 	bool                    uses_virtio = false;
9594 	uint16_t                vlan;
9595 
9596 	if (ifp->if_bridge == NULL) {
9597 		/* no longer part of bridge */
9598 		goto done;
9599 	}
9600 	bridge_ifp = sc->sc_ifp;
9601 	is_broadcast = IS_BCAST_MCAST(list.head);
9602 	is_ifp_mac = (!is_broadcast && !is_promisc);
9603 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9604 	    "%s from %s count %d head 0x%llx.0x%llx tail 0x%llx.0x%llx",
9605 	    bridge_ifp->if_xname, ifp->if_xname, list.count,
9606 	    (uint64_t)VM_KERNEL_ADDRPERM(list.head),
9607 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.head, void *)),
9608 	    (uint64_t)VM_KERNEL_ADDRPERM(list.tail),
9609 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(list.tail, void *)));
9610 
9611 	/* assume we'll return all packets */
9612 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
9613 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9614 		    "%s not running passing along",
9615 		    bridge_ifp->if_xname);
9616 		goto done;
9617 	}
9618 
9619 	vlan = VLANTAGOF(m);
9620 
9621 	/* lookup the bridge member */
9622 	BRIDGE_LOCK(sc);
9623 	bif = bridge_lookup_member_if(sc, ifp);
9624 	if (bif == NULL) {
9625 		BRIDGE_UNLOCK(sc);
9626 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9627 		    "%s bridge_lookup_member_if failed",
9628 		    bridge_ifp->if_xname);
9629 		goto done;
9630 	}
9631 
9632 	uses_virtio = bif_uses_virtio(bif);
9633 
9634 	/*
9635 	 * host filter drops packets that:
9636 	 * - are not ARP, IPv4, or IPv6
9637 	 * - have incorrect source MAC address
9638 	 */
9639 	host_filter = (bif->bif_flags & BIFF_HOST_FILTER) != 0;
9640 	etypef = ether_type_flag_get(eh_in_p->ether_type);
9641 	if (host_filter
9642 	    && (etypef & ETHER_TYPE_FLAG_IP_ARP) == 0) {
9643 		/* ether type not one of ARP, IPv4, or IPv6 */
9644 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
9645 		host_filter_drop = true;
9646 	} else if ((bif->bif_flags & BIFF_HF_HWSRC) != 0 &&
9647 	    bcmp(eh_in_p->ether_shost, bif->bif_hf_hwsrc, ETHER_ADDR_LEN)
9648 	    != 0) {
9649 		/* only allow the single source MAC address */
9650 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr,
9651 		    __func__, __LINE__);
9652 		host_filter_drop = true;
9653 	}
9654 	if (host_filter_drop) {
9655 		BRIDGE_UNLOCK(sc);
9656 		m_freem_list(list.head);
9657 		list.head = list.tail = NULL;
9658 		goto done;
9659 	}
9660 
9661 #if BRIDGESTP
9662 	discarding = (bif->bif_ifflags & IFBIF_STP) != 0 &&
9663 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING;
9664 #endif /* BRIDGESTP */
9665 
9666 	dhost = eh_in_p->ether_dhost;
9667 	shost = eh_in_p->ether_shost;
9668 	/*
9669 	 * Reserved multicast address listed in 802.1D section 7.12.6
9670 	 * must not be forwarded by the bridge.
9671 	 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
9672 	 */
9673 	if (is_broadcast) {
9674 		if (IS_MCAST(list.head)) {
9675 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
9676 			    " multicast: "
9677 			    "%02x:%02x:%02x:%02x:%02x:%02x",
9678 			    dhost[0], dhost[1],
9679 			    dhost[2], dhost[3],
9680 			    dhost[4], dhost[5]);
9681 		}
9682 		if (bcmp(dhost, bstp_etheraddr, (ETHER_ADDR_LEN - 1)) == 0) {
9683 			if (dhost[5] == BSTP_ETHERADDR_RANGE_FIRST) {
9684 				/* multicast for spanning tree */
9685 #if BRIDGESTP
9686 				bridge_bstp_input_list(&bif->bif_stp, list.head);
9687 #else /* BRIDGESTP */
9688 				m_freem_list(list.head);
9689 #endif /* BRIDGESTP */
9690 				list.head = list.tail = NULL;
9691 				BRIDGE_UNLOCK(sc);
9692 				goto done;
9693 			}
9694 			if (dhost[5] <= BSTP_ETHERADDR_RANGE_LAST) {
9695 				/* allow packet to continue up the stack */
9696 				BRIDGE_UNLOCK(sc);
9697 				goto done;
9698 			}
9699 		}
9700 		/* broadcast to all members */
9701 		os_atomic_add(&bridge_ifp->if_imcasts, list.count, relaxed);
9702 	}
9703 
9704 #if BRIDGESTP
9705 	if (discarding) {
9706 		BRIDGE_UNLOCK(sc);
9707 		goto done;
9708 	}
9709 #endif /* BRIDGESTP */
9710 
9711 	/* If the interface is learning, record the address. */
9712 	if ((bif->bif_ifflags & IFBIF_LEARNING) != 0) {
9713 		error = bridge_rtupdate(sc, shost, vlan, bif, 0, IFBAF_DYNAMIC);
9714 		/*
9715 		 * If the interface has addresses limits then deny any source
9716 		 * that is not in the cache.
9717 		 */
9718 		if (error != 0 && bif->bif_addrmax) {
9719 			BRIDGE_UNLOCK(sc);
9720 			goto done;
9721 		}
9722 	}
9723 #if BRIDGESTP
9724 	if ((bif->bif_ifflags & IFBIF_STP) != 0 &&
9725 	    bif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
9726 		BRIDGE_UNLOCK(sc);
9727 		goto done;
9728 	}
9729 #endif /* BRIDGESTP */
9730 
9731 	/*
9732 	 * If the packet is not IP, let the host filter drop ARP packets.
9733 	 * Otherwise, if the host filter is enabled or we need to compute
9734 	 * checksums, do that.
9735 	 * Otherwise, if MAC-NAT is enabled and this is an IPv4 packet,
9736 	 * check for IPv4 broadcast packets. Accumulate those in a separate
9737 	 * list `ip_bcast`.
9738 	 */
9739 	checksum_offload = bif_has_checksum_offload(bif);
9740 	if (!ether_type_flag_is_ip(etypef)) {
9741 		/* host filter process ARP */
9742 		if (host_filter) {
9743 			/* host filter check earlier means this must be ARP */
9744 			VERIFY(etypef == ETHER_TYPE_FLAG_ARP);
9745 			list = bridge_filter_arp_list(bif, list.head);
9746 			if (list.head == NULL) {
9747 				VERIFY(list.tail == NULL);
9748 				BRIDGE_UNLOCK(sc);
9749 				goto done;
9750 			}
9751 		}
9752 	} else if (host_filter || checksum_offload) {
9753 		/* host filter and/or checksum */
9754 		list = bridge_filter_checksum_list(bridge_ifp, bif,
9755 		    list.head, etypef, host_filter, checksum_offload);
9756 		if (list.head == NULL) {
9757 			VERIFY(list.tail == NULL);
9758 			BRIDGE_UNLOCK(sc);
9759 			goto done;
9760 		}
9761 	} else if (is_ifp_mac && bif == sc->sc_mac_nat_bif &&
9762 	    etypef == ETHER_TYPE_FLAG_IPV4) {
9763 		/* look for broadcast IPv4 packet */
9764 		list = bridge_find_broadcast_ipv4(list.head, &ip_bcast);
9765 		if (list.head == NULL && ip_bcast == NULL) {
9766 			/* all packets were consumed */
9767 			BRIDGE_UNLOCK(sc);
9768 			goto done;
9769 		}
9770 	}
9771 
9772 	/*
9773 	 * If the bridge has ULP attached, and the destination MAC
9774 	 * matches the bridge interface, claim the packets for the bridge
9775 	 * interface.
9776 	 */
9777 	bridge_needs_input = (sc->sc_flags & SCF_PROTO_ATTACHED) != 0;
9778 	if (bridge_needs_input &&
9779 	    !is_broadcast && _ether_cmp(dhost, IF_LLADDR(bridge_ifp)) == 0) {
9780 		is_bridge_mac = true;
9781 	}
9782 	if (is_ifp_mac) {
9783 		/* unicast to the interface */
9784 		if (sc->sc_mac_nat_bif == bif) {
9785 			mbuf_ref_t  forward = NULL;
9786 
9787 			if (list.head != NULL) {
9788 				/* handle MAC-NAT if enabled */
9789 				list = bridge_mac_nat_input_list(sc, ifp,
9790 				    list.head, &forward);
9791 			}
9792 			if (ip_bcast != NULL) {
9793 				/* forward to all members except this one */
9794 				/* bridge_broadcast_list unlocks */
9795 				bridge_broadcast_list(sc, bif, etypef,
9796 				    ip_bcast, pkt_direction_RX);
9797 			} else {
9798 				BRIDGE_UNLOCK(sc);
9799 			}
9800 			if (forward != NULL) {
9801 				bridge_mac_nat_forward_list(bridge_ifp, etypef,
9802 				    forward);
9803 			}
9804 		} else {
9805 			BRIDGE_UNLOCK(sc);
9806 		}
9807 		/* unicast packets for this interface do not get forwarded */
9808 		goto done;
9809 	}
9810 	if (is_bridge_mac || list.head == NULL) {
9811 		BRIDGE_UNLOCK(sc);
9812 		goto done;
9813 	}
9814 	if (!is_broadcast) {
9815 		/* find where to send the packet */
9816 		dst_if = bridge_rtlookup(sc, dhost, vlan);
9817 		if (ifp == dst_if) {
9818 			/* nothing to forward */
9819 			BRIDGE_UNLOCK(sc);
9820 			goto done;
9821 		}
9822 		if (dst_if == NULL) {
9823 			/* if a member is the dhost, deliver as input */
9824 			member_input = bridge_find_member(sc, dhost, bif);
9825 			if (member_input != NULL) {
9826 				/* grab packets destined to member */
9827 				BRIDGE_UNLOCK(sc);
9828 				goto done;
9829 			}
9830 			/* if a member is shost, there's a loop, drop it */
9831 			if (bridge_find_member(sc, shost, bif) != NULL) {
9832 				BRIDGE_UNLOCK(sc);
9833 				m_freem_list(list.head);
9834 				list.head = list.tail = NULL;
9835 				goto done;
9836 			}
9837 		}
9838 	}
9839 	if (dst_if == NULL) {
9840 		mbuf_t  m;
9841 
9842 		m = copy_packet_list(list.head);
9843 		if (m != NULL) {
9844 			/* bridge_broadcast_list unlocks */
9845 			bridge_broadcast_list(sc, bif, etypef, m,
9846 			    pkt_direction_RX);
9847 		} else {
9848 			BRIDGE_UNLOCK(sc);
9849 		}
9850 	} else {
9851 		/* bridge_forward_list() consumes list and unlocks */
9852 		bridge_forward_list(sc, bif, dst_if, etypef, list.head);
9853 		list.head = list.tail = NULL;
9854 	}
9855 
9856 done:
9857 	if (list.head != NULL) {
9858 		if (member_input != NULL) {
9859 			/* member gets the packets */
9860 			inject_input_packet_list(member_input, list.head, true);
9861 			list.head = list.tail = NULL;
9862 		} else if (is_bridge_mac) {
9863 			/* bridge consumes all the unicast packets */
9864 			bridge_interface_input_list(bridge_ifp, etypef, list,
9865 			    uses_virtio);
9866 			list.head = list.tail = NULL;
9867 		} else {
9868 			adjust_input_packet_list(list.head);
9869 		}
9870 	}
9871 	return list;
9872 }
9873 
9874 static inline void
9875 update_mbuf_flags(struct ifnet * ifp, mbuf_t m, struct ether_header * eh)
9876 {
9877 	/* duplicate some of the work done in ether_demux */
9878 	if ((eh->ether_dhost[0] & 1) == 0) {
9879 		if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp)) != 0) {
9880 			m->m_flags |= M_PROMISC;
9881 		}
9882 	} else {
9883 		/* Check for broadcast */
9884 		if (_ether_cmp(etherbroadcastaddr, eh->ether_dhost) == 0) {
9885 			m->m_flags |= M_BCAST;
9886 		} else {
9887 			m->m_flags |= M_MCAST;
9888 		}
9889 	}
9890 	if (m->m_flags & M_HASFCS) {
9891 		/*
9892 		 * If the M_HASFCS is set by the driver we want to make sure
9893 		 * that we strip off the trailing FCS data before handing it
9894 		 * up the stack.
9895 		 */
9896 		m_adj(m, -ETHER_CRC_LEN);
9897 		m->m_flags &= ~M_HASFCS;
9898 	}
9899 	return;
9900 }
9901 
9902 static mbuf_t
9903 bridge_pf_list(mbuf_t m, ifnet_t ifp, uint32_t sc_filter_flags, bool input)
9904 {
9905 	mbuf_t  next_packet = NULL;
9906 	mblist  ret;
9907 
9908 	mblist_init(&ret);
9909 	for (mbuf_ref_t scan = m; scan != NULL; scan = next_packet) {
9910 		next_packet = scan->m_nextpkt;
9911 
9912 		/* remove packet from list, and pass through PF */
9913 		scan->m_nextpkt = NULL;
9914 		MBUF_INPUT_CHECK(scan, ifp);
9915 		bridge_pf(&scan, ifp, sc_filter_flags, input);
9916 		if (scan != NULL) {
9917 			/* add packet back to the list */
9918 			mblist_append(&ret, scan);
9919 		}
9920 	}
9921 	return ret.head;
9922 }
9923 
9924 static inline bool
9925 bridge_check_frame_header(struct bridge_softc * sc, ifnet_t ifp, mbuf_t m)
9926 {
9927 	bool                    included = false;
9928 	char * __single         header;
9929 	size_t                  header_length = 0;
9930 
9931 	header = m->m_pkthdr.pkt_hdr;
9932 	if (header >= (char *)mbuf_datastart(m) &&
9933 	    header <= mtod(m, char *)) {
9934 		header_length = mtod(m, char *) - header;
9935 		if (header_length >= ETHER_HDR_LEN) {
9936 			included = true;
9937 		}
9938 	}
9939 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9940 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
9941 	    "header length %lu", sc->sc_ifp->if_xname,
9942 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
9943 	    (uint64_t)VM_KERNEL_ADDRPERM(mtod(m, void *)),
9944 	    (uint64_t)VM_KERNEL_ADDRPERM(header),
9945 	    included ? "inside" : "outside", header_length);
9946 	if (!included) {
9947 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
9948 		    "%s: frame_header outside mbuf", ifp->if_xname);
9949 	}
9950 	return included;
9951 }
9952 
9953 
9954 mbuf_t
9955 bridge_early_input(struct ifnet *ifp, mbuf_t in_list, u_int32_t cnt)
9956 {
9957 	struct ether_header eh;
9958 	mblist          list;
9959 	volatile bool   list_is_promisc;
9960 	int             n_lists = 0;
9961 	mbuf_t          next_packet = NULL;
9962 	mblist          ret;
9963 	struct bridge_softc * __single sc = ifp->if_bridge;
9964 	uint32_t        sc_filter_flags;
9965 
9966 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
9967 	    "(%s): count %u", ifp->if_xname, cnt);
9968 
9969 	/* run packet list through PF first */
9970 	sc_filter_flags = sc->sc_filter_flags;
9971 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
9972 		in_list = bridge_pf_list(in_list, ifp, sc_filter_flags, true);
9973 	}
9974 
9975 	/* form sublists with the same ethernet header */
9976 	mblist_init(&list);
9977 	mblist_init(&ret);
9978 	for (mbuf_t scan = in_list; scan != NULL; scan = next_packet) {
9979 		struct ether_header *   eh_p;
9980 		volatile bool           is_promisc;
9981 		mblist                  resid;
9982 
9983 		/* take it out of the list */
9984 		next_packet = scan->m_nextpkt;
9985 		scan->m_nextpkt = NULL;
9986 
9987 		/* don't loop the packet */
9988 		if ((scan->m_flags & M_PROTO1) != 0) {
9989 			mblist_append(&ret, scan);
9990 			continue;
9991 		}
9992 		/* Check if this mbuf looks valid */
9993 		MBUF_INPUT_CHECK(scan, ifp);
9994 
9995 		/* if the frame header isn't in the first mbuf, ignore */
9996 		if (!bridge_check_frame_header(sc, ifp, scan)) {
9997 			mblist_append(&ret, scan);
9998 			continue;
9999 		}
10000 		eh_p = __unsafe_forge_single(struct ether_header *,
10001 		    scan->m_pkthdr.pkt_hdr);
10002 		update_mbuf_flags(ifp, scan, eh_p);
10003 
10004 		/* set start back to include ether header */
10005 		_mbuf_adjust_pkthdr_and_data(scan, -ETHER_HDR_LEN);
10006 
10007 		is_promisc = get_and_clear_promisc(scan);
10008 		if (list.head == NULL) {
10009 			/* start a new list */
10010 			mblist_append(&list, scan);
10011 			bcopy(eh_p, &eh, sizeof(eh));
10012 			list_is_promisc = is_promisc;
10013 		} else if (bcmp(eh_p, &eh, sizeof(eh)) != 0) {
10014 			n_lists++;
10015 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10016 			    "(%s): sublist %u pkts %u",
10017 			    ifp->if_xname, n_lists, list.count);
10018 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10019 				brlog_ether_header(&eh);
10020 			}
10021 			resid = bridge_input_list(sc, ifp, &eh, list,
10022 			    list_is_promisc);
10023 			if (resid.head != NULL) {
10024 				/* add to the packets to be returned */
10025 				mblist_append_list(&ret, resid);
10026 			}
10027 			/* start new list */
10028 			mblist_init(&list);
10029 			mblist_append(&list, scan);
10030 			list_is_promisc = is_promisc;
10031 			bcopy(eh_p, &eh, sizeof(eh));
10032 		} else {
10033 			mblist_append(&list, scan);
10034 			VERIFY(is_promisc == list_is_promisc);
10035 		}
10036 		if (next_packet == NULL) {
10037 			/* last list */
10038 			n_lists++;
10039 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT_LIST,
10040 			    "(%s): sublist %u pkts %u",
10041 			    ifp->if_xname, n_lists, list.count);
10042 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT_LIST)) {
10043 				brlog_ether_header(&eh);
10044 			}
10045 			resid = bridge_input_list(sc, ifp, &eh, list,
10046 			    list_is_promisc);
10047 			if (resid.head != NULL) {
10048 				/* add to the packets to be returned */
10049 				mblist_append_list(&ret, resid);
10050 			}
10051 		}
10052 	}
10053 	return ret.head;
10054 }
10055 
10056 /*
10057  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
10058  * All rights reserved.
10059  *
10060  * Redistribution and use in source and binary forms, with or without
10061  * modification, are permitted provided that the following conditions
10062  * are met:
10063  *   1. Redistributions of source code must retain the above copyright
10064  *      notice, this list of conditions and the following disclaimer.
10065  *   2. Redistributions in binary form must reproduce the above copyright
10066  *      notice, this list of conditions and the following disclaimer in the
10067  *      documentation and/or other materials provided with the distribution.
10068  *
10069  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
10070  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
10071  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
10072  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
10073  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
10074  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
10075  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10076  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10077  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
10078  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
10079  * SUCH DAMAGE.
10080  */
10081 
10082 /*
10083  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
10084  *
10085  * Create a queue of packets/segments which fit the given mss + hdr_len.
10086  * m0 points to mbuf chain to be segmented.
10087  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
10088  * into segments of length MSS bytes and then copy the first hdr_len bytes
10089  * from m0 at the top of each segment.
10090  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
10091  * in each segment after the first hdr_len bytes
10092  *
10093  * Return the new queue with the segments on success, NULL on failure.
10094  * (the mbuf queue is freed in this case).
10095  */
10096 
10097 static mblist
10098 m_seg(struct mbuf *m0, int hdr_len, int mss, char * hdr2_buf __sized_by_or_null(hdr2_len), int hdr2_len)
10099 {
10100 	int off = 0, n, firstlen;
10101 	struct mbuf *mseg;
10102 	int total_len = m0->m_pkthdr.len;
10103 	mblist ret;
10104 
10105 	mblist_init(&ret);
10106 	mblist_append(&ret, m0);
10107 
10108 	/*
10109 	 * Segmentation useless
10110 	 */
10111 	if (total_len <= hdr_len + mss) {
10112 		n = 1;
10113 		goto done;
10114 	}
10115 	if (hdr2_buf == NULL || hdr2_len <= 0) {
10116 		hdr2_buf = NULL;
10117 		hdr2_len = 0;
10118 	}
10119 
10120 	off = hdr_len + mss;
10121 	firstlen = mss; /* first segment stored in the original mbuf */
10122 	ret.bytes = off;
10123 	for (n = 1; off < total_len; off += mss, n++) {
10124 		struct mbuf *m;
10125 		/*
10126 		 * Copy the header from the original packet
10127 		 * and create a new mbuf chain
10128 		 */
10129 		if (MHLEN < hdr_len) {
10130 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
10131 		} else {
10132 			m = m_gethdr(M_NOWAIT, MT_DATA);
10133 		}
10134 
10135 		if (m == NULL) {
10136 #ifdef GSO_DEBUG
10137 			D("MGETHDR error\n");
10138 #endif
10139 			goto err;
10140 		}
10141 
10142 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
10143 
10144 		m->m_len = hdr_len;
10145 		/*
10146 		 * if the optional header is present, copy it
10147 		 */
10148 		if (hdr2_buf != NULL) {
10149 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
10150 		}
10151 
10152 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
10153 		if (off + mss >= total_len) {           /* last segment */
10154 			mss = total_len - off;
10155 		}
10156 		/*
10157 		 * Copy the payload from original packet
10158 		 */
10159 		mseg = m_copym(m0, off, mss, M_NOWAIT);
10160 		if (mseg == NULL) {
10161 			m_freem(m);
10162 #ifdef GSO_DEBUG
10163 			D("m_copym error\n");
10164 #endif
10165 			goto err;
10166 		}
10167 		m_cat(m, mseg);
10168 
10169 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
10170 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
10171 		/*
10172 		 * Copy the checksum flags and data (in_cksum() need this)
10173 		 */
10174 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
10175 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
10176 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
10177 
10178 		mblist_append(&ret, m);
10179 	}
10180 
10181 	/*
10182 	 * Update first segment.
10183 	 * If the optional header is present, is necessary
10184 	 * to insert it into the first segment.
10185 	 */
10186 	if (hdr2_buf == NULL) {
10187 		m_adj(m0, hdr_len + firstlen - total_len);
10188 		m0->m_pkthdr.len = hdr_len + firstlen;
10189 	} else {
10190 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
10191 		if (mseg == NULL) {
10192 #ifdef GSO_DEBUG
10193 			D("m_copym error\n");
10194 #endif
10195 			goto err;
10196 		}
10197 		m_adj(m0, hdr_len - total_len);
10198 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
10199 		m_cat(m0, mseg);
10200 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
10201 	}
10202 
10203 done:
10204 	return ret;
10205 
10206 err:
10207 	if (ret.head != NULL) {
10208 		m_freem_list(ret.head);
10209 		mblist_init(&ret);
10210 	}
10211 	return ret;
10212 }
10213 
10214 /*
10215  * Wrappers of IPv4 checksum functions
10216  */
10217 static inline void
10218 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
10219 {
10220 	m->m_data += mac_hlen;
10221 	m->m_len -= mac_hlen;
10222 	m->m_pkthdr.len -= mac_hlen;
10223 #if __FreeBSD_version < 1000000
10224 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
10225 #endif
10226 
10227 	in_delayed_cksum(m);
10228 
10229 #if __FreeBSD_version < 1000000
10230 	ip->ip_len = htons(ip->ip_len);
10231 #endif
10232 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
10233 	m->m_len += mac_hlen;
10234 	m->m_pkthdr.len += mac_hlen;
10235 	m->m_data -= mac_hlen;
10236 }
10237 
10238 static inline void
10239 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
10240 {
10241 	m->m_data += mac_hlen;
10242 
10243 	ip->ip_sum = in_cksum(m, ip_hlen);
10244 
10245 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
10246 	m->m_data -= mac_hlen;
10247 }
10248 
10249 /*
10250  * Structure that contains the state during the TCP segmentation
10251  */
10252 struct gso_ip_tcp_state {
10253 	void    (*update)
10254 	(struct gso_ip_tcp_state*, struct mbuf*);
10255 	void    (*internal)
10256 	(struct gso_ip_tcp_state*, struct mbuf*);
10257 	u_int ip_m0_len;
10258 	uint8_t * __counted_by(ip_m0_len) hdr;
10259 	struct tcphdr *tcp;
10260 	int mac_hlen;
10261 	int ip_hlen;
10262 	int tcp_hlen;
10263 	int hlen;
10264 	int pay_len;
10265 	int sw_csum;
10266 	uint32_t tcp_seq;
10267 	uint16_t ip_id;
10268 	boolean_t is_tx;
10269 };
10270 
10271 /*
10272  * Update the pointers to TCP and IPv4 headers
10273  */
10274 static inline void
10275 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10276 {
10277 	state->hdr = mtodo(m, state->mac_hlen);
10278 	state->ip_m0_len = m->m_len - state->mac_hlen;
10279 	state->ip_hlen = state->ip_hlen;
10280 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10281 	state->pay_len = m->m_pkthdr.len - state->hlen;
10282 }
10283 
10284 /*
10285  * Set properly the TCP and IPv4 headers
10286  */
10287 static inline void
10288 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10289 {
10290 	struct ip *ip;
10291 	/*
10292 	 * Update IP header
10293 	 */
10294 	ip = (struct ip *)state->hdr;
10295 	ip->ip_id = htons((state->ip_id)++);
10296 	ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
10297 	/*
10298 	 * TCP Checksum
10299 	 */
10300 	state->tcp->th_sum = 0;
10301 	state->tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
10302 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
10303 	/*
10304 	 * Checksum HW not supported (TCP)
10305 	 */
10306 	if (state->sw_csum & CSUM_DELAY_DATA) {
10307 		gso_ipv4_data_cksum(m, ip, state->mac_hlen);
10308 	}
10309 
10310 	state->tcp_seq += state->pay_len;
10311 	/*
10312 	 * IP Checksum
10313 	 */
10314 	ip->ip_sum = 0;
10315 	/*
10316 	 * Checksum HW not supported (IP)
10317 	 */
10318 	if (state->sw_csum & CSUM_IP) {
10319 		gso_ipv4_hdr_cksum(m, ip, state->mac_hlen, state->ip_hlen);
10320 	}
10321 }
10322 
10323 
10324 /*
10325  * Updates the pointers to TCP and IPv6 headers
10326  */
10327 static inline void
10328 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
10329 {
10330 	state->hdr = mtodo(m, state->mac_hlen);
10331 	state->ip_m0_len = m->m_len - state->mac_hlen;
10332 	state->ip_hlen = state->ip_hlen;
10333 	state->tcp = (struct tcphdr *)(state->hdr + state->ip_hlen);
10334 	state->pay_len = m->m_pkthdr.len - state->hlen;
10335 }
10336 
10337 /*
10338  * Sets properly the TCP and IPv6 headers
10339  */
10340 static inline void
10341 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
10342 {
10343 	struct ip6_hdr *ip6;
10344 
10345 	ip6 = (struct ip6_hdr *)state->hdr;
10346 	ip6->ip6_plen = htons(m->m_pkthdr.len - state->mac_hlen - state->ip_hlen);
10347 	/*
10348 	 * TCP Checksum
10349 	 */
10350 	state->tcp->th_sum = 0;
10351 	state->tcp->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst,
10352 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
10353 	/*
10354 	 * Checksum HW not supported (TCP)
10355 	 */
10356 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
10357 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
10358 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
10359 	}
10360 	state->tcp_seq += state->pay_len;
10361 }
10362 
10363 /*
10364  * Init the state during the TCP segmentation
10365  */
10366 static void
10367 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
10368     bool is_ipv4, int mac_hlen, int ip_hlen,
10369     uint8_t *__counted_by(ip_m0_len) ip_hdr, u_int ip_m0_len,
10370     struct tcphdr * tcp_hdr)
10371 {
10372 #pragma unused(ifp)
10373 
10374 	state->hdr = ip_hdr;
10375 	state->ip_m0_len = ip_m0_len;
10376 	state->ip_hlen = ip_hlen;
10377 	state->tcp = tcp_hdr;
10378 	if (is_ipv4) {
10379 		state->ip_id = ntohs(((struct ip *)state->hdr)->ip_id);
10380 		state->update = gso_ipv4_tcp_update;
10381 		state->internal = gso_ipv4_tcp_internal;
10382 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
10383 	} else {
10384 		state->update = gso_ipv6_tcp_update;
10385 		state->internal = gso_ipv6_tcp_internal;
10386 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
10387 	}
10388 	state->mac_hlen = mac_hlen;
10389 	state->tcp_hlen = state->tcp->th_off << 2;
10390 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
10391 	state->tcp_seq = ntohl(state->tcp->th_seq);
10392 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
10393 	return;
10394 }
10395 
10396 /*
10397  * GSO on TCP/IP (v4 or v6)
10398  *
10399  * Segment the given mbuf and return the list of packets.
10400  *
10401  */
10402 static mblist
10403 gso_ip_tcp(ifnet_t ifp, mbuf_t m0, struct gso_ip_tcp_state *state, bool is_tx)
10404 {
10405 	struct mbuf *m;
10406 	int orig_mss;
10407 	int mss = 0;
10408 #ifdef GSO_STATS
10409 	int total_len = m0->m_pkthdr.len;
10410 #endif /* GSO_STATS */
10411 	mblist  seg;
10412 	bool tso_with_gso = false;
10413 
10414 	orig_mss = mss = _mbuf_get_tso_mss(m0);
10415 	if (mss == 0 && !is_tx) {
10416 		uint8_t seg_cnt = m0->m_pkthdr.rx_seg_cnt;
10417 
10418 		if (seg_cnt != 0) {
10419 			uint32_t        hdr_len;
10420 			uint32_t        len;
10421 
10422 			/* approximate the MSS using LRO seg cnt */
10423 			hdr_len = state->ip_hlen + state->tcp_hlen;
10424 			len = mbuf_pkthdr_len(m0) - hdr_len - ETHER_HDR_LEN;
10425 			mss = len / seg_cnt;
10426 			m0->m_pkthdr.rx_seg_cnt = 0;
10427 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10428 			    "%s: mss %d = len %d / seg cnt %d",
10429 			    ifp->if_xname, mss, len, seg_cnt);
10430 		}
10431 	}
10432 	if (mss == 0) {
10433 		/* hack: we don't have the actual MSS */
10434 		u_int reduce_mss;
10435 
10436 		reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
10437 		    : if_bridge_tso_reduce_mss_forwarding;
10438 		mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen -
10439 		    reduce_mss;
10440 		assert(mss > 0);
10441 	} else if (is_tx) {
10442 		bool    is_ipv4;
10443 		bool    do_tso = true;
10444 
10445 		if (TSO_IPV4_OK(ifp, m0)) {
10446 			is_ipv4 = true;
10447 		} else if (TSO_IPV6_OK(ifp, m0)) {
10448 			is_ipv4 = false;
10449 		} else {
10450 			do_tso = false;
10451 		}
10452 		if (do_tso) { /* TSO with GSO */
10453 			uint32_t        if_tso_max;
10454 
10455 			if_tso_max = get_if_tso_mtu(ifp, is_ipv4);
10456 			mss = if_tso_max - state->ip_hlen - state->tcp_hlen
10457 			    - ETHER_HDR_LEN;
10458 			tso_with_gso = true;
10459 		}
10460 	}
10461 	if (!tso_with_gso) {
10462 		/* clear TSO flags */
10463 		m0->m_pkthdr.csum_flags &= ~_TSO_CSUM;
10464 	}
10465 	seg = m_seg(m0, state->hlen, mss, 0, 0);
10466 	if (seg.head == NULL || seg.head->m_nextpkt == NULL) {
10467 		return seg;
10468 	}
10469 	if (tso_with_gso) {
10470 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10471 		    "%s TX gso size %d mss %d nsegs %d",
10472 		    ifp->if_xname,
10473 		    mss, orig_mss, seg.count);
10474 	} else {
10475 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10476 		    "%s %s mss %d nsegs %d",
10477 		    ifp->if_xname,
10478 		    is_tx ? "TX" : "RX",
10479 		    mss, seg.count);
10480 	}
10481 #ifdef GSO_STATS
10482 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
10483 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
10484 	GSOSTAT_ADD(tcp.gsos_osegments, seg.count);
10485 #endif /* GSO_STATS */
10486 
10487 	/* first pkt */
10488 	VERIFY(seg.head == m0);
10489 	m = m0;
10490 
10491 	state->update(state, m);
10492 
10493 	do {
10494 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
10495 
10496 		state->internal(state, m);
10497 		m = m->m_nextpkt;
10498 		state->update(state, m);
10499 		state->tcp->th_flags &= ~TH_CWR;
10500 		state->tcp->th_seq = htonl(state->tcp_seq);
10501 	} while (m->m_nextpkt);
10502 
10503 	/* last pkt */
10504 	state->internal(state, m);
10505 
10506 #ifdef GSO_STATS
10507 	if (!error) {
10508 		GSOSTAT_INC(tcp.gsos_segmented);
10509 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
10510 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
10511 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
10512 	}
10513 #endif /* GSO_STATS */
10514 	return seg;
10515 }
10516 
10517 /*
10518  * GSO for TCP/IPv[46]
10519  */
10520 static mblist
10521 gso_tcp_with_info(ifnet_t ifp, mbuf_t m, ip_packet_info_t info_p,
10522     u_int mac_hlen, bool is_ipv4, bool is_tx)
10523 {
10524 	uint32_t csum_flags;
10525 	struct gso_ip_tcp_state state;
10526 	struct tcphdr *tcp;
10527 
10528 	assert(info_p->ip_proto_hdr != NULL);
10529 	tcp = (struct tcphdr *)(void *)info_p->ip_proto_hdr;
10530 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
10531 	    info_p->ip_hlen + info_p->ip_opt_len,
10532 	    info_p->ip_hdr, info_p->ip_m0_len, tcp);
10533 	csum_flags = is_ipv4 ? CSUM_DELAY_DATA : CSUM_DELAY_IPV6_DATA; /* XXX */
10534 	m->m_pkthdr.csum_flags |= csum_flags;
10535 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
10536 	return gso_ip_tcp(ifp, m, &state, is_tx);
10537 }
10538 
10539 static mblist
10540 gso_tcp(ifnet_t ifp, mbuf_t m, u_int mac_hlen, bool is_ipv4, bool is_tx)
10541 {
10542 	int error;
10543 	ip_packet_info info;
10544 	struct bripstats stats; /* XXX ignored */
10545 	mblist ret;
10546 
10547 	error = bridge_get_tcp_header(&m, mac_hlen, is_ipv4, &info, &stats);
10548 	if (error != 0) {
10549 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
10550 		    "%s bridge_get_tcp_header failed %d (%s)",
10551 		    ifp->if_xname, error,
10552 		    is_tx ? "TX" : "RX");
10553 		if (m != NULL) {
10554 			m_freem(m);
10555 			m = NULL;
10556 		}
10557 		goto no_segment;
10558 	}
10559 	if (info.ip_proto_hdr == NULL) {
10560 		/* not actually a TCP packet, no segmentation */
10561 		goto no_segment;
10562 	}
10563 	if (!is_tx && ip_packet_info_dst_is_our_ip(&info, ifp->if_index)) {
10564 		goto no_segment;
10565 	}
10566 	return gso_tcp_with_info(ifp, m, &info, mac_hlen, is_ipv4, is_tx);
10567 
10568 no_segment:
10569 	mblist_init(&ret);
10570 	if (m != NULL) {
10571 		mblist_append(&ret, m);
10572 	}
10573 	return ret;
10574 }
10575