xref: /xnu-8796.141.3/bsd/net/if_bridge.c (revision 1b191cb58250d0705d8a51287127505aa4bc0789)
1 /*
2  * Copyright (c) 2004-2023 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162 
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166 
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169 
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172 
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176 
177 #include <os/log.h>
178 
179 /*
180  * if_bridge_debug, BR_DBGF_*
181  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182  *   to enable additional logs for the corresponding bridge function
183  * - "sysctl net.link.bridge.debug" controls the value of
184  *   'if_bridge_debug'
185  */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE       0x0001
188 #define BR_DBGF_INPUT           0x0002
189 #define BR_DBGF_OUTPUT          0x0004
190 #define BR_DBGF_RT_TABLE        0x0008
191 #define BR_DBGF_DELAYED_CALL    0x0010
192 #define BR_DBGF_IOCTL           0x0020
193 #define BR_DBGF_MBUF            0x0040
194 #define BR_DBGF_MCAST           0x0080
195 #define BR_DBGF_HOSTFILTER      0x0100
196 #define BR_DBGF_CHECKSUM        0x0200
197 #define BR_DBGF_MAC_NAT         0x0400
198 
199 /*
200  * if_bridge_log_level
201  * - 'if_bridge_log_level' ensures that by default important logs are
202  *   logged regardless of if_bridge_debug by comparing the log level
203  *   in BRIDGE_LOG to if_bridge_log_level
204  * - use "sysctl net.link.bridge.log_level" controls the value of
205  *   'if_bridge_log_level'
206  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207  *   logs must use LOG_NOTICE to ensure they appear by default
208  */
209 static int if_bridge_log_level = LOG_NOTICE;
210 
211 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
212 
213 /*
214  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215  * - macros to generate the specified log conditionally based on
216  *   the specified log level and debug flags
217  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218  */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
220 	do {                                                            \
221 	        if (__level <= if_bridge_log_level ||                   \
222 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
223 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 	                       __func__, ## __VA_ARGS__);       \
225 	        }                                                       \
226 	} while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
228 	do {                                                    \
229 	        if (__level <= if_bridge_log_level ||           \
230 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
231 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 	        }                                                               \
233 	} while (0)
234 
235 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
238 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
240 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241 
242 #define BRIDGE_LOCK_DEBUG      1
243 #if BRIDGE_LOCK_DEBUG
244 
245 #define BR_LCKDBG_MAX                   4
246 
247 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
253 
254 #else /* !BRIDGE_LOCK_DEBUG */
255 
256 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
259 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
260 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
261 	        (_err) = EBUSY;                                         \
262 	else {                                                          \
263 	        (_sc)->sc_iflist_ref++;                                 \
264 	        (_err) = 0;                                             \
265 	}                                                               \
266 	_BRIDGE_UNLOCK(_sc);                                            \
267 } while (0)
268 #define BRIDGE_UNREF(_sc)               do {                            \
269 	_BRIDGE_LOCK(_sc);                                              \
270 	(_sc)->sc_iflist_ref--;                                         \
271 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
272 	        _BRIDGE_UNLOCK(_sc);                                    \
273 	        wakeup(&(_sc)->sc_cv);                                  \
274 	} else                                                          \
275 	        _BRIDGE_UNLOCK(_sc);                                    \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc)               do {                            \
278 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
279 	(_sc)->sc_iflist_xcnt++;                                        \
280 	while ((_sc)->sc_iflist_ref > 0)                                \
281 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
282 	            "BRIDGE_XLOCK", NULL);                              \
283 } while (0)
284 #define BRIDGE_XDROP(_sc)               do {                            \
285 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
286 	(_sc)->sc_iflist_xcnt--;                                        \
287 } while (0)
288 
289 #endif /* BRIDGE_LOCK_DEBUG */
290 
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
293 	if (sc->sc_bpf_input != NULL)                                   \
294 	        bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298 
299 /*
300  * Initial size of the route hash table.  Must be a power of two.
301  */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE              16
304 #endif
305 
306 /*
307  * Maximum size of the routing hash table
308  */
309 #define BRIDGE_RTHASH_SIZE_MAX          2048
310 
311 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
312 
313 /*
314  * Maximum number of addresses to cache.
315  */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX               100
318 #endif
319 
320 
321 /*
322  * Timeout (in seconds) for entries learned dynamically.
323  */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
326 #endif
327 
328 /*
329  * Number of seconds between walks of the route list.
330  */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
333 #endif
334 
335 /*
336  * Number of MAC NAT entries
337  * - sized based on 16 clients (including MAC NAT interface)
338  *   each with 4 addresses
339  */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343 
344 /*
345  * List of capabilities to possibly mask on the member interface.
346  */
347 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349  * List of capabilities to disable on the member interface.
350  */
351 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
352 
353 /*
354  * Bridge interface list entry.
355  */
356 struct bridge_iflist {
357 	TAILQ_ENTRY(bridge_iflist) bif_next;
358 	struct ifnet            *bif_ifp;       /* member if */
359 	struct bstp_port        bif_stp;        /* STP state */
360 	uint32_t                bif_ifflags;    /* member if flags */
361 	int                     bif_savedcaps;  /* saved capabilities */
362 	uint32_t                bif_addrmax;    /* max # of addresses */
363 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
364 	uint32_t                bif_addrexceeded; /* # of address violations */
365 
366 	interface_filter_t      bif_iff_ref;
367 	struct bridge_softc     *bif_sc;
368 	uint32_t                bif_flags;
369 
370 	/* host filter */
371 	struct in_addr          bif_hf_ipsrc;
372 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
373 
374 	struct ifbrmstats       bif_stats;
375 };
376 
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 	return (bif->bif_ifflags & flags) == flags;
381 }
382 
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388 
389 /* fake errors to make the code clearer */
390 #define _EBADIP                 EJUSTRETURN
391 #define _EBADIPCHECKSUM         EJUSTRETURN
392 #define _EBADIPV6               EJUSTRETURN
393 #define _EBADUDP                EJUSTRETURN
394 #define _EBADTCP                EJUSTRETURN
395 #define _EBADUDPCHECKSUM        EJUSTRETURN
396 #define _EBADTCPCHECKSUM        EJUSTRETURN
397 
398 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
400 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
402 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
403 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
407 #define BIFF_WIFI_INFRA         0x200   /* interface is Wi-Fi infra */
408 #define BIFF_ALL_MULTI          0x400   /* allmulti set */
409 #if SKYWALK
410 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
411 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
412 #endif /* SKYWALK */
413 
414 /*
415  * mac_nat_entry
416  * - translates between an IP address and MAC address on a specific
417  *   bridge interface member
418  */
419 struct mac_nat_entry {
420 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
421 	struct bridge_iflist    *mne_bif;       /* originating interface */
422 	unsigned long           mne_expire;     /* expiration time */
423 	union {
424 		struct in_addr  mneu_ip;        /* originating IPv4 address */
425 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
426 	} mne_u;
427 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
428 	uint8_t                 mne_flags;
429 	uint8_t                 mne_reserved;
430 };
431 #define mne_ip  mne_u.mneu_ip
432 #define mne_ip6 mne_u.mneu_ip6
433 
434 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
435 
436 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
437 
438 /*
439  * mac_nat_record
440  * - used by bridge_mac_nat_output() to convey the translation that needs
441  *   to take place in bridge_mac_nat_translate
442  * - holds enough information so that the translation can be done later without
443  *   holding the bridge lock
444  */
445 struct mac_nat_record {
446 	uint16_t                mnr_ether_type;
447 	union {
448 		uint16_t        mnru_arp_offset;
449 		struct {
450 			uint16_t mnruip_dhcp_flags;
451 			uint16_t mnruip_udp_csum;
452 			uint8_t  mnruip_header_len;
453 		} mnru_ip;
454 		struct {
455 			uint16_t mnruip6_icmp6_len;
456 			uint16_t mnruip6_lladdr_offset;
457 			uint8_t mnruip6_icmp6_type;
458 			uint8_t mnruip6_header_len;
459 		} mnru_ip6;
460 	} mnr_u;
461 };
462 
463 #define mnr_arp_offset  mnr_u.mnru_arp_offset
464 
465 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
466 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
467 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
468 
469 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
470 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
471 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
472 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
473 
474 /*
475  * Bridge route node.
476  */
477 struct bridge_rtnode {
478 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
479 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
480 	struct bridge_iflist    *brt_dst;       /* destination if */
481 	unsigned long           brt_expire;     /* expiration time */
482 	uint8_t                 brt_flags;      /* address flags */
483 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
484 	uint16_t                brt_vlan;       /* vlan id */
485 
486 };
487 #define brt_ifp                 brt_dst->bif_ifp
488 
489 /*
490  * Bridge delayed function call context
491  */
492 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
493 
494 struct bridge_delayed_call {
495 	struct bridge_softc     *bdc_sc;
496 	bridge_delayed_func_t   bdc_func; /* Function to call */
497 	struct timespec         bdc_ts; /* Time to call */
498 	u_int32_t               bdc_flags;
499 	thread_call_t           bdc_thread_call;
500 };
501 
502 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
503 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
504 
505 /*
506  * Software state for each bridge.
507  */
508 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
509 
510 struct bridge_softc {
511 	struct ifnet            *sc_ifp;        /* make this an interface */
512 	u_int32_t               sc_flags;
513 	LIST_ENTRY(bridge_softc) sc_list;
514 	decl_lck_mtx_data(, sc_mtx);
515 	struct _bridge_rtnode_list *sc_rthash;  /* our forwarding table */
516 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
517 	uint32_t                sc_rthash_key;  /* key for hash */
518 	uint32_t                sc_rthash_size; /* size of the hash table */
519 	struct bridge_delayed_call sc_aging_timer;
520 	struct bridge_delayed_call sc_resize_call;
521 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
522 	struct bstp_state       sc_stp;         /* STP state */
523 	bpf_packet_func         sc_bpf_input;
524 	bpf_packet_func         sc_bpf_output;
525 	void                    *sc_cv;
526 	uint32_t                sc_brtmax;      /* max # of addresses */
527 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
528 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
529 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
530 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
531 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
532 	uint32_t                sc_brtexceeded; /* # of cache drops */
533 	uint32_t                sc_filter_flags; /* ipf and flags */
534 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
535 	u_char                  sc_defaddr[6];  /* Default MAC address */
536 	char                    sc_if_xname[IFNAMSIZ];
537 
538 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
539 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
540 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
541 	uint32_t                sc_mne_max;      /* max # of entries */
542 	uint32_t                sc_mne_count;    /* cur. # of entries */
543 	uint32_t                sc_mne_allocation_failures;
544 #if BRIDGE_LOCK_DEBUG
545 	/*
546 	 * Locking and unlocking calling history
547 	 */
548 	void                    *lock_lr[BR_LCKDBG_MAX];
549 	int                     next_lock_lr;
550 	void                    *unlock_lr[BR_LCKDBG_MAX];
551 	int                     next_unlock_lr;
552 #endif /* BRIDGE_LOCK_DEBUG */
553 };
554 
555 #define SCF_DETACHING            0x01
556 #define SCF_RESIZING             0x02
557 #define SCF_MEDIA_ACTIVE         0x04
558 
559 typedef enum {
560 	CHECKSUM_OPERATION_NONE = 0,
561 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
562 	CHECKSUM_OPERATION_FINALIZE = 2,
563 	CHECKSUM_OPERATION_COMPUTE = 3,
564 } ChecksumOperation;
565 
566 union iphdr {
567 	struct ip *ip;
568 	struct ip6_hdr *ip6;
569 	void * ptr;
570 };
571 
572 typedef struct {
573 	u_int           ip_hlen;        /* IP header length */
574 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
575 	u_int           ip_opt_len;     /* IPv6 options headers length */
576 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
577 	bool            ip_is_ipv4;
578 	bool            ip_is_fragmented;
579 	union iphdr     ip_hdr;         /* pointer to IP header */
580 	void *          ip_proto_hdr;   /* ptr to protocol header (TCP) */
581 } ip_packet_info, *ip_packet_info_t;
582 
583 struct bridge_hostfilter_stats bridge_hostfilter_stats;
584 
585 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
586 #if BRIDGE_LOCK_DEBUG
587 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
588 #else
589 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
590 #endif
591 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
592 
593 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
594 
595 static KALLOC_TYPE_DEFINE(bridge_rtnode_pool, struct bridge_rtnode, NET_KT_DEFAULT);
596 static KALLOC_TYPE_DEFINE(bridge_mne_pool, struct mac_nat_entry, NET_KT_DEFAULT);
597 
598 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
599 static int      bridge_clone_destroy(struct ifnet *);
600 
601 static errno_t  bridge_ioctl(struct ifnet *, u_long, void *);
602 #if HAS_IF_CAP
603 static void     bridge_mutecaps(struct bridge_softc *);
604 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
605     int);
606 #endif
607 static errno_t bridge_set_tso(struct bridge_softc *);
608 static void     bridge_proto_attach_changed(struct ifnet *);
609 static int      bridge_init(struct ifnet *);
610 #if HAS_BRIDGE_DUMMYNET
611 static void     bridge_dummynet(struct mbuf *, struct ifnet *);
612 #endif
613 static void     bridge_ifstop(struct ifnet *, int);
614 static int      bridge_output(struct ifnet *, struct mbuf *);
615 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
616 static void     bridge_start(struct ifnet *);
617 static errno_t  bridge_input(struct ifnet *, mbuf_t *);
618 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
619     mbuf_t *, char **);
620 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
621     mbuf_t *);
622 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
623     mbuf_t *m);
624 
625 static int      bridge_enqueue(ifnet_t, struct ifnet *,
626     struct ifnet *, struct mbuf *, ChecksumOperation);
627 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
628 
629 static void     bridge_forward(struct bridge_softc *, struct bridge_iflist *,
630     struct mbuf *);
631 
632 static void     bridge_aging_timer(struct bridge_softc *sc);
633 
634 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
635     struct mbuf *, int);
636 static void     bridge_span(struct bridge_softc *, struct mbuf *);
637 
638 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
639     uint16_t, struct bridge_iflist *, int, uint8_t);
640 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
641     uint16_t);
642 static void     bridge_rttrim(struct bridge_softc *);
643 static void     bridge_rtage(struct bridge_softc *);
644 static void     bridge_rtflush(struct bridge_softc *, int);
645 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
646     uint16_t);
647 
648 static int      bridge_rtable_init(struct bridge_softc *);
649 static void     bridge_rtable_fini(struct bridge_softc *);
650 
651 static void     bridge_rthash_resize(struct bridge_softc *);
652 
653 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
654 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
655     const uint8_t *, uint16_t);
656 static int      bridge_rtnode_hash(struct bridge_softc *,
657     struct bridge_rtnode *);
658 static int      bridge_rtnode_insert(struct bridge_softc *,
659     struct bridge_rtnode *);
660 static void     bridge_rtnode_destroy(struct bridge_softc *,
661     struct bridge_rtnode *);
662 #if BRIDGESTP
663 static void     bridge_rtable_expire(struct ifnet *, int);
664 static void     bridge_state_change(struct ifnet *, int);
665 #endif /* BRIDGESTP */
666 
667 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
668     const char *name);
669 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
670     struct ifnet *ifp);
671 static void     bridge_delete_member(struct bridge_softc *,
672     struct bridge_iflist *);
673 static void     bridge_delete_span(struct bridge_softc *,
674     struct bridge_iflist *);
675 
676 static int      bridge_ioctl_add(struct bridge_softc *, void *);
677 static int      bridge_ioctl_del(struct bridge_softc *, void *);
678 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
679 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
680 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
681 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
682 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *);
683 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *);
684 static int      bridge_ioctl_rts32(struct bridge_softc *, void *);
685 static int      bridge_ioctl_rts64(struct bridge_softc *, void *);
686 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *);
687 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *);
688 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
689 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
690 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *);
691 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *);
692 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
693 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
694 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
695 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
696 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
697 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
698 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
699 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
700 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
701 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
702 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
703 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
704 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
705 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
706 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *);
707 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *);
708 static int      bridge_ioctl_grte(struct bridge_softc *, void *);
709 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
710 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
711 static int      bridge_ioctl_sproto(struct bridge_softc *, void *);
712 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *);
713 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *);
714 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
715 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
716 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
717 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
718 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
719 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
720 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *);
721 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *);
722 
723 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
724 static int bridge_ip_checkbasic(struct mbuf **);
725 static int bridge_ip6_checkbasic(struct mbuf **);
726 
727 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
728 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
729 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
730 
731 static void bridge_detach(ifnet_t);
732 static void bridge_link_event(struct ifnet *, u_int32_t);
733 static void bridge_iflinkevent(struct ifnet *);
734 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
735 static int interface_media_active(struct ifnet *);
736 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
737 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
739 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
740 
741 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
742     struct bridge_iflist *);
743 static void bridge_mac_nat_disable(struct bridge_softc *sc);
744 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
745 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
746 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
747     struct bridge_iflist *);
748 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
749     boolean_t *);
750 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
751     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
752 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
753     const caddr_t);
754 static bool is_broadcast_ip_packet(mbuf_t *);
755 static bool in_addr_is_ours(const struct in_addr);
756 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
757 
758 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
759 
760 static int
761 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
762     boolean_t is_tx);
763 
764 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
765 #define VLANTAGOF(_m)   0
766 
767 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
768 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
769 
770 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
771 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
772 
773 #if BRIDGESTP
774 static struct bstp_cb_ops bridge_ops = {
775 	.bcb_state = bridge_state_change,
776 	.bcb_rtage = bridge_rtable_expire
777 };
778 #endif /* BRIDGESTP */
779 
780 SYSCTL_DECL(_net_link);
781 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
782     "Bridge");
783 
784 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
785 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
786     CTLFLAG_RW | CTLFLAG_LOCKED,
787     &bridge_inherit_mac, 0,
788     "Inherit MAC address from the first bridge member");
789 
790 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
791     CTLFLAG_RW | CTLFLAG_LOCKED,
792     &bridge_rtable_prune_period, 0,
793     "Interval between pruning of routing table");
794 
795 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
796 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
797     CTLFLAG_RW | CTLFLAG_LOCKED,
798     &bridge_rtable_hash_size_max, 0,
799     "Maximum size of the routing hash table");
800 
801 #if BRIDGE_DELAYED_CALLBACK_DEBUG
802 static int bridge_delayed_callback_delay = 0;
803 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
804     CTLFLAG_RW | CTLFLAG_LOCKED,
805     &bridge_delayed_callback_delay, 0,
806     "Delay before calling delayed function");
807 #endif
808 
809 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
810     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
811     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
812 
813 #if BRIDGESTP
814 static int log_stp   = 0;   /* log STP state changes */
815 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
816     &log_stp, 0, "Log STP state changes");
817 #endif /* BRIDGESTP */
818 
819 struct bridge_control {
820 	int             (*bc_func)(struct bridge_softc *, void *);
821 	unsigned int    bc_argsize;
822 	unsigned int    bc_flags;
823 };
824 
825 #define VMNET_TAG               "com.apple.vmnet"
826 #define VMNET_LOCAL_TAG         VMNET_TAG ".local"
827 #define VMNET_BROADCAST_TAG     VMNET_TAG ".broadcast"
828 #define VMNET_MULTICAST_TAG     VMNET_TAG ".multicast"
829 
830 static u_int16_t vmnet_tag;
831 static u_int16_t vmnet_local_tag;
832 static u_int16_t vmnet_broadcast_tag;
833 static u_int16_t vmnet_multicast_tag;
834 
835 static u_int16_t
allocate_pf_tag(char * name)836 allocate_pf_tag(char * name)
837 {
838 	u_int16_t       tag;
839 
840 	tag = pf_tagname2tag_ext(name);
841 	BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
842 	return tag;
843 }
844 
845 static void
allocate_vmnet_pf_tags(void)846 allocate_vmnet_pf_tags(void)
847 {
848 	/* allocate tags to use with PF */
849 	if (vmnet_tag == 0) {
850 		vmnet_tag = allocate_pf_tag(VMNET_TAG);
851 	}
852 	if (vmnet_local_tag == 0) {
853 		vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
854 	}
855 	if (vmnet_broadcast_tag == 0) {
856 		vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
857 	}
858 	if (vmnet_multicast_tag == 0) {
859 		vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
860 	}
861 }
862 
863 #define BC_F_COPYIN             0x01    /* copy arguments in */
864 #define BC_F_COPYOUT            0x02    /* copy arguments out */
865 #define BC_F_SUSER              0x04    /* do super-user check */
866 
867 static const struct bridge_control bridge_control_table32[] = {
868 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
869 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
870 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
871 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
872 
873 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
874 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
875 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
876 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
877 
878 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
879 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
881 	  .bc_flags = BC_F_COPYOUT },
882 
883 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
884 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
885 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
886 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
887 
888 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
889 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890 
891 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
892 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
894 	  .bc_flags = BC_F_COPYOUT },
895 
896 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
897 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898 
899 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
900 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901 
902 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
903 	  .bc_flags = BC_F_COPYOUT },
904 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
905 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
906 
907 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
908 	  .bc_flags = BC_F_COPYOUT },
909 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
910 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911 
912 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
913 	  .bc_flags = BC_F_COPYOUT },
914 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
915 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916 
917 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYOUT },
919 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
920 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921 
922 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
923 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924 
925 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
926 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927 
928 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
929 	  .bc_flags = BC_F_COPYOUT },
930 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
931 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932 
933 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
939 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940 
941 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
942 	  .bc_flags = BC_F_COPYOUT },
943 
944 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
945 	  .bc_flags = BC_F_COPYOUT },
946 
947 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
948 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
949 
950 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
951 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
952 
953 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
954 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955 
956 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
957 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958 
959 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
960 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
961 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
962 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963 
964 	{ .bc_func = bridge_ioctl_gmnelist32,
965 	  .bc_argsize = sizeof(struct ifbrmnelist32),
966 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
967 	{ .bc_func = bridge_ioctl_gifstats32,
968 	  .bc_argsize = sizeof(struct ifbrmreq32),
969 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 };
971 
972 static const struct bridge_control bridge_control_table64[] = {
973 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
974 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
976 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 
978 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
979 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
980 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
981 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982 
983 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
984 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
986 	  .bc_flags = BC_F_COPYOUT },
987 
988 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
989 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
990 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
991 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
992 
993 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
994 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995 
996 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
997 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
999 	  .bc_flags = BC_F_COPYOUT },
1000 
1001 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1002 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003 
1004 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1005 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006 
1007 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1008 	  .bc_flags = BC_F_COPYOUT },
1009 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1010 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1011 
1012 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1013 	  .bc_flags = BC_F_COPYOUT },
1014 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1015 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016 
1017 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1018 	  .bc_flags = BC_F_COPYOUT },
1019 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1020 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021 
1022 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYOUT },
1024 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1025 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026 
1027 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1028 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029 
1030 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1031 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032 
1033 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1034 	  .bc_flags = BC_F_COPYOUT },
1035 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1036 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037 
1038 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1039 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040 
1041 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1044 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045 
1046 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1047 	  .bc_flags = BC_F_COPYOUT },
1048 
1049 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 
1052 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1053 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1054 
1055 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1056 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1057 
1058 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1059 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060 
1061 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1062 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063 
1064 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1065 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1066 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1067 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068 
1069 	{ .bc_func = bridge_ioctl_gmnelist64,
1070 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1071 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1072 	{ .bc_func = bridge_ioctl_gifstats64,
1073 	  .bc_argsize = sizeof(struct ifbrmreq64),
1074 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 };
1076 
1077 static const unsigned int bridge_control_table_size =
1078     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1079 
1080 static LIST_HEAD(, bridge_softc) bridge_list =
1081     LIST_HEAD_INITIALIZER(bridge_list);
1082 
1083 #define BRIDGENAME      "bridge"
1084 #define BRIDGES_MAX     IF_MAXUNIT
1085 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1086 
1087 static struct if_clone bridge_cloner =
1088     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1089     0, BRIDGES_MAX);
1090 
1091 static int if_bridge_txstart = 0;
1092 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1093     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1094 
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1096     &if_bridge_debug, 0, "Bridge debug flags");
1097 
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1099     CTLFLAG_RW | CTLFLAG_LOCKED,
1100     &if_bridge_log_level, 0, "Bridge log level");
1101 
1102 static int if_bridge_segmentation = 1;
1103 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1104     CTLFLAG_RW | CTLFLAG_LOCKED,
1105     &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1106 
1107 static int if_bridge_vmnet_pf_tagging = 1;
1108 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1109     CTLFLAG_RW | CTLFLAG_LOCKED,
1110     &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1111 
1112 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1113 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1114 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1115 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1116 
1117 static u_int if_bridge_tso_reduce_mss_forwarding
1118         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1119 static u_int if_bridge_tso_reduce_mss_tx
1120         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1121 
1122 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1123 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1124 {
1125 	int     changed;
1126 	int     error;
1127 	u_int   new_value;
1128 
1129 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1130 	    &changed);
1131 	if (error == 0 && changed != 0) {
1132 		if (new_value > val_max) {
1133 			return EINVAL;
1134 		}
1135 		*val = new_value;
1136 	}
1137 	return error;
1138 }
1139 
1140 static int
1141 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1142 {
1143 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1144     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1145 }
1146 
1147 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1148     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1149     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1150     "Bridge tso reduce mss when forwarding");
1151 
1152 static int
1153 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1154 {
1155 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1156     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1157 }
1158 
1159 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1160     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1161     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1162     "Bridge tso reduce mss on transmit");
1163 
1164 
1165 #if DEBUG || DEVELOPMENT
1166 #define BRIDGE_FORCE_ONE        0x00000001
1167 #define BRIDGE_FORCE_TWO        0x00000002
1168 static u_int32_t if_bridge_force_errors = 0;
1169 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1170     CTLFLAG_RW | CTLFLAG_LOCKED,
1171     &if_bridge_force_errors, 0, "Bridge interface force errors");
1172 static inline bool
bridge_error_is_forced(u_int32_t flags)1173 bridge_error_is_forced(u_int32_t flags)
1174 {
1175 	return (if_bridge_force_errors & flags) != 0;
1176 }
1177 
1178 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags)                   \
1179 	do {                                                            \
1180 	        __is_forced = bridge_error_is_forced(__flags);          \
1181 	        if (__is_forced) {                                      \
1182 	                BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1183 	        }                                                       \
1184 	} while (0)
1185 #endif /* DEBUG || DEVELOPMENT */
1186 
1187 
1188 static void brlog_ether_header(struct ether_header *);
1189 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1190 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1191 static void brlog_mbuf(mbuf_t, const char *, const char *);
1192 static void brlog_link(struct bridge_softc * sc);
1193 
1194 #if BRIDGE_LOCK_DEBUG
1195 static void bridge_lock(struct bridge_softc *);
1196 static void bridge_unlock(struct bridge_softc *);
1197 static int bridge_lock2ref(struct bridge_softc *);
1198 static void bridge_unref(struct bridge_softc *);
1199 static void bridge_xlock(struct bridge_softc *);
1200 static void bridge_xdrop(struct bridge_softc *);
1201 
1202 static void
bridge_lock(struct bridge_softc * sc)1203 bridge_lock(struct bridge_softc *sc)
1204 {
1205 	void *lr_saved = __builtin_return_address(0);
1206 
1207 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1208 
1209 	_BRIDGE_LOCK(sc);
1210 
1211 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1212 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1213 }
1214 
1215 static void
bridge_unlock(struct bridge_softc * sc)1216 bridge_unlock(struct bridge_softc *sc)
1217 {
1218 	void *lr_saved = __builtin_return_address(0);
1219 
1220 	BRIDGE_LOCK_ASSERT_HELD(sc);
1221 
1222 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1223 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1224 
1225 	_BRIDGE_UNLOCK(sc);
1226 }
1227 
1228 static int
bridge_lock2ref(struct bridge_softc * sc)1229 bridge_lock2ref(struct bridge_softc *sc)
1230 {
1231 	int error = 0;
1232 	void *lr_saved = __builtin_return_address(0);
1233 
1234 	BRIDGE_LOCK_ASSERT_HELD(sc);
1235 
1236 	if (sc->sc_iflist_xcnt > 0) {
1237 		error = EBUSY;
1238 	} else {
1239 		sc->sc_iflist_ref++;
1240 	}
1241 
1242 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1243 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1244 
1245 	_BRIDGE_UNLOCK(sc);
1246 
1247 	return error;
1248 }
1249 
1250 static void
bridge_unref(struct bridge_softc * sc)1251 bridge_unref(struct bridge_softc *sc)
1252 {
1253 	void *lr_saved = __builtin_return_address(0);
1254 
1255 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1256 
1257 	_BRIDGE_LOCK(sc);
1258 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1259 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1260 
1261 	sc->sc_iflist_ref--;
1262 
1263 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1264 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1265 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1266 		_BRIDGE_UNLOCK(sc);
1267 		wakeup(&sc->sc_cv);
1268 	} else {
1269 		_BRIDGE_UNLOCK(sc);
1270 	}
1271 }
1272 
1273 static void
bridge_xlock(struct bridge_softc * sc)1274 bridge_xlock(struct bridge_softc *sc)
1275 {
1276 	void *lr_saved = __builtin_return_address(0);
1277 
1278 	BRIDGE_LOCK_ASSERT_HELD(sc);
1279 
1280 	sc->sc_iflist_xcnt++;
1281 	while (sc->sc_iflist_ref > 0) {
1282 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1283 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1284 
1285 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1286 
1287 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1288 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1289 	}
1290 }
1291 
1292 static void
bridge_xdrop(struct bridge_softc * sc)1293 bridge_xdrop(struct bridge_softc *sc)
1294 {
1295 	BRIDGE_LOCK_ASSERT_HELD(sc);
1296 
1297 	sc->sc_iflist_xcnt--;
1298 }
1299 
1300 #endif /* BRIDGE_LOCK_DEBUG */
1301 
1302 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1303 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1304 {
1305 	if (m) {
1306 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1307 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1308 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1309 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1310 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1311 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1312 		    suffix ? suffix : "");
1313 	} else {
1314 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1315 	}
1316 }
1317 
1318 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1319 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1320 {
1321 	if (m) {
1322 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1323 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1324 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1325 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1326 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1327 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1328 		    (unsigned int)mbuf_maxlen(m),
1329 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1330 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1331 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1332 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1333 			brlog_mbuf_pkthdr(m, "", suffix);
1334 		}
1335 	} else {
1336 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1337 	}
1338 }
1339 
1340 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1341 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1342 {
1343 	mbuf_t                  n;
1344 	size_t                  i, j;
1345 	size_t                  pktlen, mlen, maxlen;
1346 	unsigned char   *ptr;
1347 
1348 	pktlen = mbuf_pkthdr_len(m);
1349 
1350 	if (offset > pktlen) {
1351 		return;
1352 	}
1353 
1354 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1355 	n = m;
1356 	mlen = mbuf_len(n);
1357 	ptr = mbuf_data(n);
1358 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1359 		if (j >= mlen) {
1360 			n = mbuf_next(n);
1361 			if (n == 0) {
1362 				break;
1363 			}
1364 			ptr = mbuf_data(n);
1365 			mlen = mbuf_len(n);
1366 			j = 0;
1367 		}
1368 		if (i >= offset) {
1369 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1370 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1371 		}
1372 	}
1373 }
1374 
1375 static void
brlog_ether_header(struct ether_header * eh)1376 brlog_ether_header(struct ether_header *eh)
1377 {
1378 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1379 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1380 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1381 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1382 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1383 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1384 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1385 	    ntohs(eh->ether_type));
1386 }
1387 
1388 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1389 ether_ntop(char *buf, size_t len, const u_char *ap)
1390 {
1391 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1392 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1393 
1394 	return buf;
1395 }
1396 
1397 static void
brlog_link(struct bridge_softc * sc)1398 brlog_link(struct bridge_softc * sc)
1399 {
1400 	int i;
1401 	uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1402 	IFNAMSIZ + ETHER_ADDR_LEN];
1403 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1404 	const u_char * lladdr;
1405 	char lladdr_str[48];
1406 
1407 	memset(sdl, 0, sizeof(sdl_buffer));
1408 	sdl->sdl_family = AF_LINK;
1409 	sdl->sdl_nlen = strlen(sc->sc_if_xname);
1410 	sdl->sdl_alen = ETHER_ADDR_LEN;
1411 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1412 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1413 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1414 	lladdr_str[0] = '\0';
1415 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1416 	    i < sdl->sdl_alen;
1417 	    i++, lladdr++) {
1418 		char    byte_str[4];
1419 
1420 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1421 		    *lladdr);
1422 		strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1423 	}
1424 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1425 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1426 	    " slen %d addr %s", sc->sc_if_xname,
1427 	    sdl->sdl_len, sdl->sdl_index,
1428 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1429 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1430 }
1431 
1432 
1433 /*
1434  * bridgeattach:
1435  *
1436  *	Pseudo-device attach routine.
1437  */
1438 __private_extern__ int
bridgeattach(int n)1439 bridgeattach(int n)
1440 {
1441 #pragma unused(n)
1442 	int error;
1443 
1444 	LIST_INIT(&bridge_list);
1445 
1446 #if BRIDGESTP
1447 	bstp_sys_init();
1448 #endif /* BRIDGESTP */
1449 
1450 	error = if_clone_attach(&bridge_cloner);
1451 	if (error != 0) {
1452 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1453 	}
1454 	return error;
1455 }
1456 
1457 
1458 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1459 bridge_ifnet_set_attrs(struct ifnet * ifp)
1460 {
1461 	errno_t         error;
1462 
1463 	error = ifnet_set_mtu(ifp, ETHERMTU);
1464 	if (error != 0) {
1465 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1466 		goto done;
1467 	}
1468 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1469 	if (error != 0) {
1470 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1471 		goto done;
1472 	}
1473 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1474 	if (error != 0) {
1475 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1476 		goto done;
1477 	}
1478 	error = ifnet_set_flags(ifp,
1479 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1480 	    0xffff);
1481 
1482 	if (error != 0) {
1483 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1484 		goto done;
1485 	}
1486 done:
1487 	return error;
1488 }
1489 
1490 /*
1491  * bridge_clone_create:
1492  *
1493  *	Create a new bridge instance.
1494  */
1495 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1496 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1497 {
1498 #pragma unused(params)
1499 	struct ifnet *ifp = NULL;
1500 	struct bridge_softc *sc = NULL;
1501 	struct bridge_softc *sc2 = NULL;
1502 	struct ifnet_init_eparams init_params;
1503 	errno_t error = 0;
1504 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1505 	int fb, retry, has_hostid;
1506 
1507 	sc = kalloc_type(struct bridge_softc, Z_WAITOK_ZERO_NOFAIL);
1508 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1509 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1510 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1511 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1512 	sc->sc_filter_flags = 0;
1513 
1514 	TAILQ_INIT(&sc->sc_iflist);
1515 
1516 	/* use the interface name as the unique id for ifp recycle */
1517 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1518 	    ifc->ifc_name, unit);
1519 	bzero(&init_params, sizeof(init_params));
1520 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1521 	init_params.len                 = sizeof(init_params);
1522 	/* Initialize our routing table. */
1523 	error = bridge_rtable_init(sc);
1524 	if (error != 0) {
1525 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1526 		goto done;
1527 	}
1528 	TAILQ_INIT(&sc->sc_spanlist);
1529 	if (if_bridge_txstart) {
1530 		init_params.start = bridge_start;
1531 	} else {
1532 		init_params.flags = IFNET_INIT_LEGACY;
1533 		init_params.output = bridge_output;
1534 	}
1535 	init_params.set_bpf_tap = bridge_set_bpf_tap;
1536 	init_params.uniqueid            = sc->sc_if_xname;
1537 	init_params.uniqueid_len        = strlen(sc->sc_if_xname);
1538 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1539 	init_params.name                = ifc->ifc_name;
1540 	init_params.unit                = unit;
1541 	init_params.family              = IFNET_FAMILY_ETHERNET;
1542 	init_params.type                = IFT_BRIDGE;
1543 	init_params.demux               = ether_demux;
1544 	init_params.add_proto           = ether_add_proto;
1545 	init_params.del_proto           = ether_del_proto;
1546 	init_params.check_multi         = ether_check_multi;
1547 	init_params.framer_extended     = ether_frameout_extended;
1548 	init_params.softc               = sc;
1549 	init_params.ioctl               = bridge_ioctl;
1550 	init_params.detach              = bridge_detach;
1551 	init_params.broadcast_addr      = etherbroadcastaddr;
1552 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1553 
1554 	error = ifnet_allocate_extended(&init_params, &ifp);
1555 	if (error != 0) {
1556 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1557 		goto done;
1558 	}
1559 	LIST_INIT(&sc->sc_mne_list);
1560 	LIST_INIT(&sc->sc_mne_list_v6);
1561 	sc->sc_ifp = ifp;
1562 	error = bridge_ifnet_set_attrs(ifp);
1563 	if (error != 0) {
1564 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1565 		    error);
1566 		goto done;
1567 	}
1568 	/*
1569 	 * Generate an ethernet address with a locally administered address.
1570 	 *
1571 	 * Since we are using random ethernet addresses for the bridge, it is
1572 	 * possible that we might have address collisions, so make sure that
1573 	 * this hardware address isn't already in use on another bridge.
1574 	 * The first try uses the "hostid" and falls back to read_frandom();
1575 	 * for "hostid", we use the MAC address of the first-encountered
1576 	 * Ethernet-type interface that is currently configured.
1577 	 */
1578 	fb = 0;
1579 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1580 	for (retry = 1; retry != 0;) {
1581 		if (fb || has_hostid == 0) {
1582 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1583 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1584 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1585 		} else {
1586 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1587 			    ETHER_ADDR_LEN);
1588 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1589 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1590 			sc->sc_defaddr[3] =     /* stir it up a bit */
1591 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1592 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1593 			/*
1594 			 * Mix in the LSB as it's actually pretty significant,
1595 			 * see rdar://14076061
1596 			 */
1597 			sc->sc_defaddr[4] =
1598 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1599 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1600 			    sc->sc_defaddr[5];
1601 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1602 		}
1603 
1604 		fb = 1;
1605 		retry = 0;
1606 		lck_mtx_lock(&bridge_list_mtx);
1607 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1608 			if (_ether_cmp(sc->sc_defaddr,
1609 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1610 				retry = 1;
1611 			}
1612 		}
1613 		lck_mtx_unlock(&bridge_list_mtx);
1614 	}
1615 
1616 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1617 
1618 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1619 		brlog_link(sc);
1620 	}
1621 	error = ifnet_attach(ifp, NULL);
1622 	if (error != 0) {
1623 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1624 		goto done;
1625 	}
1626 
1627 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1628 	    IFT_ETHER);
1629 	if (error != 0) {
1630 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1631 		    error);
1632 		goto done;
1633 	}
1634 
1635 	ifnet_set_offload(ifp,
1636 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1637 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1638 	error = bridge_set_tso(sc);
1639 	if (error != 0) {
1640 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1641 		goto done;
1642 	}
1643 #if BRIDGESTP
1644 	bstp_attach(&sc->sc_stp, &bridge_ops);
1645 #endif /* BRIDGESTP */
1646 
1647 	lck_mtx_lock(&bridge_list_mtx);
1648 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1649 	lck_mtx_unlock(&bridge_list_mtx);
1650 
1651 	/* attach as ethernet */
1652 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1653 	    NULL, NULL);
1654 
1655 done:
1656 	if (error != 0) {
1657 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1658 		/* TBD: Clean up: sc, sc_rthash etc */
1659 	}
1660 
1661 	return error;
1662 }
1663 
1664 /*
1665  * bridge_clone_destroy:
1666  *
1667  *	Destroy a bridge instance.
1668  */
1669 static int
bridge_clone_destroy(struct ifnet * ifp)1670 bridge_clone_destroy(struct ifnet *ifp)
1671 {
1672 	struct bridge_softc *sc = ifp->if_softc;
1673 	struct bridge_iflist *bif;
1674 	errno_t error;
1675 
1676 	BRIDGE_LOCK(sc);
1677 	if ((sc->sc_flags & SCF_DETACHING)) {
1678 		BRIDGE_UNLOCK(sc);
1679 		return 0;
1680 	}
1681 	sc->sc_flags |= SCF_DETACHING;
1682 
1683 	bridge_ifstop(ifp, 1);
1684 
1685 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1686 
1687 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1688 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1689 
1690 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1691 	if (error != 0) {
1692 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1693 	}
1694 
1695 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1696 		bridge_delete_member(sc, bif);
1697 	}
1698 
1699 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1700 		bridge_delete_span(sc, bif);
1701 	}
1702 	BRIDGE_UNLOCK(sc);
1703 
1704 	error = ifnet_detach(ifp);
1705 	if (error != 0) {
1706 		panic("%s (%d): ifnet_detach(%p) failed %d",
1707 		    __func__, __LINE__, ifp, error);
1708 	}
1709 	return 0;
1710 }
1711 
1712 #define DRVSPEC do { \
1713 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1714 	        error = EINVAL;                                         \
1715 	        break;                                                  \
1716 	}                                                               \
1717 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1718                                                                         \
1719 	if (cmd == SIOCGDRVSPEC &&                                      \
1720 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1721 	        error = EINVAL;                                         \
1722 	        break;                                                  \
1723 	} else if (cmd == SIOCSDRVSPEC &&                               \
1724 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1725 	        error = EINVAL;                                         \
1726 	        break;                                                  \
1727 	}                                                               \
1728                                                                         \
1729 	if (bc->bc_flags & BC_F_SUSER) {                                \
1730 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1731 	            KAUTH_GENERIC_ISSUSER);                             \
1732 	        if (error)                                              \
1733 	                break;                                          \
1734 	}                                                               \
1735                                                                         \
1736 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1737 	    ifd->ifd_len > sizeof (args)) {                             \
1738 	        error = EINVAL;                                         \
1739 	        break;                                                  \
1740 	}                                                               \
1741                                                                         \
1742 	bzero(&args, sizeof (args));                                    \
1743 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1744 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1745 	        if (error)                                              \
1746 	                break;                                          \
1747 	}                                                               \
1748                                                                         \
1749 	BRIDGE_LOCK(sc);                                                \
1750 	error = (*bc->bc_func)(sc, &args);                              \
1751 	BRIDGE_UNLOCK(sc);                                              \
1752 	if (error)                                                      \
1753 	        break;                                                  \
1754                                                                         \
1755 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1756 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1757 } while (0)
1758 
1759 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1760 interface_needs_input_broadcast(struct ifnet * ifp)
1761 {
1762 	/*
1763 	 * Selectively enable input broadcast only when necessary.
1764 	 * The bridge interface itself attaches a fake protocol
1765 	 * so checking for at least two protocols means that the
1766 	 * interface is being used for something besides bridging
1767 	 * and needs to see broadcast packets from other members.
1768 	 */
1769 	return if_get_protolist(ifp, NULL, 0) >= 2;
1770 }
1771 
1772 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1773 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1774 {
1775 	boolean_t       old_input_broadcast;
1776 
1777 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1778 	if (input_broadcast) {
1779 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1780 	} else {
1781 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1782 	}
1783 	return old_input_broadcast != input_broadcast;
1784 }
1785 
1786 /*
1787  * bridge_ioctl:
1788  *
1789  *	Handle a control request from the operator.
1790  */
1791 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1792 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1793 {
1794 	struct bridge_softc *sc = ifp->if_softc;
1795 	struct ifreq *ifr = (struct ifreq *)data;
1796 	struct bridge_iflist *bif;
1797 	int error = 0;
1798 
1799 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1800 
1801 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1802 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1803 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1804 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1805 	    (char)IOCGROUP(cmd), cmd & 0xff);
1806 
1807 	switch (cmd) {
1808 	case SIOCSIFADDR:
1809 	case SIOCAIFADDR:
1810 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1811 		break;
1812 
1813 	case SIOCGIFMEDIA32:
1814 	case SIOCGIFMEDIA64: {
1815 		struct ifmediareq *ifmr = (struct ifmediareq *)data;
1816 		user_addr_t user_addr;
1817 
1818 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1819 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1820 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1821 
1822 		ifmr->ifm_status = IFM_AVALID;
1823 		ifmr->ifm_mask = 0;
1824 		ifmr->ifm_count = 1;
1825 
1826 		BRIDGE_LOCK(sc);
1827 		if (!(sc->sc_flags & SCF_DETACHING) &&
1828 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1829 			ifmr->ifm_status |= IFM_ACTIVE;
1830 			ifmr->ifm_active = ifmr->ifm_current =
1831 			    IFM_ETHER | IFM_AUTO;
1832 		} else {
1833 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1834 		}
1835 		BRIDGE_UNLOCK(sc);
1836 
1837 		if (user_addr != USER_ADDR_NULL) {
1838 			error = copyout(&ifmr->ifm_current, user_addr,
1839 			    sizeof(int));
1840 		}
1841 		break;
1842 	}
1843 
1844 	case SIOCADDMULTI:
1845 	case SIOCDELMULTI:
1846 		break;
1847 
1848 	case SIOCSDRVSPEC32:
1849 	case SIOCGDRVSPEC32: {
1850 		union {
1851 			struct ifbreq ifbreq;
1852 			struct ifbifconf32 ifbifconf;
1853 			struct ifbareq32 ifbareq;
1854 			struct ifbaconf32 ifbaconf;
1855 			struct ifbrparam ifbrparam;
1856 			struct ifbropreq32 ifbropreq;
1857 		} args;
1858 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1859 		const struct bridge_control *bridge_control_table =
1860 		    bridge_control_table32, *bc;
1861 
1862 		DRVSPEC;
1863 
1864 		break;
1865 	}
1866 	case SIOCSDRVSPEC64:
1867 	case SIOCGDRVSPEC64: {
1868 		union {
1869 			struct ifbreq ifbreq;
1870 			struct ifbifconf64 ifbifconf;
1871 			struct ifbareq64 ifbareq;
1872 			struct ifbaconf64 ifbaconf;
1873 			struct ifbrparam ifbrparam;
1874 			struct ifbropreq64 ifbropreq;
1875 		} args;
1876 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1877 		const struct bridge_control *bridge_control_table =
1878 		    bridge_control_table64, *bc;
1879 
1880 		DRVSPEC;
1881 
1882 		break;
1883 	}
1884 
1885 	case SIOCSIFFLAGS:
1886 		if (!(ifp->if_flags & IFF_UP) &&
1887 		    (ifp->if_flags & IFF_RUNNING)) {
1888 			/*
1889 			 * If interface is marked down and it is running,
1890 			 * then stop and disable it.
1891 			 */
1892 			BRIDGE_LOCK(sc);
1893 			bridge_ifstop(ifp, 1);
1894 			BRIDGE_UNLOCK(sc);
1895 		} else if ((ifp->if_flags & IFF_UP) &&
1896 		    !(ifp->if_flags & IFF_RUNNING)) {
1897 			/*
1898 			 * If interface is marked up and it is stopped, then
1899 			 * start it.
1900 			 */
1901 			BRIDGE_LOCK(sc);
1902 			error = bridge_init(ifp);
1903 			BRIDGE_UNLOCK(sc);
1904 		}
1905 		break;
1906 
1907 	case SIOCSIFLLADDR:
1908 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1909 		    ifr->ifr_addr.sa_len);
1910 		if (error != 0) {
1911 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1912 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1913 			    error);
1914 		}
1915 		break;
1916 
1917 	case SIOCSIFMTU:
1918 		if (ifr->ifr_mtu < 576) {
1919 			error = EINVAL;
1920 			break;
1921 		}
1922 		BRIDGE_LOCK(sc);
1923 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1924 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1925 			BRIDGE_UNLOCK(sc);
1926 			break;
1927 		}
1928 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1929 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1930 				BRIDGE_LOG(LOG_NOTICE, 0,
1931 				    "%s invalid MTU: %u(%s) != %d",
1932 				    sc->sc_ifp->if_xname,
1933 				    bif->bif_ifp->if_mtu,
1934 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1935 				error = EINVAL;
1936 				break;
1937 			}
1938 		}
1939 		if (!error) {
1940 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1941 		}
1942 		BRIDGE_UNLOCK(sc);
1943 		break;
1944 
1945 	default:
1946 		error = ether_ioctl(ifp, cmd, data);
1947 		if (error != 0 && error != EOPNOTSUPP) {
1948 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1949 			    "ifp %s cmd 0x%08lx "
1950 			    "(%c%c [%lu] %c %lu) failed error: %d",
1951 			    ifp->if_xname, cmd,
1952 			    (cmd & IOC_IN) ? 'I' : ' ',
1953 			    (cmd & IOC_OUT) ? 'O' : ' ',
1954 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1955 			    cmd & 0xff, error);
1956 		}
1957 		break;
1958 	}
1959 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1960 
1961 	return error;
1962 }
1963 
1964 #if HAS_IF_CAP
1965 /*
1966  * bridge_mutecaps:
1967  *
1968  *	Clear or restore unwanted capabilities on the member interface
1969  */
1970 static void
bridge_mutecaps(struct bridge_softc * sc)1971 bridge_mutecaps(struct bridge_softc *sc)
1972 {
1973 	struct bridge_iflist *bif;
1974 	int enabled, mask;
1975 
1976 	/* Initial bitmask of capabilities to test */
1977 	mask = BRIDGE_IFCAPS_MASK;
1978 
1979 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1980 		/* Every member must support it or its disabled */
1981 		mask &= bif->bif_savedcaps;
1982 	}
1983 
1984 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1985 		enabled = bif->bif_ifp->if_capenable;
1986 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1987 		/* strip off mask bits and enable them again if allowed */
1988 		enabled &= ~BRIDGE_IFCAPS_MASK;
1989 		enabled |= mask;
1990 
1991 		bridge_set_ifcap(sc, bif, enabled);
1992 	}
1993 }
1994 
1995 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1996 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1997 {
1998 	struct ifnet *ifp = bif->bif_ifp;
1999 	struct ifreq ifr;
2000 	int error;
2001 
2002 	bzero(&ifr, sizeof(ifr));
2003 	ifr.ifr_reqcap = set;
2004 
2005 	if (ifp->if_capenable != set) {
2006 		IFF_LOCKGIANT(ifp);
2007 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2008 		IFF_UNLOCKGIANT(ifp);
2009 		if (error) {
2010 			BRIDGE_LOG(LOG_NOTICE, 0,
2011 			    "%s error setting interface capabilities on %s",
2012 			    sc->sc_ifp->if_xname, ifp->if_xname);
2013 		}
2014 	}
2015 }
2016 #endif /* HAS_IF_CAP */
2017 
2018 static errno_t
bridge_set_tso(struct bridge_softc * sc)2019 bridge_set_tso(struct bridge_softc *sc)
2020 {
2021 	struct bridge_iflist *bif;
2022 	u_int32_t tso_v4_mtu;
2023 	u_int32_t tso_v6_mtu;
2024 	ifnet_offload_t offload;
2025 	errno_t error = 0;
2026 
2027 	/* By default, support TSO */
2028 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2029 	tso_v4_mtu = IP_MAXPACKET;
2030 	tso_v6_mtu = IP_MAXPACKET;
2031 
2032 	/* Use the lowest common denominator of the members */
2033 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2034 		ifnet_t ifp = bif->bif_ifp;
2035 
2036 		if (ifp == NULL) {
2037 			continue;
2038 		}
2039 
2040 		if (offload & IFNET_TSO_IPV4) {
2041 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2042 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2043 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2044 				}
2045 			} else {
2046 				offload &= ~IFNET_TSO_IPV4;
2047 				tso_v4_mtu = 0;
2048 			}
2049 		}
2050 		if (offload & IFNET_TSO_IPV6) {
2051 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2052 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2053 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2054 				}
2055 			} else {
2056 				offload &= ~IFNET_TSO_IPV6;
2057 				tso_v6_mtu = 0;
2058 			}
2059 		}
2060 	}
2061 
2062 	if (offload != sc->sc_ifp->if_hwassist) {
2063 		error = ifnet_set_offload(sc->sc_ifp, offload);
2064 		if (error != 0) {
2065 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2066 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2067 			    sc->sc_ifp->if_xname, offload, error);
2068 			goto done;
2069 		}
2070 		/*
2071 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2072 		 * as large as the interface MTU
2073 		 */
2074 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2075 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2076 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2077 			}
2078 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2079 			    tso_v4_mtu);
2080 			if (error != 0) {
2081 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2082 				    "ifnet_set_tso_mtu(%s, "
2083 				    "AF_INET, %u) failed %d",
2084 				    sc->sc_ifp->if_xname,
2085 				    tso_v4_mtu, error);
2086 				goto done;
2087 			}
2088 		}
2089 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2090 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2091 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2092 			}
2093 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2094 			    tso_v6_mtu);
2095 			if (error != 0) {
2096 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2097 				    "ifnet_set_tso_mtu(%s, "
2098 				    "AF_INET6, %u) failed %d",
2099 				    sc->sc_ifp->if_xname,
2100 				    tso_v6_mtu, error);
2101 				goto done;
2102 			}
2103 		}
2104 	}
2105 done:
2106 	return error;
2107 }
2108 
2109 /*
2110  * bridge_lookup_member:
2111  *
2112  *	Lookup a bridge member interface.
2113  */
2114 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2115 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2116 {
2117 	struct bridge_iflist *bif;
2118 	struct ifnet *ifp;
2119 
2120 	BRIDGE_LOCK_ASSERT_HELD(sc);
2121 
2122 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2123 		ifp = bif->bif_ifp;
2124 		if (strcmp(ifp->if_xname, name) == 0) {
2125 			return bif;
2126 		}
2127 	}
2128 
2129 	return NULL;
2130 }
2131 
2132 /*
2133  * bridge_lookup_member_if:
2134  *
2135  *	Lookup a bridge member interface by ifnet*.
2136  */
2137 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2138 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2139 {
2140 	struct bridge_iflist *bif;
2141 
2142 	BRIDGE_LOCK_ASSERT_HELD(sc);
2143 
2144 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2145 		if (bif->bif_ifp == member_ifp) {
2146 			return bif;
2147 		}
2148 	}
2149 
2150 	return NULL;
2151 }
2152 
2153 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2154 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2155     mbuf_t *data, char **frame_ptr)
2156 {
2157 #pragma unused(protocol)
2158 	errno_t error = 0;
2159 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2160 	struct bridge_softc *sc = bif->bif_sc;
2161 	int included = 0;
2162 	size_t frmlen = 0;
2163 	mbuf_t m = *data;
2164 
2165 	if ((m->m_flags & M_PROTO1)) {
2166 		goto out;
2167 	}
2168 
2169 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2170 	    *frame_ptr <= (char *)mbuf_data(m)) {
2171 		included = 1;
2172 		frmlen = (char *)mbuf_data(m) - *frame_ptr;
2173 	}
2174 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2175 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2176 	    "frmlen %lu", sc->sc_ifp->if_xname,
2177 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2178 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2179 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2180 	    included ? "inside" : "outside", frmlen);
2181 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2182 		brlog_mbuf(m, "bridge_iff_input[", "");
2183 		brlog_ether_header((struct ether_header *)
2184 		    (void *)*frame_ptr);
2185 		brlog_mbuf_data(m, 0, 20);
2186 	}
2187 	if (included == 0) {
2188 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2189 		goto out;
2190 	}
2191 
2192 	/* Move data pointer to start of frame to the link layer header */
2193 	(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2194 	    mbuf_len(m) + frmlen);
2195 	(void) mbuf_pkthdr_adjustlen(m, frmlen);
2196 
2197 	/* make sure we can access the ethernet header */
2198 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2199 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2200 		    "short frame %lu < %lu",
2201 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2202 		goto out;
2203 	}
2204 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2205 		error = mbuf_pullup(data, sizeof(struct ether_header));
2206 		if (error != 0) {
2207 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2208 			    "mbuf_pullup(%lu) failed %d",
2209 			    sizeof(struct ether_header),
2210 			    error);
2211 			error = EJUSTRETURN;
2212 			goto out;
2213 		}
2214 		if (m != *data) {
2215 			m = *data;
2216 			*frame_ptr = mbuf_data(m);
2217 		}
2218 	}
2219 
2220 	error = bridge_input(ifp, data);
2221 
2222 	/* Adjust packet back to original */
2223 	if (error == 0) {
2224 		/* bridge_input might have modified *data */
2225 		if (*data != m) {
2226 			m = *data;
2227 			*frame_ptr = mbuf_data(m);
2228 		}
2229 		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2230 		    mbuf_len(m) - frmlen);
2231 		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
2232 	}
2233 
2234 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2235 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2236 		brlog_mbuf(m, "bridge_iff_input]", "");
2237 	}
2238 
2239 out:
2240 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2241 
2242 	return error;
2243 }
2244 
2245 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2246 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2247     mbuf_t *data)
2248 {
2249 #pragma unused(protocol)
2250 	errno_t error = 0;
2251 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2252 	struct bridge_softc *sc = bif->bif_sc;
2253 	mbuf_t m = *data;
2254 
2255 	if ((m->m_flags & M_PROTO1)) {
2256 		goto out;
2257 	}
2258 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2259 	    "%s from %s m 0x%llx data 0x%llx",
2260 	    sc->sc_ifp->if_xname, ifp->if_xname,
2261 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2262 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2263 
2264 	error = bridge_member_output(sc, ifp, data);
2265 	if (error != 0 && error != EJUSTRETURN) {
2266 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2267 		    "bridge_member_output failed error %d",
2268 		    error);
2269 	}
2270 out:
2271 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2272 
2273 	return error;
2274 }
2275 
2276 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2277 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2278     const struct kev_msg *event_msg)
2279 {
2280 #pragma unused(protocol)
2281 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2282 	struct bridge_softc *sc = bif->bif_sc;
2283 
2284 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2285 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2286 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2287 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2288 		    "%s event_code %u - %s",
2289 		    ifp->if_xname, event_msg->event_code,
2290 		    dlil_kev_dl_code_str(event_msg->event_code));
2291 
2292 		switch (event_msg->event_code) {
2293 		case KEV_DL_LINK_OFF:
2294 		case KEV_DL_LINK_ON: {
2295 			bridge_iflinkevent(ifp);
2296 #if BRIDGESTP
2297 			bstp_linkstate(ifp, event_msg->event_code);
2298 #endif /* BRIDGESTP */
2299 			break;
2300 		}
2301 		case KEV_DL_SIFFLAGS: {
2302 			if ((ifp->if_flags & IFF_UP) == 0) {
2303 				break;
2304 			}
2305 			if ((bif->bif_flags & BIFF_PROMISC) == 0) {
2306 				errno_t error;
2307 
2308 				error = ifnet_set_promiscuous(ifp, 1);
2309 				if (error != 0) {
2310 					BRIDGE_LOG(LOG_NOTICE, 0,
2311 					    "ifnet_set_promiscuous (%s)"
2312 					    " failed %d", ifp->if_xname,
2313 					    error);
2314 				} else {
2315 					bif->bif_flags |= BIFF_PROMISC;
2316 				}
2317 			}
2318 			if ((bif->bif_flags & BIFF_WIFI_INFRA) != 0 &&
2319 			    (bif->bif_flags & BIFF_ALL_MULTI) == 0) {
2320 				errno_t error;
2321 
2322 				error = if_allmulti(ifp, 1);
2323 				if (error != 0) {
2324 					BRIDGE_LOG(LOG_NOTICE, 0,
2325 					    "if_allmulti (%s)"
2326 					    " failed %d", ifp->if_xname,
2327 					    error);
2328 				} else {
2329 					bif->bif_flags |= BIFF_ALL_MULTI;
2330 #ifdef XNU_PLATFORM_AppleTVOS
2331 					ip6_forwarding = 1;
2332 #endif /* XNU_PLATFORM_AppleTVOS */
2333 				}
2334 			}
2335 			break;
2336 		}
2337 		case KEV_DL_IFCAP_CHANGED: {
2338 			BRIDGE_LOCK(sc);
2339 			bridge_set_tso(sc);
2340 			BRIDGE_UNLOCK(sc);
2341 			break;
2342 		}
2343 		case KEV_DL_PROTO_DETACHED:
2344 		case KEV_DL_PROTO_ATTACHED: {
2345 			bridge_proto_attach_changed(ifp);
2346 			break;
2347 		}
2348 		default:
2349 			break;
2350 		}
2351 	}
2352 }
2353 
2354 /*
2355  * bridge_iff_detached:
2356  *
2357  *      Called when our interface filter has been detached from a
2358  *      member interface.
2359  */
2360 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2361 bridge_iff_detached(void *cookie, ifnet_t ifp)
2362 {
2363 #pragma unused(cookie)
2364 	struct bridge_iflist *bif;
2365 	struct bridge_softc *sc = ifp->if_bridge;
2366 
2367 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2368 
2369 	/* Check if the interface is a bridge member */
2370 	if (sc != NULL) {
2371 		BRIDGE_LOCK(sc);
2372 		bif = bridge_lookup_member_if(sc, ifp);
2373 		if (bif != NULL) {
2374 			bridge_delete_member(sc, bif);
2375 		}
2376 		BRIDGE_UNLOCK(sc);
2377 		return;
2378 	}
2379 	/* Check if the interface is a span port */
2380 	lck_mtx_lock(&bridge_list_mtx);
2381 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2382 		BRIDGE_LOCK(sc);
2383 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2384 		if (ifp == bif->bif_ifp) {
2385 			bridge_delete_span(sc, bif);
2386 			break;
2387 		}
2388 		BRIDGE_UNLOCK(sc);
2389 	}
2390 	lck_mtx_unlock(&bridge_list_mtx);
2391 }
2392 
2393 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2394 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2395     char *header)
2396 {
2397 #pragma unused(protocol, packet, header)
2398 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2399 	    ifp->if_xname);
2400 	return 0;
2401 }
2402 
2403 static int
bridge_attach_protocol(struct ifnet * ifp)2404 bridge_attach_protocol(struct ifnet *ifp)
2405 {
2406 	int     error;
2407 	struct ifnet_attach_proto_param reg;
2408 
2409 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2410 	bzero(&reg, sizeof(reg));
2411 	reg.input = bridge_proto_input;
2412 
2413 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2414 	if (error) {
2415 		BRIDGE_LOG(LOG_NOTICE, 0,
2416 		    "ifnet_attach_protocol(%s) failed, %d",
2417 		    ifp->if_xname, error);
2418 	}
2419 
2420 	return error;
2421 }
2422 
2423 static int
bridge_detach_protocol(struct ifnet * ifp)2424 bridge_detach_protocol(struct ifnet *ifp)
2425 {
2426 	int     error;
2427 
2428 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2429 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2430 	if (error) {
2431 		BRIDGE_LOG(LOG_NOTICE, 0,
2432 		    "ifnet_detach_protocol(%s) failed, %d",
2433 		    ifp->if_xname, error);
2434 	}
2435 
2436 	return error;
2437 }
2438 
2439 /*
2440  * bridge_delete_member:
2441  *
2442  *	Delete the specified member interface.
2443  */
2444 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2445 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2446 {
2447 #if SKYWALK
2448 	boolean_t add_netagent = FALSE;
2449 #endif /* SKYWALK */
2450 	uint32_t    bif_flags;
2451 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2452 	int lladdr_changed = 0, error;
2453 	uint8_t eaddr[ETHER_ADDR_LEN];
2454 	u_int32_t event_code = 0;
2455 
2456 	BRIDGE_LOCK_ASSERT_HELD(sc);
2457 	VERIFY(ifs != NULL);
2458 
2459 	/*
2460 	 * Remove the member from the list first so it cannot be found anymore
2461 	 * when we release the bridge lock below
2462 	 */
2463 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2464 		BRIDGE_XLOCK(sc);
2465 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2466 		BRIDGE_XDROP(sc);
2467 	}
2468 	if (sc->sc_mac_nat_bif != NULL) {
2469 		if (bif == sc->sc_mac_nat_bif) {
2470 			bridge_mac_nat_disable(sc);
2471 		} else {
2472 			bridge_mac_nat_flush_entries(sc, bif);
2473 		}
2474 	}
2475 #if BRIDGESTP
2476 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2477 		bstp_disable(&bif->bif_stp);
2478 	}
2479 #endif /* BRIDGESTP */
2480 
2481 	/*
2482 	 * If removing the interface that gave the bridge its mac address, set
2483 	 * the mac address of the bridge to the address of the next member, or
2484 	 * to its default address if no members are left.
2485 	 */
2486 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2487 		ifnet_release(sc->sc_ifaddr);
2488 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2489 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2490 			sc->sc_ifaddr = NULL;
2491 		} else {
2492 			struct ifnet *fif =
2493 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2494 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2495 			sc->sc_ifaddr = fif;
2496 			ifnet_reference(fif);   /* for sc_ifaddr */
2497 		}
2498 		lladdr_changed = 1;
2499 	}
2500 
2501 #if HAS_IF_CAP
2502 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2503 #endif /* HAS_IF_CAP */
2504 
2505 	error = bridge_set_tso(sc);
2506 	if (error != 0) {
2507 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2508 	}
2509 
2510 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2511 
2512 	KASSERT(bif->bif_addrcnt == 0,
2513 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2514 
2515 	/*
2516 	 * Update link status of the bridge based on its remaining members
2517 	 */
2518 	event_code = bridge_updatelinkstatus(sc);
2519 	bif_flags = bif->bif_flags;
2520 	BRIDGE_UNLOCK(sc);
2521 
2522 	/* only perform these steps if the interface is still attached */
2523 	if (ifnet_is_attached(ifs, 1)) {
2524 #if SKYWALK
2525 		add_netagent = (bif_flags & BIFF_NETAGENT_REMOVED) != 0;
2526 
2527 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2528 			ifnet_detach_flowswitch_nexus(ifs);
2529 		}
2530 #endif /* SKYWALK */
2531 		/* disable promiscuous mode */
2532 		if ((bif_flags & BIFF_PROMISC) != 0) {
2533 			(void) ifnet_set_promiscuous(ifs, 0);
2534 		}
2535 		/* disable all multi */
2536 		if ((bif_flags & BIFF_ALL_MULTI) != 0) {
2537 			(void)if_allmulti(ifs, 0);
2538 		}
2539 #if HAS_IF_CAP
2540 		/* re-enable any interface capabilities */
2541 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2542 #endif
2543 		/* detach bridge "protocol" */
2544 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2545 			(void)bridge_detach_protocol(ifs);
2546 		}
2547 		/* detach interface filter */
2548 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2549 			iflt_detach(bif->bif_iff_ref);
2550 		}
2551 		ifnet_decr_iorefcnt(ifs);
2552 	}
2553 
2554 	if (lladdr_changed &&
2555 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2556 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2557 	}
2558 
2559 	if (event_code != 0) {
2560 		bridge_link_event(bifp, event_code);
2561 	}
2562 
2563 #if BRIDGESTP
2564 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2565 #endif /* BRIDGESTP */
2566 
2567 	kfree_type(struct bridge_iflist, bif);
2568 	ifs->if_bridge = NULL;
2569 #if SKYWALK
2570 	if (add_netagent && ifnet_is_attached(ifs, 1)) {
2571 		(void)ifnet_add_netagent(ifs);
2572 		ifnet_decr_iorefcnt(ifs);
2573 	}
2574 #endif /* SKYWALK */
2575 
2576 	ifnet_release(ifs);
2577 
2578 	BRIDGE_LOCK(sc);
2579 }
2580 
2581 /*
2582  * bridge_delete_span:
2583  *
2584  *	Delete the specified span interface.
2585  */
2586 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2587 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2588 {
2589 	BRIDGE_LOCK_ASSERT_HELD(sc);
2590 
2591 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2592 	    ("%s: not a span interface", __func__));
2593 
2594 	ifnet_release(bif->bif_ifp);
2595 
2596 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2597 	kfree_type(struct bridge_iflist, bif);
2598 }
2599 
2600 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2601 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2602 {
2603 	struct ifbreq *req = arg;
2604 	struct bridge_iflist *bif = NULL;
2605 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2606 	int error = 0, lladdr_changed = 0;
2607 	uint8_t eaddr[ETHER_ADDR_LEN];
2608 	struct iff_filter iff;
2609 	u_int32_t event_code = 0;
2610 	boolean_t input_broadcast;
2611 	boolean_t wifi_infra = FALSE;
2612 	int media_active;
2613 
2614 	ifs = ifunit(req->ifbr_ifsname);
2615 	if (ifs == NULL) {
2616 		return ENOENT;
2617 	}
2618 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2619 		return EINVAL;
2620 	}
2621 
2622 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
2623 		return EINVAL;
2624 	}
2625 
2626 	/* If it's in the span list, it can't be a member. */
2627 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2628 		if (ifs == bif->bif_ifp) {
2629 			return EBUSY;
2630 		}
2631 	}
2632 
2633 	if (ifs->if_bridge == sc) {
2634 		return EEXIST;
2635 	}
2636 
2637 	if (ifs->if_bridge != NULL) {
2638 		return EBUSY;
2639 	}
2640 
2641 	switch (ifs->if_type) {
2642 	case IFT_ETHER:
2643 		if (strcmp(ifs->if_name, "en") == 0 &&
2644 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2645 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2646 			/* XXX is there a better way to identify Wi-Fi STA? */
2647 			wifi_infra = TRUE;
2648 		}
2649 		break;
2650 	case IFT_L2VLAN:
2651 	case IFT_IEEE8023ADLAG:
2652 		break;
2653 	case IFT_GIF:
2654 	/* currently not supported */
2655 	/* FALLTHRU */
2656 	default:
2657 		return EINVAL;
2658 	}
2659 
2660 	/* fail to add the interface if the MTU doesn't match */
2661 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2662 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2663 		    sc->sc_ifp->if_xname,
2664 		    ifs->if_xname);
2665 		return EINVAL;
2666 	}
2667 
2668 	/* there's already an interface that's doing MAC NAT */
2669 	if (wifi_infra && sc->sc_mac_nat_bif != NULL) {
2670 		return EBUSY;
2671 	}
2672 
2673 	/* prevent the interface from detaching while we add the member */
2674 	if (!ifnet_is_attached(ifs, 1)) {
2675 		return ENXIO;
2676 	}
2677 
2678 	/* allocate a new member */
2679 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2680 	bif->bif_ifp = ifs;
2681 	ifnet_reference(ifs);
2682 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2683 #if HAS_IF_CAP
2684 	bif->bif_savedcaps = ifs->if_capenable;
2685 #endif /* HAS_IF_CAP */
2686 	bif->bif_sc = sc;
2687 	if (wifi_infra) {
2688 		(void)bridge_mac_nat_enable(sc, bif);
2689 	}
2690 
2691 	if (IFNET_IS_VMNET(ifs)) {
2692 		allocate_vmnet_pf_tags();
2693 	}
2694 	/* Allow the first Ethernet member to define the MTU */
2695 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2696 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2697 	}
2698 
2699 	/*
2700 	 * Assign the interface's MAC address to the bridge if it's the first
2701 	 * member and the MAC address of the bridge has not been changed from
2702 	 * the default (randomly) generated one.
2703 	 */
2704 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2705 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2706 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2707 		sc->sc_ifaddr = ifs;
2708 		ifnet_reference(ifs);   /* for sc_ifaddr */
2709 		lladdr_changed = 1;
2710 	}
2711 
2712 	ifs->if_bridge = sc;
2713 #if BRIDGESTP
2714 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2715 #endif /* BRIDGESTP */
2716 
2717 #if HAS_IF_CAP
2718 	/* Set interface capabilities to the intersection set of all members */
2719 	bridge_mutecaps(sc);
2720 #endif /* HAS_IF_CAP */
2721 
2722 
2723 	/*
2724 	 * Respect lock ordering with DLIL lock for the following operations
2725 	 */
2726 	BRIDGE_UNLOCK(sc);
2727 
2728 	/* enable promiscuous mode */
2729 	error = ifnet_set_promiscuous(ifs, 1);
2730 	switch (error) {
2731 	case 0:
2732 		bif->bif_flags |= BIFF_PROMISC;
2733 		break;
2734 	case ENETDOWN:
2735 	case EPWROFF:
2736 		BRIDGE_LOG(LOG_NOTICE, 0,
2737 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2738 		    ifs->if_xname, error);
2739 		/* Ignore error when device is not up */
2740 		error = 0;
2741 		break;
2742 	default:
2743 		BRIDGE_LOG(LOG_NOTICE, 0,
2744 		    "ifnet_set_promiscuous(%s) failed %d",
2745 		    ifs->if_xname, error);
2746 		BRIDGE_LOCK(sc);
2747 		goto out;
2748 	}
2749 	if (wifi_infra) {
2750 		int this_error;
2751 
2752 		/* Wi-Fi doesn't really support promiscuous, set allmulti */
2753 		bif->bif_flags |= BIFF_WIFI_INFRA;
2754 		this_error = if_allmulti(ifs, 1);
2755 		if (this_error == 0) {
2756 			bif->bif_flags |= BIFF_ALL_MULTI;
2757 #ifdef XNU_PLATFORM_AppleTVOS
2758 			ip6_forwarding = 1;
2759 #endif /* XNU_PLATFORM_AppleTVOS */
2760 		} else {
2761 			BRIDGE_LOG(LOG_NOTICE, 0,
2762 			    "if_allmulti(%s) failed %d, ignoring",
2763 			    ifs->if_xname, this_error);
2764 		}
2765 	}
2766 #if SKYWALK
2767 	/* ensure that the flowswitch is present for native interface */
2768 	if (SKYWALK_NATIVE(ifs)) {
2769 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2770 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2771 		}
2772 	}
2773 	/* remove the netagent on the flowswitch (rdar://75050182) */
2774 	if (if_is_fsw_netagent_enabled()) {
2775 		(void)ifnet_remove_netagent(ifs);
2776 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2777 	}
2778 #endif /* SKYWALK */
2779 
2780 	/*
2781 	 * install an interface filter
2782 	 */
2783 	memset(&iff, 0, sizeof(struct iff_filter));
2784 	iff.iff_cookie = bif;
2785 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2786 	iff.iff_input = bridge_iff_input;
2787 	iff.iff_output = bridge_iff_output;
2788 	iff.iff_event = bridge_iff_event;
2789 	iff.iff_detached = bridge_iff_detached;
2790 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2791 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2792 	if (error != 0) {
2793 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2794 		BRIDGE_LOCK(sc);
2795 		goto out;
2796 	}
2797 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2798 
2799 	/*
2800 	 * install a dummy "bridge" protocol
2801 	 */
2802 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2803 		if (error != 0) {
2804 			BRIDGE_LOG(LOG_NOTICE, 0,
2805 			    "bridge_attach_protocol failed %d", error);
2806 			BRIDGE_LOCK(sc);
2807 			goto out;
2808 		}
2809 	}
2810 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2811 
2812 	if (lladdr_changed &&
2813 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2814 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2815 	}
2816 
2817 	media_active = interface_media_active(ifs);
2818 
2819 	/*
2820 	 * No failures past this point. Add the member to the list.
2821 	 */
2822 	BRIDGE_LOCK(sc);
2823 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2824 	BRIDGE_XLOCK(sc);
2825 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2826 	BRIDGE_XDROP(sc);
2827 
2828 	/* cache the member link status */
2829 	if (media_active != 0) {
2830 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2831 	} else {
2832 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2833 	}
2834 
2835 	/* the new member may change the link status of the bridge interface */
2836 	event_code = bridge_updatelinkstatus(sc);
2837 
2838 	/* check whether we need input broadcast or not */
2839 	input_broadcast = interface_needs_input_broadcast(ifs);
2840 	bif_set_input_broadcast(bif, input_broadcast);
2841 	BRIDGE_UNLOCK(sc);
2842 
2843 	if (event_code != 0) {
2844 		bridge_link_event(bifp, event_code);
2845 	}
2846 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2847 	    "%s input broadcast %s", ifs->if_xname,
2848 	    input_broadcast ? "ENABLED" : "DISABLED");
2849 
2850 	BRIDGE_LOCK(sc);
2851 	bridge_set_tso(sc);
2852 
2853 out:
2854 	/* allow the interface to detach */
2855 	ifnet_decr_iorefcnt(ifs);
2856 
2857 	if (error != 0) {
2858 		if (bif != NULL) {
2859 			bridge_delete_member(sc, bif);
2860 		}
2861 	} else if (IFNET_IS_VMNET(ifs)) {
2862 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2863 	}
2864 
2865 	return error;
2866 }
2867 
2868 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2869 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2870 {
2871 	struct ifbreq *req = arg;
2872 	struct bridge_iflist *bif;
2873 
2874 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2875 	if (bif == NULL) {
2876 		return ENOENT;
2877 	}
2878 
2879 	bridge_delete_member(sc, bif);
2880 
2881 	return 0;
2882 }
2883 
2884 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2885 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2886 {
2887 #pragma unused(sc, arg)
2888 	return 0;
2889 }
2890 
2891 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2892 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2893 {
2894 	struct ifbreq *req = arg;
2895 	struct bridge_iflist *bif;
2896 
2897 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2898 	if (bif == NULL) {
2899 		return ENOENT;
2900 	}
2901 
2902 	struct bstp_port *bp;
2903 
2904 	bp = &bif->bif_stp;
2905 	req->ifbr_state = bp->bp_state;
2906 	req->ifbr_priority = bp->bp_priority;
2907 	req->ifbr_path_cost = bp->bp_path_cost;
2908 	req->ifbr_proto = bp->bp_protover;
2909 	req->ifbr_role = bp->bp_role;
2910 	req->ifbr_stpflags = bp->bp_flags;
2911 	req->ifbr_ifsflags = bif->bif_ifflags;
2912 
2913 	/* Copy STP state options as flags */
2914 	if (bp->bp_operedge) {
2915 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2916 	}
2917 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2918 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2919 	}
2920 	if (bp->bp_ptp_link) {
2921 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2922 	}
2923 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2924 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2925 	}
2926 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2927 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2928 	}
2929 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2930 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2931 	}
2932 
2933 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2934 	req->ifbr_addrcnt = bif->bif_addrcnt;
2935 	req->ifbr_addrmax = bif->bif_addrmax;
2936 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
2937 
2938 	return 0;
2939 }
2940 
2941 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2942 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2943 {
2944 	struct ifbreq *req = arg;
2945 	struct bridge_iflist *bif;
2946 #if BRIDGESTP
2947 	struct bstp_port *bp;
2948 	int error;
2949 #endif /* BRIDGESTP */
2950 
2951 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2952 	if (bif == NULL) {
2953 		return ENOENT;
2954 	}
2955 
2956 	if (req->ifbr_ifsflags & IFBIF_SPAN) {
2957 		/* SPAN is readonly */
2958 		return EINVAL;
2959 	}
2960 #define _EXCLUSIVE_FLAGS        (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2961 	if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2962 		/* can't specify both MAC-NAT and checksum offload */
2963 		return EINVAL;
2964 	}
2965 	if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2966 		errno_t error;
2967 
2968 		error = bridge_mac_nat_enable(sc, bif);
2969 		if (error != 0) {
2970 			return error;
2971 		}
2972 	} else if (sc->sc_mac_nat_bif == bif) {
2973 		bridge_mac_nat_disable(sc);
2974 	}
2975 
2976 
2977 #if BRIDGESTP
2978 	if (req->ifbr_ifsflags & IFBIF_STP) {
2979 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2980 			error = bstp_enable(&bif->bif_stp);
2981 			if (error) {
2982 				return error;
2983 			}
2984 		}
2985 	} else {
2986 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2987 			bstp_disable(&bif->bif_stp);
2988 		}
2989 	}
2990 
2991 	/* Pass on STP flags */
2992 	bp = &bif->bif_stp;
2993 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2994 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2995 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2996 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2997 #else /* !BRIDGESTP */
2998 	if (req->ifbr_ifsflags & IFBIF_STP) {
2999 		return EOPNOTSUPP;
3000 	}
3001 #endif /* !BRIDGESTP */
3002 
3003 	/* Save the bits relating to the bridge */
3004 	bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
3005 
3006 
3007 	return 0;
3008 }
3009 
3010 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)3011 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
3012 {
3013 	struct ifbrparam *param = arg;
3014 
3015 	sc->sc_brtmax = param->ifbrp_csize;
3016 	bridge_rttrim(sc);
3017 	return 0;
3018 }
3019 
3020 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)3021 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
3022 {
3023 	struct ifbrparam *param = arg;
3024 
3025 	param->ifbrp_csize = sc->sc_brtmax;
3026 
3027 	return 0;
3028 }
3029 
3030 #define BRIDGE_IOCTL_GIFS do { \
3031 	struct bridge_iflist *bif;                                      \
3032 	struct ifbreq breq;                                             \
3033 	char *buf, *outbuf;                                             \
3034 	unsigned int count, buflen, len;                                \
3035                                                                         \
3036 	count = 0;                                                      \
3037 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
3038 	        count++;                                                \
3039 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
3040 	        count++;                                                \
3041                                                                         \
3042 	buflen = sizeof (breq) * count;                                 \
3043 	if (bifc->ifbic_len == 0) {                                     \
3044 	        bifc->ifbic_len = buflen;                               \
3045 	        return (0);                                             \
3046 	}                                                               \
3047 	BRIDGE_UNLOCK(sc);                                              \
3048 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3049 	BRIDGE_LOCK(sc);                                                \
3050                                                                         \
3051 	count = 0;                                                      \
3052 	buf = outbuf;                                                   \
3053 	len = min(bifc->ifbic_len, buflen);                             \
3054 	bzero(&breq, sizeof (breq));                                    \
3055 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3056 	        if (len < sizeof (breq))                                \
3057 	                break;                                          \
3058                                                                         \
3059 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3060 	            "%s", bif->bif_ifp->if_xname);                      \
3061 	/* Fill in the ifbreq structure */                      \
3062 	        error = bridge_ioctl_gifflags(sc, &breq);               \
3063 	        if (error)                                              \
3064 	                break;                                          \
3065 	        memcpy(buf, &breq, sizeof (breq));                      \
3066 	        count++;                                                \
3067 	        buf += sizeof (breq);                                   \
3068 	        len -= sizeof (breq);                                   \
3069 	}                                                               \
3070 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3071 	        if (len < sizeof (breq))                                \
3072 	                break;                                          \
3073                                                                         \
3074 	        snprintf(breq.ifbr_ifsname,                             \
3075 	                 sizeof (breq.ifbr_ifsname),                    \
3076 	                 "%s", bif->bif_ifp->if_xname);                 \
3077 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3078 	        breq.ifbr_portno                                        \
3079 	                = bif->bif_ifp->if_index & 0xfff;               \
3080 	        memcpy(buf, &breq, sizeof (breq));                      \
3081 	        count++;                                                \
3082 	        buf += sizeof (breq);                                   \
3083 	        len -= sizeof (breq);                                   \
3084 	}                                                               \
3085                                                                         \
3086 	BRIDGE_UNLOCK(sc);                                              \
3087 	bifc->ifbic_len = sizeof (breq) * count;                        \
3088 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
3089 	BRIDGE_LOCK(sc);                                                \
3090 	kfree_data(outbuf, buflen);                                     \
3091 } while (0)
3092 
3093 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3094 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3095 {
3096 	struct ifbifconf64 *bifc = arg;
3097 	int error = 0;
3098 
3099 	BRIDGE_IOCTL_GIFS;
3100 
3101 	return error;
3102 }
3103 
3104 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3105 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3106 {
3107 	struct ifbifconf32 *bifc = arg;
3108 	int error = 0;
3109 
3110 	BRIDGE_IOCTL_GIFS;
3111 
3112 	return error;
3113 }
3114 
3115 #define BRIDGE_IOCTL_RTS do {                                               \
3116 	struct bridge_rtnode *brt;                                          \
3117 	char *buf;                                                          \
3118 	char *outbuf = NULL;                                                \
3119 	unsigned int count, buflen, len;                                    \
3120 	unsigned long now;                                                  \
3121                                                                             \
3122 	if (bac->ifbac_len == 0)                                            \
3123 	        return (0);                                                 \
3124                                                                             \
3125 	bzero(&bareq, sizeof (bareq));                                      \
3126 	count = 0;                                                          \
3127 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3128 	        count++;                                                    \
3129 	buflen = sizeof (bareq) * count;                                    \
3130                                                                             \
3131 	BRIDGE_UNLOCK(sc);                                                  \
3132 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);            \
3133 	BRIDGE_LOCK(sc);                                                    \
3134                                                                             \
3135 	count = 0;                                                          \
3136 	buf = outbuf;                                                       \
3137 	len = min(bac->ifbac_len, buflen);                                  \
3138 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3139 	        if (len < sizeof (bareq))                                   \
3140 	                goto out;                                           \
3141 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3142 	                 "%s", brt->brt_ifp->if_xname);                     \
3143 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3144 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3145 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3146 	                now = (unsigned long) net_uptime();                 \
3147 	                if (now < brt->brt_expire)                          \
3148 	                        bareq.ifba_expire =                         \
3149 	                            brt->brt_expire - now;                  \
3150 	        } else                                                      \
3151 	                bareq.ifba_expire = 0;                              \
3152 	        bareq.ifba_flags = brt->brt_flags;                          \
3153                                                                             \
3154 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3155 	        count++;                                                    \
3156 	        buf += sizeof (bareq);                                      \
3157 	        len -= sizeof (bareq);                                      \
3158 	}                                                                   \
3159 out:                                                                        \
3160 	bac->ifbac_len = sizeof (bareq) * count;                            \
3161 	if (outbuf != NULL) {                                               \
3162 	        BRIDGE_UNLOCK(sc);                                          \
3163 	        error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);    \
3164 	        kfree_data(outbuf, buflen);                                 \
3165 	        BRIDGE_LOCK(sc);                                            \
3166 	}                                                                   \
3167 	return (error);                                                     \
3168 } while (0)
3169 
3170 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3171 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3172 {
3173 	struct ifbaconf64 *bac = arg;
3174 	struct ifbareq64 bareq;
3175 	int error = 0;
3176 
3177 	BRIDGE_IOCTL_RTS;
3178 	return error;
3179 }
3180 
3181 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3182 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3183 {
3184 	struct ifbaconf32 *bac = arg;
3185 	struct ifbareq32 bareq;
3186 	int error = 0;
3187 
3188 	BRIDGE_IOCTL_RTS;
3189 	return error;
3190 }
3191 
3192 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3193 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3194 {
3195 	struct ifbareq32 *req = arg;
3196 	struct bridge_iflist *bif;
3197 	int error;
3198 
3199 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3200 	if (bif == NULL) {
3201 		return ENOENT;
3202 	}
3203 
3204 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3205 	    req->ifba_flags);
3206 
3207 	return error;
3208 }
3209 
3210 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3211 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3212 {
3213 	struct ifbareq64 *req = arg;
3214 	struct bridge_iflist *bif;
3215 	int error;
3216 
3217 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3218 	if (bif == NULL) {
3219 		return ENOENT;
3220 	}
3221 
3222 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3223 	    req->ifba_flags);
3224 
3225 	return error;
3226 }
3227 
3228 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3229 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3230 {
3231 	struct ifbrparam *param = arg;
3232 
3233 	sc->sc_brttimeout = param->ifbrp_ctime;
3234 	return 0;
3235 }
3236 
3237 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3238 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3239 {
3240 	struct ifbrparam *param = arg;
3241 
3242 	param->ifbrp_ctime = sc->sc_brttimeout;
3243 	return 0;
3244 }
3245 
3246 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3247 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3248 {
3249 	struct ifbareq32 *req = arg;
3250 
3251 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3252 }
3253 
3254 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3255 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3256 {
3257 	struct ifbareq64 *req = arg;
3258 
3259 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3260 }
3261 
3262 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3263 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3264 {
3265 	struct ifbreq *req = arg;
3266 
3267 	bridge_rtflush(sc, req->ifbr_ifsflags);
3268 	return 0;
3269 }
3270 
3271 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3272 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3273 {
3274 	struct ifbrparam *param = arg;
3275 	struct bstp_state *bs = &sc->sc_stp;
3276 
3277 	param->ifbrp_prio = bs->bs_bridge_priority;
3278 	return 0;
3279 }
3280 
3281 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3282 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3283 {
3284 #if BRIDGESTP
3285 	struct ifbrparam *param = arg;
3286 
3287 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3288 #else /* !BRIDGESTP */
3289 #pragma unused(sc, arg)
3290 	return EOPNOTSUPP;
3291 #endif /* !BRIDGESTP */
3292 }
3293 
3294 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3295 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3296 {
3297 	struct ifbrparam *param = arg;
3298 	struct bstp_state *bs = &sc->sc_stp;
3299 
3300 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3301 	return 0;
3302 }
3303 
3304 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3305 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3306 {
3307 #if BRIDGESTP
3308 	struct ifbrparam *param = arg;
3309 
3310 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3311 #else /* !BRIDGESTP */
3312 #pragma unused(sc, arg)
3313 	return EOPNOTSUPP;
3314 #endif /* !BRIDGESTP */
3315 }
3316 
3317 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3318 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3319 {
3320 	struct ifbrparam *param;
3321 	struct bstp_state *bs;
3322 
3323 	param = arg;
3324 	bs = &sc->sc_stp;
3325 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3326 	return 0;
3327 }
3328 
3329 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3330 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3331 {
3332 #if BRIDGESTP
3333 	struct ifbrparam *param = arg;
3334 
3335 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3336 #else /* !BRIDGESTP */
3337 #pragma unused(sc, arg)
3338 	return EOPNOTSUPP;
3339 #endif /* !BRIDGESTP */
3340 }
3341 
3342 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3343 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3344 {
3345 	struct ifbrparam *param;
3346 	struct bstp_state *bs;
3347 
3348 	param = arg;
3349 	bs = &sc->sc_stp;
3350 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3351 	return 0;
3352 }
3353 
3354 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3355 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3356 {
3357 #if BRIDGESTP
3358 	struct ifbrparam *param = arg;
3359 
3360 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3361 #else /* !BRIDGESTP */
3362 #pragma unused(sc, arg)
3363 	return EOPNOTSUPP;
3364 #endif /* !BRIDGESTP */
3365 }
3366 
3367 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3368 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3369 {
3370 #if BRIDGESTP
3371 	struct ifbreq *req = arg;
3372 	struct bridge_iflist *bif;
3373 
3374 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3375 	if (bif == NULL) {
3376 		return ENOENT;
3377 	}
3378 
3379 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3380 #else /* !BRIDGESTP */
3381 #pragma unused(sc, arg)
3382 	return EOPNOTSUPP;
3383 #endif /* !BRIDGESTP */
3384 }
3385 
3386 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3387 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3388 {
3389 #if BRIDGESTP
3390 	struct ifbreq *req = arg;
3391 	struct bridge_iflist *bif;
3392 
3393 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3394 	if (bif == NULL) {
3395 		return ENOENT;
3396 	}
3397 
3398 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3399 #else /* !BRIDGESTP */
3400 #pragma unused(sc, arg)
3401 	return EOPNOTSUPP;
3402 #endif /* !BRIDGESTP */
3403 }
3404 
3405 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3406 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3407 {
3408 	struct ifbrparam *param = arg;
3409 
3410 	param->ifbrp_filter = sc->sc_filter_flags;
3411 
3412 	return 0;
3413 }
3414 
3415 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3416 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3417 {
3418 	struct ifbrparam *param = arg;
3419 
3420 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3421 		return EINVAL;
3422 	}
3423 
3424 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3425 		return EINVAL;
3426 	}
3427 
3428 	sc->sc_filter_flags = param->ifbrp_filter;
3429 
3430 	return 0;
3431 }
3432 
3433 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3434 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3435 {
3436 	struct ifbreq *req = arg;
3437 	struct bridge_iflist *bif;
3438 
3439 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3440 	if (bif == NULL) {
3441 		return ENOENT;
3442 	}
3443 
3444 	bif->bif_addrmax = req->ifbr_addrmax;
3445 	return 0;
3446 }
3447 
3448 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3449 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3450 {
3451 	struct ifbreq *req = arg;
3452 	struct bridge_iflist *bif = NULL;
3453 	struct ifnet *ifs;
3454 
3455 	ifs = ifunit(req->ifbr_ifsname);
3456 	if (ifs == NULL) {
3457 		return ENOENT;
3458 	}
3459 
3460 	if (IFNET_IS_INTCOPROC(ifs) || IFNET_IS_MANAGEMENT(ifs)) {
3461 		return EINVAL;
3462 	}
3463 
3464 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3465 	if (ifs == bif->bif_ifp) {
3466 		return EBUSY;
3467 	}
3468 
3469 	if (ifs->if_bridge != NULL) {
3470 		return EBUSY;
3471 	}
3472 
3473 	switch (ifs->if_type) {
3474 	case IFT_ETHER:
3475 	case IFT_L2VLAN:
3476 	case IFT_IEEE8023ADLAG:
3477 		break;
3478 	case IFT_GIF:
3479 	/* currently not supported */
3480 	/* FALLTHRU */
3481 	default:
3482 		return EINVAL;
3483 	}
3484 
3485 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3486 
3487 	bif->bif_ifp = ifs;
3488 	bif->bif_ifflags = IFBIF_SPAN;
3489 
3490 	ifnet_reference(bif->bif_ifp);
3491 
3492 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3493 
3494 	return 0;
3495 }
3496 
3497 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3498 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3499 {
3500 	struct ifbreq *req = arg;
3501 	struct bridge_iflist *bif;
3502 	struct ifnet *ifs;
3503 
3504 	ifs = ifunit(req->ifbr_ifsname);
3505 	if (ifs == NULL) {
3506 		return ENOENT;
3507 	}
3508 
3509 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3510 	if (ifs == bif->bif_ifp) {
3511 		break;
3512 	}
3513 
3514 	if (bif == NULL) {
3515 		return ENOENT;
3516 	}
3517 
3518 	bridge_delete_span(sc, bif);
3519 
3520 	return 0;
3521 }
3522 
3523 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3524 	struct bstp_state *bs = &sc->sc_stp;                            \
3525 	struct bstp_port *root_port;                                    \
3526                                                                         \
3527 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3528 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3529 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3530                                                                         \
3531 	root_port = bs->bs_root_port;                                   \
3532 	if (root_port == NULL)                                          \
3533 	        req->ifbop_root_port = 0;                               \
3534 	else                                                            \
3535 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3536                                                                         \
3537 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3538 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3539 	req->ifbop_protocol = bs->bs_protover;                          \
3540 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3541 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3542 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3543 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3544 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3545 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3546 } while (0)
3547 
3548 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3549 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3550 {
3551 	struct ifbropreq32 *req = arg;
3552 
3553 	BRIDGE_IOCTL_GBPARAM;
3554 	return 0;
3555 }
3556 
3557 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3558 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3559 {
3560 	struct ifbropreq64 *req = arg;
3561 
3562 	BRIDGE_IOCTL_GBPARAM;
3563 	return 0;
3564 }
3565 
3566 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3567 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3568 {
3569 	struct ifbrparam *param = arg;
3570 
3571 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3572 	return 0;
3573 }
3574 
3575 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3576 	struct bridge_iflist *bif;                                      \
3577 	struct bstp_port *bp;                                           \
3578 	struct ifbpstpreq bpreq;                                        \
3579 	char *buf, *outbuf;                                             \
3580 	unsigned int count, buflen, len;                                \
3581                                                                         \
3582 	count = 0;                                                      \
3583 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3584 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3585 	                count++;                                        \
3586 	}                                                               \
3587                                                                         \
3588 	buflen = sizeof (bpreq) * count;                                \
3589 	if (bifstp->ifbpstp_len == 0) {                                 \
3590 	        bifstp->ifbpstp_len = buflen;                           \
3591 	        return (0);                                             \
3592 	}                                                               \
3593                                                                         \
3594 	BRIDGE_UNLOCK(sc);                                              \
3595 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3596 	BRIDGE_LOCK(sc);                                                \
3597                                                                         \
3598 	count = 0;                                                      \
3599 	buf = outbuf;                                                   \
3600 	len = min(bifstp->ifbpstp_len, buflen);                         \
3601 	bzero(&bpreq, sizeof (bpreq));                                  \
3602 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3603 	        if (len < sizeof (bpreq))                               \
3604 	                break;                                          \
3605                                                                         \
3606 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3607 	                continue;                                       \
3608                                                                         \
3609 	        bp = &bif->bif_stp;                                     \
3610 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3611 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3612 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3613 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3614 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3615 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3616                                                                         \
3617 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3618 	        count++;                                                \
3619 	        buf += sizeof (bpreq);                                  \
3620 	        len -= sizeof (bpreq);                                  \
3621 	}                                                               \
3622                                                                         \
3623 	BRIDGE_UNLOCK(sc);                                              \
3624 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3625 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3626 	BRIDGE_LOCK(sc);                                                \
3627 	kfree_data(outbuf, buflen);                                     \
3628 	return (error);                                                 \
3629 } while (0)
3630 
3631 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3632 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3633 {
3634 	struct ifbpstpconf32 *bifstp = arg;
3635 	int error = 0;
3636 
3637 	BRIDGE_IOCTL_GIFSSTP;
3638 	return error;
3639 }
3640 
3641 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3642 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3643 {
3644 	struct ifbpstpconf64 *bifstp = arg;
3645 	int error = 0;
3646 
3647 	BRIDGE_IOCTL_GIFSSTP;
3648 	return error;
3649 }
3650 
3651 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3652 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3653 {
3654 #if BRIDGESTP
3655 	struct ifbrparam *param = arg;
3656 
3657 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3658 #else /* !BRIDGESTP */
3659 #pragma unused(sc, arg)
3660 	return EOPNOTSUPP;
3661 #endif /* !BRIDGESTP */
3662 }
3663 
3664 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3665 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3666 {
3667 #if BRIDGESTP
3668 	struct ifbrparam *param = arg;
3669 
3670 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3671 #else /* !BRIDGESTP */
3672 #pragma unused(sc, arg)
3673 	return EOPNOTSUPP;
3674 #endif /* !BRIDGESTP */
3675 }
3676 
3677 
3678 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3679 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3680 {
3681 	struct ifbrhostfilter *req = arg;
3682 	struct bridge_iflist *bif;
3683 
3684 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3685 	if (bif == NULL) {
3686 		return ENOENT;
3687 	}
3688 
3689 	bzero(req, sizeof(struct ifbrhostfilter));
3690 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3691 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3692 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3693 		    ETHER_ADDR_LEN);
3694 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3695 	}
3696 	return 0;
3697 }
3698 
3699 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3700 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3701 {
3702 	struct ifbrhostfilter *req = arg;
3703 	struct bridge_iflist *bif;
3704 
3705 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3706 	if (bif == NULL) {
3707 		return ENOENT;
3708 	}
3709 
3710 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3711 		bif->bif_flags |= BIFF_HOST_FILTER;
3712 
3713 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3714 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3715 			    ETHER_ADDR_LEN);
3716 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3717 			    ETHER_ADDR_LEN) != 0) {
3718 				bif->bif_flags |= BIFF_HF_HWSRC;
3719 			} else {
3720 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3721 			}
3722 		}
3723 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3724 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3725 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3726 				bif->bif_flags |= BIFF_HF_IPSRC;
3727 			} else {
3728 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3729 			}
3730 		}
3731 	} else {
3732 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3733 		    BIFF_HF_IPSRC);
3734 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3735 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3736 	}
3737 
3738 	return 0;
3739 }
3740 
3741 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3742 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3743     unsigned int * count_p, char *buf, unsigned int *len_p)
3744 {
3745 	unsigned int            count = *count_p;
3746 	struct ifbrmne          ifbmne;
3747 	unsigned int            len = *len_p;
3748 	struct mac_nat_entry    *mne;
3749 	unsigned long           now;
3750 
3751 	bzero(&ifbmne, sizeof(ifbmne));
3752 	LIST_FOREACH(mne, list, mne_list) {
3753 		if (len < sizeof(ifbmne)) {
3754 			break;
3755 		}
3756 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3757 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3758 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3759 		    sizeof(ifbmne.ifbmne_mac));
3760 		now = (unsigned long) net_uptime();
3761 		if (now < mne->mne_expire) {
3762 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3763 		} else {
3764 			ifbmne.ifbmne_expire = 0;
3765 		}
3766 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3767 			ifbmne.ifbmne_af = AF_INET6;
3768 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3769 		} else {
3770 			ifbmne.ifbmne_af = AF_INET;
3771 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3772 		}
3773 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3774 		count++;
3775 		buf += sizeof(ifbmne);
3776 		len -= sizeof(ifbmne);
3777 	}
3778 	*count_p = count;
3779 	*len_p = len;
3780 	return buf;
3781 }
3782 
3783 /*
3784  * bridge_ioctl_gmnelist()
3785  *   Perform the get mac_nat_entry list ioctl.
3786  *
3787  * Note:
3788  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3789  *   field size/layout except for the last field ifbml_buf, the user-supplied
3790  *   buffer pointer. That is passed in separately via the 'user_addr'
3791  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3792  */
3793 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3794 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3795     user_addr_t user_addr)
3796 {
3797 	unsigned int            count;
3798 	char                    *buf;
3799 	int                     error = 0;
3800 	char                    *outbuf = NULL;
3801 	struct mac_nat_entry    *mne;
3802 	unsigned int            buflen;
3803 	unsigned int            len;
3804 
3805 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3806 	count = 0;
3807 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3808 		count++;
3809 	}
3810 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3811 		count++;
3812 	}
3813 	buflen = sizeof(struct ifbrmne) * count;
3814 	if (buflen == 0 || mnl->ifbml_len == 0) {
3815 		mnl->ifbml_len = buflen;
3816 		return error;
3817 	}
3818 	BRIDGE_UNLOCK(sc);
3819 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3820 	BRIDGE_LOCK(sc);
3821 	count = 0;
3822 	buf = outbuf;
3823 	len = min(mnl->ifbml_len, buflen);
3824 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3825 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3826 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3827 	BRIDGE_UNLOCK(sc);
3828 	error = copyout(outbuf, user_addr, mnl->ifbml_len);
3829 	kfree_data(outbuf, buflen);
3830 	BRIDGE_LOCK(sc);
3831 	return error;
3832 }
3833 
3834 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3835 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3836 {
3837 	struct ifbrmnelist64 *mnl = arg;
3838 
3839 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3840 }
3841 
3842 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3843 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3844 {
3845 	struct ifbrmnelist32 *mnl = arg;
3846 
3847 	return bridge_ioctl_gmnelist(sc, arg,
3848 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
3849 }
3850 
3851 /*
3852  * bridge_ioctl_gifstats()
3853  *   Return per-member stats.
3854  *
3855  * Note:
3856  *   The ifbrmreq32 and ifbrmreq64 structures have the same
3857  *   field size/layout except for the last field brmr_buf, the user-supplied
3858  *   buffer pointer. That is passed in separately via the 'user_addr'
3859  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3860  */
3861 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3862 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3863     user_addr_t user_addr)
3864 {
3865 	struct bridge_iflist    *bif;
3866 	int                     error = 0;
3867 	unsigned int            buflen;
3868 
3869 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3870 	if (bif == NULL) {
3871 		error = ENOENT;
3872 		goto done;
3873 	}
3874 
3875 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3876 	if (buflen == 0 || mreq->brmr_len == 0) {
3877 		mreq->brmr_len = buflen;
3878 		goto done;
3879 	}
3880 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3881 		error = ENOBUFS;
3882 		goto done;
3883 	}
3884 	mreq->brmr_len = buflen;
3885 	error = copyout(&bif->bif_stats, user_addr, buflen);
3886 done:
3887 	return error;
3888 }
3889 
3890 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3891 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3892 {
3893 	struct ifbrmreq32 *mreq = arg;
3894 
3895 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3896 }
3897 
3898 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3899 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3900 {
3901 	struct ifbrmreq64 *mreq = arg;
3902 
3903 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3904 }
3905 
3906 /*
3907  * bridge_proto_attach_changed
3908  *
3909  *	Called when protocol attachment on the interface changes.
3910  */
3911 static void
bridge_proto_attach_changed(struct ifnet * ifp)3912 bridge_proto_attach_changed(struct ifnet *ifp)
3913 {
3914 	boolean_t changed = FALSE;
3915 	struct bridge_iflist *bif;
3916 	boolean_t input_broadcast;
3917 	struct bridge_softc *sc = ifp->if_bridge;
3918 
3919 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3920 	if (sc == NULL) {
3921 		return;
3922 	}
3923 	input_broadcast = interface_needs_input_broadcast(ifp);
3924 	BRIDGE_LOCK(sc);
3925 	bif = bridge_lookup_member_if(sc, ifp);
3926 	if (bif != NULL) {
3927 		changed = bif_set_input_broadcast(bif, input_broadcast);
3928 	}
3929 	BRIDGE_UNLOCK(sc);
3930 	if (changed) {
3931 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3932 		    "%s input broadcast %s", ifp->if_xname,
3933 		    input_broadcast ? "ENABLED" : "DISABLED");
3934 	}
3935 	return;
3936 }
3937 
3938 /*
3939  * interface_media_active:
3940  *
3941  *	Tells if an interface media is active.
3942  */
3943 static int
interface_media_active(struct ifnet * ifp)3944 interface_media_active(struct ifnet *ifp)
3945 {
3946 	struct ifmediareq   ifmr;
3947 	int status = 0;
3948 
3949 	bzero(&ifmr, sizeof(ifmr));
3950 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3951 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3952 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3953 		}
3954 	}
3955 
3956 	return status;
3957 }
3958 
3959 /*
3960  * bridge_updatelinkstatus:
3961  *
3962  *      Update the media active status of the bridge based on the
3963  *	media active status of its member.
3964  *	If changed, return the corresponding onf/off link event.
3965  */
3966 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3967 bridge_updatelinkstatus(struct bridge_softc *sc)
3968 {
3969 	struct bridge_iflist *bif;
3970 	int active_member = 0;
3971 	u_int32_t event_code = 0;
3972 
3973 	BRIDGE_LOCK_ASSERT_HELD(sc);
3974 
3975 	/*
3976 	 * Find out if we have an active interface
3977 	 */
3978 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3979 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3980 			active_member = 1;
3981 			break;
3982 		}
3983 	}
3984 
3985 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3986 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
3987 		event_code = KEV_DL_LINK_ON;
3988 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3989 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3990 		event_code = KEV_DL_LINK_OFF;
3991 	}
3992 
3993 	return event_code;
3994 }
3995 
3996 /*
3997  * bridge_iflinkevent:
3998  */
3999 static void
bridge_iflinkevent(struct ifnet * ifp)4000 bridge_iflinkevent(struct ifnet *ifp)
4001 {
4002 	struct bridge_softc *sc = ifp->if_bridge;
4003 	struct bridge_iflist *bif;
4004 	u_int32_t event_code = 0;
4005 	int media_active;
4006 
4007 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
4008 
4009 	/* Check if the interface is a bridge member */
4010 	if (sc == NULL) {
4011 		return;
4012 	}
4013 
4014 	media_active = interface_media_active(ifp);
4015 	BRIDGE_LOCK(sc);
4016 	bif = bridge_lookup_member_if(sc, ifp);
4017 	if (bif != NULL) {
4018 		if (media_active) {
4019 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
4020 		} else {
4021 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
4022 		}
4023 		if (sc->sc_mac_nat_bif != NULL) {
4024 			bridge_mac_nat_flush_entries(sc, bif);
4025 		}
4026 
4027 		event_code = bridge_updatelinkstatus(sc);
4028 	}
4029 	BRIDGE_UNLOCK(sc);
4030 
4031 	if (event_code != 0) {
4032 		bridge_link_event(sc->sc_ifp, event_code);
4033 	}
4034 }
4035 
4036 /*
4037  * bridge_delayed_callback:
4038  *
4039  *	Makes a delayed call
4040  */
4041 static void
bridge_delayed_callback(void * param,__unused void * param2)4042 bridge_delayed_callback(void *param, __unused void *param2)
4043 {
4044 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
4045 	struct bridge_softc *sc = call->bdc_sc;
4046 
4047 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4048 	if (bridge_delayed_callback_delay > 0) {
4049 		struct timespec ts;
4050 
4051 		ts.tv_sec = bridge_delayed_callback_delay;
4052 		ts.tv_nsec = 0;
4053 
4054 		BRIDGE_LOG(LOG_NOTICE, 0,
4055 		    "sleeping for %d seconds",
4056 		    bridge_delayed_callback_delay);
4057 
4058 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4059 		    __func__, &ts);
4060 
4061 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4062 	}
4063 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4064 
4065 	BRIDGE_LOCK(sc);
4066 
4067 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4068 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4069 	    "%s call 0x%llx flags 0x%x",
4070 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4071 	    call->bdc_flags);
4072 }
4073 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4074 
4075 	if (call->bdc_flags & BDCF_CANCELLING) {
4076 		wakeup(call);
4077 	} else {
4078 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4079 			(*call->bdc_func)(sc);
4080 		}
4081 	}
4082 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4083 	BRIDGE_UNLOCK(sc);
4084 }
4085 
4086 /*
4087  * bridge_schedule_delayed_call:
4088  *
4089  *	Schedule a function to be called on a separate thread
4090  *      The actual call may be scheduled to run at a given time or ASAP.
4091  */
4092 static void
4093 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4094 {
4095 	uint64_t deadline = 0;
4096 	struct bridge_softc *sc = call->bdc_sc;
4097 
4098 	BRIDGE_LOCK_ASSERT_HELD(sc);
4099 
4100 	if ((sc->sc_flags & SCF_DETACHING) ||
4101 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4102 		return;
4103 	}
4104 
4105 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4106 		nanoseconds_to_absolutetime(
4107 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4108 			call->bdc_ts.tv_nsec, &deadline);
4109 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4110 	}
4111 
4112 	call->bdc_flags = BDCF_OUTSTANDING;
4113 
4114 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4115 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4116 	    "%s call 0x%llx flags 0x%x",
4117 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4118 	    call->bdc_flags);
4119 }
4120 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4121 
4122 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4123 		thread_call_func_delayed(
4124 			(thread_call_func_t)bridge_delayed_callback,
4125 			call, deadline);
4126 	} else {
4127 		if (call->bdc_thread_call == NULL) {
4128 			call->bdc_thread_call = thread_call_allocate(
4129 				(thread_call_func_t)bridge_delayed_callback,
4130 				call);
4131 		}
4132 		thread_call_enter(call->bdc_thread_call);
4133 	}
4134 }
4135 
4136 /*
4137  * bridge_cancel_delayed_call:
4138  *
4139  *	Cancel a queued or running delayed call.
4140  *	If call is running, does not return until the call is done to
4141  *	prevent race condition with the brigde interface getting destroyed
4142  */
4143 static void
4144 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4145 {
4146 	boolean_t result;
4147 	struct bridge_softc *sc = call->bdc_sc;
4148 
4149 	/*
4150 	 * The call was never scheduled
4151 	 */
4152 	if (sc == NULL) {
4153 		return;
4154 	}
4155 
4156 	BRIDGE_LOCK_ASSERT_HELD(sc);
4157 
4158 	call->bdc_flags |= BDCF_CANCELLING;
4159 
4160 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4161 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4162 		    "%s call 0x%llx flags 0x%x",
4163 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4164 		    call->bdc_flags);
4165 		result = thread_call_func_cancel(
4166 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4167 
4168 		if (result) {
4169 			/*
4170 			 * We managed to dequeue the delayed call
4171 			 */
4172 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4173 		} else {
4174 			/*
4175 			 * Wait for delayed call do be done running
4176 			 */
4177 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4178 		}
4179 	}
4180 	call->bdc_flags &= ~BDCF_CANCELLING;
4181 }
4182 
4183 /*
4184  * bridge_cleanup_delayed_call:
4185  *
4186  *	Dispose resource allocated for a delayed call
4187  *	Assume the delayed call is not queued or running .
4188  */
4189 static void
4190 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4191 {
4192 	boolean_t result;
4193 	struct bridge_softc *sc = call->bdc_sc;
4194 
4195 	/*
4196 	 * The call was never scheduled
4197 	 */
4198 	if (sc == NULL) {
4199 		return;
4200 	}
4201 
4202 	BRIDGE_LOCK_ASSERT_HELD(sc);
4203 
4204 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4205 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4206 
4207 	if (call->bdc_thread_call != NULL) {
4208 		result = thread_call_free(call->bdc_thread_call);
4209 		if (result == FALSE) {
4210 			panic("%s thread_call_free() failed for call %p",
4211 			    __func__, call);
4212 		}
4213 		call->bdc_thread_call = NULL;
4214 	}
4215 }
4216 
4217 /*
4218  * bridge_init:
4219  *
4220  *	Initialize a bridge interface.
4221  */
4222 static int
4223 bridge_init(struct ifnet *ifp)
4224 {
4225 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4226 	errno_t error;
4227 
4228 	BRIDGE_LOCK_ASSERT_HELD(sc);
4229 
4230 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4231 		return 0;
4232 	}
4233 
4234 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4235 
4236 	/*
4237 	 * Calling bridge_aging_timer() is OK as there are no entries to
4238 	 * age so we're just going to arm the timer
4239 	 */
4240 	bridge_aging_timer(sc);
4241 #if BRIDGESTP
4242 	if (error == 0) {
4243 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4244 	}
4245 #endif /* BRIDGESTP */
4246 	return error;
4247 }
4248 
4249 /*
4250  * bridge_ifstop:
4251  *
4252  *	Stop the bridge interface.
4253  */
4254 static void
4255 bridge_ifstop(struct ifnet *ifp, int disable)
4256 {
4257 #pragma unused(disable)
4258 	struct bridge_softc *sc = ifp->if_softc;
4259 
4260 	BRIDGE_LOCK_ASSERT_HELD(sc);
4261 
4262 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4263 		return;
4264 	}
4265 
4266 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4267 
4268 #if BRIDGESTP
4269 	bstp_stop(&sc->sc_stp);
4270 #endif /* BRIDGESTP */
4271 
4272 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4273 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4274 }
4275 
4276 /*
4277  * bridge_compute_cksum:
4278  *
4279  *	If the packet has checksum flags, compare the hardware checksum
4280  *	capabilities of the source and destination interfaces. If they
4281  *	are the same, there's nothing to do. If they are different,
4282  *	finalize the checksum so that it can be sent on the destination
4283  *	interface.
4284  */
4285 static void
4286 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4287 {
4288 	uint32_t csum_flags;
4289 	uint16_t dst_hw_csum;
4290 	uint32_t did_sw = 0;
4291 	struct ether_header *eh;
4292 	uint16_t src_hw_csum;
4293 
4294 	if (src_if == dst_if) {
4295 		return;
4296 	}
4297 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4298 	if (csum_flags == 0) {
4299 		/* no checksum offload */
4300 		return;
4301 	}
4302 
4303 	/*
4304 	 * if destination/source differ in checksum offload
4305 	 * capabilities, finalize/compute the checksum
4306 	 */
4307 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4308 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4309 	if (dst_hw_csum == src_hw_csum) {
4310 		return;
4311 	}
4312 	eh = mtod(m, struct ether_header *);
4313 	switch (ntohs(eh->ether_type)) {
4314 	case ETHERTYPE_IP:
4315 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4316 		break;
4317 	case ETHERTYPE_IPV6:
4318 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4319 		break;
4320 	}
4321 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4322 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4323 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4324 	    m->m_pkthdr.csum_flags);
4325 }
4326 
4327 static errno_t
4328 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4329 {
4330 	struct flowadv  adv = { .code = FADV_SUCCESS };
4331 	errno_t         error;
4332 
4333 	error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4334 	if (error == 0) {
4335 		if (adv.code == FADV_FLOW_CONTROLLED) {
4336 			error = EQFULL;
4337 		} else if (adv.code == FADV_SUSPENDED) {
4338 			error = EQSUSPENDED;
4339 		}
4340 	}
4341 	return error;
4342 }
4343 
4344 static int
4345 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4346     bool *is_fragmented)
4347 {
4348 	int newoff;
4349 
4350 	*is_fragmented = false;
4351 	while (1) {
4352 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4353 		if (newoff < 0) {
4354 			return off;
4355 		} else if (newoff < off) {
4356 			return -1;    /* invalid */
4357 		} else if (newoff == off) {
4358 			return newoff;
4359 		}
4360 		off = newoff;
4361 		proto = *nxtp;
4362 		if (proto == IPPROTO_FRAGMENT) {
4363 			*is_fragmented = true;
4364 		}
4365 	}
4366 }
4367 
4368 static int
4369 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4370     ip_packet_info_t info_p, struct bripstats * stats_p)
4371 {
4372 	int             error = 0;
4373 	u_int           hlen;
4374 	u_int           ip_hlen;
4375 	u_int           ip_pay_len;
4376 	struct mbuf *   m0 = *mp;
4377 	int             off;
4378 	int             opt_len = 0;
4379 	int             proto = 0;
4380 
4381 	bzero(info_p, sizeof(*info_p));
4382 	if (is_ipv4) {
4383 		struct ip *     ip;
4384 		u_int           ip_total_len;
4385 
4386 		/* IPv4 */
4387 		hlen = mac_hlen + sizeof(struct ip);
4388 		if (m0->m_pkthdr.len < hlen) {
4389 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4390 			    "Short IP packet %d < %d",
4391 			    m0->m_pkthdr.len, hlen);
4392 			error = _EBADIP;
4393 			stats_p->bips_bad_ip++;
4394 			goto done;
4395 		}
4396 		if (m0->m_len < hlen) {
4397 			*mp = m0 = m_pullup(m0, hlen);
4398 			if (m0 == NULL) {
4399 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4400 				    "m_pullup failed hlen %d",
4401 				    hlen);
4402 				error = ENOBUFS;
4403 				stats_p->bips_bad_ip++;
4404 				goto done;
4405 			}
4406 		}
4407 		ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4408 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4409 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4410 			    "bad IP version");
4411 			error = _EBADIP;
4412 			stats_p->bips_bad_ip++;
4413 			goto done;
4414 		}
4415 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4416 		if (ip_hlen < sizeof(struct ip)) {
4417 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4418 			    "bad IP header length %d < %d",
4419 			    ip_hlen,
4420 			    (int)sizeof(struct ip));
4421 			error = _EBADIP;
4422 			stats_p->bips_bad_ip++;
4423 			goto done;
4424 		}
4425 		hlen = mac_hlen + ip_hlen;
4426 		if (m0->m_len < hlen) {
4427 			*mp = m0 = m_pullup(m0, hlen);
4428 			if (m0 == NULL) {
4429 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4430 				    "m_pullup failed hlen %d",
4431 				    hlen);
4432 				error = ENOBUFS;
4433 				stats_p->bips_bad_ip++;
4434 				goto done;
4435 			}
4436 		}
4437 
4438 		ip_total_len = ntohs(ip->ip_len);
4439 		if (ip_total_len < ip_hlen) {
4440 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4441 			    "IP total len %d < header len %d",
4442 			    ip_total_len, ip_hlen);
4443 			error = _EBADIP;
4444 			stats_p->bips_bad_ip++;
4445 			goto done;
4446 		}
4447 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4448 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4449 			    "invalid IP payload length %d > %d",
4450 			    ip_total_len,
4451 			    (m0->m_pkthdr.len - mac_hlen));
4452 			error = _EBADIP;
4453 			stats_p->bips_bad_ip++;
4454 			goto done;
4455 		}
4456 		ip_pay_len = ip_total_len - ip_hlen;
4457 		info_p->ip_proto = ip->ip_p;
4458 		info_p->ip_hdr.ip = ip;
4459 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4460 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4461 			info_p->ip_is_fragmented = true;
4462 		}
4463 		stats_p->bips_ip++;
4464 	} else {
4465 		struct ip6_hdr *ip6;
4466 
4467 		/* IPv6 */
4468 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4469 		if (m0->m_pkthdr.len < hlen) {
4470 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4471 			    "short IPv6 packet %d < %d",
4472 			    m0->m_pkthdr.len, hlen);
4473 			error = _EBADIPV6;
4474 			stats_p->bips_bad_ip6++;
4475 			goto done;
4476 		}
4477 		if (m0->m_len < hlen) {
4478 			*mp = m0 = m_pullup(m0, hlen);
4479 			if (m0 == NULL) {
4480 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4481 				    "m_pullup failed hlen %d",
4482 				    hlen);
4483 				error = ENOBUFS;
4484 				stats_p->bips_bad_ip6++;
4485 				goto done;
4486 			}
4487 		}
4488 		ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4489 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4490 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4491 			    "bad IPv6 version");
4492 			error = _EBADIPV6;
4493 			stats_p->bips_bad_ip6++;
4494 			goto done;
4495 		}
4496 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4497 		    &info_p->ip_is_fragmented);
4498 		if (off < 0 || m0->m_pkthdr.len < off) {
4499 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4500 			    "ip6_lasthdr() returned %d",
4501 			    off);
4502 			error = _EBADIPV6;
4503 			stats_p->bips_bad_ip6++;
4504 			goto done;
4505 		}
4506 		ip_hlen = sizeof(*ip6);
4507 		opt_len = off - mac_hlen - ip_hlen;
4508 		if (opt_len < 0) {
4509 			error = _EBADIPV6;
4510 			stats_p->bips_bad_ip6++;
4511 			goto done;
4512 		}
4513 		info_p->ip_proto = proto;
4514 		info_p->ip_hdr.ip6 = ip6;
4515 		ip_pay_len = ntohs(ip6->ip6_plen);
4516 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4517 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4518 			    "invalid IPv6 payload length %d > %d",
4519 			    ip_pay_len,
4520 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4521 			error = _EBADIPV6;
4522 			stats_p->bips_bad_ip6++;
4523 			goto done;
4524 		}
4525 		stats_p->bips_ip6++;
4526 	}
4527 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4528 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4529 	    is_ipv4 ? '4' : '6',
4530 	    proto, ip_hlen, ip_pay_len, opt_len,
4531 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4532 	info_p->ip_hlen = ip_hlen;
4533 	info_p->ip_pay_len = ip_pay_len;
4534 	info_p->ip_opt_len = opt_len;
4535 	info_p->ip_is_ipv4 = is_ipv4;
4536 done:
4537 	return error;
4538 }
4539 
4540 static int
4541 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4542     ip_packet_info_t info_p, struct bripstats * stats_p)
4543 {
4544 	int             error;
4545 	u_int           hlen;
4546 
4547 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4548 	if (error != 0) {
4549 		goto done;
4550 	}
4551 	if (info_p->ip_proto != IPPROTO_TCP) {
4552 		/* not a TCP frame, not an error, just a bad guess */
4553 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4554 		    "non-TCP (%d) IPv%c frame %d bytes",
4555 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4556 		    (*mp)->m_pkthdr.len);
4557 		goto done;
4558 	}
4559 	if (info_p->ip_is_fragmented) {
4560 		/* both TSO and IP fragmentation don't make sense */
4561 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4562 		    "fragmented TSO packet?");
4563 		stats_p->bips_bad_tcp++;
4564 		error = _EBADTCP;
4565 		goto done;
4566 	}
4567 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4568 	    info_p->ip_opt_len;
4569 	if ((*mp)->m_len < hlen) {
4570 		*mp = m_pullup(*mp, hlen);
4571 		if (*mp == NULL) {
4572 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4573 			    "m_pullup %d failed",
4574 			    hlen);
4575 			stats_p->bips_bad_tcp++;
4576 			error = _EBADTCP;
4577 			goto done;
4578 		}
4579 	}
4580 	info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4581 	    info_p->ip_hlen + info_p->ip_opt_len;
4582 done:
4583 	return error;
4584 }
4585 
4586 static inline void
4587 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4588 {
4589 	if (proto == IPPROTO_TCP) {
4590 		stats_p->brcs_tcp_checksum++;
4591 	} else {
4592 		stats_p->brcs_udp_checksum++;
4593 	}
4594 	return;
4595 }
4596 
4597 static bool
4598 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4599 {
4600 	uint16_t        ether_type;
4601 	bool            is_ip = TRUE;
4602 
4603 	ether_type = ntohs(eh->ether_type);
4604 	switch (ether_type) {
4605 	case ETHERTYPE_IP:
4606 		*is_ipv4 = TRUE;
4607 		break;
4608 	case ETHERTYPE_IPV6:
4609 		*is_ipv4 = FALSE;
4610 		break;
4611 	default:
4612 		is_ip = FALSE;
4613 		break;
4614 	}
4615 	return is_ip;
4616 }
4617 
4618 static errno_t
4619 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4620 {
4621 	struct brcsumstats *csum_stats_p;
4622 	struct ether_header     *eh;
4623 	errno_t         error = 0;
4624 	ip_packet_info  info;
4625 	bool            is_ipv4;
4626 	struct mbuf *   m;
4627 	u_int           mac_hlen = sizeof(struct ether_header);
4628 	uint16_t        sum;
4629 	bool            valid;
4630 
4631 	eh = mtod(*mp, struct ether_header *);
4632 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4633 		goto done;
4634 	}
4635 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4636 	    &stats_p->brms_out_ip);
4637 	m = *mp;
4638 	if (error != 0) {
4639 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4640 		    "bridge_get_ip_proto failed %d",
4641 		    error);
4642 		goto done;
4643 	}
4644 	if (is_ipv4) {
4645 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4646 			/* hardware offloaded IP header checksum */
4647 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4648 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4649 			    "IP checksum HW %svalid",
4650 			    valid ? "" : "in");
4651 			if (!valid) {
4652 				stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4653 				error = _EBADIPCHECKSUM;
4654 				goto done;
4655 			}
4656 			stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4657 		} else {
4658 			/* verify */
4659 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4660 			valid = (sum == 0);
4661 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4662 			    "IP checksum SW %svalid",
4663 			    valid ? "" : "in");
4664 			if (!valid) {
4665 				stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4666 				error = _EBADIPCHECKSUM;
4667 				goto done;
4668 			}
4669 			stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4670 		}
4671 	}
4672 	if (info.ip_is_fragmented) {
4673 		/* can't verify checksum on fragmented packets */
4674 		goto done;
4675 	}
4676 	switch (info.ip_proto) {
4677 	case IPPROTO_TCP:
4678 		stats_p->brms_out_ip.bips_tcp++;
4679 		break;
4680 	case IPPROTO_UDP:
4681 		stats_p->brms_out_ip.bips_udp++;
4682 		break;
4683 	default:
4684 		goto done;
4685 	}
4686 	/* check for hardware offloaded UDP/TCP checksum */
4687 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4688 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4689 		/* checksum verified by hardware */
4690 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4691 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4692 		    "IPv%c %s checksum HW 0x%x %svalid",
4693 		    is_ipv4 ? '4' : '6',
4694 		    (info.ip_proto == IPPROTO_TCP)
4695 		    ? "TCP" : "UDP",
4696 		    m->m_pkthdr.csum_data,
4697 		    valid ? "" : "in" );
4698 		if (!valid) {
4699 			/* bad checksum */
4700 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4701 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4702 			    : _EBADTCPCHECKSUM;
4703 		} else {
4704 			/* good checksum */
4705 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4706 		}
4707 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4708 		goto done;
4709 	}
4710 	m->m_data += mac_hlen;
4711 	m->m_len -= mac_hlen;
4712 	m->m_pkthdr.len -= mac_hlen;
4713 	if (is_ipv4) {
4714 		sum = inet_cksum(m, info.ip_proto,
4715 		    info.ip_hlen,
4716 		    info.ip_pay_len);
4717 	} else {
4718 		sum = inet6_cksum(m, info.ip_proto,
4719 		    info.ip_hlen + info.ip_opt_len,
4720 		    info.ip_pay_len - info.ip_opt_len);
4721 	}
4722 	valid = (sum == 0);
4723 	if (valid) {
4724 		csum_stats_p = &stats_p->brms_out_cksum_good;
4725 	} else {
4726 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4727 		error = (info.ip_proto == IPPROTO_TCP)
4728 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4729 	}
4730 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4731 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4732 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4733 	    is_ipv4 ? '4' : '6',
4734 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4735 	    valid ? "" : "in",
4736 	    sum, info.ip_hlen, info.ip_pay_len);
4737 	m->m_data -= mac_hlen;
4738 	m->m_len += mac_hlen;
4739 	m->m_pkthdr.len += mac_hlen;
4740 done:
4741 	return error;
4742 }
4743 
4744 static errno_t
4745 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4746     struct ifbrmstats * stats_p)
4747 {
4748 	uint16_t *      csum_p;
4749 	errno_t         error = 0;
4750 	u_int           hlen;
4751 	struct mbuf *   m0 = *mp;
4752 	u_int           mac_hlen = sizeof(struct ether_header);
4753 	u_int           pkt_hdr_len;
4754 	struct tcphdr * tcp;
4755 	u_int           tcp_hlen;
4756 	struct udphdr * udp;
4757 
4758 	if (info_p->ip_is_ipv4) {
4759 		/* compute IP header checksum */
4760 		info_p->ip_hdr.ip->ip_sum = 0;
4761 		info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4762 		    info_p->ip_hlen);
4763 		stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4764 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4765 		    "IPv4 checksum 0x%x",
4766 		    ntohs(info_p->ip_hdr.ip->ip_sum));
4767 	}
4768 	if (info_p->ip_is_fragmented) {
4769 		/* can't compute checksum on fragmented packets */
4770 		goto done;
4771 	}
4772 	pkt_hdr_len = m0->m_pkthdr.len;
4773 	switch (info_p->ip_proto) {
4774 	case IPPROTO_TCP:
4775 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4776 		    + sizeof(struct tcphdr);
4777 		if (m0->m_len < hlen) {
4778 			*mp = m0 = m_pullup(m0, hlen);
4779 			if (m0 == NULL) {
4780 				stats_p->brms_in_ip.bips_bad_tcp++;
4781 				error = _EBADTCP;
4782 				goto done;
4783 			}
4784 		}
4785 		tcp = (struct tcphdr *)(void *)
4786 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4787 		    + info_p->ip_opt_len);
4788 		tcp_hlen = tcp->th_off << 2;
4789 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4790 		if (hlen > pkt_hdr_len) {
4791 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4792 			    "bad tcp header length %u",
4793 			    tcp_hlen);
4794 			stats_p->brms_in_ip.bips_bad_tcp++;
4795 			error = _EBADTCP;
4796 			goto done;
4797 		}
4798 		csum_p = &tcp->th_sum;
4799 		stats_p->brms_in_ip.bips_tcp++;
4800 		break;
4801 	case IPPROTO_UDP:
4802 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4803 		if (m0->m_len < hlen) {
4804 			*mp = m0 = m_pullup(m0, hlen);
4805 			if (m0 == NULL) {
4806 				stats_p->brms_in_ip.bips_bad_udp++;
4807 				error = ENOBUFS;
4808 				goto done;
4809 			}
4810 		}
4811 		udp = (struct udphdr *)(void *)
4812 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4813 		    + info_p->ip_opt_len);
4814 		csum_p = &udp->uh_sum;
4815 		stats_p->brms_in_ip.bips_udp++;
4816 		break;
4817 	default:
4818 		/* not TCP or UDP */
4819 		goto done;
4820 	}
4821 	*csum_p = 0;
4822 	m0->m_data += mac_hlen;
4823 	m0->m_len -= mac_hlen;
4824 	m0->m_pkthdr.len -= mac_hlen;
4825 	if (info_p->ip_is_ipv4) {
4826 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4827 		    info_p->ip_pay_len);
4828 	} else {
4829 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
4830 		    info_p->ip_hlen + info_p->ip_opt_len,
4831 		    info_p->ip_pay_len - info_p->ip_opt_len);
4832 	}
4833 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4834 		/* RFC 1122 4.1.3.4 */
4835 		*csum_p = 0xffff;
4836 	}
4837 	m0->m_data -= mac_hlen;
4838 	m0->m_len += mac_hlen;
4839 	m0->m_pkthdr.len += mac_hlen;
4840 	proto_csum_stats_increment(info_p->ip_proto,
4841 	    &stats_p->brms_in_computed_cksum);
4842 
4843 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4844 	    "IPv%c %s set checksum 0x%x",
4845 	    info_p->ip_is_ipv4 ? '4' : '6',
4846 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4847 	    ntohs(*csum_p));
4848 done:
4849 	return error;
4850 }
4851 
4852 static errno_t
4853 bridge_send(struct ifnet *src_ifp,
4854     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4855 {
4856 	switch (cksum_op) {
4857 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4858 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4859 		break;
4860 	case CHECKSUM_OPERATION_FINALIZE:
4861 		/* the checksum might not be correct, finalize now */
4862 		bridge_finalize_cksum(dst_ifp, m);
4863 		break;
4864 	case CHECKSUM_OPERATION_COMPUTE:
4865 		bridge_compute_cksum(src_ifp, dst_ifp, m);
4866 		break;
4867 	default:
4868 		break;
4869 	}
4870 #if HAS_IF_CAP
4871 	/*
4872 	 * If underlying interface can not do VLAN tag insertion itself
4873 	 * then attach a packet tag that holds it.
4874 	 */
4875 	if ((m->m_flags & M_VLANTAG) &&
4876 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4877 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4878 		if (m == NULL) {
4879 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4880 			    "%s: unable to prepend VLAN header",
4881 			    dst_ifp->if_xname);
4882 			(void) ifnet_stat_increment_out(dst_ifp,
4883 			    0, 0, 1);
4884 			return 0;
4885 		}
4886 		m->m_flags &= ~M_VLANTAG;
4887 	}
4888 #endif /* HAS_IF_CAP */
4889 	return bridge_transmit(dst_ifp, m);
4890 }
4891 
4892 static errno_t
4893 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4894 {
4895 	errno_t                 error;
4896 	u_int                   mac_hlen;
4897 
4898 	mac_hlen = sizeof(struct ether_header);
4899 
4900 #if HAS_IF_CAP
4901 	/*
4902 	 * If underlying interface can not do VLAN tag insertion itself
4903 	 * then attach a packet tag that holds it.
4904 	 */
4905 	if ((m->m_flags & M_VLANTAG) &&
4906 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4907 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4908 		if (m == NULL) {
4909 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4910 			    "%s: unable to prepend VLAN header",
4911 			    dst_ifp->if_xname);
4912 			(void) ifnet_stat_increment_out(dst_ifp,
4913 			    0, 0, 1);
4914 			error = ENOBUFS;
4915 			goto done;
4916 		}
4917 		m->m_flags &= ~M_VLANTAG;
4918 		mac_hlen += ETHER_VLAN_ENCAP_LEN;
4919 	}
4920 #endif /* HAS_IF_CAP */
4921 	error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4922 	return error;
4923 }
4924 
4925 /*
4926  * tso_hwassist:
4927  * - determine whether the destination interface supports TSO offload
4928  * - if the packet is already marked for offload and the hardware supports
4929  *   it, just allow the packet to continue on
4930  * - if not, parse the packet headers to verify that this is a large TCP
4931  *   packet requiring segmentation; if the hardware doesn't support it
4932  *   set need_sw_tso; otherwise, mark the packet for TSO offload
4933  */
4934 static int
4935 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4936     bool * need_sw_tso, bool * is_large_tcp)
4937 {
4938 	int             error = 0;
4939 	u_int32_t       if_csum;
4940 	u_int32_t       if_tso;
4941 	u_int32_t       mbuf_tso;
4942 	bool            supports_cksum = false;
4943 
4944 	*need_sw_tso = false;
4945 	*is_large_tcp = false;
4946 	if (is_ipv4) {
4947 		/*
4948 		 * Enable both TCP and IP offload if the hardware supports it.
4949 		 * If the hardware doesn't support TCP offload, supports_cksum
4950 		 * will be false so we won't set either offload.
4951 		 */
4952 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4953 		supports_cksum = (if_csum & CSUM_TCP) != 0;
4954 		if_tso = IFNET_TSO_IPV4;
4955 		mbuf_tso = CSUM_TSO_IPV4;
4956 	} else {
4957 		supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4958 		if_csum = CSUM_TCPIPV6;
4959 		if_tso = IFNET_TSO_IPV6;
4960 		mbuf_tso = CSUM_TSO_IPV6;
4961 	}
4962 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4963 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
4964 	    ifp->if_xname, supports_cksum ? "" : " not",
4965 	    ifp->if_hwassist, if_csum);
4966 	if ((ifp->if_hwassist & if_tso) != 0 &&
4967 	    ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4968 		/* hardware TSO, mbuf already marked */
4969 	} else {
4970 		/* verify that this is a large TCP frame */
4971 		uint32_t                csum_flags;
4972 		ip_packet_info          info;
4973 		int                     mss;
4974 		struct bripstats        stats;
4975 		struct tcphdr *         tcp;
4976 
4977 		error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4978 		    &info, &stats);
4979 		if (error != 0) {
4980 			/* bad packet */
4981 			goto done;
4982 		}
4983 		if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4984 		    ifp->if_mtu) {
4985 			/* not actually a large packet */
4986 			goto done;
4987 		}
4988 		if (info.ip_proto_hdr == NULL) {
4989 			/* not a TCP packet */
4990 			goto done;
4991 		}
4992 		if ((ifp->if_hwassist & if_tso) == 0) {
4993 			/* hardware does not support TSO, enable sw tso */
4994 			*need_sw_tso = if_bridge_segmentation != 0;
4995 			goto done;
4996 		}
4997 		/* use hardware TSO */
4998 		(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4999 		tcp = (struct tcphdr *)info.ip_proto_hdr;
5000 		mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
5001 		    - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
5002 		assert(mss > 0);
5003 		csum_flags = mbuf_tso;
5004 		if (supports_cksum) {
5005 			csum_flags |= if_csum;
5006 		}
5007 		(*mp)->m_pkthdr.tso_segsz = mss;
5008 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
5009 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
5010 		*is_large_tcp = true;
5011 	}
5012 done:
5013 	return error;
5014 }
5015 
5016 /*
5017  * bridge_enqueue:
5018  *
5019  *	Enqueue a packet on a bridge member interface.
5020  *
5021  */
5022 static errno_t
5023 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
5024     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
5025 {
5026 	errno_t         error = 0;
5027 	int             len;
5028 
5029 	VERIFY(dst_ifp != NULL);
5030 
5031 	/*
5032 	 * We may be sending a fragment so traverse the mbuf
5033 	 *
5034 	 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
5035 	 */
5036 	for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
5037 		bool            need_sw_tso = false;
5038 		bool            is_ipv4 = false;
5039 		bool            is_large_pkt;
5040 		errno_t         _error = 0;
5041 
5042 		len = m->m_pkthdr.len;
5043 		m->m_flags |= M_PROTO1; /* set to avoid loops */
5044 		next_m = m->m_nextpkt;
5045 		m->m_nextpkt = NULL;
5046 		/*
5047 		 * Need to segment the packet if it is a large frame
5048 		 * and the destination interface does not support TSO.
5049 		 *
5050 		 * Note that with trailers, it's possible for a packet to
5051 		 * be large but not actually require segmentation.
5052 		 */
5053 		is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5054 		if (is_large_pkt) {
5055 			struct ether_header     *eh;
5056 			bool                    is_large_tcp = false;
5057 
5058 			eh = mtod(m, struct ether_header *);
5059 			if (ether_header_type_is_ip(eh, &is_ipv4)) {
5060 				_error = tso_hwassist(&m, is_ipv4,
5061 				    dst_ifp, sizeof(struct ether_header),
5062 				    &need_sw_tso, &is_large_tcp);
5063 				if (is_large_tcp) {
5064 					cksum_op = CHECKSUM_OPERATION_NONE;
5065 				}
5066 			} else {
5067 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5068 				    "large non IP packet");
5069 			}
5070 		}
5071 		if (_error != 0) {
5072 			if (m != NULL) {
5073 				m_freem(m);
5074 			}
5075 		} else if (need_sw_tso) {
5076 			_error = bridge_send_tso(dst_ifp, m, is_ipv4);
5077 		} else {
5078 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5079 			    "%s bridge_send(%s) len %d op %d",
5080 			    bridge_ifp->if_xname,
5081 			    dst_ifp->if_xname,
5082 			    len, cksum_op);
5083 			_error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5084 		}
5085 
5086 		/* Preserve first error value */
5087 		if (error == 0 && _error != 0) {
5088 			error = _error;
5089 		}
5090 		if (_error == 0) {
5091 			(void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5092 		} else {
5093 			(void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5094 		}
5095 	}
5096 
5097 	return error;
5098 }
5099 
5100 #if HAS_BRIDGE_DUMMYNET
5101 /*
5102  * bridge_dummynet:
5103  *
5104  *	Receive a queued packet from dummynet and pass it on to the output
5105  *	interface.
5106  *
5107  *	The mbuf has the Ethernet header already attached.
5108  */
5109 static void
5110 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5111 {
5112 	struct bridge_softc *sc;
5113 
5114 	sc = ifp->if_bridge;
5115 
5116 	/*
5117 	 * The packet didn't originate from a member interface. This should only
5118 	 * ever happen if a member interface is removed while packets are
5119 	 * queued for it.
5120 	 */
5121 	if (sc == NULL) {
5122 		m_freem(m);
5123 		return;
5124 	}
5125 
5126 	if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5127 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5128 			return;
5129 		}
5130 		if (m == NULL) {
5131 			return;
5132 		}
5133 	}
5134 	(void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5135 }
5136 
5137 #endif /* HAS_BRIDGE_DUMMYNET */
5138 
5139 /*
5140  * bridge_member_output:
5141  *
5142  *	Send output from a bridge member interface.  This
5143  *	performs the bridging function for locally originated
5144  *	packets.
5145  *
5146  *	The mbuf has the Ethernet header already attached.
5147  */
5148 static errno_t
5149 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5150 {
5151 	ifnet_t bridge_ifp;
5152 	struct ether_header *eh;
5153 	struct ifnet *dst_if;
5154 	uint16_t vlan;
5155 	struct bridge_iflist *mac_nat_bif;
5156 	ifnet_t mac_nat_ifp;
5157 	mbuf_t m = *data;
5158 
5159 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5160 	    "ifp %s", ifp->if_xname);
5161 	if (m->m_len < ETHER_HDR_LEN) {
5162 		m = m_pullup(m, ETHER_HDR_LEN);
5163 		if (m == NULL) {
5164 			*data = NULL;
5165 			return EJUSTRETURN;
5166 		}
5167 	}
5168 
5169 	eh = mtod(m, struct ether_header *);
5170 	vlan = VLANTAGOF(m);
5171 
5172 	BRIDGE_LOCK(sc);
5173 	mac_nat_bif = sc->sc_mac_nat_bif;
5174 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5175 	if (mac_nat_ifp == ifp) {
5176 		/* record the IP address used by the MAC NAT interface */
5177 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5178 		m = *data;
5179 		if (m == NULL) {
5180 			/* packet was deallocated */
5181 			BRIDGE_UNLOCK(sc);
5182 			return EJUSTRETURN;
5183 		}
5184 	}
5185 	bridge_ifp = sc->sc_ifp;
5186 
5187 	/*
5188 	 * APPLE MODIFICATION
5189 	 * If the packet is an 802.1X ethertype, then only send on the
5190 	 * original output interface.
5191 	 */
5192 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5193 		dst_if = ifp;
5194 		goto sendunicast;
5195 	}
5196 
5197 	/*
5198 	 * If bridge is down, but the original output interface is up,
5199 	 * go ahead and send out that interface.  Otherwise, the packet
5200 	 * is dropped below.
5201 	 */
5202 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5203 		dst_if = ifp;
5204 		goto sendunicast;
5205 	}
5206 
5207 	/*
5208 	 * If the packet is a multicast, or we don't know a better way to
5209 	 * get there, send to all interfaces.
5210 	 */
5211 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5212 		dst_if = NULL;
5213 	} else {
5214 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5215 	}
5216 	if (dst_if == NULL) {
5217 		struct bridge_iflist *bif;
5218 		struct mbuf *mc;
5219 		errno_t error;
5220 
5221 
5222 		bridge_span(sc, m);
5223 
5224 		BRIDGE_LOCK2REF(sc, error);
5225 		if (error != 0) {
5226 			m_freem(m);
5227 			return EJUSTRETURN;
5228 		}
5229 
5230 		/*
5231 		 * Duplicate and send the packet across all member interfaces
5232 		 * except the originating interface.
5233 		 */
5234 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5235 			dst_if = bif->bif_ifp;
5236 			if (dst_if == ifp) {
5237 				/* skip the originating interface */
5238 				continue;
5239 			}
5240 			/* skip interface with inactive link status */
5241 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5242 				continue;
5243 			}
5244 #if 0
5245 			if (dst_if->if_type == IFT_GIF) {
5246 				continue;
5247 			}
5248 #endif
5249 			/* skip interface that isn't running */
5250 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5251 				continue;
5252 			}
5253 			/*
5254 			 * If the interface is participating in spanning
5255 			 * tree, make sure the port is in a state that
5256 			 * allows forwarding.
5257 			 */
5258 			if ((bif->bif_ifflags & IFBIF_STP) &&
5259 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5260 				continue;
5261 			}
5262 			/*
5263 			 * If the destination is the MAC NAT interface,
5264 			 * skip sending the packet. The packet can't be sent
5265 			 * if the source MAC is incorrect.
5266 			 */
5267 			if (dst_if == mac_nat_ifp) {
5268 				continue;
5269 			}
5270 
5271 			/* make a deep copy to send on this member interface */
5272 			mc = m_dup(m, M_DONTWAIT);
5273 			if (mc == NULL) {
5274 				(void)ifnet_stat_increment_out(bridge_ifp,
5275 				    0, 0, 1);
5276 				continue;
5277 			}
5278 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5279 			    mc, CHECKSUM_OPERATION_COMPUTE);
5280 		}
5281 		BRIDGE_UNREF(sc);
5282 
5283 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5284 			m_freem(m);
5285 			return EJUSTRETURN;
5286 		}
5287 		/* allow packet to continue on the originating interface */
5288 		return 0;
5289 	}
5290 
5291 sendunicast:
5292 	/*
5293 	 * XXX Spanning tree consideration here?
5294 	 */
5295 
5296 	bridge_span(sc, m);
5297 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5298 		m_freem(m);
5299 		BRIDGE_UNLOCK(sc);
5300 		return EJUSTRETURN;
5301 	}
5302 
5303 	BRIDGE_UNLOCK(sc);
5304 	if (dst_if == ifp) {
5305 		/* allow packet to continue on the originating interface */
5306 		return 0;
5307 	}
5308 	if (dst_if != mac_nat_ifp) {
5309 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5310 		    CHECKSUM_OPERATION_COMPUTE);
5311 	} else {
5312 		/*
5313 		 * This is not the original output interface
5314 		 * and the destination is the MAC NAT interface.
5315 		 * Drop the packet because the packet can't be sent
5316 		 * if the source MAC is incorrect.
5317 		 */
5318 		m_freem(m);
5319 	}
5320 	return EJUSTRETURN;
5321 }
5322 
5323 /*
5324  * Output callback.
5325  *
5326  * This routine is called externally from above only when if_bridge_txstart
5327  * is disabled; otherwise it is called internally by bridge_start().
5328  */
5329 static int
5330 bridge_output(struct ifnet *ifp, struct mbuf *m)
5331 {
5332 	struct bridge_softc *sc = ifnet_softc(ifp);
5333 	struct ether_header *eh;
5334 	struct ifnet *dst_if = NULL;
5335 	int error = 0;
5336 
5337 	eh = mtod(m, struct ether_header *);
5338 
5339 	BRIDGE_LOCK(sc);
5340 
5341 	if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5342 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5343 	}
5344 
5345 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5346 
5347 #if NBPFILTER > 0
5348 	if (sc->sc_bpf_output) {
5349 		bridge_bpf_output(ifp, m);
5350 	}
5351 #endif
5352 
5353 	if (dst_if == NULL) {
5354 		/* callee will unlock */
5355 		bridge_broadcast(sc, NULL, m, 0);
5356 	} else {
5357 		ifnet_t bridge_ifp;
5358 
5359 		bridge_ifp = sc->sc_ifp;
5360 		BRIDGE_UNLOCK(sc);
5361 
5362 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5363 		    CHECKSUM_OPERATION_FINALIZE);
5364 	}
5365 
5366 	return error;
5367 }
5368 
5369 static void
5370 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5371 {
5372 	struct ether_header *eh;
5373 	bool is_ipv4;
5374 	uint32_t sw_csum, hwcap;
5375 	uint32_t did_sw;
5376 	uint32_t csum_flags;
5377 
5378 	eh = mtod(m, struct ether_header *);
5379 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5380 		return;
5381 	}
5382 
5383 	/* do in software what the hardware cannot */
5384 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5385 	csum_flags = m->m_pkthdr.csum_flags;
5386 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5387 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5388 
5389 	if (is_ipv4) {
5390 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5391 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5392 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5393 				uint16_t start =
5394 				    sizeof(*eh) + sizeof(struct ip);
5395 				uint16_t ulpoff =
5396 				    m->m_pkthdr.csum_data & 0xffff;
5397 				m->m_pkthdr.csum_flags |=
5398 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5399 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5400 				m->m_pkthdr.csum_tx_start = start;
5401 			} else {
5402 				sw_csum |= (CSUM_DELAY_DATA &
5403 				    m->m_pkthdr.csum_flags);
5404 			}
5405 		}
5406 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5407 	} else {
5408 		if ((hwcap & CSUM_PARTIAL) &&
5409 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5410 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5411 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5412 				uint16_t start =
5413 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5414 				uint16_t ulpoff =
5415 				    m->m_pkthdr.csum_data & 0xffff;
5416 				m->m_pkthdr.csum_flags |=
5417 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5418 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5419 				m->m_pkthdr.csum_tx_start = start;
5420 			} else {
5421 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5422 				    m->m_pkthdr.csum_flags);
5423 			}
5424 		}
5425 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5426 	}
5427 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5428 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5429 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5430 	    did_sw, m->m_pkthdr.csum_flags);
5431 }
5432 
5433 /*
5434  * bridge_start:
5435  *
5436  *	Start output on a bridge.
5437  *
5438  * This routine is invoked by the start worker thread; because we never call
5439  * it directly, there is no need do deploy any serialization mechanism other
5440  * than what's already used by the worker thread, i.e. this is already single
5441  * threaded.
5442  *
5443  * This routine is called only when if_bridge_txstart is enabled.
5444  */
5445 static void
5446 bridge_start(struct ifnet *ifp)
5447 {
5448 	struct mbuf *m;
5449 
5450 	for (;;) {
5451 		if (ifnet_dequeue(ifp, &m) != 0) {
5452 			break;
5453 		}
5454 
5455 		(void) bridge_output(ifp, m);
5456 	}
5457 }
5458 
5459 /*
5460  * bridge_forward:
5461  *
5462  *	The forwarding function of the bridge.
5463  *
5464  *	NOTE: Releases the lock on return.
5465  */
5466 static void
5467 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5468     struct mbuf *m)
5469 {
5470 	struct bridge_iflist *dbif;
5471 	ifnet_t bridge_ifp;
5472 	struct ifnet *src_if, *dst_if;
5473 	struct ether_header *eh;
5474 	uint16_t vlan;
5475 	uint8_t *dst;
5476 	int error;
5477 	struct mac_nat_record mnr;
5478 	bool translate_mac = FALSE;
5479 	uint32_t sc_filter_flags = 0;
5480 
5481 	BRIDGE_LOCK_ASSERT_HELD(sc);
5482 
5483 	bridge_ifp = sc->sc_ifp;
5484 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5485 	    "%s m 0x%llx", bridge_ifp->if_xname,
5486 	    (uint64_t)VM_KERNEL_ADDRPERM(m));
5487 
5488 	src_if = m->m_pkthdr.rcvif;
5489 	if (src_if != sbif->bif_ifp) {
5490 		const char *    src_if_name;
5491 
5492 		src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5493 		BRIDGE_LOG(LOG_NOTICE, 0,
5494 		    "src_if %s != bif_ifp %s",
5495 		    src_if_name, sbif->bif_ifp->if_xname);
5496 		goto drop;
5497 	}
5498 
5499 	(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5500 	vlan = VLANTAGOF(m);
5501 
5502 
5503 	if ((sbif->bif_ifflags & IFBIF_STP) &&
5504 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5505 		goto drop;
5506 	}
5507 
5508 	eh = mtod(m, struct ether_header *);
5509 	dst = eh->ether_dhost;
5510 
5511 	/* If the interface is learning, record the address. */
5512 	if (sbif->bif_ifflags & IFBIF_LEARNING) {
5513 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5514 		    sbif, 0, IFBAF_DYNAMIC);
5515 		/*
5516 		 * If the interface has addresses limits then deny any source
5517 		 * that is not in the cache.
5518 		 */
5519 		if (error && sbif->bif_addrmax) {
5520 			goto drop;
5521 		}
5522 	}
5523 
5524 	if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5525 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5526 		goto drop;
5527 	}
5528 
5529 	/*
5530 	 * At this point, the port either doesn't participate
5531 	 * in spanning tree or it is in the forwarding state.
5532 	 */
5533 
5534 	/*
5535 	 * If the packet is unicast, destined for someone on
5536 	 * "this" side of the bridge, drop it.
5537 	 */
5538 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5539 		/* unicast */
5540 		dst_if = bridge_rtlookup(sc, dst, vlan);
5541 		if (src_if == dst_if) {
5542 			goto drop;
5543 		}
5544 	} else {
5545 		/* broadcast/multicast */
5546 
5547 		/*
5548 		 * Check if its a reserved multicast address, any address
5549 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
5550 		 * bridge.
5551 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5552 		 */
5553 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
5554 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
5555 		    dst[4] == 0x00 && dst[5] <= 0x0f) {
5556 			goto drop;
5557 		}
5558 
5559 
5560 		/* ...forward it to all interfaces. */
5561 		atomic_add_64(&bridge_ifp->if_imcasts, 1);
5562 		dst_if = NULL;
5563 	}
5564 
5565 	/*
5566 	 * If we have a destination interface which is a member of our bridge,
5567 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
5568 	 * For broadcast or multicast packets, don't bother because it will
5569 	 * be reinjected into ether_input. We do this before we pass the packets
5570 	 * through the pfil(9) framework, as it is possible that pfil(9) will
5571 	 * drop the packet, or possibly modify it, making it difficult to debug
5572 	 * firewall issues on the bridge.
5573 	 */
5574 #if NBPFILTER > 0
5575 	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5576 	    dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5577 		m->m_pkthdr.rcvif = bridge_ifp;
5578 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5579 	}
5580 #endif /* NBPFILTER */
5581 
5582 	if (dst_if == NULL) {
5583 		/* bridge_broadcast will unlock */
5584 		bridge_broadcast(sc, sbif, m, 1);
5585 		return;
5586 	}
5587 
5588 	/*
5589 	 * Unicast.
5590 	 */
5591 	/*
5592 	 * At this point, we're dealing with a unicast frame
5593 	 * going to a different interface.
5594 	 */
5595 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5596 		goto drop;
5597 	}
5598 
5599 	dbif = bridge_lookup_member_if(sc, dst_if);
5600 	if (dbif == NULL) {
5601 		/* Not a member of the bridge (anymore?) */
5602 		goto drop;
5603 	}
5604 
5605 	/* Private segments can not talk to each other */
5606 	if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5607 		goto drop;
5608 	}
5609 
5610 	if ((dbif->bif_ifflags & IFBIF_STP) &&
5611 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5612 		goto drop;
5613 	}
5614 
5615 #if HAS_DHCPRA_MASK
5616 	/* APPLE MODIFICATION <rdar:6985737> */
5617 	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5618 		m = ip_xdhcpra_output(dst_if, m);
5619 		if (!m) {
5620 			++bridge_ifp.if_xdhcpra;
5621 			BRIDGE_UNLOCK(sc);
5622 			return;
5623 		}
5624 	}
5625 #endif /* HAS_DHCPRA_MASK */
5626 
5627 	if (dbif == sc->sc_mac_nat_bif) {
5628 		/* determine how to translate the packet */
5629 		translate_mac
5630 		        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5631 		if (m == NULL) {
5632 			/* packet was deallocated */
5633 			BRIDGE_UNLOCK(sc);
5634 			return;
5635 		}
5636 	} else if (bif_has_checksum_offload(dbif) &&
5637 	    !bif_has_checksum_offload(sbif)) {
5638 		/*
5639 		 * If the destination interface has checksum offload enabled,
5640 		 * verify the checksum now, unless the source interface also has
5641 		 * checksum offload enabled. The checksum in that case has
5642 		 * already just been computed and verifying it is unnecessary.
5643 		 */
5644 		error = bridge_verify_checksum(&m, &dbif->bif_stats);
5645 		if (error != 0) {
5646 			BRIDGE_UNLOCK(sc);
5647 			if (m != NULL) {
5648 				m_freem(m);
5649 			}
5650 			return;
5651 		}
5652 	}
5653 
5654 	sc_filter_flags = sc->sc_filter_flags;
5655 
5656 	BRIDGE_UNLOCK(sc);
5657 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5658 		if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5659 			return;
5660 		}
5661 		if (m == NULL) {
5662 			return;
5663 		}
5664 	}
5665 
5666 	/* if we need to, translate the MAC address */
5667 	if (translate_mac) {
5668 		bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5669 	}
5670 	/*
5671 	 * We're forwarding an inbound packet in which the checksum must
5672 	 * already have been computed and if required, verified.
5673 	 */
5674 	if (m != NULL) {
5675 		(void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5676 		    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5677 	}
5678 	return;
5679 
5680 drop:
5681 	BRIDGE_UNLOCK(sc);
5682 	m_freem(m);
5683 }
5684 
5685 static void
5686 inject_input_packet(ifnet_t ifp, mbuf_t m)
5687 {
5688 	mbuf_pkthdr_setrcvif(m, ifp);
5689 	mbuf_pkthdr_setheader(m, mbuf_data(m));
5690 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5691 	    mbuf_len(m) - ETHER_HDR_LEN);
5692 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5693 	m->m_flags |= M_PROTO1; /* set to avoid loops */
5694 	dlil_input_packet_list(ifp, m);
5695 	return;
5696 }
5697 
5698 static bool
5699 in_addr_is_ours(struct in_addr ip)
5700 {
5701 	struct in_ifaddr *ia;
5702 	bool             ours = false;
5703 
5704 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5705 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5706 		if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5707 			ours = true;
5708 			break;
5709 		}
5710 	}
5711 	lck_rw_done(&in_ifaddr_rwlock);
5712 	return ours;
5713 }
5714 
5715 static bool
5716 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5717 {
5718 	struct in6_ifaddr       *ia6;
5719 	bool                    ours = false;
5720 
5721 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5722 		struct in6_addr         dst_ip;
5723 
5724 		/* need to embed scope ID for comparison */
5725 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5726 		dst_ip.s6_addr16[1] = htons(ifscope);
5727 		ip6_p = &dst_ip;
5728 	}
5729 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5730 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5731 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5732 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5733 			ours = true;
5734 			break;
5735 		}
5736 	}
5737 	lck_rw_done(&in6_ifaddr_rwlock);
5738 	return ours;
5739 }
5740 
5741 static void
5742 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5743     bpf_packet_func bpf_input_func)
5744 {
5745 	size_t                  byte_count;
5746 	struct ether_header     *eh;
5747 	errno_t                 error;
5748 	bool                    is_ipv4;
5749 	int                     len;
5750 	u_int                   mac_hlen;
5751 	int                     pkt_count;
5752 
5753 	/* segment large packets before sending them up */
5754 	if (if_bridge_segmentation == 0) {
5755 		goto done;
5756 	}
5757 	len = m->m_pkthdr.len;
5758 	if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5759 		goto done;
5760 	}
5761 	eh = mtod(m, struct ether_header *);
5762 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5763 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5764 		    "large non IPv4/IPv6 packet");
5765 		goto done;
5766 	}
5767 
5768 	/*
5769 	 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5770 	 *
5771 	 * If gso_tcp() returns success (0), the packet(s) are
5772 	 * ready to be passed up. If the destination is a local IP address,
5773 	 * the packet will be passed up as a large, single packet.
5774 	 *
5775 	 * If gso_tcp() returns an error, the packet has already
5776 	 * been freed.
5777 	 */
5778 	mac_hlen = sizeof(*eh);
5779 	error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5780 	if (error != 0) {
5781 		return;
5782 	}
5783 
5784 done:
5785 	pkt_count = 0;
5786 	byte_count = 0;
5787 	for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5788 		/* Mark the packet as arriving on the bridge interface */
5789 		mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5790 		mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5791 		if (bpf_input_func != NULL) {
5792 			(*bpf_input_func)(bridge_ifp, scan);
5793 		}
5794 		mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5795 		    mbuf_len(scan) - ETHER_HDR_LEN);
5796 		mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5797 		byte_count += mbuf_pkthdr_len(scan);
5798 		pkt_count++;
5799 	}
5800 	(void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5801 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5802 	    "%s %d packet(s) %ld bytes",
5803 	    bridge_ifp->if_xname, pkt_count, byte_count);
5804 	dlil_input_packet_list(bridge_ifp, m);
5805 	return;
5806 }
5807 
5808 static bool
5809 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5810 {
5811 	bool    ours;
5812 
5813 	if (info_p->ip_is_ipv4) {
5814 		struct in_addr  dst_ip;
5815 
5816 		bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5817 		ours = in_addr_is_ours(dst_ip);
5818 	} else {
5819 		ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5820 	}
5821 	return ours;
5822 }
5823 
5824 static inline errno_t
5825 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5826     const u_char * ether_dhost, mbuf_t *mp,
5827     bool is_broadcast, bool is_ip, bool is_ipv4,
5828     ip_packet_info * info_p, struct bripstats * stats_p,
5829     bool *info_initialized)
5830 {
5831 	errno_t         error = 0;
5832 	bool            is_local = false;
5833 	struct pf_mtag *pf_mtag;
5834 	u_int16_t       tag = vmnet_tag;
5835 
5836 	*info_initialized = false;
5837 	if (is_broadcast) {
5838 		if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5839 			tag = vmnet_broadcast_tag;
5840 		} else {
5841 			tag = vmnet_multicast_tag;
5842 		}
5843 	} else if (is_ip) {
5844 		unsigned int    mac_hlen = sizeof(struct ether_header);
5845 
5846 		bzero(stats_p, sizeof(*stats_p));
5847 		*info_initialized = true;
5848 		error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5849 		    stats_p);
5850 		if (error != 0) {
5851 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5852 			    "%s(%s) bridge_get_ip_proto failed %d",
5853 			    bridge_ifp->if_xname,
5854 			    ifp->if_xname, error);
5855 			if (*mp == NULL) {
5856 				return EJUSTRETURN;
5857 			}
5858 		} else {
5859 			is_local = is_our_ip(info_p, bridge_ifp->if_index);
5860 			if (is_local) {
5861 				tag = vmnet_local_tag;
5862 			}
5863 		}
5864 	}
5865 	pf_mtag = pf_get_mtag(*mp);
5866 	if (pf_mtag != NULL) {
5867 		pf_mtag->pftag_tag = tag;
5868 	}
5869 #if DEBUG || DEVELOPMENT
5870 	{
5871 		bool forced;
5872 
5873 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5874 		if (forced) {
5875 			m_freem(*mp);
5876 			*mp = NULL;
5877 			error = EJUSTRETURN;
5878 			goto done;
5879 		}
5880 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5881 		if (forced) {
5882 			error = _EBADIP;
5883 			goto done;
5884 		}
5885 	}
5886 done:
5887 #endif /* DEBUG || DEVELOPMENT */
5888 	return error;
5889 }
5890 
5891 static void
5892 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5893 {
5894 	dst_p->bips_ip += src_p->bips_ip;
5895 	dst_p->bips_ip6 += src_p->bips_ip6;
5896 	dst_p->bips_udp += src_p->bips_udp;
5897 	dst_p->bips_tcp += src_p->bips_tcp;
5898 
5899 	dst_p->bips_bad_ip += src_p->bips_bad_ip;
5900 	dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5901 	dst_p->bips_bad_udp += src_p->bips_bad_udp;
5902 	dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5903 }
5904 
5905 static void
5906 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5907 {
5908 	struct bridge_iflist *bif;
5909 	struct bridge_softc *sc = ifp->if_bridge;
5910 
5911 	BRIDGE_LOCK(sc);
5912 	bif = bridge_lookup_member_if(sc, ifp);
5913 	if (bif == NULL) {
5914 		goto done;
5915 	}
5916 	if (!bif_has_checksum_offload(bif)) {
5917 		goto done;
5918 	}
5919 	bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5920 
5921 done:
5922 	BRIDGE_UNLOCK(sc);
5923 	return;
5924 }
5925 
5926 /*
5927  * bridge_input:
5928  *
5929  *	Filter input from a member interface.  Queue the packet for
5930  *	bridging if it is not for us.
5931  */
5932 errno_t
5933 bridge_input(struct ifnet *ifp, mbuf_t *data)
5934 {
5935 	struct bridge_softc *sc = ifp->if_bridge;
5936 	struct bridge_iflist *bif, *bif2;
5937 	struct ether_header eh_in;
5938 	bool is_ip = false;
5939 	bool is_ipv4 = false;
5940 	ifnet_t bridge_ifp;
5941 	struct mbuf *mc, *mc2;
5942 	unsigned int mac_hlen = sizeof(struct ether_header);
5943 	uint16_t vlan;
5944 	errno_t error;
5945 	ip_packet_info info;
5946 	struct bripstats stats;
5947 	bool info_initialized = false;
5948 	errno_t ip_packet_error = 0;
5949 	bool is_broadcast;
5950 	bool is_ip_broadcast = false;
5951 	bool is_ifp_mac = false;
5952 	mbuf_t m = *data;
5953 	uint32_t sc_filter_flags = 0;
5954 
5955 	bridge_ifp = sc->sc_ifp;
5956 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5957 	    "%s from %s m 0x%llx data 0x%llx",
5958 	    bridge_ifp->if_xname, ifp->if_xname,
5959 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
5960 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5961 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5962 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5963 		    "%s not running passing along",
5964 		    bridge_ifp->if_xname);
5965 		return 0;
5966 	}
5967 
5968 	vlan = VLANTAGOF(m);
5969 
5970 #ifdef IFF_MONITOR
5971 	/*
5972 	 * Implement support for bridge monitoring. If this flag has been
5973 	 * set on this interface, discard the packet once we push it through
5974 	 * the bpf(4) machinery, but before we do, increment the byte and
5975 	 * packet counters associated with this interface.
5976 	 */
5977 	if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5978 		m->m_pkthdr.rcvif = bridge_ifp;
5979 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5980 		(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5981 		*data = NULL;
5982 		m_freem(m);
5983 		return EJUSTRETURN;
5984 	}
5985 #endif /* IFF_MONITOR */
5986 
5987 	is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5988 
5989 	/*
5990 	 * Need to clear the promiscuous flag otherwise it will be
5991 	 * dropped by DLIL after processing filters
5992 	 */
5993 	if ((mbuf_flags(m) & MBUF_PROMISC)) {
5994 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5995 	}
5996 
5997 	/* copy the ethernet header */
5998 	eh_in = *(mtod(m, struct ether_header *));
5999 
6000 	is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
6001 
6002 	if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
6003 		/* tag packets coming from VMNET interfaces */
6004 		ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
6005 		    eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
6006 		    &info, &stats, &info_initialized);
6007 		m = *data;
6008 		if (m == NULL) {
6009 			bridge_bripstats_apply(ifp, &stats);
6010 			return EJUSTRETURN;
6011 		}
6012 	}
6013 
6014 	sc_filter_flags = sc->sc_filter_flags;
6015 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6016 		error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
6017 		m = *data;
6018 		if (error != 0 || m == NULL) {
6019 			return EJUSTRETURN;
6020 		}
6021 	}
6022 
6023 	BRIDGE_LOCK(sc);
6024 	bif = bridge_lookup_member_if(sc, ifp);
6025 	if (bif == NULL) {
6026 		BRIDGE_UNLOCK(sc);
6027 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6028 		    "%s bridge_lookup_member_if failed",
6029 		    bridge_ifp->if_xname);
6030 		return 0;
6031 	}
6032 	if (is_ip && bif_has_checksum_offload(bif)) {
6033 		if (info_initialized) {
6034 			bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
6035 		} else {
6036 			error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
6037 			    &info, &bif->bif_stats.brms_in_ip);
6038 			if (error != 0) {
6039 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6040 				    "%s(%s) bridge_get_ip_proto failed %d",
6041 				    bridge_ifp->if_xname,
6042 				    bif->bif_ifp->if_xname, error);
6043 				ip_packet_error = error;
6044 			}
6045 		}
6046 		if (ip_packet_error == 0) {
6047 			/* need to compute IP/UDP/TCP/checksums */
6048 			error = bridge_offload_checksum(data, &info,
6049 			    &bif->bif_stats);
6050 			if (error != 0) {
6051 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6052 				    "%s(%s) bridge_offload_checksum failed %d",
6053 				    bridge_ifp->if_xname,
6054 				    bif->bif_ifp->if_xname, error);
6055 				ip_packet_error = error;
6056 			}
6057 		}
6058 		if (ip_packet_error != 0) {
6059 			BRIDGE_UNLOCK(sc);
6060 			if (*data != NULL) {
6061 				m_freem(*data);
6062 				*data = NULL;
6063 			}
6064 			return EJUSTRETURN;
6065 		}
6066 		m = *data;
6067 	}
6068 
6069 	if (bif->bif_flags & BIFF_HOST_FILTER) {
6070 		error = bridge_host_filter(bif, data);
6071 		if (error != 0) {
6072 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6073 			    "%s bridge_host_filter failed",
6074 			    bif->bif_ifp->if_xname);
6075 			BRIDGE_UNLOCK(sc);
6076 			return EJUSTRETURN;
6077 		}
6078 		m = *data;
6079 	}
6080 
6081 	if (!is_broadcast &&
6082 	    _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6083 		/* the packet is unicast to the interface's MAC address */
6084 		if (is_ip && sc->sc_mac_nat_bif == bif) {
6085 			/* doing MAC-NAT, check if destination is IP broadcast */
6086 			is_ip_broadcast = is_broadcast_ip_packet(data);
6087 			if (*data == NULL) {
6088 				BRIDGE_UNLOCK(sc);
6089 				return EJUSTRETURN;
6090 			}
6091 			m = *data;
6092 		}
6093 		if (!is_ip_broadcast) {
6094 			is_ifp_mac = TRUE;
6095 		}
6096 	}
6097 
6098 	bridge_span(sc, m);
6099 
6100 	if (is_broadcast || is_ip_broadcast) {
6101 		if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6102 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6103 			    " multicast: "
6104 			    "%02x:%02x:%02x:%02x:%02x:%02x",
6105 			    eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6106 			    eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6107 			    eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6108 		}
6109 		/* Tap off 802.1D packets; they do not get forwarded. */
6110 		if (is_broadcast &&
6111 		    _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6112 #if BRIDGESTP
6113 			m = bstp_input(&bif->bif_stp, ifp, m);
6114 #else /* !BRIDGESTP */
6115 			m_freem(m);
6116 			m = NULL;
6117 #endif /* !BRIDGESTP */
6118 			if (m == NULL) {
6119 				BRIDGE_UNLOCK(sc);
6120 				return EJUSTRETURN;
6121 			}
6122 		}
6123 
6124 		if ((bif->bif_ifflags & IFBIF_STP) &&
6125 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6126 			BRIDGE_UNLOCK(sc);
6127 			return 0;
6128 		}
6129 
6130 		/*
6131 		 * Make a deep copy of the packet and enqueue the copy
6132 		 * for bridge processing.
6133 		 */
6134 		mc = m_dup(m, M_DONTWAIT);
6135 		if (mc == NULL) {
6136 			BRIDGE_UNLOCK(sc);
6137 			return 0;
6138 		}
6139 
6140 		/*
6141 		 * Perform the bridge forwarding function with the copy.
6142 		 *
6143 		 * Note that bridge_forward calls BRIDGE_UNLOCK
6144 		 */
6145 		if (is_ip_broadcast) {
6146 			struct ether_header *eh;
6147 
6148 			/* make the copy look like it is actually broadcast */
6149 			mc->m_flags |= M_BCAST;
6150 			eh = mtod(mc, struct ether_header *);
6151 			bcopy(etherbroadcastaddr, eh->ether_dhost,
6152 			    ETHER_ADDR_LEN);
6153 		}
6154 		bridge_forward(sc, bif, mc);
6155 
6156 		/*
6157 		 * Reinject the mbuf as arriving on the bridge so we have a
6158 		 * chance at claiming multicast packets. We can not loop back
6159 		 * here from ether_input as a bridge is never a member of a
6160 		 * bridge.
6161 		 */
6162 		VERIFY(bridge_ifp->if_bridge == NULL);
6163 		mc2 = m_dup(m, M_DONTWAIT);
6164 		if (mc2 != NULL) {
6165 			/* Keep the layer3 header aligned */
6166 			int i = min(mc2->m_pkthdr.len, max_protohdr);
6167 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6168 		}
6169 		if (mc2 != NULL) {
6170 			/* mark packet as arriving on the bridge */
6171 			mc2->m_pkthdr.rcvif = bridge_ifp;
6172 			mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6173 			BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6174 			(void) mbuf_setdata(mc2,
6175 			    (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6176 			    mbuf_len(mc2) - ETHER_HDR_LEN);
6177 			(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6178 			(void) ifnet_stat_increment_in(bridge_ifp, 1,
6179 			    mbuf_pkthdr_len(mc2), 0);
6180 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6181 			    "%s mcast for us", bridge_ifp->if_xname);
6182 			dlil_input_packet_list(bridge_ifp, mc2);
6183 		}
6184 
6185 		/* Return the original packet for local processing. */
6186 		return 0;
6187 	}
6188 
6189 	if ((bif->bif_ifflags & IFBIF_STP) &&
6190 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6191 		BRIDGE_UNLOCK(sc);
6192 		return 0;
6193 	}
6194 
6195 #ifdef DEV_CARP
6196 #define CARP_CHECK_WE_ARE_DST(iface) \
6197 	((iface)->if_carp &&\
6198 	        carp_forus((iface)->if_carp, eh_in.ether_dhost))
6199 #define CARP_CHECK_WE_ARE_SRC(iface) \
6200 	((iface)->if_carp &&\
6201 	        carp_forus((iface)->if_carp, eh_in.ether_shost))
6202 #else
6203 #define CARP_CHECK_WE_ARE_DST(iface) 0
6204 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6205 #endif
6206 
6207 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6208 
6209 #define PFIL_PHYS(sc, ifp, m)
6210 
6211 #define GRAB_OUR_PACKETS(iface)                                         \
6212 	if ((iface)->if_type == IFT_GIF)                                \
6213 	        continue;                                               \
6214 	/* It is destined for us. */                                    \
6215 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 ||   \
6216 	    CARP_CHECK_WE_ARE_DST((iface))) {                           \
6217 	        if ((iface)->if_type == IFT_BRIDGE) {                   \
6218 	                BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
6219 	/* Filter on the physical interface. */         \
6220 	                PFIL_PHYS(sc, iface, m);                        \
6221 	        } else {                                                \
6222 	                bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0);      \
6223 	        }                                                       \
6224 	        if (bif->bif_ifflags & IFBIF_LEARNING) {                \
6225 	                error = bridge_rtupdate(sc, eh_in.ether_shost,  \
6226 	                    vlan, bif, 0, IFBAF_DYNAMIC);               \
6227 	                if (error && bif->bif_addrmax) {                \
6228 	                        BRIDGE_UNLOCK(sc);                      \
6229 	                        m_freem(m);                             \
6230 	                        return (EJUSTRETURN);                   \
6231 	                }                                               \
6232 	        }                                                       \
6233 	        BRIDGE_UNLOCK(sc);                                      \
6234 	        inject_input_packet(iface, m);                          \
6235 	        return (EJUSTRETURN);                                   \
6236 	}                                                               \
6237                                                                         \
6238 	/* We just received a packet that we sent out. */               \
6239 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 ||   \
6240 	    CARP_CHECK_WE_ARE_SRC((iface))) {                           \
6241 	        BRIDGE_UNLOCK(sc);                                      \
6242 	        m_freem(m);                                             \
6243 	        return (EJUSTRETURN);                                   \
6244 	}
6245 
6246 	/*
6247 	 * Unicast.
6248 	 */
6249 
6250 	/* handle MAC-NAT if enabled */
6251 	if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6252 		ifnet_t dst_if;
6253 		boolean_t is_input = FALSE;
6254 
6255 		dst_if = bridge_mac_nat_input(sc, data, &is_input);
6256 		m = *data;
6257 		if (dst_if == ifp) {
6258 			/* our input packet */
6259 		} else if (dst_if != NULL || m == NULL) {
6260 			BRIDGE_UNLOCK(sc);
6261 			if (dst_if != NULL) {
6262 				ASSERT(m != NULL);
6263 				if (is_input) {
6264 					inject_input_packet(dst_if, m);
6265 				} else {
6266 					(void)bridge_enqueue(bridge_ifp, NULL,
6267 					    dst_if, m,
6268 					    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6269 				}
6270 			}
6271 			return EJUSTRETURN;
6272 		}
6273 	}
6274 
6275 	/*
6276 	 * If the packet is for the bridge, pass it up for local processing.
6277 	 */
6278 	if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6279 	    CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6280 		bpf_packet_func     bpf_input_func = sc->sc_bpf_input;
6281 
6282 		/*
6283 		 * If the interface is learning, and the source
6284 		 * address is valid and not multicast, record
6285 		 * the address.
6286 		 */
6287 		if (bif->bif_ifflags & IFBIF_LEARNING) {
6288 			(void) bridge_rtupdate(sc, eh_in.ether_shost,
6289 			    vlan, bif, 0, IFBAF_DYNAMIC);
6290 		}
6291 		BRIDGE_UNLOCK(sc);
6292 
6293 		bridge_interface_input(bridge_ifp, m, bpf_input_func);
6294 		return EJUSTRETURN;
6295 	}
6296 
6297 	/*
6298 	 * if the destination of the packet is for the MAC address of
6299 	 * the member interface itself, then we don't need to forward
6300 	 * it -- just pass it back.  Note that it'll likely just be
6301 	 * dropped by the stack, but if something else is bound to
6302 	 * the interface directly (for example, the wireless stats
6303 	 * protocol -- although that actually uses BPF right now),
6304 	 * then it will consume the packet
6305 	 *
6306 	 * ALSO, note that we do this check AFTER checking for the
6307 	 * bridge's own MAC address, because the bridge may be
6308 	 * using the SAME MAC address as one of its interfaces
6309 	 */
6310 	if (is_ifp_mac) {
6311 
6312 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6313 		BRIDGE_LOG(LOG_NOTICE, 0,
6314 		    "not forwarding packet bound for member interface");
6315 #endif
6316 
6317 		BRIDGE_UNLOCK(sc);
6318 		return 0;
6319 	}
6320 
6321 	/* Now check the remaining bridge members. */
6322 	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6323 		if (bif2->bif_ifp != ifp) {
6324 			GRAB_OUR_PACKETS(bif2->bif_ifp);
6325 		}
6326 	}
6327 
6328 #undef CARP_CHECK_WE_ARE_DST
6329 #undef CARP_CHECK_WE_ARE_SRC
6330 #undef GRAB_OUR_PACKETS
6331 
6332 	/*
6333 	 * Perform the bridge forwarding function.
6334 	 *
6335 	 * Note that bridge_forward calls BRIDGE_UNLOCK
6336 	 */
6337 	bridge_forward(sc, bif, m);
6338 
6339 	return EJUSTRETURN;
6340 }
6341 
6342 /*
6343  * bridge_broadcast:
6344  *
6345  *	Send a frame to all interfaces that are members of
6346  *	the bridge, except for the one on which the packet
6347  *	arrived.
6348  *
6349  *	NOTE: Releases the lock on return.
6350  */
6351 static void
6352 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6353     struct mbuf *m, int runfilt)
6354 {
6355 	ifnet_t bridge_ifp;
6356 	struct bridge_iflist *dbif;
6357 	struct ifnet * src_if;
6358 	struct mbuf *mc;
6359 	struct mbuf *mc_in;
6360 	struct ifnet *dst_if;
6361 	int error = 0, used = 0;
6362 	boolean_t bridge_if_out;
6363 	ChecksumOperation cksum_op;
6364 	struct mac_nat_record mnr;
6365 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6366 	boolean_t translate_mac = FALSE;
6367 	uint32_t sc_filter_flags = 0;
6368 
6369 	bridge_ifp = sc->sc_ifp;
6370 	if (sbif != NULL) {
6371 		bridge_if_out = FALSE;
6372 		src_if = sbif->bif_ifp;
6373 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6374 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6375 			/* get the translation record while holding the lock */
6376 			translate_mac
6377 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6378 			if (m == NULL) {
6379 				/* packet was deallocated */
6380 				BRIDGE_UNLOCK(sc);
6381 				return;
6382 			}
6383 		}
6384 	} else {
6385 		/*
6386 		 * sbif is NULL when the bridge interface calls
6387 		 * bridge_broadcast().
6388 		 */
6389 		bridge_if_out = TRUE;
6390 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6391 		sbif = NULL;
6392 		src_if = NULL;
6393 	}
6394 
6395 	BRIDGE_LOCK2REF(sc, error);
6396 	if (error) {
6397 		m_freem(m);
6398 		return;
6399 	}
6400 
6401 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6402 		dst_if = dbif->bif_ifp;
6403 		if (dst_if == src_if) {
6404 			/* skip the interface that the packet came in on */
6405 			continue;
6406 		}
6407 
6408 		/* Private segments can not talk to each other */
6409 		if (sbif != NULL &&
6410 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6411 			continue;
6412 		}
6413 
6414 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6415 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6416 			continue;
6417 		}
6418 
6419 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6420 		    (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6421 			continue;
6422 		}
6423 
6424 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6425 			continue;
6426 		}
6427 
6428 		if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6429 			continue;
6430 		}
6431 
6432 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6433 			mc = m;
6434 			used = 1;
6435 		} else {
6436 			mc = m_dup(m, M_DONTWAIT);
6437 			if (mc == NULL) {
6438 				(void) ifnet_stat_increment_out(bridge_ifp,
6439 				    0, 0, 1);
6440 				continue;
6441 			}
6442 		}
6443 
6444 		/*
6445 		 * If broadcast input is enabled, do so only if this
6446 		 * is an input packet.
6447 		 */
6448 		if (!bridge_if_out &&
6449 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6450 			mc_in = m_dup(mc, M_DONTWAIT);
6451 			/* this could fail, but we continue anyways */
6452 		} else {
6453 			mc_in = NULL;
6454 		}
6455 
6456 		/* out */
6457 		if (translate_mac && mac_nat_bif == dbif) {
6458 			/* translate the packet without holding the lock */
6459 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6460 		}
6461 
6462 		sc_filter_flags = sc->sc_filter_flags;
6463 		if (runfilt &&
6464 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6465 			if (used == 0) {
6466 				/* Keep the layer3 header aligned */
6467 				int i = min(mc->m_pkthdr.len, max_protohdr);
6468 				mc = m_copyup(mc, i, ETHER_ALIGN);
6469 				if (mc == NULL) {
6470 					(void) ifnet_stat_increment_out(
6471 						sc->sc_ifp, 0, 0, 1);
6472 					if (mc_in != NULL) {
6473 						m_freem(mc_in);
6474 						mc_in = NULL;
6475 					}
6476 					continue;
6477 				}
6478 			}
6479 			if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6480 				if (mc_in != NULL) {
6481 					m_freem(mc_in);
6482 					mc_in = NULL;
6483 				}
6484 				continue;
6485 			}
6486 			if (mc == NULL) {
6487 				if (mc_in != NULL) {
6488 					m_freem(mc_in);
6489 					mc_in = NULL;
6490 				}
6491 				continue;
6492 			}
6493 		}
6494 
6495 		if (mc != NULL) {
6496 			/* verify checksum if necessary */
6497 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6498 			    !bif_has_checksum_offload(sbif)) {
6499 				error = bridge_verify_checksum(&mc,
6500 				    &dbif->bif_stats);
6501 				if (error != 0) {
6502 					if (mc != NULL) {
6503 						m_freem(mc);
6504 					}
6505 					mc = NULL;
6506 				}
6507 			}
6508 			if (mc != NULL) {
6509 				(void) bridge_enqueue(bridge_ifp,
6510 				    NULL, dst_if, mc, cksum_op);
6511 			}
6512 		}
6513 
6514 		/* in */
6515 		if (mc_in == NULL) {
6516 			continue;
6517 		}
6518 		bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6519 		mbuf_pkthdr_setrcvif(mc_in, dst_if);
6520 		mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6521 		mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6522 		    mbuf_len(mc_in) - ETHER_HDR_LEN);
6523 		mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6524 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6525 		dlil_input_packet_list(dst_if, mc_in);
6526 	}
6527 	if (used == 0) {
6528 		m_freem(m);
6529 	}
6530 
6531 
6532 	BRIDGE_UNREF(sc);
6533 }
6534 
6535 /*
6536  * bridge_span:
6537  *
6538  *	Duplicate a packet out one or more interfaces that are in span mode,
6539  *	the original mbuf is unmodified.
6540  */
6541 static void
6542 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6543 {
6544 	struct bridge_iflist *bif;
6545 	struct ifnet *dst_if;
6546 	struct mbuf *mc;
6547 
6548 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6549 		return;
6550 	}
6551 
6552 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6553 		dst_if = bif->bif_ifp;
6554 
6555 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6556 			continue;
6557 		}
6558 
6559 		mc = m_copypacket(m, M_DONTWAIT);
6560 		if (mc == NULL) {
6561 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6562 			continue;
6563 		}
6564 
6565 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6566 		    CHECKSUM_OPERATION_NONE);
6567 	}
6568 }
6569 
6570 
6571 /*
6572  * bridge_rtupdate:
6573  *
6574  *	Add a bridge routing entry.
6575  */
6576 static int
6577 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6578     struct bridge_iflist *bif, int setflags, uint8_t flags)
6579 {
6580 	struct bridge_rtnode *brt;
6581 	int error;
6582 
6583 	BRIDGE_LOCK_ASSERT_HELD(sc);
6584 
6585 	/* Check the source address is valid and not multicast. */
6586 	if (ETHER_IS_MULTICAST(dst) ||
6587 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6588 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6589 		return EINVAL;
6590 	}
6591 
6592 
6593 	/* 802.1p frames map to vlan 1 */
6594 	if (vlan == 0) {
6595 		vlan = 1;
6596 	}
6597 
6598 	/*
6599 	 * A route for this destination might already exist.  If so,
6600 	 * update it, otherwise create a new one.
6601 	 */
6602 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6603 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6604 			sc->sc_brtexceeded++;
6605 			return ENOSPC;
6606 		}
6607 		/* Check per interface address limits (if enabled) */
6608 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6609 			bif->bif_addrexceeded++;
6610 			return ENOSPC;
6611 		}
6612 
6613 		/*
6614 		 * Allocate a new bridge forwarding node, and
6615 		 * initialize the expiration time and Ethernet
6616 		 * address.
6617 		 */
6618 		brt = zalloc_noblock(bridge_rtnode_pool);
6619 		if (brt == NULL) {
6620 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6621 			    "zalloc_nolock failed");
6622 			return ENOMEM;
6623 		}
6624 		bzero(brt, sizeof(struct bridge_rtnode));
6625 
6626 		if (bif->bif_ifflags & IFBIF_STICKY) {
6627 			brt->brt_flags = IFBAF_STICKY;
6628 		} else {
6629 			brt->brt_flags = IFBAF_DYNAMIC;
6630 		}
6631 
6632 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6633 		brt->brt_vlan = vlan;
6634 
6635 
6636 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6637 			zfree(bridge_rtnode_pool, brt);
6638 			return error;
6639 		}
6640 		brt->brt_dst = bif;
6641 		bif->bif_addrcnt++;
6642 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6643 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6644 		    "on %s count %u hashsize %u",
6645 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6646 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6647 		    sc->sc_rthash_size);
6648 	}
6649 
6650 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6651 	    brt->brt_dst != bif) {
6652 		brt->brt_dst->bif_addrcnt--;
6653 		brt->brt_dst = bif;
6654 		brt->brt_dst->bif_addrcnt++;
6655 	}
6656 
6657 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6658 		unsigned long now;
6659 
6660 		now = (unsigned long) net_uptime();
6661 		brt->brt_expire = now + sc->sc_brttimeout;
6662 	}
6663 	if (setflags) {
6664 		brt->brt_flags = flags;
6665 	}
6666 
6667 
6668 	return 0;
6669 }
6670 
6671 /*
6672  * bridge_rtlookup:
6673  *
6674  *	Lookup the destination interface for an address.
6675  */
6676 static struct ifnet *
6677 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6678 {
6679 	struct bridge_rtnode *brt;
6680 
6681 	BRIDGE_LOCK_ASSERT_HELD(sc);
6682 
6683 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6684 		return NULL;
6685 	}
6686 
6687 	return brt->brt_ifp;
6688 }
6689 
6690 /*
6691  * bridge_rttrim:
6692  *
6693  *	Trim the routine table so that we have a number
6694  *	of routing entries less than or equal to the
6695  *	maximum number.
6696  */
6697 static void
6698 bridge_rttrim(struct bridge_softc *sc)
6699 {
6700 	struct bridge_rtnode *brt, *nbrt;
6701 
6702 	BRIDGE_LOCK_ASSERT_HELD(sc);
6703 
6704 	/* Make sure we actually need to do this. */
6705 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6706 		return;
6707 	}
6708 
6709 	/* Force an aging cycle; this might trim enough addresses. */
6710 	bridge_rtage(sc);
6711 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6712 		return;
6713 	}
6714 
6715 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6716 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6717 			bridge_rtnode_destroy(sc, brt);
6718 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6719 				return;
6720 			}
6721 		}
6722 	}
6723 }
6724 
6725 /*
6726  * bridge_aging_timer:
6727  *
6728  *	Aging periodic timer for the bridge routing table.
6729  */
6730 static void
6731 bridge_aging_timer(struct bridge_softc *sc)
6732 {
6733 	BRIDGE_LOCK_ASSERT_HELD(sc);
6734 
6735 	bridge_rtage(sc);
6736 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6737 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6738 		sc->sc_aging_timer.bdc_sc = sc;
6739 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6740 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6741 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6742 	}
6743 }
6744 
6745 /*
6746  * bridge_rtage:
6747  *
6748  *	Perform an aging cycle.
6749  */
6750 static void
6751 bridge_rtage(struct bridge_softc *sc)
6752 {
6753 	struct bridge_rtnode *brt, *nbrt;
6754 	unsigned long now;
6755 
6756 	BRIDGE_LOCK_ASSERT_HELD(sc);
6757 
6758 	now = (unsigned long) net_uptime();
6759 
6760 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6761 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6762 			if (now >= brt->brt_expire) {
6763 				bridge_rtnode_destroy(sc, brt);
6764 			}
6765 		}
6766 	}
6767 	if (sc->sc_mac_nat_bif != NULL) {
6768 		bridge_mac_nat_age_entries(sc, now);
6769 	}
6770 }
6771 
6772 /*
6773  * bridge_rtflush:
6774  *
6775  *	Remove all dynamic addresses from the bridge.
6776  */
6777 static void
6778 bridge_rtflush(struct bridge_softc *sc, int full)
6779 {
6780 	struct bridge_rtnode *brt, *nbrt;
6781 
6782 	BRIDGE_LOCK_ASSERT_HELD(sc);
6783 
6784 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6785 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6786 			bridge_rtnode_destroy(sc, brt);
6787 		}
6788 	}
6789 }
6790 
6791 /*
6792  * bridge_rtdaddr:
6793  *
6794  *	Remove an address from the table.
6795  */
6796 static int
6797 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6798 {
6799 	struct bridge_rtnode *brt;
6800 	int found = 0;
6801 
6802 	BRIDGE_LOCK_ASSERT_HELD(sc);
6803 
6804 	/*
6805 	 * If vlan is zero then we want to delete for all vlans so the lookup
6806 	 * may return more than one.
6807 	 */
6808 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6809 		bridge_rtnode_destroy(sc, brt);
6810 		found = 1;
6811 	}
6812 
6813 	return found ? 0 : ENOENT;
6814 }
6815 
6816 /*
6817  * bridge_rtdelete:
6818  *
6819  *	Delete routes to a specific member interface.
6820  */
6821 static void
6822 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6823 {
6824 	struct bridge_rtnode *brt, *nbrt;
6825 
6826 	BRIDGE_LOCK_ASSERT_HELD(sc);
6827 
6828 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6829 		if (brt->brt_ifp == ifp && (full ||
6830 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6831 			bridge_rtnode_destroy(sc, brt);
6832 		}
6833 	}
6834 }
6835 
6836 /*
6837  * bridge_rtable_init:
6838  *
6839  *	Initialize the route table for this bridge.
6840  */
6841 static int
6842 bridge_rtable_init(struct bridge_softc *sc)
6843 {
6844 	u_int32_t i;
6845 
6846 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6847 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6848 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6849 
6850 	for (i = 0; i < sc->sc_rthash_size; i++) {
6851 		LIST_INIT(&sc->sc_rthash[i]);
6852 	}
6853 
6854 	sc->sc_rthash_key = RandomULong();
6855 
6856 	LIST_INIT(&sc->sc_rtlist);
6857 
6858 	return 0;
6859 }
6860 
6861 /*
6862  * bridge_rthash_delayed_resize:
6863  *
6864  *	Resize the routing table hash on a delayed thread call.
6865  */
6866 static void
6867 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6868 {
6869 	u_int32_t new_rthash_size = 0;
6870 	u_int32_t old_rthash_size = 0;
6871 	struct _bridge_rtnode_list *new_rthash = NULL;
6872 	struct _bridge_rtnode_list *old_rthash = NULL;
6873 	u_int32_t i;
6874 	struct bridge_rtnode *brt;
6875 	int error = 0;
6876 
6877 	BRIDGE_LOCK_ASSERT_HELD(sc);
6878 
6879 	/*
6880 	 * Four entries per hash bucket is our ideal load factor
6881 	 */
6882 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6883 		goto out;
6884 	}
6885 
6886 	/*
6887 	 * Doubling the number of hash buckets may be too simplistic
6888 	 * especially when facing a spike of new entries
6889 	 */
6890 	new_rthash_size = sc->sc_rthash_size * 2;
6891 
6892 	sc->sc_flags |= SCF_RESIZING;
6893 	BRIDGE_UNLOCK(sc);
6894 
6895 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6896 	    Z_WAITOK | Z_ZERO);
6897 
6898 	BRIDGE_LOCK(sc);
6899 	sc->sc_flags &= ~SCF_RESIZING;
6900 
6901 	if (new_rthash == NULL) {
6902 		error = ENOMEM;
6903 		goto out;
6904 	}
6905 	if ((sc->sc_flags & SCF_DETACHING)) {
6906 		error = ENODEV;
6907 		goto out;
6908 	}
6909 	/*
6910 	 * Fail safe from here on
6911 	 */
6912 	old_rthash = sc->sc_rthash;
6913 	old_rthash_size = sc->sc_rthash_size;
6914 	sc->sc_rthash = new_rthash;
6915 	sc->sc_rthash_size = new_rthash_size;
6916 
6917 	/*
6918 	 * Get a new key to force entries to be shuffled around to reduce
6919 	 * the likelihood they will land in the same buckets
6920 	 */
6921 	sc->sc_rthash_key = RandomULong();
6922 
6923 	for (i = 0; i < sc->sc_rthash_size; i++) {
6924 		LIST_INIT(&sc->sc_rthash[i]);
6925 	}
6926 
6927 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6928 		LIST_REMOVE(brt, brt_hash);
6929 		(void) bridge_rtnode_hash(sc, brt);
6930 	}
6931 out:
6932 	if (error == 0) {
6933 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6934 		    "%s new size %u",
6935 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6936 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6937 	} else {
6938 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6939 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6940 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6941 	}
6942 }
6943 
6944 /*
6945  * Resize the number of hash buckets based on the load factor
6946  * Currently only grow
6947  * Failing to resize the hash table is not fatal
6948  */
6949 static void
6950 bridge_rthash_resize(struct bridge_softc *sc)
6951 {
6952 	BRIDGE_LOCK_ASSERT_HELD(sc);
6953 
6954 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6955 		return;
6956 	}
6957 
6958 	/*
6959 	 * Four entries per hash bucket is our ideal load factor
6960 	 */
6961 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6962 		return;
6963 	}
6964 	/*
6965 	 * Hard limit on the size of the routing hash table
6966 	 */
6967 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6968 		return;
6969 	}
6970 
6971 	sc->sc_resize_call.bdc_sc = sc;
6972 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6973 	bridge_schedule_delayed_call(&sc->sc_resize_call);
6974 }
6975 
6976 /*
6977  * bridge_rtable_fini:
6978  *
6979  *	Deconstruct the route table for this bridge.
6980  */
6981 static void
6982 bridge_rtable_fini(struct bridge_softc *sc)
6983 {
6984 	KASSERT(sc->sc_brtcnt == 0,
6985 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6986 	kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
6987 	    sc->sc_rthash);
6988 	sc->sc_rthash = NULL;
6989 	sc->sc_rthash_size = 0;
6990 }
6991 
6992 /*
6993  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6994  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6995  */
6996 #define mix(a, b, c)                                                    \
6997 do {                                                                    \
6998 	a -= b; a -= c; a ^= (c >> 13);                                 \
6999 	b -= c; b -= a; b ^= (a << 8);                                  \
7000 	c -= a; c -= b; c ^= (b >> 13);                                 \
7001 	a -= b; a -= c; a ^= (c >> 12);                                 \
7002 	b -= c; b -= a; b ^= (a << 16);                                 \
7003 	c -= a; c -= b; c ^= (b >> 5);                                  \
7004 	a -= b; a -= c; a ^= (c >> 3);                                  \
7005 	b -= c; b -= a; b ^= (a << 10);                                 \
7006 	c -= a; c -= b; c ^= (b >> 15);                                 \
7007 } while ( /*CONSTCOND*/ 0)
7008 
7009 static __inline uint32_t
7010 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
7011 {
7012 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
7013 
7014 	b += addr[5] << 8;
7015 	b += addr[4];
7016 	a += addr[3] << 24;
7017 	a += addr[2] << 16;
7018 	a += addr[1] << 8;
7019 	a += addr[0];
7020 
7021 	mix(a, b, c);
7022 
7023 	return c & BRIDGE_RTHASH_MASK(sc);
7024 }
7025 
7026 #undef mix
7027 
7028 static int
7029 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
7030 {
7031 	int i, d;
7032 
7033 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
7034 		d = ((int)a[i]) - ((int)b[i]);
7035 	}
7036 
7037 	return d;
7038 }
7039 
7040 /*
7041  * bridge_rtnode_lookup:
7042  *
7043  *	Look up a bridge route node for the specified destination. Compare the
7044  *	vlan id or if zero then just return the first match.
7045  */
7046 static struct bridge_rtnode *
7047 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7048     uint16_t vlan)
7049 {
7050 	struct bridge_rtnode *brt;
7051 	uint32_t hash;
7052 	int dir;
7053 
7054 	BRIDGE_LOCK_ASSERT_HELD(sc);
7055 
7056 	hash = bridge_rthash(sc, addr);
7057 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7058 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7059 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7060 			return brt;
7061 		}
7062 		if (dir > 0) {
7063 			return NULL;
7064 		}
7065 	}
7066 
7067 	return NULL;
7068 }
7069 
7070 /*
7071  * bridge_rtnode_hash:
7072  *
7073  *	Insert the specified bridge node into the route hash table.
7074  *	This is used when adding a new node or to rehash when resizing
7075  *	the hash table
7076  */
7077 static int
7078 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7079 {
7080 	struct bridge_rtnode *lbrt;
7081 	uint32_t hash;
7082 	int dir;
7083 
7084 	BRIDGE_LOCK_ASSERT_HELD(sc);
7085 
7086 	hash = bridge_rthash(sc, brt->brt_addr);
7087 
7088 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7089 	if (lbrt == NULL) {
7090 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7091 		goto out;
7092 	}
7093 
7094 	do {
7095 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7096 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7097 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7098 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7099 			    sc->sc_ifp->if_xname,
7100 			    brt->brt_addr[0], brt->brt_addr[1],
7101 			    brt->brt_addr[2], brt->brt_addr[3],
7102 			    brt->brt_addr[4], brt->brt_addr[5]);
7103 			return EEXIST;
7104 		}
7105 		if (dir > 0) {
7106 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7107 			goto out;
7108 		}
7109 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7110 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7111 			goto out;
7112 		}
7113 		lbrt = LIST_NEXT(lbrt, brt_hash);
7114 	} while (lbrt != NULL);
7115 
7116 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7117 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7118 	    sc->sc_ifp->if_xname,
7119 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7120 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7121 out:
7122 	return 0;
7123 }
7124 
7125 /*
7126  * bridge_rtnode_insert:
7127  *
7128  *	Insert the specified bridge node into the route table.  We
7129  *	assume the entry is not already in the table.
7130  */
7131 static int
7132 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7133 {
7134 	int error;
7135 
7136 	error = bridge_rtnode_hash(sc, brt);
7137 	if (error != 0) {
7138 		return error;
7139 	}
7140 
7141 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7142 	sc->sc_brtcnt++;
7143 
7144 	bridge_rthash_resize(sc);
7145 
7146 	return 0;
7147 }
7148 
7149 /*
7150  * bridge_rtnode_destroy:
7151  *
7152  *	Destroy a bridge rtnode.
7153  */
7154 static void
7155 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7156 {
7157 	BRIDGE_LOCK_ASSERT_HELD(sc);
7158 
7159 	LIST_REMOVE(brt, brt_hash);
7160 
7161 	LIST_REMOVE(brt, brt_list);
7162 	sc->sc_brtcnt--;
7163 	brt->brt_dst->bif_addrcnt--;
7164 	zfree(bridge_rtnode_pool, brt);
7165 }
7166 
7167 #if BRIDGESTP
7168 /*
7169  * bridge_rtable_expire:
7170  *
7171  *	Set the expiry time for all routes on an interface.
7172  */
7173 static void
7174 bridge_rtable_expire(struct ifnet *ifp, int age)
7175 {
7176 	struct bridge_softc *sc = ifp->if_bridge;
7177 	struct bridge_rtnode *brt;
7178 
7179 	BRIDGE_LOCK(sc);
7180 
7181 	/*
7182 	 * If the age is zero then flush, otherwise set all the expiry times to
7183 	 * age for the interface
7184 	 */
7185 	if (age == 0) {
7186 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7187 	} else {
7188 		unsigned long now;
7189 
7190 		now = (unsigned long) net_uptime();
7191 
7192 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7193 			/* Cap the expiry time to 'age' */
7194 			if (brt->brt_ifp == ifp &&
7195 			    brt->brt_expire > now + age &&
7196 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7197 				brt->brt_expire = now + age;
7198 			}
7199 		}
7200 	}
7201 	BRIDGE_UNLOCK(sc);
7202 }
7203 
7204 /*
7205  * bridge_state_change:
7206  *
7207  *	Callback from the bridgestp code when a port changes states.
7208  */
7209 static void
7210 bridge_state_change(struct ifnet *ifp, int state)
7211 {
7212 	struct bridge_softc *sc = ifp->if_bridge;
7213 	static const char *stpstates[] = {
7214 		"disabled",
7215 		"listening",
7216 		"learning",
7217 		"forwarding",
7218 		"blocking",
7219 		"discarding"
7220 	};
7221 
7222 	if (log_stp) {
7223 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7224 		    sc->sc_ifp->if_xname,
7225 		    stpstates[state], ifp->if_xname);
7226 	}
7227 }
7228 #endif /* BRIDGESTP */
7229 
7230 /*
7231  * bridge_set_bpf_tap:
7232  *
7233  *	Sets ups the BPF callbacks.
7234  */
7235 static errno_t
7236 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7237 {
7238 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7239 
7240 	/* TBD locking */
7241 	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7242 		return ENODEV;
7243 	}
7244 	switch (mode) {
7245 	case BPF_TAP_DISABLE:
7246 		sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7247 		break;
7248 
7249 	case BPF_TAP_INPUT:
7250 		sc->sc_bpf_input = bpf_callback;
7251 		break;
7252 
7253 	case BPF_TAP_OUTPUT:
7254 		sc->sc_bpf_output = bpf_callback;
7255 		break;
7256 
7257 	case BPF_TAP_INPUT_OUTPUT:
7258 		sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7259 		break;
7260 
7261 	default:
7262 		break;
7263 	}
7264 
7265 	return 0;
7266 }
7267 
7268 /*
7269  * bridge_detach:
7270  *
7271  *	Callback when interface has been detached.
7272  */
7273 static void
7274 bridge_detach(ifnet_t ifp)
7275 {
7276 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7277 
7278 #if BRIDGESTP
7279 	bstp_detach(&sc->sc_stp);
7280 #endif /* BRIDGESTP */
7281 
7282 	/* Tear down the routing table. */
7283 	bridge_rtable_fini(sc);
7284 
7285 	lck_mtx_lock(&bridge_list_mtx);
7286 	LIST_REMOVE(sc, sc_list);
7287 	lck_mtx_unlock(&bridge_list_mtx);
7288 
7289 	ifnet_release(ifp);
7290 
7291 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7292 	kfree_type(struct bridge_softc, sc);
7293 }
7294 
7295 /*
7296  * bridge_bpf_input:
7297  *
7298  *	Invoke the input BPF callback if enabled
7299  */
7300 static errno_t
7301 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7302 {
7303 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7304 	bpf_packet_func     input_func = sc->sc_bpf_input;
7305 
7306 	if (input_func != NULL) {
7307 		if (mbuf_pkthdr_rcvif(m) != ifp) {
7308 			BRIDGE_LOG(LOG_NOTICE, 0,
7309 			    "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7310 			    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7311 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7312 		}
7313 		(*input_func)(ifp, m);
7314 	}
7315 	return 0;
7316 }
7317 
7318 /*
7319  * bridge_bpf_output:
7320  *
7321  *	Invoke the output BPF callback if enabled
7322  */
7323 static errno_t
7324 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7325 {
7326 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7327 	bpf_packet_func     output_func = sc->sc_bpf_output;
7328 
7329 	if (output_func != NULL) {
7330 		(*output_func)(ifp, m);
7331 	}
7332 	return 0;
7333 }
7334 
7335 /*
7336  * bridge_link_event:
7337  *
7338  *	Report a data link event on an interface
7339  */
7340 static void
7341 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7342 {
7343 	struct event {
7344 		u_int32_t ifnet_family;
7345 		u_int32_t unit;
7346 		char if_name[IFNAMSIZ];
7347 	};
7348 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7349 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7350 	struct event *data = (struct event *)(header + 1);
7351 
7352 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7353 	    "%s event_code %u - %s", ifp->if_xname,
7354 	    event_code, dlil_kev_dl_code_str(event_code));
7355 	header->total_size   = sizeof(message);
7356 	header->vendor_code  = KEV_VENDOR_APPLE;
7357 	header->kev_class    = KEV_NETWORK_CLASS;
7358 	header->kev_subclass = KEV_DL_SUBCLASS;
7359 	header->event_code   = event_code;
7360 	data->ifnet_family   = ifnet_family(ifp);
7361 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7362 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7363 	ifnet_event(ifp, header);
7364 }
7365 
7366 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7367 	        bridge_hostfilter_stats.reason++;                       \
7368 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7369 	                   "%s.%d" #reason, func, line);                \
7370 	        error = EINVAL;                                         \
7371 	}
7372 
7373 /*
7374  * Make sure this is a DHCP or Bootp request that match the host filter
7375  */
7376 static int
7377 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7378 {
7379 	int error = EINVAL;
7380 	struct dhcp dhcp;
7381 
7382 	/*
7383 	 * Note: We use the dhcp structure because bootp structure definition
7384 	 * is larger and some vendors do not pad the request
7385 	 */
7386 	error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7387 	if (error != 0) {
7388 		BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7389 		goto done;
7390 	}
7391 	if (dhcp.dp_op != BOOTREQUEST) {
7392 		BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7393 		goto done;
7394 	}
7395 	/*
7396 	 * The hardware address must be an exact match
7397 	 */
7398 	if (dhcp.dp_htype != ARPHRD_ETHER) {
7399 		BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7400 		goto done;
7401 	}
7402 	if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7403 		BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7404 		goto done;
7405 	}
7406 	if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7407 	    ETHER_ADDR_LEN) != 0) {
7408 		BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7409 		goto done;
7410 	}
7411 	/*
7412 	 * Client address must match the host address or be not specified
7413 	 */
7414 	if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7415 	    dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7416 		BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7417 		goto done;
7418 	}
7419 	error = 0;
7420 done:
7421 	return error;
7422 }
7423 
7424 static int
7425 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7426 {
7427 	int error = EINVAL;
7428 	struct ether_header *eh;
7429 	static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7430 	mbuf_t m = *data;
7431 
7432 	eh = mtod(m, struct ether_header *);
7433 
7434 	/*
7435 	 * Restrict the source hardware address
7436 	 */
7437 	if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7438 	    bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7439 	    ETHER_ADDR_LEN) != 0) {
7440 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7441 		goto done;
7442 	}
7443 
7444 	/*
7445 	 * Restrict Ethernet protocols to ARP and IP
7446 	 */
7447 	if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7448 		struct ether_arp *ea;
7449 		size_t minlen = sizeof(struct ether_header) +
7450 		    sizeof(struct ether_arp);
7451 
7452 		/*
7453 		 * Make the Ethernet and ARP headers contiguous
7454 		 */
7455 		if (mbuf_pkthdr_len(m) < minlen) {
7456 			BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7457 			goto done;
7458 		}
7459 		if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7460 			BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7461 			    __func__, __LINE__);
7462 			goto done;
7463 		}
7464 		m = *data;
7465 
7466 		/*
7467 		 * Verify this is an ethernet/ip arp
7468 		 */
7469 		eh = mtod(m, struct ether_header *);
7470 		ea = (struct ether_arp *)(eh + 1);
7471 		if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7472 			BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7473 			    __func__, __LINE__);
7474 			goto done;
7475 		}
7476 		if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7477 			BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7478 			    __func__, __LINE__);
7479 			goto done;
7480 		}
7481 		/*
7482 		 * Verify the address lengths are correct
7483 		 */
7484 		if (ea->arp_hln != ETHER_ADDR_LEN) {
7485 			BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7486 			goto done;
7487 		}
7488 		if (ea->arp_pln != sizeof(struct in_addr)) {
7489 			BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7490 			    __func__, __LINE__);
7491 			goto done;
7492 		}
7493 
7494 		/*
7495 		 * Allow only ARP request or ARP reply
7496 		 */
7497 		if (ea->arp_op != htons(ARPOP_REQUEST) &&
7498 		    ea->arp_op != htons(ARPOP_REPLY)) {
7499 			BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7500 			goto done;
7501 		}
7502 		/*
7503 		 * Verify source hardware address matches
7504 		 */
7505 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7506 		    ETHER_ADDR_LEN) != 0) {
7507 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7508 			goto done;
7509 		}
7510 		/*
7511 		 * Verify source protocol address:
7512 		 * May be null for an ARP probe
7513 		 */
7514 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7515 		    sizeof(struct in_addr)) != 0 &&
7516 		    bcmp(ea->arp_spa, &inaddr_any,
7517 		    sizeof(struct in_addr)) != 0) {
7518 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7519 			goto done;
7520 		}
7521 		bridge_hostfilter_stats.brhf_arp_ok += 1;
7522 		error = 0;
7523 	} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7524 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7525 		struct ip iphdr;
7526 		size_t offset;
7527 
7528 		/*
7529 		 * Make the Ethernet and IP headers contiguous
7530 		 */
7531 		if (mbuf_pkthdr_len(m) < minlen) {
7532 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7533 			goto done;
7534 		}
7535 		offset = sizeof(struct ether_header);
7536 		error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7537 		if (error != 0) {
7538 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7539 			goto done;
7540 		}
7541 		/*
7542 		 * Verify the source IP address
7543 		 */
7544 		if (iphdr.ip_p == IPPROTO_UDP) {
7545 			struct udphdr udp;
7546 
7547 			minlen += sizeof(struct udphdr);
7548 			if (mbuf_pkthdr_len(m) < minlen) {
7549 				BRIDGE_HF_DROP(brhf_ip_too_small,
7550 				    __func__, __LINE__);
7551 				goto done;
7552 			}
7553 
7554 			/*
7555 			 * Allow all zero addresses for DHCP requests
7556 			 */
7557 			if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7558 			    iphdr.ip_src.s_addr != INADDR_ANY) {
7559 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7560 				    __func__, __LINE__);
7561 				goto done;
7562 			}
7563 			offset = sizeof(struct ether_header) +
7564 			    (IP_VHL_HL(iphdr.ip_vhl) << 2);
7565 			error = mbuf_copydata(m, offset,
7566 			    sizeof(struct udphdr), &udp);
7567 			if (error != 0) {
7568 				BRIDGE_HF_DROP(brhf_ip_too_small,
7569 				    __func__, __LINE__);
7570 				goto done;
7571 			}
7572 			/*
7573 			 * Either it's a Bootp/DHCP packet that we like or
7574 			 * it's a UDP packet from the host IP as source address
7575 			 */
7576 			if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7577 			    udp.uh_dport == htons(IPPORT_BOOTPS)) {
7578 				minlen += sizeof(struct dhcp);
7579 				if (mbuf_pkthdr_len(m) < minlen) {
7580 					BRIDGE_HF_DROP(brhf_ip_too_small,
7581 					    __func__, __LINE__);
7582 					goto done;
7583 				}
7584 				offset += sizeof(struct udphdr);
7585 				error = bridge_dhcp_filter(bif, m, offset);
7586 				if (error != 0) {
7587 					goto done;
7588 				}
7589 			} else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7590 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7591 				    __func__, __LINE__);
7592 				goto done;
7593 			}
7594 		} else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7595 		    bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7596 			BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7597 			goto done;
7598 		}
7599 		/*
7600 		 * Allow only boring IP protocols
7601 		 */
7602 		if (iphdr.ip_p != IPPROTO_TCP &&
7603 		    iphdr.ip_p != IPPROTO_UDP &&
7604 		    iphdr.ip_p != IPPROTO_ICMP &&
7605 		    iphdr.ip_p != IPPROTO_ESP &&
7606 		    iphdr.ip_p != IPPROTO_AH &&
7607 		    iphdr.ip_p != IPPROTO_GRE) {
7608 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7609 			goto done;
7610 		}
7611 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7612 		error = 0;
7613 	} else {
7614 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7615 		goto done;
7616 	}
7617 done:
7618 	if (error != 0) {
7619 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7620 			if (m) {
7621 				brlog_mbuf_data(m, 0,
7622 				    sizeof(struct ether_header) +
7623 				    sizeof(struct ip));
7624 			}
7625 		}
7626 
7627 		if (m != NULL) {
7628 			m_freem(m);
7629 		}
7630 	}
7631 	return error;
7632 }
7633 
7634 /*
7635  * MAC NAT
7636  */
7637 
7638 static errno_t
7639 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7640 {
7641 	errno_t         error = 0;
7642 
7643 	BRIDGE_LOCK_ASSERT_HELD(sc);
7644 
7645 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7646 		error = EINVAL;
7647 		goto done;
7648 	}
7649 	if (sc->sc_mac_nat_bif != NULL) {
7650 		if (sc->sc_mac_nat_bif != bif) {
7651 			error = EBUSY;
7652 		}
7653 		goto done;
7654 	}
7655 	sc->sc_mac_nat_bif = bif;
7656 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7657 	bridge_mac_nat_populate_entries(sc);
7658 
7659 done:
7660 	return error;
7661 }
7662 
7663 static void
7664 bridge_mac_nat_disable(struct bridge_softc *sc)
7665 {
7666 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7667 
7668 	assert(mac_nat_bif != NULL);
7669 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7670 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7671 	sc->sc_mac_nat_bif = NULL;
7672 	return;
7673 }
7674 
7675 static void
7676 mac_nat_entry_print2(struct mac_nat_entry *mne,
7677     char *ifname, const char *msg1, const char *msg2)
7678 {
7679 	int             af;
7680 	char            etopbuf[24];
7681 	char            ntopbuf[MAX_IPv6_STR_LEN];
7682 	const char      *space;
7683 
7684 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7685 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7686 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7687 	if (msg2 == NULL) {
7688 		msg2 = "";
7689 		space = "";
7690 	} else {
7691 		space = " ";
7692 	}
7693 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7694 	    "%s %s%s%s %p (%s, %s, %s)",
7695 	    ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7696 	    ntopbuf, etopbuf);
7697 }
7698 
7699 static void
7700 mac_nat_entry_print(struct mac_nat_entry *mne,
7701     char *ifname, const char *msg)
7702 {
7703 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7704 }
7705 
7706 static struct mac_nat_entry *
7707 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7708 {
7709 	struct mac_nat_entry    *mne;
7710 	struct mac_nat_entry    *ret_mne = NULL;
7711 
7712 	if (af == AF_INET) {
7713 		in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7714 
7715 		LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7716 			if (mne->mne_ip.s_addr == s_addr) {
7717 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7718 					mac_nat_entry_print(mne, sc->sc_if_xname,
7719 					    "found");
7720 				}
7721 				ret_mne = mne;
7722 				break;
7723 			}
7724 		}
7725 	} else {
7726 		const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7727 
7728 		LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7729 			if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7730 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7731 					mac_nat_entry_print(mne, sc->sc_if_xname,
7732 					    "found");
7733 				}
7734 				ret_mne = mne;
7735 				break;
7736 			}
7737 		}
7738 	}
7739 	return ret_mne;
7740 }
7741 
7742 static void
7743 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7744     struct mac_nat_entry *mne, const char *reason)
7745 {
7746 	LIST_REMOVE(mne, mne_list);
7747 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7748 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7749 	}
7750 	zfree(bridge_mne_pool, mne);
7751 	sc->sc_mne_count--;
7752 }
7753 
7754 static struct mac_nat_entry *
7755 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7756     struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7757 {
7758 	struct mac_nat_entry_list *list;
7759 	struct mac_nat_entry *mne;
7760 
7761 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7762 		sc->sc_mne_allocation_failures++;
7763 		return NULL;
7764 	}
7765 	mne = zalloc_noblock(bridge_mne_pool);
7766 	if (mne == NULL) {
7767 		sc->sc_mne_allocation_failures++;
7768 		return NULL;
7769 	}
7770 	sc->sc_mne_count++;
7771 	bzero(mne, sizeof(*mne));
7772 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7773 	mne->mne_bif = bif;
7774 	if (af == AF_INET) {
7775 		bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7776 		list = &sc->sc_mne_list;
7777 	} else {
7778 		bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7779 		mne->mne_flags |= MNE_FLAGS_IPV6;
7780 		list = &sc->sc_mne_list_v6;
7781 	}
7782 	LIST_INSERT_HEAD(list, mne, mne_list);
7783 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7784 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7785 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7786 	}
7787 	return mne;
7788 }
7789 
7790 static struct mac_nat_entry *
7791 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7792     struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7793 {
7794 	struct mac_nat_entry *mne;
7795 
7796 	mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7797 	if (mne != NULL) {
7798 		struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7799 
7800 		if (mne->mne_bif == mac_nat_bif) {
7801 			/* the MAC NAT interface takes precedence */
7802 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7803 				if (mne->mne_bif != bif) {
7804 					mac_nat_entry_print2(mne,
7805 					    sc->sc_if_xname, "reject",
7806 					    bif->bif_ifp->if_xname);
7807 				}
7808 			}
7809 		} else if (mne->mne_bif != bif) {
7810 			const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7811 
7812 			mne->mne_bif = bif;
7813 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7814 				mac_nat_entry_print2(mne,
7815 				    sc->sc_if_xname, "replaced",
7816 				    old_if);
7817 			}
7818 			bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7819 		}
7820 		mne->mne_expire = (unsigned long)net_uptime() +
7821 		    sc->sc_brttimeout;
7822 	} else {
7823 		mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7824 	}
7825 	return mne;
7826 }
7827 
7828 static void
7829 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7830     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7831 {
7832 	struct mac_nat_entry *mne;
7833 	struct mac_nat_entry *tmne;
7834 
7835 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7836 		if (bif != NULL && mne->mne_bif != bif) {
7837 			continue;
7838 		}
7839 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7840 	}
7841 }
7842 
7843 /*
7844  * bridge_mac_nat_flush_entries:
7845  *
7846  * Flush MAC NAT entries for the specified member. Flush all entries if
7847  * the member is the one that requires MAC NAT, otherwise just flush the
7848  * ones for the specified member.
7849  */
7850 static void
7851 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7852 {
7853 	struct bridge_iflist *flush_bif;
7854 
7855 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7856 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7857 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7858 }
7859 
7860 static void
7861 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7862 {
7863 	errno_t                 error;
7864 	ifnet_t                 ifp;
7865 	ifaddr_t                *list;
7866 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7867 
7868 	assert(mac_nat_bif != NULL);
7869 	ifp = mac_nat_bif->bif_ifp;
7870 	error = ifnet_get_address_list(ifp, &list);
7871 	if (error != 0) {
7872 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7873 		    "ifnet_get_address_list(%s) failed %d",
7874 		    ifp->if_xname, error);
7875 		return;
7876 	}
7877 	for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7878 		sa_family_t     af;
7879 		void            *ip;
7880 
7881 		union {
7882 			struct sockaddr         sa;
7883 			struct sockaddr_in      sin;
7884 			struct sockaddr_in6     sin6;
7885 		} u;
7886 		af = ifaddr_address_family(*scan);
7887 		switch (af) {
7888 		case AF_INET:
7889 		case AF_INET6:
7890 			error = ifaddr_address(*scan, &u.sa, sizeof(u));
7891 			if (error != 0) {
7892 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7893 				    "ifaddr_address failed %d",
7894 				    error);
7895 				break;
7896 			}
7897 			if (af == AF_INET) {
7898 				ip = (void *)&u.sin.sin_addr;
7899 			} else {
7900 				if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7901 					/* remove scope ID */
7902 					u.sin6.sin6_addr.s6_addr16[1] = 0;
7903 				}
7904 				ip = (void *)&u.sin6.sin6_addr;
7905 			}
7906 			bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7907 			    (uint8_t *)IF_LLADDR(ifp));
7908 			break;
7909 		default:
7910 			break;
7911 		}
7912 	}
7913 	ifnet_free_address_list(list);
7914 	return;
7915 }
7916 
7917 static void
7918 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7919     struct mac_nat_entry_list *list, unsigned long now)
7920 {
7921 	struct mac_nat_entry *mne;
7922 	struct mac_nat_entry *tmne;
7923 
7924 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7925 		if (now >= mne->mne_expire) {
7926 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7927 		}
7928 	}
7929 }
7930 
7931 static void
7932 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7933 {
7934 	if (sc->sc_mac_nat_bif == NULL) {
7935 		return;
7936 	}
7937 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7938 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7939 }
7940 
7941 static const char *
7942 get_in_out_string(boolean_t is_output)
7943 {
7944 	return is_output ? "OUT" : "IN";
7945 }
7946 
7947 /*
7948  * is_valid_arp_packet:
7949  *	Verify that this is a valid ARP packet.
7950  *
7951  *	Returns TRUE if the packet is valid, FALSE otherwise.
7952  */
7953 static boolean_t
7954 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7955     struct ether_header **eh_p, struct ether_arp **ea_p)
7956 {
7957 	struct ether_arp *ea;
7958 	struct ether_header *eh;
7959 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7960 	boolean_t is_valid = FALSE;
7961 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7962 
7963 	if (mbuf_pkthdr_len(*data) < minlen) {
7964 		BRIDGE_LOG(LOG_DEBUG, flags,
7965 		    "ARP %s short frame %lu < %lu",
7966 		    get_in_out_string(is_output),
7967 		    mbuf_pkthdr_len(*data), minlen);
7968 		goto done;
7969 	}
7970 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7971 		BRIDGE_LOG(LOG_DEBUG, flags,
7972 		    "ARP %s size %lu mbuf_pullup fail",
7973 		    get_in_out_string(is_output),
7974 		    minlen);
7975 		*data = NULL;
7976 		goto done;
7977 	}
7978 
7979 	/* validate ARP packet */
7980 	eh = mtod(*data, struct ether_header *);
7981 	ea = (struct ether_arp *)(eh + 1);
7982 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7983 		BRIDGE_LOG(LOG_DEBUG, flags,
7984 		    "ARP %s htype not ethernet",
7985 		    get_in_out_string(is_output));
7986 		goto done;
7987 	}
7988 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7989 		BRIDGE_LOG(LOG_DEBUG, flags,
7990 		    "ARP %s hlen not ethernet",
7991 		    get_in_out_string(is_output));
7992 		goto done;
7993 	}
7994 	if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7995 		BRIDGE_LOG(LOG_DEBUG, flags,
7996 		    "ARP %s ptype not IP",
7997 		    get_in_out_string(is_output));
7998 		goto done;
7999 	}
8000 	if (ea->arp_pln != sizeof(struct in_addr)) {
8001 		BRIDGE_LOG(LOG_DEBUG, flags,
8002 		    "ARP %s plen not IP",
8003 		    get_in_out_string(is_output));
8004 		goto done;
8005 	}
8006 	is_valid = TRUE;
8007 	*ea_p = ea;
8008 	*eh_p = eh;
8009 done:
8010 	return is_valid;
8011 }
8012 
8013 static struct mac_nat_entry *
8014 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
8015 {
8016 	struct ether_arp        *ea;
8017 	struct ether_header     *eh;
8018 	struct mac_nat_entry    *mne = NULL;
8019 	u_short                 op;
8020 	struct in_addr          tpa;
8021 
8022 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
8023 		goto done;
8024 	}
8025 	op = ntohs(ea->arp_op);
8026 	switch (op) {
8027 	case ARPOP_REQUEST:
8028 	case ARPOP_REPLY:
8029 		/* only care about REQUEST and REPLY */
8030 		break;
8031 	default:
8032 		goto done;
8033 	}
8034 
8035 	/* check the target IP address for a NAT entry */
8036 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
8037 	if (tpa.s_addr != 0) {
8038 		mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
8039 	}
8040 	if (mne != NULL) {
8041 		if (op == ARPOP_REPLY) {
8042 			/* translate the MAC address */
8043 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8044 				char    mac_src[24];
8045 				char    mac_dst[24];
8046 
8047 				ether_ntop(mac_src, sizeof(mac_src),
8048 				    ea->arp_tha);
8049 				ether_ntop(mac_dst, sizeof(mac_dst),
8050 				    mne->mne_mac);
8051 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8052 				    "%s %s ARP %s -> %s",
8053 				    sc->sc_if_xname,
8054 				    mne->mne_bif->bif_ifp->if_xname,
8055 				    mac_src, mac_dst);
8056 			}
8057 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8058 		}
8059 	} else {
8060 		/* handle conflicting ARP (sender matches mne) */
8061 		struct in_addr spa;
8062 
8063 		bcopy(ea->arp_spa, &spa, sizeof(spa));
8064 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8065 			/* check the source IP for a NAT entry */
8066 			mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8067 		}
8068 	}
8069 
8070 done:
8071 	return mne;
8072 }
8073 
8074 static boolean_t
8075 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8076     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8077 {
8078 	struct ether_arp        *ea;
8079 	struct ether_header     *eh;
8080 	struct in_addr          ip;
8081 	struct mac_nat_entry    *mne = NULL;
8082 	u_short                 op;
8083 	boolean_t               translate = FALSE;
8084 
8085 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8086 		goto done;
8087 	}
8088 	op = ntohs(ea->arp_op);
8089 	switch (op) {
8090 	case ARPOP_REQUEST:
8091 	case ARPOP_REPLY:
8092 		/* only care about REQUEST and REPLY */
8093 		break;
8094 	default:
8095 		goto done;
8096 	}
8097 
8098 	bcopy(ea->arp_spa, &ip, sizeof(ip));
8099 	if (ip.s_addr == 0) {
8100 		goto done;
8101 	}
8102 	/* XXX validate IP address: no multicast/broadcast */
8103 	mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8104 	if (mnr != NULL && mne != NULL) {
8105 		/* record the offset to do the replacement */
8106 		translate = TRUE;
8107 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8108 	}
8109 
8110 done:
8111 	return translate;
8112 }
8113 
8114 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
8115 	                         + sizeof(struct ip))
8116 static struct ether_header *
8117 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8118 {
8119 	struct ether_header     *eh = NULL;
8120 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8121 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
8122 
8123 	if (mbuf_pkthdr_len(*data) < minlen) {
8124 		BRIDGE_LOG(LOG_DEBUG, flags,
8125 		    "IP %s short frame %lu < %lu",
8126 		    get_in_out_string(is_output),
8127 		    mbuf_pkthdr_len(*data), minlen);
8128 		goto done;
8129 	}
8130 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8131 		BRIDGE_LOG(LOG_DEBUG, flags,
8132 		    "IP %s size %lu mbuf_pullup fail",
8133 		    get_in_out_string(is_output),
8134 		    minlen);
8135 		*data = NULL;
8136 		goto done;
8137 	}
8138 	eh = mtod(*data, struct ether_header *);
8139 done:
8140 	return eh;
8141 }
8142 
8143 static bool
8144 is_broadcast_ip_packet(mbuf_t *data)
8145 {
8146 	struct ether_header     *eh;
8147 	uint16_t                ether_type;
8148 	bool                    is_broadcast = FALSE;
8149 
8150 	eh = mtod(*data, struct ether_header *);
8151 	ether_type = ntohs(eh->ether_type);
8152 	switch (ether_type) {
8153 	case ETHERTYPE_IP:
8154 		eh = get_ether_ip_header(data, FALSE);
8155 		if (eh != NULL) {
8156 			struct in_addr  dst;
8157 			struct ip       *iphdr;
8158 
8159 			iphdr = (struct ip *)(void *)(eh + 1);
8160 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8161 			is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8162 		}
8163 		break;
8164 	default:
8165 		break;
8166 	}
8167 	return is_broadcast;
8168 }
8169 
8170 static struct mac_nat_entry *
8171 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8172 {
8173 	struct in_addr          dst;
8174 	struct ether_header     *eh;
8175 	struct ip               *iphdr;
8176 	struct mac_nat_entry    *mne = NULL;
8177 
8178 	eh = get_ether_ip_header(data, FALSE);
8179 	if (eh == NULL) {
8180 		goto done;
8181 	}
8182 	iphdr = (struct ip *)(void *)(eh + 1);
8183 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8184 	/* XXX validate IP address */
8185 	if (dst.s_addr == 0) {
8186 		goto done;
8187 	}
8188 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8189 done:
8190 	return mne;
8191 }
8192 
8193 static void
8194 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8195     struct bridge_iflist *bif, mbuf_t m,
8196     uint8_t ip_header_len, struct mac_nat_record *mnr)
8197 {
8198 	uint16_t        dp_flags;
8199 	errno_t         error;
8200 	size_t          offset;
8201 	struct udphdr   udphdr;
8202 
8203 	/* copy the UDP header */
8204 	offset = sizeof(struct ether_header) + ip_header_len;
8205 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8206 	if (error != 0) {
8207 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8208 		    "mbuf_copydata udphdr failed %d",
8209 		    error);
8210 		return;
8211 	}
8212 	if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8213 	    ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8214 		/* not a BOOTP/DHCP packet */
8215 		return;
8216 	}
8217 	/* check whether the broadcast bit is already set */
8218 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8219 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8220 	if (error != 0) {
8221 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8222 		    "mbuf_copydata dp_flags failed %d",
8223 		    error);
8224 		return;
8225 	}
8226 	if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8227 		/* it's already set, nothing to do */
8228 		return;
8229 	}
8230 	/* broadcast bit needs to be set */
8231 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8232 	mnr->mnr_ip_header_len = ip_header_len;
8233 	if (udphdr.uh_sum != 0) {
8234 		uint16_t        delta;
8235 
8236 		/* adjust checksum to take modified dp_flags into account */
8237 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8238 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8239 	}
8240 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8241 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8242 	    sc->sc_if_xname,
8243 	    bif->bif_ifp->if_xname,
8244 	    ntohs(mnr->mnr_ip_dhcp_flags),
8245 	    ntohs(mnr->mnr_ip_udp_csum));
8246 	return;
8247 }
8248 
8249 static boolean_t
8250 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8251     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8252 {
8253 #pragma unused(mnr)
8254 	struct ether_header     *eh;
8255 	struct in_addr          ip;
8256 	struct ip               *iphdr;
8257 	uint8_t                 ip_header_len;
8258 	struct mac_nat_entry    *mne = NULL;
8259 	boolean_t               translate = FALSE;
8260 
8261 	eh = get_ether_ip_header(data, TRUE);
8262 	if (eh == NULL) {
8263 		goto done;
8264 	}
8265 	iphdr = (struct ip *)(void *)(eh + 1);
8266 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8267 	if (ip_header_len < sizeof(ip)) {
8268 		/* bogus IP header */
8269 		goto done;
8270 	}
8271 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8272 	/* XXX validate the source address */
8273 	if (ip.s_addr != 0) {
8274 		mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8275 		    eh->ether_shost);
8276 	}
8277 	if (mnr != NULL) {
8278 		if (iphdr->ip_p == IPPROTO_UDP) {
8279 			/* handle DHCP must broadcast */
8280 			bridge_mac_nat_udp_output(sc, bif, *data,
8281 			    ip_header_len, mnr);
8282 		}
8283 		translate = TRUE;
8284 	}
8285 done:
8286 	return translate;
8287 }
8288 
8289 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8290 	                         + sizeof(struct ip6_hdr))
8291 static struct ether_header *
8292 get_ether_ipv6_header(mbuf_t *data, size_t plen, boolean_t is_output)
8293 {
8294 	struct ether_header     *eh = NULL;
8295 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8296 	size_t          minlen = ETHER_IPV6_HEADER_LEN + plen;
8297 
8298 	if (mbuf_pkthdr_len(*data) < minlen) {
8299 		BRIDGE_LOG(LOG_DEBUG, flags,
8300 		    "IP %s short frame %lu < %lu",
8301 		    get_in_out_string(is_output),
8302 		    mbuf_pkthdr_len(*data), minlen);
8303 		goto done;
8304 	}
8305 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8306 		BRIDGE_LOG(LOG_DEBUG, flags,
8307 		    "IP %s size %lu mbuf_pullup fail",
8308 		    get_in_out_string(is_output),
8309 		    minlen);
8310 		*data = NULL;
8311 		goto done;
8312 	}
8313 	eh = mtod(*data, struct ether_header *);
8314 done:
8315 	return eh;
8316 }
8317 
8318 #include <netinet/icmp6.h>
8319 #include <netinet6/nd6.h>
8320 
8321 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8322 
8323 static void
8324 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc,
8325     struct bridge_iflist *bif,
8326     mbuf_t *data, struct ip6_hdr *ip6h,
8327     struct in6_addr *saddrp,
8328     struct mac_nat_record *mnr)
8329 {
8330 	struct ether_header *eh;
8331 	struct icmp6_hdr *icmp6;
8332 	uint8_t         icmp6_type;
8333 	uint32_t        icmp6len;
8334 	int             lladdrlen = 0;
8335 	char            *lladdr = NULL;
8336 	unsigned int    off = sizeof(*ip6h);
8337 
8338 	icmp6len = (u_int32_t)ntohs(ip6h->ip6_plen);
8339 	if (icmp6len < sizeof(*icmp6)) {
8340 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8341 		    "short IPv6 payload length %d < %lu",
8342 		    icmp6len, sizeof(*icmp6));
8343 		return;
8344 	}
8345 
8346 	/* pullup IP6 header + ICMPv6 header */
8347 	eh = get_ether_ipv6_header(data, sizeof(*icmp6), TRUE);
8348 	if (eh == NULL) {
8349 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8350 		    "failed to pullup icmp6 header");
8351 		return;
8352 	}
8353 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8354 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8355 	icmp6_type = icmp6->icmp6_type;
8356 	switch (icmp6_type) {
8357 	case ND_NEIGHBOR_SOLICIT:
8358 	case ND_NEIGHBOR_ADVERT:
8359 	case ND_ROUTER_ADVERT:
8360 	case ND_ROUTER_SOLICIT:
8361 		break;
8362 	default:
8363 		return;
8364 	}
8365 
8366 	/* pullup IP6 header + payload */
8367 	eh = get_ether_ipv6_header(data, icmp6len, TRUE);
8368 	if (eh == NULL) {
8369 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8370 		    "failed to pullup icmp6 + payload");
8371 		return;
8372 	}
8373 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8374 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8375 	switch (icmp6_type) {
8376 	case ND_NEIGHBOR_SOLICIT: {
8377 		struct nd_neighbor_solicit *nd_ns;
8378 		union nd_opts ndopts;
8379 		boolean_t is_dad_probe;
8380 		struct in6_addr taddr;
8381 
8382 		if (icmp6len < sizeof(*nd_ns)) {
8383 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8384 			    "short nd_ns %d < %lu",
8385 			    icmp6len, sizeof(*nd_ns));
8386 			return;
8387 		}
8388 
8389 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8390 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8391 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8392 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8393 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8394 			    "invalid target ignored");
8395 			return;
8396 		}
8397 		/* parse options */
8398 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8399 		if (nd6_options(&ndopts) < 0) {
8400 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8401 			    "invalid ND6 NS option");
8402 			return;
8403 		}
8404 		if (ndopts.nd_opts_src_lladdr != NULL) {
8405 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8406 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8407 		}
8408 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8409 		if (lladdr != NULL) {
8410 			if (is_dad_probe) {
8411 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8412 				    "bad ND6 DAD packet");
8413 				return;
8414 			}
8415 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8416 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8417 				    "source lladdrlen %d != %lu",
8418 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8419 				return;
8420 			}
8421 		}
8422 		if (is_dad_probe) {
8423 			/* node is trying use taddr, create an mne for taddr */
8424 			*saddrp = taddr;
8425 		}
8426 		break;
8427 	}
8428 	case ND_NEIGHBOR_ADVERT: {
8429 		struct nd_neighbor_advert *nd_na;
8430 		union nd_opts ndopts;
8431 		struct in6_addr taddr;
8432 
8433 
8434 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8435 
8436 		if (icmp6len < sizeof(*nd_na)) {
8437 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8438 			    "short nd_na %d < %lu",
8439 			    icmp6len, sizeof(*nd_na));
8440 			return;
8441 		}
8442 
8443 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8444 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8445 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8446 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8447 			    "invalid target ignored");
8448 			return;
8449 		}
8450 		/* parse options */
8451 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8452 		if (nd6_options(&ndopts) < 0) {
8453 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8454 			    "invalid ND6 NA option");
8455 			return;
8456 		}
8457 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8458 			/* target linklayer, nothing to do */
8459 			return;
8460 		}
8461 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8462 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8463 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8464 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8465 			    "target lladdrlen %d != %lu",
8466 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8467 			return;
8468 		}
8469 		break;
8470 	}
8471 	case ND_ROUTER_ADVERT:
8472 	case ND_ROUTER_SOLICIT: {
8473 		union nd_opts ndopts;
8474 		uint32_t type_length;
8475 		const char *description;
8476 
8477 		if (icmp6_type == ND_ROUTER_ADVERT) {
8478 			type_length = sizeof(struct nd_router_advert);
8479 			description = "RA";
8480 		} else {
8481 			type_length = sizeof(struct nd_router_solicit);
8482 			description = "RS";
8483 		}
8484 		if (icmp6len < type_length) {
8485 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8486 			    "short ND6 %s %d < %d",
8487 			    description, icmp6len, type_length);
8488 			return;
8489 		}
8490 		/* parse options */
8491 		nd6_option_init(((uint8_t *)icmp6) + type_length,
8492 		    icmp6len - type_length, &ndopts);
8493 		if (nd6_options(&ndopts) < 0) {
8494 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8495 			    "invalid ND6 %s option", description);
8496 			return;
8497 		}
8498 		if (ndopts.nd_opts_src_lladdr != NULL) {
8499 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8500 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8501 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8502 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8503 				    "source lladdrlen %d != %lu",
8504 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8505 				return;
8506 			}
8507 		}
8508 		break;
8509 	}
8510 	default:
8511 		break;
8512 	}
8513 	if (lladdr != NULL) {
8514 		mnr->mnr_ip6_lladdr_offset = (uint16_t)
8515 		    ((uintptr_t)lladdr - (uintptr_t)eh);
8516 		mnr->mnr_ip6_icmp6_len = icmp6len;
8517 		mnr->mnr_ip6_icmp6_type = icmp6_type;
8518 		mnr->mnr_ip6_header_len = off;
8519 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8520 			const char *str;
8521 
8522 			switch (mnr->mnr_ip6_icmp6_type) {
8523 			case ND_ROUTER_ADVERT:
8524 				str = "ROUTER ADVERT";
8525 				break;
8526 			case ND_ROUTER_SOLICIT:
8527 				str = "ROUTER SOLICIT";
8528 				break;
8529 			case ND_NEIGHBOR_ADVERT:
8530 				str = "NEIGHBOR ADVERT";
8531 				break;
8532 			case ND_NEIGHBOR_SOLICIT:
8533 				str = "NEIGHBOR SOLICIT";
8534 				break;
8535 			default:
8536 				str = "";
8537 				break;
8538 			}
8539 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8540 			    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8541 			    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8542 			    mnr->mnr_ip6_header_len,
8543 			    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8544 		}
8545 	}
8546 }
8547 
8548 static struct mac_nat_entry *
8549 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8550 {
8551 	struct in6_addr         dst;
8552 	struct ether_header     *eh;
8553 	struct ip6_hdr          *ip6h;
8554 	struct mac_nat_entry    *mne = NULL;
8555 
8556 	eh = get_ether_ipv6_header(data, 0, FALSE);
8557 	if (eh == NULL) {
8558 		goto done;
8559 	}
8560 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8561 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8562 	/* XXX validate IPv6 address */
8563 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8564 		goto done;
8565 	}
8566 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8567 
8568 done:
8569 	return mne;
8570 }
8571 
8572 static boolean_t
8573 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8574     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8575 {
8576 	struct ether_header     *eh;
8577 	ether_addr_t            ether_shost;
8578 	struct ip6_hdr          *ip6h;
8579 	struct in6_addr         saddr;
8580 	boolean_t               translate;
8581 
8582 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8583 	eh = get_ether_ipv6_header(data, 0, TRUE);
8584 	if (eh == NULL) {
8585 		translate = FALSE;
8586 		goto done;
8587 	}
8588 	bcopy(eh->ether_shost, &ether_shost, sizeof(ether_shost));
8589 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8590 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8591 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8592 		bridge_mac_nat_icmpv6_output(sc, bif, data, ip6h, &saddr, mnr);
8593 	}
8594 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8595 		goto done;
8596 	}
8597 	(void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8598 	    ether_shost.octet);
8599 
8600 done:
8601 	return translate;
8602 }
8603 
8604 /*
8605  * bridge_mac_nat_input:
8606  * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8607  * This interface is the "external" interface with respect to NAT.
8608  * The interface is only capable of receiving a single MAC address
8609  * (e.g. a Wi-Fi STA interface).
8610  *
8611  * When a packet arrives on the external interface, look up the destination
8612  * IP address in the mac_nat_entry table. If there is a match, *is_input
8613  * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8614  * is set to FALSE and translate the MAC address if necessary.
8615  *
8616  * Returns:
8617  * The internal interface to direct the packet to, or NULL if the packet
8618  * should not be redirected.
8619  *
8620  * *data may be updated to point at a different mbuf chain, or set to NULL
8621  * if the chain was deallocated during processing.
8622  */
8623 static ifnet_t
8624 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8625     boolean_t *is_input)
8626 {
8627 	ifnet_t                 dst_if = NULL;
8628 	struct ether_header     *eh;
8629 	uint16_t                ether_type;
8630 	boolean_t               is_unicast;
8631 	mbuf_t                  m = *data;
8632 	struct mac_nat_entry    *mne = NULL;
8633 
8634 	BRIDGE_LOCK_ASSERT_HELD(sc);
8635 	*is_input = FALSE;
8636 	assert(sc->sc_mac_nat_bif != NULL);
8637 	is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8638 	eh = mtod(m, struct ether_header *);
8639 	ether_type = ntohs(eh->ether_type);
8640 	switch (ether_type) {
8641 	case ETHERTYPE_ARP:
8642 		mne = bridge_mac_nat_arp_input(sc, data);
8643 		break;
8644 	case ETHERTYPE_IP:
8645 		if (is_unicast) {
8646 			mne = bridge_mac_nat_ip_input(sc, data);
8647 		}
8648 		break;
8649 	case ETHERTYPE_IPV6:
8650 		if (is_unicast) {
8651 			mne = bridge_mac_nat_ipv6_input(sc, data);
8652 		}
8653 		break;
8654 	default:
8655 		break;
8656 	}
8657 	if (mne != NULL) {
8658 		if (is_unicast) {
8659 			if (m != *data) {
8660 				/* it may have changed */
8661 				eh = mtod(*data, struct ether_header *);
8662 			}
8663 			bcopy(mne->mne_mac, eh->ether_dhost,
8664 			    sizeof(eh->ether_dhost));
8665 		}
8666 		dst_if = mne->mne_bif->bif_ifp;
8667 		*is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8668 	}
8669 	return dst_if;
8670 }
8671 
8672 /*
8673  * bridge_mac_nat_output:
8674  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8675  * from the interface 'bif'.
8676  *
8677  * Create a mac_nat_entry containing the source IP address and MAC address
8678  * from the packet. Populate a mac_nat_record with information detailing
8679  * how to translate the packet. Translation takes place later when
8680  * the bridge lock is no longer held.
8681  *
8682  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8683  * interface is generating an output packet. No translation is required in this
8684  * case, we just record the IP address used to prevent another bif from
8685  * claiming our IP address.
8686  *
8687  * Returns:
8688  * TRUE if the packet should be translated (*mnr updated as well),
8689  * FALSE otherwise.
8690  *
8691  * *data may be updated to point at a different mbuf chain or NULL if
8692  * the chain was deallocated during processing.
8693  */
8694 
8695 static boolean_t
8696 bridge_mac_nat_output(struct bridge_softc *sc,
8697     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8698 {
8699 	struct ether_header     *eh;
8700 	uint16_t                ether_type;
8701 	boolean_t               translate = FALSE;
8702 
8703 	BRIDGE_LOCK_ASSERT_HELD(sc);
8704 	assert(sc->sc_mac_nat_bif != NULL);
8705 
8706 	eh = mtod(*data, struct ether_header *);
8707 	ether_type = ntohs(eh->ether_type);
8708 	if (mnr != NULL) {
8709 		bzero(mnr, sizeof(*mnr));
8710 		mnr->mnr_ether_type = ether_type;
8711 	}
8712 	switch (ether_type) {
8713 	case ETHERTYPE_ARP:
8714 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8715 		break;
8716 	case ETHERTYPE_IP:
8717 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8718 		break;
8719 	case ETHERTYPE_IPV6:
8720 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8721 		break;
8722 	default:
8723 		break;
8724 	}
8725 	return translate;
8726 }
8727 
8728 static void
8729 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8730     const caddr_t eaddr)
8731 {
8732 	errno_t                 error;
8733 
8734 	if (mnr->mnr_arp_offset == 0) {
8735 		return;
8736 	}
8737 	/* replace the source hardware address */
8738 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8739 	    ETHER_ADDR_LEN, eaddr,
8740 	    MBUF_DONTWAIT);
8741 	if (error != 0) {
8742 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8743 		    "mbuf_copyback failed");
8744 		m_freem(*data);
8745 		*data = NULL;
8746 	}
8747 	return;
8748 }
8749 
8750 static void
8751 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8752 {
8753 	errno_t         error;
8754 	size_t          offset;
8755 
8756 	if (mnr->mnr_ip_header_len == 0) {
8757 		return;
8758 	}
8759 	/* update the UDP checksum */
8760 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8761 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8762 	    sizeof(mnr->mnr_ip_udp_csum),
8763 	    &mnr->mnr_ip_udp_csum,
8764 	    MBUF_DONTWAIT);
8765 	if (error != 0) {
8766 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8767 		    "mbuf_copyback uh_sum failed");
8768 		m_freem(*data);
8769 		*data = NULL;
8770 	}
8771 	/* update the DHCP must broadcast flag */
8772 	offset += sizeof(struct udphdr);
8773 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8774 	    sizeof(mnr->mnr_ip_dhcp_flags),
8775 	    &mnr->mnr_ip_dhcp_flags,
8776 	    MBUF_DONTWAIT);
8777 	if (error != 0) {
8778 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8779 		    "mbuf_copyback dp_flags failed");
8780 		m_freem(*data);
8781 		*data = NULL;
8782 	}
8783 }
8784 
8785 static void
8786 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8787     const caddr_t eaddr)
8788 {
8789 	uint16_t        cksum;
8790 	errno_t         error;
8791 	mbuf_t          m = *data;
8792 
8793 	if (mnr->mnr_ip6_header_len == 0) {
8794 		return;
8795 	}
8796 	switch (mnr->mnr_ip6_icmp6_type) {
8797 	case ND_ROUTER_ADVERT:
8798 	case ND_ROUTER_SOLICIT:
8799 	case ND_NEIGHBOR_SOLICIT:
8800 	case ND_NEIGHBOR_ADVERT:
8801 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8802 			/* nothing to do */
8803 			return;
8804 		}
8805 		break;
8806 	default:
8807 		return;
8808 	}
8809 
8810 	/*
8811 	 * replace the lladdr
8812 	 */
8813 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8814 	    ETHER_ADDR_LEN, eaddr,
8815 	    MBUF_DONTWAIT);
8816 	if (error != 0) {
8817 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8818 		    "mbuf_copyback lladdr failed");
8819 		m_freem(m);
8820 		*data = NULL;
8821 		return;
8822 	}
8823 
8824 	/*
8825 	 * recompute the icmp6 checksum
8826 	 */
8827 
8828 	/* skip past the ethernet header */
8829 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8830 	    mbuf_len(m) - ETHER_HDR_LEN);
8831 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8832 
8833 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8834 	/* set the checksum to zero */
8835 	cksum = 0;
8836 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8837 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8838 	if (error != 0) {
8839 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8840 		    "mbuf_copyback cksum=0 failed");
8841 		m_freem(m);
8842 		*data = NULL;
8843 		return;
8844 	}
8845 	/* compute and set the new checksum */
8846 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8847 	    mnr->mnr_ip6_icmp6_len);
8848 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8849 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8850 	if (error != 0) {
8851 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8852 		    "mbuf_copyback cksum failed");
8853 		m_freem(m);
8854 		*data = NULL;
8855 		return;
8856 	}
8857 	/* restore the ethernet header */
8858 	mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8859 	    mbuf_len(m) + ETHER_HDR_LEN);
8860 	mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8861 	return;
8862 }
8863 
8864 static void
8865 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8866     const caddr_t eaddr)
8867 {
8868 	struct ether_header     *eh;
8869 
8870 	/* replace the source ethernet address with the single MAC */
8871 	eh = mtod(*data, struct ether_header *);
8872 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8873 	switch (mnr->mnr_ether_type) {
8874 	case ETHERTYPE_ARP:
8875 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8876 		break;
8877 
8878 	case ETHERTYPE_IP:
8879 		bridge_mac_nat_ip_translate(data, mnr);
8880 		break;
8881 
8882 	case ETHERTYPE_IPV6:
8883 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8884 		break;
8885 
8886 	default:
8887 		break;
8888 	}
8889 	return;
8890 }
8891 
8892 /*
8893  * bridge packet filtering
8894  */
8895 
8896 /*
8897  * Perform basic checks on header size since
8898  * pfil assumes ip_input has already processed
8899  * it for it.  Cut-and-pasted from ip_input.c.
8900  * Given how simple the IPv6 version is,
8901  * does the IPv4 version really need to be
8902  * this complicated?
8903  *
8904  * XXX Should we update ipstat here, or not?
8905  * XXX Right now we update ipstat but not
8906  * XXX csum_counter.
8907  */
8908 static int
8909 bridge_ip_checkbasic(struct mbuf **mp)
8910 {
8911 	struct mbuf *m = *mp;
8912 	struct ip *ip;
8913 	int len, hlen;
8914 	u_short sum;
8915 
8916 	if (*mp == NULL) {
8917 		return -1;
8918 	}
8919 
8920 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8921 		/* max_linkhdr is already rounded up to nearest 4-byte */
8922 		if ((m = m_copyup(m, sizeof(struct ip),
8923 		    max_linkhdr)) == NULL) {
8924 			/* XXXJRT new stat, please */
8925 			ipstat.ips_toosmall++;
8926 			goto bad;
8927 		}
8928 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8929 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8930 			ipstat.ips_toosmall++;
8931 			goto bad;
8932 		}
8933 	}
8934 	ip = mtod(m, struct ip *);
8935 	if (ip == NULL) {
8936 		goto bad;
8937 	}
8938 
8939 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8940 		ipstat.ips_badvers++;
8941 		goto bad;
8942 	}
8943 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8944 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8945 		ipstat.ips_badhlen++;
8946 		goto bad;
8947 	}
8948 	if (hlen > m->m_len) {
8949 		if ((m = m_pullup(m, hlen)) == 0) {
8950 			ipstat.ips_badhlen++;
8951 			goto bad;
8952 		}
8953 		ip = mtod(m, struct ip *);
8954 		if (ip == NULL) {
8955 			goto bad;
8956 		}
8957 	}
8958 
8959 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8960 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8961 	} else {
8962 		if (hlen == sizeof(struct ip)) {
8963 			sum = in_cksum_hdr(ip);
8964 		} else {
8965 			sum = in_cksum(m, hlen);
8966 		}
8967 	}
8968 	if (sum) {
8969 		ipstat.ips_badsum++;
8970 		goto bad;
8971 	}
8972 
8973 	/* Retrieve the packet length. */
8974 	len = ntohs(ip->ip_len);
8975 
8976 	/*
8977 	 * Check for additional length bogosity
8978 	 */
8979 	if (len < hlen) {
8980 		ipstat.ips_badlen++;
8981 		goto bad;
8982 	}
8983 
8984 	/*
8985 	 * Check that the amount of data in the buffers
8986 	 * is as at least much as the IP header would have us expect.
8987 	 * Drop packet if shorter than we expect.
8988 	 */
8989 	if (m->m_pkthdr.len < len) {
8990 		ipstat.ips_tooshort++;
8991 		goto bad;
8992 	}
8993 
8994 	/* Checks out, proceed */
8995 	*mp = m;
8996 	return 0;
8997 
8998 bad:
8999 	*mp = m;
9000 	return -1;
9001 }
9002 
9003 /*
9004  * Same as above, but for IPv6.
9005  * Cut-and-pasted from ip6_input.c.
9006  * XXX Should we update ip6stat, or not?
9007  */
9008 static int
9009 bridge_ip6_checkbasic(struct mbuf **mp)
9010 {
9011 	struct mbuf *m = *mp;
9012 	struct ip6_hdr *ip6;
9013 
9014 	/*
9015 	 * If the IPv6 header is not aligned, slurp it up into a new
9016 	 * mbuf with space for link headers, in the event we forward
9017 	 * it.  Otherwise, if it is aligned, make sure the entire base
9018 	 * IPv6 header is in the first mbuf of the chain.
9019 	 */
9020 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
9021 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9022 		/* max_linkhdr is already rounded up to nearest 4-byte */
9023 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
9024 		    max_linkhdr)) == NULL) {
9025 			/* XXXJRT new stat, please */
9026 			ip6stat.ip6s_toosmall++;
9027 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9028 			goto bad;
9029 		}
9030 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
9031 		struct ifnet *inifp = m->m_pkthdr.rcvif;
9032 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
9033 			ip6stat.ip6s_toosmall++;
9034 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
9035 			goto bad;
9036 		}
9037 	}
9038 
9039 	ip6 = mtod(m, struct ip6_hdr *);
9040 
9041 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
9042 		ip6stat.ip6s_badvers++;
9043 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
9044 		goto bad;
9045 	}
9046 
9047 	/* Checks out, proceed */
9048 	*mp = m;
9049 	return 0;
9050 
9051 bad:
9052 	*mp = m;
9053 	return -1;
9054 }
9055 
9056 /*
9057  * the PF routines expect to be called from ip_input, so we
9058  * need to do and undo here some of the same processing.
9059  *
9060  * XXX : this is heavily inspired on bridge_pfil()
9061  */
9062 static int
9063 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
9064     int input)
9065 {
9066 	/*
9067 	 * XXX : mpetit : heavily inspired by bridge_pfil()
9068 	 */
9069 
9070 	int snap, error, i, hlen;
9071 	struct ether_header *eh1, eh2;
9072 	struct ip *ip;
9073 	struct llc llc1;
9074 	u_int16_t ether_type;
9075 
9076 	snap = 0;
9077 	error = -1;     /* Default error if not error == 0 */
9078 
9079 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
9080 		return 0; /* filtering is disabled */
9081 	}
9082 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9083 	if ((*mp)->m_len < i) {
9084 		*mp = m_pullup(*mp, i);
9085 		if (*mp == NULL) {
9086 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9087 			return -1;
9088 		}
9089 	}
9090 
9091 	eh1 = mtod(*mp, struct ether_header *);
9092 	ether_type = ntohs(eh1->ether_type);
9093 
9094 	/*
9095 	 * Check for SNAP/LLC.
9096 	 */
9097 	if (ether_type < ETHERMTU) {
9098 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9099 
9100 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9101 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9102 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9103 		    llc2->llc_control == LLC_UI) {
9104 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9105 			snap = 1;
9106 		}
9107 	}
9108 
9109 	/*
9110 	 * If we're trying to filter bridge traffic, don't look at anything
9111 	 * other than IP and ARP traffic.  If the filter doesn't understand
9112 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9113 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9114 	 * but of course we don't have an AppleTalk filter to begin with.
9115 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9116 	 * ARP traffic.)
9117 	 */
9118 	switch (ether_type) {
9119 	case ETHERTYPE_ARP:
9120 	case ETHERTYPE_REVARP:
9121 		return 0;         /* Automatically pass */
9122 
9123 	case ETHERTYPE_IP:
9124 	case ETHERTYPE_IPV6:
9125 		break;
9126 	default:
9127 		/*
9128 		 * Check to see if the user wants to pass non-ip
9129 		 * packets, these will not be checked by pf and
9130 		 * passed unconditionally so the default is to drop.
9131 		 */
9132 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9133 			goto bad;
9134 		}
9135 		break;
9136 	}
9137 
9138 	/* Strip off the Ethernet header and keep a copy. */
9139 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9140 	m_adj(*mp, ETHER_HDR_LEN);
9141 
9142 	/* Strip off snap header, if present */
9143 	if (snap) {
9144 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9145 		m_adj(*mp, sizeof(struct llc));
9146 	}
9147 
9148 	/*
9149 	 * Check the IP header for alignment and errors
9150 	 */
9151 	switch (ether_type) {
9152 	case ETHERTYPE_IP:
9153 		error = bridge_ip_checkbasic(mp);
9154 		break;
9155 	case ETHERTYPE_IPV6:
9156 		error = bridge_ip6_checkbasic(mp);
9157 		break;
9158 	default:
9159 		error = 0;
9160 		break;
9161 	}
9162 	if (error) {
9163 		goto bad;
9164 	}
9165 
9166 	error = 0;
9167 
9168 	/*
9169 	 * Run the packet through pf rules
9170 	 */
9171 	switch (ether_type) {
9172 	case ETHERTYPE_IP:
9173 		/*
9174 		 * before calling the firewall, swap fields the same as
9175 		 * IP does. here we assume the header is contiguous
9176 		 */
9177 		ip = mtod(*mp, struct ip *);
9178 
9179 		ip->ip_len = ntohs(ip->ip_len);
9180 		ip->ip_off = ntohs(ip->ip_off);
9181 
9182 		if (ifp != NULL) {
9183 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9184 		}
9185 
9186 		if (*mp == NULL || error != 0) { /* filter may consume */
9187 			break;
9188 		}
9189 
9190 		/* Recalculate the ip checksum and restore byte ordering */
9191 		ip = mtod(*mp, struct ip *);
9192 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9193 		if (hlen < (int)sizeof(struct ip)) {
9194 			goto bad;
9195 		}
9196 		if (hlen > (*mp)->m_len) {
9197 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9198 				goto bad;
9199 			}
9200 			ip = mtod(*mp, struct ip *);
9201 			if (ip == NULL) {
9202 				goto bad;
9203 			}
9204 		}
9205 		ip->ip_len = htons(ip->ip_len);
9206 		ip->ip_off = htons(ip->ip_off);
9207 		ip->ip_sum = 0;
9208 		if (hlen == sizeof(struct ip)) {
9209 			ip->ip_sum = in_cksum_hdr(ip);
9210 		} else {
9211 			ip->ip_sum = in_cksum(*mp, hlen);
9212 		}
9213 		break;
9214 
9215 	case ETHERTYPE_IPV6:
9216 		if (ifp != NULL) {
9217 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9218 		}
9219 
9220 		if (*mp == NULL || error != 0) { /* filter may consume */
9221 			break;
9222 		}
9223 		break;
9224 	default:
9225 		error = 0;
9226 		break;
9227 	}
9228 
9229 	if (*mp == NULL) {
9230 		return error;
9231 	}
9232 	if (error != 0) {
9233 		goto bad;
9234 	}
9235 
9236 	error = -1;
9237 
9238 	/*
9239 	 * Finally, put everything back the way it was and return
9240 	 */
9241 	if (snap) {
9242 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9243 		if (*mp == NULL) {
9244 			return error;
9245 		}
9246 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9247 	}
9248 
9249 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9250 	if (*mp == NULL) {
9251 		return error;
9252 	}
9253 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9254 
9255 	return 0;
9256 
9257 bad:
9258 	m_freem(*mp);
9259 	*mp = NULL;
9260 	return error;
9261 }
9262 
9263 /*
9264  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9265  * All rights reserved.
9266  *
9267  * Redistribution and use in source and binary forms, with or without
9268  * modification, are permitted provided that the following conditions
9269  * are met:
9270  *   1. Redistributions of source code must retain the above copyright
9271  *      notice, this list of conditions and the following disclaimer.
9272  *   2. Redistributions in binary form must reproduce the above copyright
9273  *      notice, this list of conditions and the following disclaimer in the
9274  *      documentation and/or other materials provided with the distribution.
9275  *
9276  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9277  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9278  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9279  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9280  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9281  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9282  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9283  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9284  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9285  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9286  * SUCH DAMAGE.
9287  */
9288 
9289 /*
9290  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9291  *
9292  * Create a queue of packets/segments which fit the given mss + hdr_len.
9293  * m0 points to mbuf chain to be segmented.
9294  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9295  * into segments of length MSS bytes and then copy the first hdr_len bytes
9296  * from m0 at the top of each segment.
9297  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9298  * in each segment after the first hdr_len bytes
9299  *
9300  * Return the new queue with the segments on success, NULL on failure.
9301  * (the mbuf queue is freed in this case).
9302  * nsegs contains the number of segments generated.
9303  */
9304 
9305 static struct mbuf *
9306 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9307     char * hdr2_buf, int hdr2_len)
9308 {
9309 	int off = 0, n, firstlen;
9310 	struct mbuf **mnext, *mseg;
9311 	int total_len = m0->m_pkthdr.len;
9312 
9313 	/*
9314 	 * Segmentation useless
9315 	 */
9316 	if (total_len <= hdr_len + mss) {
9317 		return m0;
9318 	}
9319 
9320 	if (hdr2_buf == NULL || hdr2_len <= 0) {
9321 		hdr2_buf = NULL;
9322 		hdr2_len = 0;
9323 	}
9324 
9325 	off = hdr_len + mss;
9326 	firstlen = mss; /* first segment stored in the original mbuf */
9327 
9328 	mnext = &(m0->m_nextpkt); /* pointer to next packet */
9329 
9330 	for (n = 1; off < total_len; off += mss, n++) {
9331 		struct mbuf *m;
9332 		/*
9333 		 * Copy the header from the original packet
9334 		 * and create a new mbuf chain
9335 		 */
9336 		if (MHLEN < hdr_len) {
9337 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9338 		} else {
9339 			m = m_gethdr(M_NOWAIT, MT_DATA);
9340 		}
9341 
9342 		if (m == NULL) {
9343 #ifdef GSO_DEBUG
9344 			D("MGETHDR error\n");
9345 #endif
9346 			goto err;
9347 		}
9348 
9349 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9350 
9351 		m->m_len = hdr_len;
9352 		/*
9353 		 * if the optional header is present, copy it
9354 		 */
9355 		if (hdr2_buf != NULL) {
9356 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9357 		}
9358 
9359 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9360 		if (off + mss >= total_len) {           /* last segment */
9361 			mss = total_len - off;
9362 		}
9363 		/*
9364 		 * Copy the payload from original packet
9365 		 */
9366 		mseg = m_copym(m0, off, mss, M_NOWAIT);
9367 		if (mseg == NULL) {
9368 			m_freem(m);
9369 #ifdef GSO_DEBUG
9370 			D("m_copym error\n");
9371 #endif
9372 			goto err;
9373 		}
9374 		m_cat(m, mseg);
9375 
9376 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9377 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9378 		/*
9379 		 * Copy the checksum flags and data (in_cksum() need this)
9380 		 */
9381 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9382 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9383 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9384 
9385 		*mnext = m;
9386 		mnext = &(m->m_nextpkt);
9387 	}
9388 
9389 	/*
9390 	 * Update first segment.
9391 	 * If the optional header is present, is necessary
9392 	 * to insert it into the first segment.
9393 	 */
9394 	if (hdr2_buf == NULL) {
9395 		m_adj(m0, hdr_len + firstlen - total_len);
9396 		m0->m_pkthdr.len = hdr_len + firstlen;
9397 	} else {
9398 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9399 		if (mseg == NULL) {
9400 #ifdef GSO_DEBUG
9401 			D("m_copym error\n");
9402 #endif
9403 			goto err;
9404 		}
9405 		m_adj(m0, hdr_len - total_len);
9406 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9407 		m_cat(m0, mseg);
9408 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9409 	}
9410 
9411 	if (nsegs != NULL) {
9412 		*nsegs = n;
9413 	}
9414 	return m0;
9415 err:
9416 	while (m0 != NULL) {
9417 		mseg = m0->m_nextpkt;
9418 		m0->m_nextpkt = NULL;
9419 		m_freem(m0);
9420 		m0 = mseg;
9421 	}
9422 	return NULL;
9423 }
9424 
9425 /*
9426  * Wrappers of IPv4 checksum functions
9427  */
9428 static inline void
9429 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9430 {
9431 	m->m_data += mac_hlen;
9432 	m->m_len -= mac_hlen;
9433 	m->m_pkthdr.len -= mac_hlen;
9434 #if __FreeBSD_version < 1000000
9435 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9436 #endif
9437 
9438 	in_delayed_cksum(m);
9439 
9440 #if __FreeBSD_version < 1000000
9441 	ip->ip_len = htons(ip->ip_len);
9442 #endif
9443 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9444 	m->m_len += mac_hlen;
9445 	m->m_pkthdr.len += mac_hlen;
9446 	m->m_data -= mac_hlen;
9447 }
9448 
9449 static inline void
9450 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9451 {
9452 	m->m_data += mac_hlen;
9453 
9454 	ip->ip_sum = in_cksum(m, ip_hlen);
9455 
9456 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
9457 	m->m_data -= mac_hlen;
9458 }
9459 
9460 /*
9461  * Structure that contains the state during the TCP segmentation
9462  */
9463 struct gso_ip_tcp_state {
9464 	void    (*update)
9465 	(struct gso_ip_tcp_state*, struct mbuf*);
9466 	void    (*internal)
9467 	(struct gso_ip_tcp_state*, struct mbuf*);
9468 	union iphdr hdr;
9469 	struct tcphdr *tcp;
9470 	int mac_hlen;
9471 	int ip_hlen;
9472 	int tcp_hlen;
9473 	int hlen;
9474 	int pay_len;
9475 	int sw_csum;
9476 	uint32_t tcp_seq;
9477 	uint16_t ip_id;
9478 	boolean_t is_tx;
9479 };
9480 
9481 /*
9482  * Update the pointers to TCP and IPv4 headers
9483  */
9484 static inline void
9485 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9486 {
9487 	state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9488 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9489 	state->pay_len = m->m_pkthdr.len - state->hlen;
9490 }
9491 
9492 /*
9493  * Set properly the TCP and IPv4 headers
9494  */
9495 static inline void
9496 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9497 {
9498 	/*
9499 	 * Update IP header
9500 	 */
9501 	state->hdr.ip->ip_id = htons((state->ip_id)++);
9502 	state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9503 	/*
9504 	 * TCP Checksum
9505 	 */
9506 	state->tcp->th_sum = 0;
9507 	state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9508 	    state->hdr.ip->ip_dst.s_addr,
9509 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9510 	/*
9511 	 * Checksum HW not supported (TCP)
9512 	 */
9513 	if (state->sw_csum & CSUM_DELAY_DATA) {
9514 		gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9515 	}
9516 
9517 	state->tcp_seq += state->pay_len;
9518 	/*
9519 	 * IP Checksum
9520 	 */
9521 	state->hdr.ip->ip_sum = 0;
9522 	/*
9523 	 * Checksum HW not supported (IP)
9524 	 */
9525 	if (state->sw_csum & CSUM_IP) {
9526 		gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9527 	}
9528 }
9529 
9530 
9531 /*
9532  * Updates the pointers to TCP and IPv6 headers
9533  */
9534 static inline void
9535 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9536 {
9537 	state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9538 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9539 	state->pay_len = m->m_pkthdr.len - state->hlen;
9540 }
9541 
9542 /*
9543  * Sets properly the TCP and IPv6 headers
9544  */
9545 static inline void
9546 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9547 {
9548 	state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9549 	    state->mac_hlen - state->ip_hlen);
9550 	/*
9551 	 * TCP Checksum
9552 	 */
9553 	state->tcp->th_sum = 0;
9554 	state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9555 	    &state->hdr.ip6->ip6_dst,
9556 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9557 	/*
9558 	 * Checksum HW not supported (TCP)
9559 	 */
9560 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9561 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9562 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9563 	}
9564 	state->tcp_seq += state->pay_len;
9565 }
9566 
9567 /*
9568  * Init the state during the TCP segmentation
9569  */
9570 static void
9571 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9572     bool is_ipv4, int mac_hlen, int ip_hlen,
9573     void * ip_hdr, struct tcphdr * tcp_hdr)
9574 {
9575 #pragma unused(ifp)
9576 
9577 	state->hdr.ptr = ip_hdr;
9578 	state->tcp = tcp_hdr;
9579 	if (is_ipv4) {
9580 		state->ip_id = ntohs(state->hdr.ip->ip_id);
9581 		state->update = gso_ipv4_tcp_update;
9582 		state->internal = gso_ipv4_tcp_internal;
9583 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9584 	} else {
9585 		state->update = gso_ipv6_tcp_update;
9586 		state->internal = gso_ipv6_tcp_internal;
9587 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9588 	}
9589 	state->mac_hlen = mac_hlen;
9590 	state->ip_hlen = ip_hlen;
9591 	state->tcp_hlen = state->tcp->th_off << 2;
9592 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9593 	state->tcp_seq = ntohl(state->tcp->th_seq);
9594 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9595 	return;
9596 }
9597 
9598 /*
9599  * GSO on TCP/IP (v4 or v6)
9600  *
9601  * If is_tx is TRUE, segmented packets are transmitted after they are
9602  * segmented.
9603  *
9604  * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9605  */
9606 static int
9607 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9608     boolean_t is_tx)
9609 {
9610 	struct mbuf *m, *m_tx;
9611 	int error = 0;
9612 	int mss = 0;
9613 	int nsegs = 0;
9614 	struct mbuf *m0 = *mp;
9615 #ifdef GSO_STATS
9616 	int total_len = m0->m_pkthdr.len;
9617 #endif /* GSO_STATS */
9618 
9619 #if 1
9620 	u_int reduce_mss;
9621 
9622 	reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9623 	    : if_bridge_tso_reduce_mss_forwarding;
9624 	mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9625 	assert(mss > 0);
9626 #else
9627 	if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9628 		mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9629 	} else {
9630 		mss = m0->m_pkthdr.tso_segsz;
9631 	}
9632 #endif
9633 
9634 	*mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9635 	if (m0 == NULL) {
9636 		return ENOBUFS; /* XXX ok? */
9637 	}
9638 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9639 	    "%s %s mss %d nsegs %d",
9640 	    ifp->if_xname,
9641 	    is_tx ? "TX" : "RX",
9642 	    mss, nsegs);
9643 	/*
9644 	 * XXX-ste: can this happen?
9645 	 */
9646 	if (m0->m_nextpkt == NULL) {
9647 #ifdef GSO_DEBUG
9648 		D("only 1 segment");
9649 #endif
9650 		if (is_tx) {
9651 			error = bridge_transmit(ifp, m0);
9652 		}
9653 		return error;
9654 	}
9655 #ifdef GSO_STATS
9656 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9657 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9658 	GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9659 #endif /* GSO_STATS */
9660 
9661 	/* first pkt */
9662 	m = m0;
9663 
9664 	state->update(state, m);
9665 
9666 	do {
9667 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9668 
9669 		state->internal(state, m);
9670 		m_tx = m;
9671 		m = m->m_nextpkt;
9672 		if (is_tx) {
9673 			m_tx->m_nextpkt = NULL;
9674 			if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9675 				/*
9676 				 * XXX: If a segment can not be sent, discard the following
9677 				 * segments and propagate the error to the upper levels.
9678 				 * In this way the TCP retransmits all the initial packet.
9679 				 */
9680 #ifdef GSO_DEBUG
9681 				D("if_transmit error\n");
9682 #endif
9683 				goto err;
9684 			}
9685 		}
9686 		state->update(state, m);
9687 
9688 		state->tcp->th_flags &= ~TH_CWR;
9689 		state->tcp->th_seq = htonl(state->tcp_seq);
9690 	} while (m->m_nextpkt);
9691 
9692 	/* last pkt */
9693 	state->internal(state, m);
9694 
9695 	if (is_tx) {
9696 		error = bridge_transmit(ifp, m);
9697 #ifdef GSO_DEBUG
9698 		if (error) {
9699 			D("last if_transmit error\n");
9700 			D("error - type = %d \n", error);
9701 		}
9702 #endif
9703 	}
9704 #ifdef GSO_STATS
9705 	if (!error) {
9706 		GSOSTAT_INC(tcp.gsos_segmented);
9707 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9708 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9709 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9710 	}
9711 #endif /* GSO_STATS */
9712 	return error;
9713 
9714 err:
9715 #ifdef GSO_DEBUG
9716 	D("error - type = %d \n", error);
9717 #endif
9718 	while (m != NULL) {
9719 		m_tx = m->m_nextpkt;
9720 		m->m_nextpkt = NULL;
9721 		m_freem(m);
9722 		m = m_tx;
9723 	}
9724 	return error;
9725 }
9726 
9727 /*
9728  * GSO for TCP/IPv[46]
9729  */
9730 static int
9731 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9732     boolean_t is_tx)
9733 {
9734 	int error;
9735 	ip_packet_info  info;
9736 	uint32_t csum_flags;
9737 	struct gso_ip_tcp_state state;
9738 	struct bripstats stats; /* XXX ignored */
9739 	struct tcphdr *tcp;
9740 
9741 	if (!is_tx && ipforwarding == 0) {
9742 		/* no need to segment if the packet will not be forwarded */
9743 		return 0;
9744 	}
9745 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9746 	if (error != 0) {
9747 		if (*mp != NULL) {
9748 			m_freem(*mp);
9749 			*mp = NULL;
9750 		}
9751 		return error;
9752 	}
9753 	if (info.ip_proto_hdr == NULL) {
9754 		/* not a TCP packet */
9755 		return 0;
9756 	}
9757 	tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9758 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9759 	    info.ip_hlen, info.ip_hdr.ptr, tcp);
9760 	if (is_ipv4) {
9761 		csum_flags = CSUM_DELAY_DATA; /* XXX */
9762 		if (!is_tx) {
9763 			/* if RX to our local IP address, don't segment */
9764 			struct in_addr  dst_ip;
9765 
9766 			bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9767 			if (in_addr_is_ours(dst_ip)) {
9768 				return 0;
9769 			}
9770 		}
9771 	} else {
9772 		csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9773 		if (!is_tx) {
9774 			/* if RX to our local IP address, don't segment */
9775 			if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9776 			    ifp->if_index)) {
9777 				/* local IP address, no need to segment */
9778 				return 0;
9779 			}
9780 		}
9781 	}
9782 	(*mp)->m_pkthdr.csum_flags = csum_flags;
9783 	(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9784 	return gso_ip_tcp(ifp, mp, &state, is_tx);
9785 }
9786