xref: /xnu-8792.81.2/bsd/net/if_bridge.c (revision 19c3b8c28c31cb8130e034cfb5df6bf9ba342d90)
1 /*
2  * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162 
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166 
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169 
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172 
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176 
177 #include <os/log.h>
178 
179 /*
180  * if_bridge_debug, BR_DBGF_*
181  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182  *   to enable additional logs for the corresponding bridge function
183  * - "sysctl net.link.bridge.debug" controls the value of
184  *   'if_bridge_debug'
185  */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE       0x0001
188 #define BR_DBGF_INPUT           0x0002
189 #define BR_DBGF_OUTPUT          0x0004
190 #define BR_DBGF_RT_TABLE        0x0008
191 #define BR_DBGF_DELAYED_CALL    0x0010
192 #define BR_DBGF_IOCTL           0x0020
193 #define BR_DBGF_MBUF            0x0040
194 #define BR_DBGF_MCAST           0x0080
195 #define BR_DBGF_HOSTFILTER      0x0100
196 #define BR_DBGF_CHECKSUM        0x0200
197 #define BR_DBGF_MAC_NAT         0x0400
198 
199 /*
200  * if_bridge_log_level
201  * - 'if_bridge_log_level' ensures that by default important logs are
202  *   logged regardless of if_bridge_debug by comparing the log level
203  *   in BRIDGE_LOG to if_bridge_log_level
204  * - use "sysctl net.link.bridge.log_level" controls the value of
205  *   'if_bridge_log_level'
206  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207  *   logs must use LOG_NOTICE to ensure they appear by default
208  */
209 static int if_bridge_log_level = LOG_NOTICE;
210 
211 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
212 
213 /*
214  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215  * - macros to generate the specified log conditionally based on
216  *   the specified log level and debug flags
217  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218  */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
220 	do {                                                            \
221 	        if (__level <= if_bridge_log_level ||                   \
222 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
223 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 	                       __func__, ## __VA_ARGS__);       \
225 	        }                                                       \
226 	} while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
228 	do {                                                    \
229 	        if (__level <= if_bridge_log_level ||           \
230 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
231 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 	        }                                                               \
233 	} while (0)
234 
235 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
238 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
240 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241 
242 #define BRIDGE_LOCK_DEBUG      1
243 #if BRIDGE_LOCK_DEBUG
244 
245 #define BR_LCKDBG_MAX                   4
246 
247 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
253 
254 #else /* !BRIDGE_LOCK_DEBUG */
255 
256 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
259 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
260 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
261 	        (_err) = EBUSY;                                         \
262 	else {                                                          \
263 	        (_sc)->sc_iflist_ref++;                                 \
264 	        (_err) = 0;                                             \
265 	}                                                               \
266 	_BRIDGE_UNLOCK(_sc);                                            \
267 } while (0)
268 #define BRIDGE_UNREF(_sc)               do {                            \
269 	_BRIDGE_LOCK(_sc);                                              \
270 	(_sc)->sc_iflist_ref--;                                         \
271 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
272 	        _BRIDGE_UNLOCK(_sc);                                    \
273 	        wakeup(&(_sc)->sc_cv);                                  \
274 	} else                                                          \
275 	        _BRIDGE_UNLOCK(_sc);                                    \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc)               do {                            \
278 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
279 	(_sc)->sc_iflist_xcnt++;                                        \
280 	while ((_sc)->sc_iflist_ref > 0)                                \
281 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
282 	            "BRIDGE_XLOCK", NULL);                              \
283 } while (0)
284 #define BRIDGE_XDROP(_sc)               do {                            \
285 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
286 	(_sc)->sc_iflist_xcnt--;                                        \
287 } while (0)
288 
289 #endif /* BRIDGE_LOCK_DEBUG */
290 
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
293 	if (sc->sc_bpf_input != NULL)                                   \
294 	        bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298 
299 /*
300  * Initial size of the route hash table.  Must be a power of two.
301  */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE              16
304 #endif
305 
306 /*
307  * Maximum size of the routing hash table
308  */
309 #define BRIDGE_RTHASH_SIZE_MAX          2048
310 
311 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
312 
313 /*
314  * Maximum number of addresses to cache.
315  */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX               100
318 #endif
319 
320 
321 /*
322  * Timeout (in seconds) for entries learned dynamically.
323  */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
326 #endif
327 
328 /*
329  * Number of seconds between walks of the route list.
330  */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
333 #endif
334 
335 /*
336  * Number of MAC NAT entries
337  * - sized based on 16 clients (including MAC NAT interface)
338  *   each with 4 addresses
339  */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343 
344 /*
345  * List of capabilities to possibly mask on the member interface.
346  */
347 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349  * List of capabilities to disable on the member interface.
350  */
351 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
352 
353 /*
354  * Bridge interface list entry.
355  */
356 struct bridge_iflist {
357 	TAILQ_ENTRY(bridge_iflist) bif_next;
358 	struct ifnet            *bif_ifp;       /* member if */
359 	struct bstp_port        bif_stp;        /* STP state */
360 	uint32_t                bif_ifflags;    /* member if flags */
361 	int                     bif_savedcaps;  /* saved capabilities */
362 	uint32_t                bif_addrmax;    /* max # of addresses */
363 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
364 	uint32_t                bif_addrexceeded; /* # of address violations */
365 
366 	interface_filter_t      bif_iff_ref;
367 	struct bridge_softc     *bif_sc;
368 	uint32_t                bif_flags;
369 
370 	/* host filter */
371 	struct in_addr          bif_hf_ipsrc;
372 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
373 
374 	struct ifbrmstats       bif_stats;
375 };
376 
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 	return (bif->bif_ifflags & flags) == flags;
381 }
382 
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388 
389 /* fake errors to make the code clearer */
390 #define _EBADIP                 EJUSTRETURN
391 #define _EBADIPCHECKSUM         EJUSTRETURN
392 #define _EBADIPV6               EJUSTRETURN
393 #define _EBADUDP                EJUSTRETURN
394 #define _EBADTCP                EJUSTRETURN
395 #define _EBADUDPCHECKSUM        EJUSTRETURN
396 #define _EBADTCPCHECKSUM        EJUSTRETURN
397 
398 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
400 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
402 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
403 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
407 #if SKYWALK
408 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
409 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
410 #endif /* SKYWALK */
411 
412 /*
413  * mac_nat_entry
414  * - translates between an IP address and MAC address on a specific
415  *   bridge interface member
416  */
417 struct mac_nat_entry {
418 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
419 	struct bridge_iflist    *mne_bif;       /* originating interface */
420 	unsigned long           mne_expire;     /* expiration time */
421 	union {
422 		struct in_addr  mneu_ip;        /* originating IPv4 address */
423 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
424 	} mne_u;
425 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
426 	uint8_t                 mne_flags;
427 	uint8_t                 mne_reserved;
428 };
429 #define mne_ip  mne_u.mneu_ip
430 #define mne_ip6 mne_u.mneu_ip6
431 
432 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
433 
434 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
435 
436 /*
437  * mac_nat_record
438  * - used by bridge_mac_nat_output() to convey the translation that needs
439  *   to take place in bridge_mac_nat_translate
440  * - holds enough information so that the translation can be done later without
441  *   holding the bridge lock
442  */
443 struct mac_nat_record {
444 	uint16_t                mnr_ether_type;
445 	union {
446 		uint16_t        mnru_arp_offset;
447 		struct {
448 			uint16_t mnruip_dhcp_flags;
449 			uint16_t mnruip_udp_csum;
450 			uint8_t  mnruip_header_len;
451 		} mnru_ip;
452 		struct {
453 			uint16_t mnruip6_icmp6_len;
454 			uint16_t mnruip6_lladdr_offset;
455 			uint8_t mnruip6_icmp6_type;
456 			uint8_t mnruip6_header_len;
457 		} mnru_ip6;
458 	} mnr_u;
459 };
460 
461 #define mnr_arp_offset  mnr_u.mnru_arp_offset
462 
463 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
464 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
465 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
466 
467 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
468 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
469 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
470 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
471 
472 /*
473  * Bridge route node.
474  */
475 struct bridge_rtnode {
476 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
477 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
478 	struct bridge_iflist    *brt_dst;       /* destination if */
479 	unsigned long           brt_expire;     /* expiration time */
480 	uint8_t                 brt_flags;      /* address flags */
481 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
482 	uint16_t                brt_vlan;       /* vlan id */
483 
484 };
485 #define brt_ifp                 brt_dst->bif_ifp
486 
487 /*
488  * Bridge delayed function call context
489  */
490 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
491 
492 struct bridge_delayed_call {
493 	struct bridge_softc     *bdc_sc;
494 	bridge_delayed_func_t   bdc_func; /* Function to call */
495 	struct timespec         bdc_ts; /* Time to call */
496 	u_int32_t               bdc_flags;
497 	thread_call_t           bdc_thread_call;
498 };
499 
500 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
501 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
502 
503 /*
504  * Software state for each bridge.
505  */
506 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
507 
508 struct bridge_softc {
509 	struct ifnet            *sc_ifp;        /* make this an interface */
510 	u_int32_t               sc_flags;
511 	LIST_ENTRY(bridge_softc) sc_list;
512 	decl_lck_mtx_data(, sc_mtx);
513 	struct _bridge_rtnode_list *sc_rthash;  /* our forwarding table */
514 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
515 	uint32_t                sc_rthash_key;  /* key for hash */
516 	uint32_t                sc_rthash_size; /* size of the hash table */
517 	struct bridge_delayed_call sc_aging_timer;
518 	struct bridge_delayed_call sc_resize_call;
519 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
520 	struct bstp_state       sc_stp;         /* STP state */
521 	bpf_packet_func         sc_bpf_input;
522 	bpf_packet_func         sc_bpf_output;
523 	void                    *sc_cv;
524 	uint32_t                sc_brtmax;      /* max # of addresses */
525 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
526 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
527 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
528 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
529 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
530 	uint32_t                sc_brtexceeded; /* # of cache drops */
531 	uint32_t                sc_filter_flags; /* ipf and flags */
532 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
533 	u_char                  sc_defaddr[6];  /* Default MAC address */
534 	char                    sc_if_xname[IFNAMSIZ];
535 
536 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
537 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
538 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
539 	uint32_t                sc_mne_max;      /* max # of entries */
540 	uint32_t                sc_mne_count;    /* cur. # of entries */
541 	uint32_t                sc_mne_allocation_failures;
542 #if BRIDGE_LOCK_DEBUG
543 	/*
544 	 * Locking and unlocking calling history
545 	 */
546 	void                    *lock_lr[BR_LCKDBG_MAX];
547 	int                     next_lock_lr;
548 	void                    *unlock_lr[BR_LCKDBG_MAX];
549 	int                     next_unlock_lr;
550 #endif /* BRIDGE_LOCK_DEBUG */
551 };
552 
553 #define SCF_DETACHING            0x01
554 #define SCF_RESIZING             0x02
555 #define SCF_MEDIA_ACTIVE         0x04
556 
557 typedef enum {
558 	CHECKSUM_OPERATION_NONE = 0,
559 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
560 	CHECKSUM_OPERATION_FINALIZE = 2,
561 	CHECKSUM_OPERATION_COMPUTE = 3,
562 } ChecksumOperation;
563 
564 union iphdr {
565 	struct ip *ip;
566 	struct ip6_hdr *ip6;
567 	void * ptr;
568 };
569 
570 typedef struct {
571 	u_int           ip_hlen;        /* IP header length */
572 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
573 	u_int           ip_opt_len;     /* IPv6 options headers length */
574 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
575 	bool            ip_is_ipv4;
576 	bool            ip_is_fragmented;
577 	union iphdr     ip_hdr;         /* pointer to IP header */
578 	void *          ip_proto_hdr;   /* ptr to protocol header (TCP) */
579 } ip_packet_info, *ip_packet_info_t;
580 
581 struct bridge_hostfilter_stats bridge_hostfilter_stats;
582 
583 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
584 #if BRIDGE_LOCK_DEBUG
585 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
586 #else
587 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
588 #endif
589 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
590 
591 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
592 
593 static ZONE_DEFINE(bridge_rtnode_pool, "bridge_rtnode",
594     sizeof(struct bridge_rtnode), ZC_NONE);
595 static ZONE_DEFINE(bridge_mne_pool, "bridge_mac_nat_entry",
596     sizeof(struct mac_nat_entry), ZC_NONE);
597 
598 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
599 static int      bridge_clone_destroy(struct ifnet *);
600 
601 static errno_t  bridge_ioctl(struct ifnet *, u_long, void *);
602 #if HAS_IF_CAP
603 static void     bridge_mutecaps(struct bridge_softc *);
604 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
605     int);
606 #endif
607 static errno_t bridge_set_tso(struct bridge_softc *);
608 static void     bridge_proto_attach_changed(struct ifnet *);
609 static int      bridge_init(struct ifnet *);
610 #if HAS_BRIDGE_DUMMYNET
611 static void     bridge_dummynet(struct mbuf *, struct ifnet *);
612 #endif
613 static void     bridge_ifstop(struct ifnet *, int);
614 static int      bridge_output(struct ifnet *, struct mbuf *);
615 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
616 static void     bridge_start(struct ifnet *);
617 static errno_t  bridge_input(struct ifnet *, mbuf_t *);
618 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
619     mbuf_t *, char **);
620 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
621     mbuf_t *);
622 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
623     mbuf_t *m);
624 
625 static int      bridge_enqueue(ifnet_t, struct ifnet *,
626     struct ifnet *, struct mbuf *, ChecksumOperation);
627 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
628 
629 static void     bridge_forward(struct bridge_softc *, struct bridge_iflist *,
630     struct mbuf *);
631 
632 static void     bridge_aging_timer(struct bridge_softc *sc);
633 
634 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
635     struct mbuf *, int);
636 static void     bridge_span(struct bridge_softc *, struct mbuf *);
637 
638 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
639     uint16_t, struct bridge_iflist *, int, uint8_t);
640 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
641     uint16_t);
642 static void     bridge_rttrim(struct bridge_softc *);
643 static void     bridge_rtage(struct bridge_softc *);
644 static void     bridge_rtflush(struct bridge_softc *, int);
645 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
646     uint16_t);
647 
648 static int      bridge_rtable_init(struct bridge_softc *);
649 static void     bridge_rtable_fini(struct bridge_softc *);
650 
651 static void     bridge_rthash_resize(struct bridge_softc *);
652 
653 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
654 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
655     const uint8_t *, uint16_t);
656 static int      bridge_rtnode_hash(struct bridge_softc *,
657     struct bridge_rtnode *);
658 static int      bridge_rtnode_insert(struct bridge_softc *,
659     struct bridge_rtnode *);
660 static void     bridge_rtnode_destroy(struct bridge_softc *,
661     struct bridge_rtnode *);
662 #if BRIDGESTP
663 static void     bridge_rtable_expire(struct ifnet *, int);
664 static void     bridge_state_change(struct ifnet *, int);
665 #endif /* BRIDGESTP */
666 
667 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
668     const char *name);
669 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
670     struct ifnet *ifp);
671 static void     bridge_delete_member(struct bridge_softc *,
672     struct bridge_iflist *);
673 static void     bridge_delete_span(struct bridge_softc *,
674     struct bridge_iflist *);
675 
676 static int      bridge_ioctl_add(struct bridge_softc *, void *);
677 static int      bridge_ioctl_del(struct bridge_softc *, void *);
678 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
679 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
680 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
681 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
682 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *);
683 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *);
684 static int      bridge_ioctl_rts32(struct bridge_softc *, void *);
685 static int      bridge_ioctl_rts64(struct bridge_softc *, void *);
686 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *);
687 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *);
688 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
689 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
690 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *);
691 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *);
692 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
693 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
694 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
695 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
696 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
697 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
698 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
699 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
700 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
701 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
702 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
703 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
704 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
705 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
706 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *);
707 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *);
708 static int      bridge_ioctl_grte(struct bridge_softc *, void *);
709 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
710 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
711 static int      bridge_ioctl_sproto(struct bridge_softc *, void *);
712 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *);
713 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *);
714 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
715 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
716 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
717 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
718 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
719 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
720 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *);
721 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *);
722 
723 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
724 static int bridge_ip_checkbasic(struct mbuf **);
725 static int bridge_ip6_checkbasic(struct mbuf **);
726 
727 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
728 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
729 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
730 
731 static void bridge_detach(ifnet_t);
732 static void bridge_link_event(struct ifnet *, u_int32_t);
733 static void bridge_iflinkevent(struct ifnet *);
734 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
735 static int interface_media_active(struct ifnet *);
736 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
737 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
739 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
740 
741 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
742     struct bridge_iflist *);
743 static void bridge_mac_nat_disable(struct bridge_softc *sc);
744 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
745 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
746 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
747     struct bridge_iflist *);
748 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
749     boolean_t *);
750 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
751     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
752 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
753     const caddr_t);
754 static bool is_broadcast_ip_packet(mbuf_t *);
755 static bool in_addr_is_ours(const struct in_addr);
756 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
757 
758 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
759 
760 static int
761 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
762     boolean_t is_tx);
763 
764 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
765 #define VLANTAGOF(_m)   0
766 
767 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
768 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
769 
770 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
771 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
772 
773 #if BRIDGESTP
774 static struct bstp_cb_ops bridge_ops = {
775 	.bcb_state = bridge_state_change,
776 	.bcb_rtage = bridge_rtable_expire
777 };
778 #endif /* BRIDGESTP */
779 
780 SYSCTL_DECL(_net_link);
781 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
782     "Bridge");
783 
784 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
785 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
786     CTLFLAG_RW | CTLFLAG_LOCKED,
787     &bridge_inherit_mac, 0,
788     "Inherit MAC address from the first bridge member");
789 
790 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
791     CTLFLAG_RW | CTLFLAG_LOCKED,
792     &bridge_rtable_prune_period, 0,
793     "Interval between pruning of routing table");
794 
795 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
796 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
797     CTLFLAG_RW | CTLFLAG_LOCKED,
798     &bridge_rtable_hash_size_max, 0,
799     "Maximum size of the routing hash table");
800 
801 #if BRIDGE_DELAYED_CALLBACK_DEBUG
802 static int bridge_delayed_callback_delay = 0;
803 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
804     CTLFLAG_RW | CTLFLAG_LOCKED,
805     &bridge_delayed_callback_delay, 0,
806     "Delay before calling delayed function");
807 #endif
808 
809 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
810     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
811     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
812 
813 #if BRIDGESTP
814 static int log_stp   = 0;   /* log STP state changes */
815 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
816     &log_stp, 0, "Log STP state changes");
817 #endif /* BRIDGESTP */
818 
819 struct bridge_control {
820 	int             (*bc_func)(struct bridge_softc *, void *);
821 	unsigned int    bc_argsize;
822 	unsigned int    bc_flags;
823 };
824 
825 #define VMNET_TAG               "com.apple.vmnet"
826 #define VMNET_LOCAL_TAG         VMNET_TAG ".local"
827 #define VMNET_BROADCAST_TAG     VMNET_TAG ".broadcast"
828 #define VMNET_MULTICAST_TAG     VMNET_TAG ".multicast"
829 
830 static u_int16_t vmnet_tag;
831 static u_int16_t vmnet_local_tag;
832 static u_int16_t vmnet_broadcast_tag;
833 static u_int16_t vmnet_multicast_tag;
834 
835 static u_int16_t
allocate_pf_tag(char * name)836 allocate_pf_tag(char * name)
837 {
838 	u_int16_t       tag;
839 
840 	tag = pf_tagname2tag_ext(name);
841 	BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
842 	return tag;
843 }
844 
845 static void
allocate_vmnet_pf_tags(void)846 allocate_vmnet_pf_tags(void)
847 {
848 	/* allocate tags to use with PF */
849 	if (vmnet_tag == 0) {
850 		vmnet_tag = allocate_pf_tag(VMNET_TAG);
851 	}
852 	if (vmnet_local_tag == 0) {
853 		vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
854 	}
855 	if (vmnet_broadcast_tag == 0) {
856 		vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
857 	}
858 	if (vmnet_multicast_tag == 0) {
859 		vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
860 	}
861 }
862 
863 #define BC_F_COPYIN             0x01    /* copy arguments in */
864 #define BC_F_COPYOUT            0x02    /* copy arguments out */
865 #define BC_F_SUSER              0x04    /* do super-user check */
866 
867 static const struct bridge_control bridge_control_table32[] = {
868 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
869 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
870 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
871 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
872 
873 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
874 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
875 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
876 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
877 
878 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
879 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
881 	  .bc_flags = BC_F_COPYOUT },
882 
883 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
884 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
885 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
886 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
887 
888 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
889 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890 
891 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
892 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
894 	  .bc_flags = BC_F_COPYOUT },
895 
896 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
897 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898 
899 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
900 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901 
902 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
903 	  .bc_flags = BC_F_COPYOUT },
904 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
905 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
906 
907 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
908 	  .bc_flags = BC_F_COPYOUT },
909 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
910 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911 
912 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
913 	  .bc_flags = BC_F_COPYOUT },
914 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
915 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916 
917 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYOUT },
919 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
920 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921 
922 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
923 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924 
925 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
926 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927 
928 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
929 	  .bc_flags = BC_F_COPYOUT },
930 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
931 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932 
933 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
939 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940 
941 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
942 	  .bc_flags = BC_F_COPYOUT },
943 
944 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
945 	  .bc_flags = BC_F_COPYOUT },
946 
947 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
948 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
949 
950 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
951 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
952 
953 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
954 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955 
956 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
957 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958 
959 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
960 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
961 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
962 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963 
964 	{ .bc_func = bridge_ioctl_gmnelist32,
965 	  .bc_argsize = sizeof(struct ifbrmnelist32),
966 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
967 	{ .bc_func = bridge_ioctl_gifstats32,
968 	  .bc_argsize = sizeof(struct ifbrmreq32),
969 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 };
971 
972 static const struct bridge_control bridge_control_table64[] = {
973 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
974 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
976 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 
978 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
979 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
980 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
981 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982 
983 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
984 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
986 	  .bc_flags = BC_F_COPYOUT },
987 
988 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
989 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
990 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
991 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
992 
993 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
994 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995 
996 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
997 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
999 	  .bc_flags = BC_F_COPYOUT },
1000 
1001 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1002 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003 
1004 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1005 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006 
1007 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1008 	  .bc_flags = BC_F_COPYOUT },
1009 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1010 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1011 
1012 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1013 	  .bc_flags = BC_F_COPYOUT },
1014 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1015 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016 
1017 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1018 	  .bc_flags = BC_F_COPYOUT },
1019 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1020 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021 
1022 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYOUT },
1024 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1025 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026 
1027 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1028 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029 
1030 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1031 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032 
1033 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1034 	  .bc_flags = BC_F_COPYOUT },
1035 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1036 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037 
1038 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1039 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040 
1041 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1044 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045 
1046 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1047 	  .bc_flags = BC_F_COPYOUT },
1048 
1049 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 
1052 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1053 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1054 
1055 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1056 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1057 
1058 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1059 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060 
1061 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1062 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063 
1064 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1065 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1066 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1067 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068 
1069 	{ .bc_func = bridge_ioctl_gmnelist64,
1070 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1071 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1072 	{ .bc_func = bridge_ioctl_gifstats64,
1073 	  .bc_argsize = sizeof(struct ifbrmreq64),
1074 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 };
1076 
1077 static const unsigned int bridge_control_table_size =
1078     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1079 
1080 static LIST_HEAD(, bridge_softc) bridge_list =
1081     LIST_HEAD_INITIALIZER(bridge_list);
1082 
1083 #define BRIDGENAME      "bridge"
1084 #define BRIDGES_MAX     IF_MAXUNIT
1085 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1086 
1087 static struct if_clone bridge_cloner =
1088     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1089     0, BRIDGES_MAX, BRIDGE_ZONE_MAX_ELEM, sizeof(struct bridge_softc));
1090 
1091 static int if_bridge_txstart = 0;
1092 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1093     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1094 
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1096     &if_bridge_debug, 0, "Bridge debug flags");
1097 
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1099     CTLFLAG_RW | CTLFLAG_LOCKED,
1100     &if_bridge_log_level, 0, "Bridge log level");
1101 
1102 static int if_bridge_segmentation = 1;
1103 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1104     CTLFLAG_RW | CTLFLAG_LOCKED,
1105     &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1106 
1107 static int if_bridge_vmnet_pf_tagging = 1;
1108 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1109     CTLFLAG_RW | CTLFLAG_LOCKED,
1110     &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1111 
1112 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX            256
1113 #define BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT        110
1114 #define BRIDGE_TSO_REDUCE_MSS_TX_MAX                    256
1115 #define BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT                0
1116 
1117 static u_int if_bridge_tso_reduce_mss_forwarding
1118         = BRIDGE_TSO_REDUCE_MSS_FORWARDING_DEFAULT;
1119 static u_int if_bridge_tso_reduce_mss_tx
1120         = BRIDGE_TSO_REDUCE_MSS_TX_DEFAULT;
1121 
1122 static int
bridge_tso_reduce_mss(struct sysctl_req * req,u_int * val,u_int val_max)1123 bridge_tso_reduce_mss(struct sysctl_req *req, u_int * val, u_int val_max)
1124 {
1125 	int     changed;
1126 	int     error;
1127 	u_int   new_value;
1128 
1129 	error = sysctl_io_number(req, *val, sizeof(*val), &new_value,
1130 	    &changed);
1131 	if (error == 0 && changed != 0) {
1132 		if (new_value > val_max) {
1133 			return EINVAL;
1134 		}
1135 		*val = new_value;
1136 	}
1137 	return error;
1138 }
1139 
1140 static int
1141 bridge_tso_reduce_mss_forwarding_sysctl SYSCTL_HANDLER_ARGS
1142 {
1143 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_forwarding,
1144     BRIDGE_TSO_REDUCE_MSS_FORWARDING_MAX);
1145 }
1146 
1147 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_forwarding,
1148     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1149     0, 0, bridge_tso_reduce_mss_forwarding_sysctl, "IU",
1150     "Bridge tso reduce mss when forwarding");
1151 
1152 static int
1153 bridge_tso_reduce_mss_tx_sysctl SYSCTL_HANDLER_ARGS
1154 {
1155 	return bridge_tso_reduce_mss(req, &if_bridge_tso_reduce_mss_tx,
1156     BRIDGE_TSO_REDUCE_MSS_TX_MAX);
1157 }
1158 
1159 SYSCTL_PROC(_net_link_bridge, OID_AUTO, tso_reduce_mss_tx,
1160     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1161     0, 0, bridge_tso_reduce_mss_tx_sysctl, "IU",
1162     "Bridge tso reduce mss on transmit");
1163 
1164 
1165 #if DEBUG || DEVELOPMENT
1166 #define BRIDGE_FORCE_ONE        0x00000001
1167 #define BRIDGE_FORCE_TWO        0x00000002
1168 static u_int32_t if_bridge_force_errors = 0;
1169 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1170     CTLFLAG_RW | CTLFLAG_LOCKED,
1171     &if_bridge_force_errors, 0, "Bridge interface force errors");
1172 static inline bool
bridge_error_is_forced(u_int32_t flags)1173 bridge_error_is_forced(u_int32_t flags)
1174 {
1175 	return (if_bridge_force_errors & flags) != 0;
1176 }
1177 
1178 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags)                   \
1179 	do {                                                            \
1180 	        __is_forced = bridge_error_is_forced(__flags);          \
1181 	        if (__is_forced) {                                      \
1182 	                BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1183 	        }                                                       \
1184 	} while (0)
1185 #endif /* DEBUG || DEVELOPMENT */
1186 
1187 
1188 static void brlog_ether_header(struct ether_header *);
1189 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1190 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1191 static void brlog_mbuf(mbuf_t, const char *, const char *);
1192 static void brlog_link(struct bridge_softc * sc);
1193 
1194 #if BRIDGE_LOCK_DEBUG
1195 static void bridge_lock(struct bridge_softc *);
1196 static void bridge_unlock(struct bridge_softc *);
1197 static int bridge_lock2ref(struct bridge_softc *);
1198 static void bridge_unref(struct bridge_softc *);
1199 static void bridge_xlock(struct bridge_softc *);
1200 static void bridge_xdrop(struct bridge_softc *);
1201 
1202 static void
bridge_lock(struct bridge_softc * sc)1203 bridge_lock(struct bridge_softc *sc)
1204 {
1205 	void *lr_saved = __builtin_return_address(0);
1206 
1207 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1208 
1209 	_BRIDGE_LOCK(sc);
1210 
1211 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1212 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1213 }
1214 
1215 static void
bridge_unlock(struct bridge_softc * sc)1216 bridge_unlock(struct bridge_softc *sc)
1217 {
1218 	void *lr_saved = __builtin_return_address(0);
1219 
1220 	BRIDGE_LOCK_ASSERT_HELD(sc);
1221 
1222 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1223 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1224 
1225 	_BRIDGE_UNLOCK(sc);
1226 }
1227 
1228 static int
bridge_lock2ref(struct bridge_softc * sc)1229 bridge_lock2ref(struct bridge_softc *sc)
1230 {
1231 	int error = 0;
1232 	void *lr_saved = __builtin_return_address(0);
1233 
1234 	BRIDGE_LOCK_ASSERT_HELD(sc);
1235 
1236 	if (sc->sc_iflist_xcnt > 0) {
1237 		error = EBUSY;
1238 	} else {
1239 		sc->sc_iflist_ref++;
1240 	}
1241 
1242 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1243 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1244 
1245 	_BRIDGE_UNLOCK(sc);
1246 
1247 	return error;
1248 }
1249 
1250 static void
bridge_unref(struct bridge_softc * sc)1251 bridge_unref(struct bridge_softc *sc)
1252 {
1253 	void *lr_saved = __builtin_return_address(0);
1254 
1255 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1256 
1257 	_BRIDGE_LOCK(sc);
1258 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1259 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1260 
1261 	sc->sc_iflist_ref--;
1262 
1263 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1264 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1265 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1266 		_BRIDGE_UNLOCK(sc);
1267 		wakeup(&sc->sc_cv);
1268 	} else {
1269 		_BRIDGE_UNLOCK(sc);
1270 	}
1271 }
1272 
1273 static void
bridge_xlock(struct bridge_softc * sc)1274 bridge_xlock(struct bridge_softc *sc)
1275 {
1276 	void *lr_saved = __builtin_return_address(0);
1277 
1278 	BRIDGE_LOCK_ASSERT_HELD(sc);
1279 
1280 	sc->sc_iflist_xcnt++;
1281 	while (sc->sc_iflist_ref > 0) {
1282 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1283 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1284 
1285 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1286 
1287 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1288 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1289 	}
1290 }
1291 
1292 static void
bridge_xdrop(struct bridge_softc * sc)1293 bridge_xdrop(struct bridge_softc *sc)
1294 {
1295 	BRIDGE_LOCK_ASSERT_HELD(sc);
1296 
1297 	sc->sc_iflist_xcnt--;
1298 }
1299 
1300 #endif /* BRIDGE_LOCK_DEBUG */
1301 
1302 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1303 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1304 {
1305 	if (m) {
1306 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1307 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1308 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1309 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1310 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1311 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1312 		    suffix ? suffix : "");
1313 	} else {
1314 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1315 	}
1316 }
1317 
1318 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1319 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1320 {
1321 	if (m) {
1322 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1323 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1324 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1325 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1326 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1327 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1328 		    (unsigned int)mbuf_maxlen(m),
1329 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1330 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1331 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1332 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1333 			brlog_mbuf_pkthdr(m, "", suffix);
1334 		}
1335 	} else {
1336 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1337 	}
1338 }
1339 
1340 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1341 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1342 {
1343 	mbuf_t                  n;
1344 	size_t                  i, j;
1345 	size_t                  pktlen, mlen, maxlen;
1346 	unsigned char   *ptr;
1347 
1348 	pktlen = mbuf_pkthdr_len(m);
1349 
1350 	if (offset > pktlen) {
1351 		return;
1352 	}
1353 
1354 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1355 	n = m;
1356 	mlen = mbuf_len(n);
1357 	ptr = mbuf_data(n);
1358 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1359 		if (j >= mlen) {
1360 			n = mbuf_next(n);
1361 			if (n == 0) {
1362 				break;
1363 			}
1364 			ptr = mbuf_data(n);
1365 			mlen = mbuf_len(n);
1366 			j = 0;
1367 		}
1368 		if (i >= offset) {
1369 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1370 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1371 		}
1372 	}
1373 }
1374 
1375 static void
brlog_ether_header(struct ether_header * eh)1376 brlog_ether_header(struct ether_header *eh)
1377 {
1378 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1379 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1380 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1381 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1382 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1383 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1384 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1385 	    ntohs(eh->ether_type));
1386 }
1387 
1388 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1389 ether_ntop(char *buf, size_t len, const u_char *ap)
1390 {
1391 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1392 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1393 
1394 	return buf;
1395 }
1396 
1397 static void
brlog_link(struct bridge_softc * sc)1398 brlog_link(struct bridge_softc * sc)
1399 {
1400 	int i;
1401 	uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1402 	IFNAMSIZ + ETHER_ADDR_LEN];
1403 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1404 	const u_char * lladdr;
1405 	char lladdr_str[48];
1406 
1407 	memset(sdl, 0, sizeof(sdl_buffer));
1408 	sdl->sdl_family = AF_LINK;
1409 	sdl->sdl_nlen = strlen(sc->sc_if_xname);
1410 	sdl->sdl_alen = ETHER_ADDR_LEN;
1411 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1412 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1413 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1414 	lladdr_str[0] = '\0';
1415 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1416 	    i < sdl->sdl_alen;
1417 	    i++, lladdr++) {
1418 		char    byte_str[4];
1419 
1420 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1421 		    *lladdr);
1422 		strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1423 	}
1424 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1425 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1426 	    " slen %d addr %s", sc->sc_if_xname,
1427 	    sdl->sdl_len, sdl->sdl_index,
1428 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1429 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1430 }
1431 
1432 
1433 /*
1434  * bridgeattach:
1435  *
1436  *	Pseudo-device attach routine.
1437  */
1438 __private_extern__ int
bridgeattach(int n)1439 bridgeattach(int n)
1440 {
1441 #pragma unused(n)
1442 	int error;
1443 
1444 	LIST_INIT(&bridge_list);
1445 
1446 #if BRIDGESTP
1447 	bstp_sys_init();
1448 #endif /* BRIDGESTP */
1449 
1450 	error = if_clone_attach(&bridge_cloner);
1451 	if (error != 0) {
1452 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1453 	}
1454 	return error;
1455 }
1456 
1457 
1458 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1459 bridge_ifnet_set_attrs(struct ifnet * ifp)
1460 {
1461 	errno_t         error;
1462 
1463 	error = ifnet_set_mtu(ifp, ETHERMTU);
1464 	if (error != 0) {
1465 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1466 		goto done;
1467 	}
1468 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1469 	if (error != 0) {
1470 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1471 		goto done;
1472 	}
1473 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1474 	if (error != 0) {
1475 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1476 		goto done;
1477 	}
1478 	error = ifnet_set_flags(ifp,
1479 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1480 	    0xffff);
1481 
1482 	if (error != 0) {
1483 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1484 		goto done;
1485 	}
1486 done:
1487 	return error;
1488 }
1489 
1490 /*
1491  * bridge_clone_create:
1492  *
1493  *	Create a new bridge instance.
1494  */
1495 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1496 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1497 {
1498 #pragma unused(params)
1499 	struct ifnet *ifp = NULL;
1500 	struct bridge_softc *sc = NULL;
1501 	struct bridge_softc *sc2 = NULL;
1502 	struct ifnet_init_eparams init_params;
1503 	errno_t error = 0;
1504 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1505 	int fb, retry, has_hostid;
1506 
1507 	sc =  if_clone_softc_allocate(&bridge_cloner);
1508 	if (sc == NULL) {
1509 		error = ENOMEM;
1510 		goto done;
1511 	}
1512 
1513 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1514 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1515 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1516 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1517 	sc->sc_filter_flags = 0;
1518 
1519 	TAILQ_INIT(&sc->sc_iflist);
1520 
1521 	/* use the interface name as the unique id for ifp recycle */
1522 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1523 	    ifc->ifc_name, unit);
1524 	bzero(&init_params, sizeof(init_params));
1525 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1526 	init_params.len                 = sizeof(init_params);
1527 	/* Initialize our routing table. */
1528 	error = bridge_rtable_init(sc);
1529 	if (error != 0) {
1530 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1531 		goto done;
1532 	}
1533 	TAILQ_INIT(&sc->sc_spanlist);
1534 	if (if_bridge_txstart) {
1535 		init_params.start = bridge_start;
1536 	} else {
1537 		init_params.flags = IFNET_INIT_LEGACY;
1538 		init_params.output = bridge_output;
1539 	}
1540 	init_params.set_bpf_tap = bridge_set_bpf_tap;
1541 	init_params.uniqueid            = sc->sc_if_xname;
1542 	init_params.uniqueid_len        = strlen(sc->sc_if_xname);
1543 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1544 	init_params.name                = ifc->ifc_name;
1545 	init_params.unit                = unit;
1546 	init_params.family              = IFNET_FAMILY_ETHERNET;
1547 	init_params.type                = IFT_BRIDGE;
1548 	init_params.demux               = ether_demux;
1549 	init_params.add_proto           = ether_add_proto;
1550 	init_params.del_proto           = ether_del_proto;
1551 	init_params.check_multi         = ether_check_multi;
1552 	init_params.framer_extended     = ether_frameout_extended;
1553 	init_params.softc               = sc;
1554 	init_params.ioctl               = bridge_ioctl;
1555 	init_params.detach              = bridge_detach;
1556 	init_params.broadcast_addr      = etherbroadcastaddr;
1557 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1558 
1559 	error = ifnet_allocate_extended(&init_params, &ifp);
1560 	if (error != 0) {
1561 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1562 		goto done;
1563 	}
1564 	LIST_INIT(&sc->sc_mne_list);
1565 	LIST_INIT(&sc->sc_mne_list_v6);
1566 	sc->sc_ifp = ifp;
1567 	error = bridge_ifnet_set_attrs(ifp);
1568 	if (error != 0) {
1569 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1570 		    error);
1571 		goto done;
1572 	}
1573 	/*
1574 	 * Generate an ethernet address with a locally administered address.
1575 	 *
1576 	 * Since we are using random ethernet addresses for the bridge, it is
1577 	 * possible that we might have address collisions, so make sure that
1578 	 * this hardware address isn't already in use on another bridge.
1579 	 * The first try uses the "hostid" and falls back to read_frandom();
1580 	 * for "hostid", we use the MAC address of the first-encountered
1581 	 * Ethernet-type interface that is currently configured.
1582 	 */
1583 	fb = 0;
1584 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1585 	for (retry = 1; retry != 0;) {
1586 		if (fb || has_hostid == 0) {
1587 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1588 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1589 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1590 		} else {
1591 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1592 			    ETHER_ADDR_LEN);
1593 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1594 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1595 			sc->sc_defaddr[3] =     /* stir it up a bit */
1596 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1597 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1598 			/*
1599 			 * Mix in the LSB as it's actually pretty significant,
1600 			 * see rdar://14076061
1601 			 */
1602 			sc->sc_defaddr[4] =
1603 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1604 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1605 			    sc->sc_defaddr[5];
1606 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1607 		}
1608 
1609 		fb = 1;
1610 		retry = 0;
1611 		lck_mtx_lock(&bridge_list_mtx);
1612 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1613 			if (_ether_cmp(sc->sc_defaddr,
1614 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1615 				retry = 1;
1616 			}
1617 		}
1618 		lck_mtx_unlock(&bridge_list_mtx);
1619 	}
1620 
1621 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1622 
1623 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1624 		brlog_link(sc);
1625 	}
1626 	error = ifnet_attach(ifp, NULL);
1627 	if (error != 0) {
1628 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1629 		goto done;
1630 	}
1631 
1632 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1633 	    IFT_ETHER);
1634 	if (error != 0) {
1635 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1636 		    error);
1637 		goto done;
1638 	}
1639 
1640 	ifnet_set_offload(ifp,
1641 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1642 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1643 	error = bridge_set_tso(sc);
1644 	if (error != 0) {
1645 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1646 		goto done;
1647 	}
1648 #if BRIDGESTP
1649 	bstp_attach(&sc->sc_stp, &bridge_ops);
1650 #endif /* BRIDGESTP */
1651 
1652 	lck_mtx_lock(&bridge_list_mtx);
1653 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1654 	lck_mtx_unlock(&bridge_list_mtx);
1655 
1656 	/* attach as ethernet */
1657 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1658 	    NULL, NULL);
1659 
1660 done:
1661 	if (error != 0) {
1662 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1663 		/* TBD: Clean up: sc, sc_rthash etc */
1664 	}
1665 
1666 	return error;
1667 }
1668 
1669 /*
1670  * bridge_clone_destroy:
1671  *
1672  *	Destroy a bridge instance.
1673  */
1674 static int
bridge_clone_destroy(struct ifnet * ifp)1675 bridge_clone_destroy(struct ifnet *ifp)
1676 {
1677 	struct bridge_softc *sc = ifp->if_softc;
1678 	struct bridge_iflist *bif;
1679 	errno_t error;
1680 
1681 	BRIDGE_LOCK(sc);
1682 	if ((sc->sc_flags & SCF_DETACHING)) {
1683 		BRIDGE_UNLOCK(sc);
1684 		return 0;
1685 	}
1686 	sc->sc_flags |= SCF_DETACHING;
1687 
1688 	bridge_ifstop(ifp, 1);
1689 
1690 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1691 
1692 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1693 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1694 
1695 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1696 	if (error != 0) {
1697 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1698 	}
1699 
1700 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1701 		bridge_delete_member(sc, bif);
1702 	}
1703 
1704 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1705 		bridge_delete_span(sc, bif);
1706 	}
1707 	BRIDGE_UNLOCK(sc);
1708 
1709 	error = ifnet_detach(ifp);
1710 	if (error != 0) {
1711 		panic("%s (%d): ifnet_detach(%p) failed %d",
1712 		    __func__, __LINE__, ifp, error);
1713 	}
1714 	return 0;
1715 }
1716 
1717 #define DRVSPEC do { \
1718 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1719 	        error = EINVAL;                                         \
1720 	        break;                                                  \
1721 	}                                                               \
1722 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1723                                                                         \
1724 	if (cmd == SIOCGDRVSPEC &&                                      \
1725 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1726 	        error = EINVAL;                                         \
1727 	        break;                                                  \
1728 	} else if (cmd == SIOCSDRVSPEC &&                               \
1729 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1730 	        error = EINVAL;                                         \
1731 	        break;                                                  \
1732 	}                                                               \
1733                                                                         \
1734 	if (bc->bc_flags & BC_F_SUSER) {                                \
1735 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1736 	            KAUTH_GENERIC_ISSUSER);                             \
1737 	        if (error)                                              \
1738 	                break;                                          \
1739 	}                                                               \
1740                                                                         \
1741 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1742 	    ifd->ifd_len > sizeof (args)) {                             \
1743 	        error = EINVAL;                                         \
1744 	        break;                                                  \
1745 	}                                                               \
1746                                                                         \
1747 	bzero(&args, sizeof (args));                                    \
1748 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1749 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1750 	        if (error)                                              \
1751 	                break;                                          \
1752 	}                                                               \
1753                                                                         \
1754 	BRIDGE_LOCK(sc);                                                \
1755 	error = (*bc->bc_func)(sc, &args);                              \
1756 	BRIDGE_UNLOCK(sc);                                              \
1757 	if (error)                                                      \
1758 	        break;                                                  \
1759                                                                         \
1760 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1761 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1762 } while (0)
1763 
1764 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1765 interface_needs_input_broadcast(struct ifnet * ifp)
1766 {
1767 	/*
1768 	 * Selectively enable input broadcast only when necessary.
1769 	 * The bridge interface itself attaches a fake protocol
1770 	 * so checking for at least two protocols means that the
1771 	 * interface is being used for something besides bridging
1772 	 * and needs to see broadcast packets from other members.
1773 	 */
1774 	return if_get_protolist(ifp, NULL, 0) >= 2;
1775 }
1776 
1777 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1778 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1779 {
1780 	boolean_t       old_input_broadcast;
1781 
1782 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1783 	if (input_broadcast) {
1784 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1785 	} else {
1786 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1787 	}
1788 	return old_input_broadcast != input_broadcast;
1789 }
1790 
1791 /*
1792  * bridge_ioctl:
1793  *
1794  *	Handle a control request from the operator.
1795  */
1796 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1797 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1798 {
1799 	struct bridge_softc *sc = ifp->if_softc;
1800 	struct ifreq *ifr = (struct ifreq *)data;
1801 	struct bridge_iflist *bif;
1802 	int error = 0;
1803 
1804 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1805 
1806 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1807 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1808 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1809 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1810 	    (char)IOCGROUP(cmd), cmd & 0xff);
1811 
1812 	switch (cmd) {
1813 	case SIOCSIFADDR:
1814 	case SIOCAIFADDR:
1815 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1816 		break;
1817 
1818 	case SIOCGIFMEDIA32:
1819 	case SIOCGIFMEDIA64: {
1820 		struct ifmediareq *ifmr = (struct ifmediareq *)data;
1821 		user_addr_t user_addr;
1822 
1823 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1824 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1825 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1826 
1827 		ifmr->ifm_status = IFM_AVALID;
1828 		ifmr->ifm_mask = 0;
1829 		ifmr->ifm_count = 1;
1830 
1831 		BRIDGE_LOCK(sc);
1832 		if (!(sc->sc_flags & SCF_DETACHING) &&
1833 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1834 			ifmr->ifm_status |= IFM_ACTIVE;
1835 			ifmr->ifm_active = ifmr->ifm_current =
1836 			    IFM_ETHER | IFM_AUTO;
1837 		} else {
1838 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1839 		}
1840 		BRIDGE_UNLOCK(sc);
1841 
1842 		if (user_addr != USER_ADDR_NULL) {
1843 			error = copyout(&ifmr->ifm_current, user_addr,
1844 			    sizeof(int));
1845 		}
1846 		break;
1847 	}
1848 
1849 	case SIOCADDMULTI:
1850 	case SIOCDELMULTI:
1851 		break;
1852 
1853 	case SIOCSDRVSPEC32:
1854 	case SIOCGDRVSPEC32: {
1855 		union {
1856 			struct ifbreq ifbreq;
1857 			struct ifbifconf32 ifbifconf;
1858 			struct ifbareq32 ifbareq;
1859 			struct ifbaconf32 ifbaconf;
1860 			struct ifbrparam ifbrparam;
1861 			struct ifbropreq32 ifbropreq;
1862 		} args;
1863 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1864 		const struct bridge_control *bridge_control_table =
1865 		    bridge_control_table32, *bc;
1866 
1867 		DRVSPEC;
1868 
1869 		break;
1870 	}
1871 	case SIOCSDRVSPEC64:
1872 	case SIOCGDRVSPEC64: {
1873 		union {
1874 			struct ifbreq ifbreq;
1875 			struct ifbifconf64 ifbifconf;
1876 			struct ifbareq64 ifbareq;
1877 			struct ifbaconf64 ifbaconf;
1878 			struct ifbrparam ifbrparam;
1879 			struct ifbropreq64 ifbropreq;
1880 		} args;
1881 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1882 		const struct bridge_control *bridge_control_table =
1883 		    bridge_control_table64, *bc;
1884 
1885 		DRVSPEC;
1886 
1887 		break;
1888 	}
1889 
1890 	case SIOCSIFFLAGS:
1891 		if (!(ifp->if_flags & IFF_UP) &&
1892 		    (ifp->if_flags & IFF_RUNNING)) {
1893 			/*
1894 			 * If interface is marked down and it is running,
1895 			 * then stop and disable it.
1896 			 */
1897 			BRIDGE_LOCK(sc);
1898 			bridge_ifstop(ifp, 1);
1899 			BRIDGE_UNLOCK(sc);
1900 		} else if ((ifp->if_flags & IFF_UP) &&
1901 		    !(ifp->if_flags & IFF_RUNNING)) {
1902 			/*
1903 			 * If interface is marked up and it is stopped, then
1904 			 * start it.
1905 			 */
1906 			BRIDGE_LOCK(sc);
1907 			error = bridge_init(ifp);
1908 			BRIDGE_UNLOCK(sc);
1909 		}
1910 		break;
1911 
1912 	case SIOCSIFLLADDR:
1913 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1914 		    ifr->ifr_addr.sa_len);
1915 		if (error != 0) {
1916 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1917 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1918 			    error);
1919 		}
1920 		break;
1921 
1922 	case SIOCSIFMTU:
1923 		if (ifr->ifr_mtu < 576) {
1924 			error = EINVAL;
1925 			break;
1926 		}
1927 		BRIDGE_LOCK(sc);
1928 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1929 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1930 			BRIDGE_UNLOCK(sc);
1931 			break;
1932 		}
1933 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1934 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1935 				BRIDGE_LOG(LOG_NOTICE, 0,
1936 				    "%s invalid MTU: %u(%s) != %d",
1937 				    sc->sc_ifp->if_xname,
1938 				    bif->bif_ifp->if_mtu,
1939 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1940 				error = EINVAL;
1941 				break;
1942 			}
1943 		}
1944 		if (!error) {
1945 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1946 		}
1947 		BRIDGE_UNLOCK(sc);
1948 		break;
1949 
1950 	default:
1951 		error = ether_ioctl(ifp, cmd, data);
1952 		if (error != 0 && error != EOPNOTSUPP) {
1953 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1954 			    "ifp %s cmd 0x%08lx "
1955 			    "(%c%c [%lu] %c %lu) failed error: %d",
1956 			    ifp->if_xname, cmd,
1957 			    (cmd & IOC_IN) ? 'I' : ' ',
1958 			    (cmd & IOC_OUT) ? 'O' : ' ',
1959 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1960 			    cmd & 0xff, error);
1961 		}
1962 		break;
1963 	}
1964 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1965 
1966 	return error;
1967 }
1968 
1969 #if HAS_IF_CAP
1970 /*
1971  * bridge_mutecaps:
1972  *
1973  *	Clear or restore unwanted capabilities on the member interface
1974  */
1975 static void
bridge_mutecaps(struct bridge_softc * sc)1976 bridge_mutecaps(struct bridge_softc *sc)
1977 {
1978 	struct bridge_iflist *bif;
1979 	int enabled, mask;
1980 
1981 	/* Initial bitmask of capabilities to test */
1982 	mask = BRIDGE_IFCAPS_MASK;
1983 
1984 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1985 		/* Every member must support it or its disabled */
1986 		mask &= bif->bif_savedcaps;
1987 	}
1988 
1989 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1990 		enabled = bif->bif_ifp->if_capenable;
1991 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1992 		/* strip off mask bits and enable them again if allowed */
1993 		enabled &= ~BRIDGE_IFCAPS_MASK;
1994 		enabled |= mask;
1995 
1996 		bridge_set_ifcap(sc, bif, enabled);
1997 	}
1998 }
1999 
2000 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)2001 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
2002 {
2003 	struct ifnet *ifp = bif->bif_ifp;
2004 	struct ifreq ifr;
2005 	int error;
2006 
2007 	bzero(&ifr, sizeof(ifr));
2008 	ifr.ifr_reqcap = set;
2009 
2010 	if (ifp->if_capenable != set) {
2011 		IFF_LOCKGIANT(ifp);
2012 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
2013 		IFF_UNLOCKGIANT(ifp);
2014 		if (error) {
2015 			BRIDGE_LOG(LOG_NOTICE, 0,
2016 			    "%s error setting interface capabilities on %s",
2017 			    sc->sc_ifp->if_xname, ifp->if_xname);
2018 		}
2019 	}
2020 }
2021 #endif /* HAS_IF_CAP */
2022 
2023 static errno_t
bridge_set_tso(struct bridge_softc * sc)2024 bridge_set_tso(struct bridge_softc *sc)
2025 {
2026 	struct bridge_iflist *bif;
2027 	u_int32_t tso_v4_mtu;
2028 	u_int32_t tso_v6_mtu;
2029 	ifnet_offload_t offload;
2030 	errno_t error = 0;
2031 
2032 	/* By default, support TSO */
2033 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
2034 	tso_v4_mtu = IP_MAXPACKET;
2035 	tso_v6_mtu = IP_MAXPACKET;
2036 
2037 	/* Use the lowest common denominator of the members */
2038 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2039 		ifnet_t ifp = bif->bif_ifp;
2040 
2041 		if (ifp == NULL) {
2042 			continue;
2043 		}
2044 
2045 		if (offload & IFNET_TSO_IPV4) {
2046 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
2047 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
2048 					tso_v4_mtu = ifp->if_tso_v4_mtu;
2049 				}
2050 			} else {
2051 				offload &= ~IFNET_TSO_IPV4;
2052 				tso_v4_mtu = 0;
2053 			}
2054 		}
2055 		if (offload & IFNET_TSO_IPV6) {
2056 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2057 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2058 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2059 				}
2060 			} else {
2061 				offload &= ~IFNET_TSO_IPV6;
2062 				tso_v6_mtu = 0;
2063 			}
2064 		}
2065 	}
2066 
2067 	if (offload != sc->sc_ifp->if_hwassist) {
2068 		error = ifnet_set_offload(sc->sc_ifp, offload);
2069 		if (error != 0) {
2070 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2071 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2072 			    sc->sc_ifp->if_xname, offload, error);
2073 			goto done;
2074 		}
2075 		/*
2076 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2077 		 * as large as the interface MTU
2078 		 */
2079 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2080 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2081 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2082 			}
2083 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2084 			    tso_v4_mtu);
2085 			if (error != 0) {
2086 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2087 				    "ifnet_set_tso_mtu(%s, "
2088 				    "AF_INET, %u) failed %d",
2089 				    sc->sc_ifp->if_xname,
2090 				    tso_v4_mtu, error);
2091 				goto done;
2092 			}
2093 		}
2094 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2095 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2096 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2097 			}
2098 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2099 			    tso_v6_mtu);
2100 			if (error != 0) {
2101 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2102 				    "ifnet_set_tso_mtu(%s, "
2103 				    "AF_INET6, %u) failed %d",
2104 				    sc->sc_ifp->if_xname,
2105 				    tso_v6_mtu, error);
2106 				goto done;
2107 			}
2108 		}
2109 	}
2110 done:
2111 	return error;
2112 }
2113 
2114 /*
2115  * bridge_lookup_member:
2116  *
2117  *	Lookup a bridge member interface.
2118  */
2119 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2120 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2121 {
2122 	struct bridge_iflist *bif;
2123 	struct ifnet *ifp;
2124 
2125 	BRIDGE_LOCK_ASSERT_HELD(sc);
2126 
2127 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2128 		ifp = bif->bif_ifp;
2129 		if (strcmp(ifp->if_xname, name) == 0) {
2130 			return bif;
2131 		}
2132 	}
2133 
2134 	return NULL;
2135 }
2136 
2137 /*
2138  * bridge_lookup_member_if:
2139  *
2140  *	Lookup a bridge member interface by ifnet*.
2141  */
2142 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2143 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2144 {
2145 	struct bridge_iflist *bif;
2146 
2147 	BRIDGE_LOCK_ASSERT_HELD(sc);
2148 
2149 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2150 		if (bif->bif_ifp == member_ifp) {
2151 			return bif;
2152 		}
2153 	}
2154 
2155 	return NULL;
2156 }
2157 
2158 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2159 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2160     mbuf_t *data, char **frame_ptr)
2161 {
2162 #pragma unused(protocol)
2163 	errno_t error = 0;
2164 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2165 	struct bridge_softc *sc = bif->bif_sc;
2166 	int included = 0;
2167 	size_t frmlen = 0;
2168 	mbuf_t m = *data;
2169 
2170 	if ((m->m_flags & M_PROTO1)) {
2171 		goto out;
2172 	}
2173 
2174 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2175 	    *frame_ptr <= (char *)mbuf_data(m)) {
2176 		included = 1;
2177 		frmlen = (char *)mbuf_data(m) - *frame_ptr;
2178 	}
2179 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2180 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2181 	    "frmlen %lu", sc->sc_ifp->if_xname,
2182 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2183 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2184 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2185 	    included ? "inside" : "outside", frmlen);
2186 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2187 		brlog_mbuf(m, "bridge_iff_input[", "");
2188 		brlog_ether_header((struct ether_header *)
2189 		    (void *)*frame_ptr);
2190 		brlog_mbuf_data(m, 0, 20);
2191 	}
2192 	if (included == 0) {
2193 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2194 		goto out;
2195 	}
2196 
2197 	/* Move data pointer to start of frame to the link layer header */
2198 	(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2199 	    mbuf_len(m) + frmlen);
2200 	(void) mbuf_pkthdr_adjustlen(m, frmlen);
2201 
2202 	/* make sure we can access the ethernet header */
2203 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2204 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2205 		    "short frame %lu < %lu",
2206 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2207 		goto out;
2208 	}
2209 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2210 		error = mbuf_pullup(data, sizeof(struct ether_header));
2211 		if (error != 0) {
2212 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2213 			    "mbuf_pullup(%lu) failed %d",
2214 			    sizeof(struct ether_header),
2215 			    error);
2216 			error = EJUSTRETURN;
2217 			goto out;
2218 		}
2219 		if (m != *data) {
2220 			m = *data;
2221 			*frame_ptr = mbuf_data(m);
2222 		}
2223 	}
2224 
2225 	error = bridge_input(ifp, data);
2226 
2227 	/* Adjust packet back to original */
2228 	if (error == 0) {
2229 		/* bridge_input might have modified *data */
2230 		if (*data != m) {
2231 			m = *data;
2232 			*frame_ptr = mbuf_data(m);
2233 		}
2234 		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2235 		    mbuf_len(m) - frmlen);
2236 		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
2237 	}
2238 
2239 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2240 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2241 		brlog_mbuf(m, "bridge_iff_input]", "");
2242 	}
2243 
2244 out:
2245 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2246 
2247 	return error;
2248 }
2249 
2250 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2251 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2252     mbuf_t *data)
2253 {
2254 #pragma unused(protocol)
2255 	errno_t error = 0;
2256 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2257 	struct bridge_softc *sc = bif->bif_sc;
2258 	mbuf_t m = *data;
2259 
2260 	if ((m->m_flags & M_PROTO1)) {
2261 		goto out;
2262 	}
2263 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2264 	    "%s from %s m 0x%llx data 0x%llx",
2265 	    sc->sc_ifp->if_xname, ifp->if_xname,
2266 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2267 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2268 
2269 	error = bridge_member_output(sc, ifp, data);
2270 	if (error != 0 && error != EJUSTRETURN) {
2271 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2272 		    "bridge_member_output failed error %d",
2273 		    error);
2274 	}
2275 out:
2276 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2277 
2278 	return error;
2279 }
2280 
2281 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2282 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2283     const struct kev_msg *event_msg)
2284 {
2285 #pragma unused(protocol)
2286 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2287 	struct bridge_softc *sc = bif->bif_sc;
2288 
2289 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2290 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2291 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2292 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2293 		    "%s event_code %u - %s",
2294 		    ifp->if_xname, event_msg->event_code,
2295 		    dlil_kev_dl_code_str(event_msg->event_code));
2296 
2297 		switch (event_msg->event_code) {
2298 		case KEV_DL_LINK_OFF:
2299 		case KEV_DL_LINK_ON: {
2300 			bridge_iflinkevent(ifp);
2301 #if BRIDGESTP
2302 			bstp_linkstate(ifp, event_msg->event_code);
2303 #endif /* BRIDGESTP */
2304 			break;
2305 		}
2306 		case KEV_DL_SIFFLAGS: {
2307 			if ((bif->bif_flags & BIFF_PROMISC) == 0 &&
2308 			    (ifp->if_flags & IFF_UP)) {
2309 				errno_t error;
2310 
2311 				error = ifnet_set_promiscuous(ifp, 1);
2312 				if (error != 0) {
2313 					BRIDGE_LOG(LOG_NOTICE, 0,
2314 					    "ifnet_set_promiscuous (%s)"
2315 					    " failed %d", ifp->if_xname,
2316 					    error);
2317 				} else {
2318 					bif->bif_flags |= BIFF_PROMISC;
2319 				}
2320 			}
2321 			break;
2322 		}
2323 		case KEV_DL_IFCAP_CHANGED: {
2324 			BRIDGE_LOCK(sc);
2325 			bridge_set_tso(sc);
2326 			BRIDGE_UNLOCK(sc);
2327 			break;
2328 		}
2329 		case KEV_DL_PROTO_DETACHED:
2330 		case KEV_DL_PROTO_ATTACHED: {
2331 			bridge_proto_attach_changed(ifp);
2332 			break;
2333 		}
2334 		default:
2335 			break;
2336 		}
2337 	}
2338 }
2339 
2340 /*
2341  * bridge_iff_detached:
2342  *
2343  *      Called when our interface filter has been detached from a
2344  *      member interface.
2345  */
2346 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2347 bridge_iff_detached(void *cookie, ifnet_t ifp)
2348 {
2349 #pragma unused(cookie)
2350 	struct bridge_iflist *bif;
2351 	struct bridge_softc *sc = ifp->if_bridge;
2352 
2353 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2354 
2355 	/* Check if the interface is a bridge member */
2356 	if (sc != NULL) {
2357 		BRIDGE_LOCK(sc);
2358 		bif = bridge_lookup_member_if(sc, ifp);
2359 		if (bif != NULL) {
2360 			bridge_delete_member(sc, bif);
2361 		}
2362 		BRIDGE_UNLOCK(sc);
2363 		return;
2364 	}
2365 	/* Check if the interface is a span port */
2366 	lck_mtx_lock(&bridge_list_mtx);
2367 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2368 		BRIDGE_LOCK(sc);
2369 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2370 		if (ifp == bif->bif_ifp) {
2371 			bridge_delete_span(sc, bif);
2372 			break;
2373 		}
2374 		BRIDGE_UNLOCK(sc);
2375 	}
2376 	lck_mtx_unlock(&bridge_list_mtx);
2377 }
2378 
2379 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2380 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2381     char *header)
2382 {
2383 #pragma unused(protocol, packet, header)
2384 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2385 	    ifp->if_xname);
2386 	return 0;
2387 }
2388 
2389 static int
bridge_attach_protocol(struct ifnet * ifp)2390 bridge_attach_protocol(struct ifnet *ifp)
2391 {
2392 	int     error;
2393 	struct ifnet_attach_proto_param reg;
2394 
2395 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2396 	bzero(&reg, sizeof(reg));
2397 	reg.input = bridge_proto_input;
2398 
2399 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2400 	if (error) {
2401 		BRIDGE_LOG(LOG_NOTICE, 0,
2402 		    "ifnet_attach_protocol(%s) failed, %d",
2403 		    ifp->if_xname, error);
2404 	}
2405 
2406 	return error;
2407 }
2408 
2409 static int
bridge_detach_protocol(struct ifnet * ifp)2410 bridge_detach_protocol(struct ifnet *ifp)
2411 {
2412 	int     error;
2413 
2414 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2415 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2416 	if (error) {
2417 		BRIDGE_LOG(LOG_NOTICE, 0,
2418 		    "ifnet_detach_protocol(%s) failed, %d",
2419 		    ifp->if_xname, error);
2420 	}
2421 
2422 	return error;
2423 }
2424 
2425 /*
2426  * bridge_delete_member:
2427  *
2428  *	Delete the specified member interface.
2429  */
2430 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2431 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2432 {
2433 	uint32_t    bif_flags;
2434 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2435 	int lladdr_changed = 0, error;
2436 	uint8_t eaddr[ETHER_ADDR_LEN];
2437 	u_int32_t event_code = 0;
2438 
2439 	BRIDGE_LOCK_ASSERT_HELD(sc);
2440 	VERIFY(ifs != NULL);
2441 
2442 	/*
2443 	 * Remove the member from the list first so it cannot be found anymore
2444 	 * when we release the bridge lock below
2445 	 */
2446 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2447 		BRIDGE_XLOCK(sc);
2448 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2449 		BRIDGE_XDROP(sc);
2450 	}
2451 	if (sc->sc_mac_nat_bif != NULL) {
2452 		if (bif == sc->sc_mac_nat_bif) {
2453 			bridge_mac_nat_disable(sc);
2454 		} else {
2455 			bridge_mac_nat_flush_entries(sc, bif);
2456 		}
2457 	}
2458 #if BRIDGESTP
2459 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2460 		bstp_disable(&bif->bif_stp);
2461 	}
2462 #endif /* BRIDGESTP */
2463 
2464 	/*
2465 	 * If removing the interface that gave the bridge its mac address, set
2466 	 * the mac address of the bridge to the address of the next member, or
2467 	 * to its default address if no members are left.
2468 	 */
2469 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2470 		ifnet_release(sc->sc_ifaddr);
2471 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2472 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2473 			sc->sc_ifaddr = NULL;
2474 		} else {
2475 			struct ifnet *fif =
2476 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2477 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2478 			sc->sc_ifaddr = fif;
2479 			ifnet_reference(fif);   /* for sc_ifaddr */
2480 		}
2481 		lladdr_changed = 1;
2482 	}
2483 
2484 #if HAS_IF_CAP
2485 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2486 #endif /* HAS_IF_CAP */
2487 
2488 	error = bridge_set_tso(sc);
2489 	if (error != 0) {
2490 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2491 	}
2492 
2493 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2494 
2495 	KASSERT(bif->bif_addrcnt == 0,
2496 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2497 
2498 	/*
2499 	 * Update link status of the bridge based on its remaining members
2500 	 */
2501 	event_code = bridge_updatelinkstatus(sc);
2502 	bif_flags = bif->bif_flags;
2503 	BRIDGE_UNLOCK(sc);
2504 
2505 	/* only perform these steps if the interface is still attached */
2506 	if (ifnet_is_attached(ifs, 1)) {
2507 #if SKYWALK
2508 		if ((bif_flags & BIFF_NETAGENT_REMOVED) != 0) {
2509 			ifnet_add_netagent(ifs);
2510 		}
2511 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2512 			ifnet_detach_flowswitch_nexus(ifs);
2513 		}
2514 #endif /* SKYWALK */
2515 		/* disable promiscuous mode */
2516 		if ((bif_flags & BIFF_PROMISC) != 0) {
2517 			(void) ifnet_set_promiscuous(ifs, 0);
2518 		}
2519 #if HAS_IF_CAP
2520 		/* re-enable any interface capabilities */
2521 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2522 #endif
2523 		/* detach bridge "protocol" */
2524 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2525 			(void)bridge_detach_protocol(ifs);
2526 		}
2527 		/* detach interface filter */
2528 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2529 			iflt_detach(bif->bif_iff_ref);
2530 		}
2531 		ifnet_decr_iorefcnt(ifs);
2532 	}
2533 
2534 	if (lladdr_changed &&
2535 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2536 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2537 	}
2538 
2539 	if (event_code != 0) {
2540 		bridge_link_event(bifp, event_code);
2541 	}
2542 
2543 #if BRIDGESTP
2544 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2545 #endif /* BRIDGESTP */
2546 
2547 	kfree_type(struct bridge_iflist, bif);
2548 	ifs->if_bridge = NULL;
2549 	ifnet_release(ifs);
2550 
2551 	BRIDGE_LOCK(sc);
2552 }
2553 
2554 /*
2555  * bridge_delete_span:
2556  *
2557  *	Delete the specified span interface.
2558  */
2559 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2560 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2561 {
2562 	BRIDGE_LOCK_ASSERT_HELD(sc);
2563 
2564 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2565 	    ("%s: not a span interface", __func__));
2566 
2567 	ifnet_release(bif->bif_ifp);
2568 
2569 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2570 	kfree_type(struct bridge_iflist, bif);
2571 }
2572 
2573 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2574 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2575 {
2576 	struct ifbreq *req = arg;
2577 	struct bridge_iflist *bif = NULL;
2578 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2579 	int error = 0, lladdr_changed = 0;
2580 	uint8_t eaddr[ETHER_ADDR_LEN];
2581 	struct iff_filter iff;
2582 	u_int32_t event_code = 0;
2583 	boolean_t mac_nat = FALSE;
2584 	boolean_t input_broadcast;
2585 
2586 	ifs = ifunit(req->ifbr_ifsname);
2587 	if (ifs == NULL) {
2588 		return ENOENT;
2589 	}
2590 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2591 		return EINVAL;
2592 	}
2593 
2594 	if (IFNET_IS_INTCOPROC(ifs)) {
2595 		return EINVAL;
2596 	}
2597 
2598 	/* If it's in the span list, it can't be a member. */
2599 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2600 		if (ifs == bif->bif_ifp) {
2601 			return EBUSY;
2602 		}
2603 	}
2604 
2605 	if (ifs->if_bridge == sc) {
2606 		return EEXIST;
2607 	}
2608 
2609 	if (ifs->if_bridge != NULL) {
2610 		return EBUSY;
2611 	}
2612 
2613 	switch (ifs->if_type) {
2614 	case IFT_ETHER:
2615 		if (strcmp(ifs->if_name, "en") == 0 &&
2616 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2617 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2618 			/* XXX is there a better way to identify Wi-Fi STA? */
2619 			mac_nat = TRUE;
2620 		}
2621 		break;
2622 	case IFT_L2VLAN:
2623 	case IFT_IEEE8023ADLAG:
2624 		break;
2625 	case IFT_GIF:
2626 	/* currently not supported */
2627 	/* FALLTHRU */
2628 	default:
2629 		return EINVAL;
2630 	}
2631 
2632 	/* fail to add the interface if the MTU doesn't match */
2633 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2634 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2635 		    sc->sc_ifp->if_xname,
2636 		    ifs->if_xname);
2637 		return EINVAL;
2638 	}
2639 
2640 	/* there's already an interface that's doing MAC NAT */
2641 	if (mac_nat && sc->sc_mac_nat_bif != NULL) {
2642 		return EBUSY;
2643 	}
2644 
2645 	/* prevent the interface from detaching while we add the member */
2646 	if (!ifnet_is_attached(ifs, 1)) {
2647 		return ENXIO;
2648 	}
2649 
2650 	/* allocate a new member */
2651 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2652 	bif->bif_ifp = ifs;
2653 	ifnet_reference(ifs);
2654 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2655 #if HAS_IF_CAP
2656 	bif->bif_savedcaps = ifs->if_capenable;
2657 #endif /* HAS_IF_CAP */
2658 	bif->bif_sc = sc;
2659 	if (mac_nat) {
2660 		(void)bridge_mac_nat_enable(sc, bif);
2661 	}
2662 
2663 	if (IFNET_IS_VMNET(ifs)) {
2664 		allocate_vmnet_pf_tags();
2665 	}
2666 	/* Allow the first Ethernet member to define the MTU */
2667 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2668 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2669 	}
2670 
2671 	/*
2672 	 * Assign the interface's MAC address to the bridge if it's the first
2673 	 * member and the MAC address of the bridge has not been changed from
2674 	 * the default (randomly) generated one.
2675 	 */
2676 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2677 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2678 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2679 		sc->sc_ifaddr = ifs;
2680 		ifnet_reference(ifs);   /* for sc_ifaddr */
2681 		lladdr_changed = 1;
2682 	}
2683 
2684 	ifs->if_bridge = sc;
2685 #if BRIDGESTP
2686 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2687 #endif /* BRIDGESTP */
2688 
2689 #if HAS_IF_CAP
2690 	/* Set interface capabilities to the intersection set of all members */
2691 	bridge_mutecaps(sc);
2692 #endif /* HAS_IF_CAP */
2693 
2694 
2695 	/*
2696 	 * Respect lock ordering with DLIL lock for the following operations
2697 	 */
2698 	BRIDGE_UNLOCK(sc);
2699 
2700 	/* enable promiscuous mode */
2701 	error = ifnet_set_promiscuous(ifs, 1);
2702 	switch (error) {
2703 	case 0:
2704 		bif->bif_flags |= BIFF_PROMISC;
2705 		break;
2706 	case ENETDOWN:
2707 	case EPWROFF:
2708 		BRIDGE_LOG(LOG_NOTICE, 0,
2709 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2710 		    ifs->if_xname, error);
2711 		/* Ignore error when device is not up */
2712 		error = 0;
2713 		break;
2714 	default:
2715 		BRIDGE_LOG(LOG_NOTICE, 0,
2716 		    "ifnet_set_promiscuous(%s) failed %d",
2717 		    ifs->if_xname, error);
2718 		BRIDGE_LOCK(sc);
2719 		goto out;
2720 	}
2721 
2722 #if SKYWALK
2723 	/* ensure that the flowswitch is present for native interface */
2724 	if (SKYWALK_NATIVE(ifs)) {
2725 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2726 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2727 		}
2728 	}
2729 	/* remove the netagent on the flowswitch (rdar://75050182) */
2730 	if (ifnet_remove_netagent(ifs)) {
2731 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2732 	}
2733 #endif /* SKYWALK */
2734 
2735 	/*
2736 	 * install an interface filter
2737 	 */
2738 	memset(&iff, 0, sizeof(struct iff_filter));
2739 	iff.iff_cookie = bif;
2740 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2741 	iff.iff_input = bridge_iff_input;
2742 	iff.iff_output = bridge_iff_output;
2743 	iff.iff_event = bridge_iff_event;
2744 	iff.iff_detached = bridge_iff_detached;
2745 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2746 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2747 	if (error != 0) {
2748 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2749 		BRIDGE_LOCK(sc);
2750 		goto out;
2751 	}
2752 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2753 
2754 	/*
2755 	 * install a dummy "bridge" protocol
2756 	 */
2757 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2758 		if (error != 0) {
2759 			BRIDGE_LOG(LOG_NOTICE, 0,
2760 			    "bridge_attach_protocol failed %d", error);
2761 			BRIDGE_LOCK(sc);
2762 			goto out;
2763 		}
2764 	}
2765 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2766 
2767 	if (lladdr_changed &&
2768 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2769 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2770 	}
2771 
2772 	/*
2773 	 * No failures past this point. Add the member to the list.
2774 	 */
2775 	BRIDGE_LOCK(sc);
2776 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2777 	BRIDGE_XLOCK(sc);
2778 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2779 	BRIDGE_XDROP(sc);
2780 
2781 	/* cache the member link status */
2782 	if (interface_media_active(ifs)) {
2783 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2784 	} else {
2785 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2786 	}
2787 
2788 	/* the new member may change the link status of the bridge interface */
2789 	event_code = bridge_updatelinkstatus(sc);
2790 
2791 	/* check whether we need input broadcast or not */
2792 	input_broadcast = interface_needs_input_broadcast(ifs);
2793 	bif_set_input_broadcast(bif, input_broadcast);
2794 	BRIDGE_UNLOCK(sc);
2795 
2796 	if (event_code != 0) {
2797 		bridge_link_event(bifp, event_code);
2798 	}
2799 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2800 	    "%s input broadcast %s", ifs->if_xname,
2801 	    input_broadcast ? "ENABLED" : "DISABLED");
2802 
2803 	BRIDGE_LOCK(sc);
2804 	bridge_set_tso(sc);
2805 
2806 out:
2807 	/* allow the interface to detach */
2808 	ifnet_decr_iorefcnt(ifs);
2809 
2810 	if (error != 0) {
2811 		if (bif != NULL) {
2812 			bridge_delete_member(sc, bif);
2813 		}
2814 	} else if (IFNET_IS_VMNET(ifs)) {
2815 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2816 	}
2817 
2818 	return error;
2819 }
2820 
2821 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2822 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2823 {
2824 	struct ifbreq *req = arg;
2825 	struct bridge_iflist *bif;
2826 
2827 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2828 	if (bif == NULL) {
2829 		return ENOENT;
2830 	}
2831 
2832 	bridge_delete_member(sc, bif);
2833 
2834 	return 0;
2835 }
2836 
2837 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2838 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2839 {
2840 #pragma unused(sc, arg)
2841 	return 0;
2842 }
2843 
2844 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2845 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2846 {
2847 	struct ifbreq *req = arg;
2848 	struct bridge_iflist *bif;
2849 
2850 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2851 	if (bif == NULL) {
2852 		return ENOENT;
2853 	}
2854 
2855 	struct bstp_port *bp;
2856 
2857 	bp = &bif->bif_stp;
2858 	req->ifbr_state = bp->bp_state;
2859 	req->ifbr_priority = bp->bp_priority;
2860 	req->ifbr_path_cost = bp->bp_path_cost;
2861 	req->ifbr_proto = bp->bp_protover;
2862 	req->ifbr_role = bp->bp_role;
2863 	req->ifbr_stpflags = bp->bp_flags;
2864 	req->ifbr_ifsflags = bif->bif_ifflags;
2865 
2866 	/* Copy STP state options as flags */
2867 	if (bp->bp_operedge) {
2868 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2869 	}
2870 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2871 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2872 	}
2873 	if (bp->bp_ptp_link) {
2874 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2875 	}
2876 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2877 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2878 	}
2879 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2880 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2881 	}
2882 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2883 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2884 	}
2885 
2886 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2887 	req->ifbr_addrcnt = bif->bif_addrcnt;
2888 	req->ifbr_addrmax = bif->bif_addrmax;
2889 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
2890 
2891 	return 0;
2892 }
2893 
2894 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2895 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2896 {
2897 	struct ifbreq *req = arg;
2898 	struct bridge_iflist *bif;
2899 #if BRIDGESTP
2900 	struct bstp_port *bp;
2901 	int error;
2902 #endif /* BRIDGESTP */
2903 
2904 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2905 	if (bif == NULL) {
2906 		return ENOENT;
2907 	}
2908 
2909 	if (req->ifbr_ifsflags & IFBIF_SPAN) {
2910 		/* SPAN is readonly */
2911 		return EINVAL;
2912 	}
2913 #define _EXCLUSIVE_FLAGS        (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2914 	if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2915 		/* can't specify both MAC-NAT and checksum offload */
2916 		return EINVAL;
2917 	}
2918 	if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2919 		errno_t error;
2920 
2921 		error = bridge_mac_nat_enable(sc, bif);
2922 		if (error != 0) {
2923 			return error;
2924 		}
2925 	} else if (sc->sc_mac_nat_bif == bif) {
2926 		bridge_mac_nat_disable(sc);
2927 	}
2928 
2929 
2930 #if BRIDGESTP
2931 	if (req->ifbr_ifsflags & IFBIF_STP) {
2932 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2933 			error = bstp_enable(&bif->bif_stp);
2934 			if (error) {
2935 				return error;
2936 			}
2937 		}
2938 	} else {
2939 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2940 			bstp_disable(&bif->bif_stp);
2941 		}
2942 	}
2943 
2944 	/* Pass on STP flags */
2945 	bp = &bif->bif_stp;
2946 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2947 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2948 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2949 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2950 #else /* !BRIDGESTP */
2951 	if (req->ifbr_ifsflags & IFBIF_STP) {
2952 		return EOPNOTSUPP;
2953 	}
2954 #endif /* !BRIDGESTP */
2955 
2956 	/* Save the bits relating to the bridge */
2957 	bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
2958 
2959 
2960 	return 0;
2961 }
2962 
2963 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)2964 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
2965 {
2966 	struct ifbrparam *param = arg;
2967 
2968 	sc->sc_brtmax = param->ifbrp_csize;
2969 	bridge_rttrim(sc);
2970 	return 0;
2971 }
2972 
2973 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)2974 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
2975 {
2976 	struct ifbrparam *param = arg;
2977 
2978 	param->ifbrp_csize = sc->sc_brtmax;
2979 
2980 	return 0;
2981 }
2982 
2983 #define BRIDGE_IOCTL_GIFS do { \
2984 	struct bridge_iflist *bif;                                      \
2985 	struct ifbreq breq;                                             \
2986 	char *buf, *outbuf;                                             \
2987 	unsigned int count, buflen, len;                                \
2988                                                                         \
2989 	count = 0;                                                      \
2990 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
2991 	        count++;                                                \
2992 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
2993 	        count++;                                                \
2994                                                                         \
2995 	buflen = sizeof (breq) * count;                                 \
2996 	if (bifc->ifbic_len == 0) {                                     \
2997 	        bifc->ifbic_len = buflen;                               \
2998 	        return (0);                                             \
2999 	}                                                               \
3000 	BRIDGE_UNLOCK(sc);                                              \
3001 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3002 	BRIDGE_LOCK(sc);                                                \
3003                                                                         \
3004 	count = 0;                                                      \
3005 	buf = outbuf;                                                   \
3006 	len = min(bifc->ifbic_len, buflen);                             \
3007 	bzero(&breq, sizeof (breq));                                    \
3008 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3009 	        if (len < sizeof (breq))                                \
3010 	                break;                                          \
3011                                                                         \
3012 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
3013 	            "%s", bif->bif_ifp->if_xname);                      \
3014 	/* Fill in the ifbreq structure */                      \
3015 	        error = bridge_ioctl_gifflags(sc, &breq);               \
3016 	        if (error)                                              \
3017 	                break;                                          \
3018 	        memcpy(buf, &breq, sizeof (breq));                      \
3019 	        count++;                                                \
3020 	        buf += sizeof (breq);                                   \
3021 	        len -= sizeof (breq);                                   \
3022 	}                                                               \
3023 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
3024 	        if (len < sizeof (breq))                                \
3025 	                break;                                          \
3026                                                                         \
3027 	        snprintf(breq.ifbr_ifsname,                             \
3028 	                 sizeof (breq.ifbr_ifsname),                    \
3029 	                 "%s", bif->bif_ifp->if_xname);                 \
3030 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
3031 	        breq.ifbr_portno                                        \
3032 	                = bif->bif_ifp->if_index & 0xfff;               \
3033 	        memcpy(buf, &breq, sizeof (breq));                      \
3034 	        count++;                                                \
3035 	        buf += sizeof (breq);                                   \
3036 	        len -= sizeof (breq);                                   \
3037 	}                                                               \
3038                                                                         \
3039 	BRIDGE_UNLOCK(sc);                                              \
3040 	bifc->ifbic_len = sizeof (breq) * count;                        \
3041 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
3042 	BRIDGE_LOCK(sc);                                                \
3043 	kfree_data(outbuf, buflen);                                     \
3044 } while (0)
3045 
3046 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)3047 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
3048 {
3049 	struct ifbifconf64 *bifc = arg;
3050 	int error = 0;
3051 
3052 	BRIDGE_IOCTL_GIFS;
3053 
3054 	return error;
3055 }
3056 
3057 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3058 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3059 {
3060 	struct ifbifconf32 *bifc = arg;
3061 	int error = 0;
3062 
3063 	BRIDGE_IOCTL_GIFS;
3064 
3065 	return error;
3066 }
3067 
3068 #define BRIDGE_IOCTL_RTS do {                                               \
3069 	struct bridge_rtnode *brt;                                          \
3070 	char *buf;                                                          \
3071 	char *outbuf = NULL;                                                \
3072 	unsigned int count, buflen, len;                                    \
3073 	unsigned long now;                                                  \
3074                                                                             \
3075 	if (bac->ifbac_len == 0)                                            \
3076 	        return (0);                                                 \
3077                                                                             \
3078 	bzero(&bareq, sizeof (bareq));                                      \
3079 	count = 0;                                                          \
3080 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3081 	        count++;                                                    \
3082 	buflen = sizeof (bareq) * count;                                    \
3083                                                                             \
3084 	BRIDGE_UNLOCK(sc);                                                  \
3085 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);            \
3086 	BRIDGE_LOCK(sc);                                                    \
3087                                                                             \
3088 	count = 0;                                                          \
3089 	buf = outbuf;                                                       \
3090 	len = min(bac->ifbac_len, buflen);                                  \
3091 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3092 	        if (len < sizeof (bareq))                                   \
3093 	                goto out;                                           \
3094 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3095 	                 "%s", brt->brt_ifp->if_xname);                     \
3096 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3097 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3098 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3099 	                now = (unsigned long) net_uptime();                 \
3100 	                if (now < brt->brt_expire)                          \
3101 	                        bareq.ifba_expire =                         \
3102 	                            brt->brt_expire - now;                  \
3103 	        } else                                                      \
3104 	                bareq.ifba_expire = 0;                              \
3105 	        bareq.ifba_flags = brt->brt_flags;                          \
3106                                                                             \
3107 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3108 	        count++;                                                    \
3109 	        buf += sizeof (bareq);                                      \
3110 	        len -= sizeof (bareq);                                      \
3111 	}                                                                   \
3112 out:                                                                        \
3113 	bac->ifbac_len = sizeof (bareq) * count;                            \
3114 	if (outbuf != NULL) {                                               \
3115 	        BRIDGE_UNLOCK(sc);                                          \
3116 	        error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);    \
3117 	        kfree_data(outbuf, buflen);                                 \
3118 	        BRIDGE_LOCK(sc);                                            \
3119 	}                                                                   \
3120 	return (error);                                                     \
3121 } while (0)
3122 
3123 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3124 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3125 {
3126 	struct ifbaconf64 *bac = arg;
3127 	struct ifbareq64 bareq;
3128 	int error = 0;
3129 
3130 	BRIDGE_IOCTL_RTS;
3131 	return error;
3132 }
3133 
3134 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3135 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3136 {
3137 	struct ifbaconf32 *bac = arg;
3138 	struct ifbareq32 bareq;
3139 	int error = 0;
3140 
3141 	BRIDGE_IOCTL_RTS;
3142 	return error;
3143 }
3144 
3145 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3146 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3147 {
3148 	struct ifbareq32 *req = arg;
3149 	struct bridge_iflist *bif;
3150 	int error;
3151 
3152 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3153 	if (bif == NULL) {
3154 		return ENOENT;
3155 	}
3156 
3157 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3158 	    req->ifba_flags);
3159 
3160 	return error;
3161 }
3162 
3163 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3164 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3165 {
3166 	struct ifbareq64 *req = arg;
3167 	struct bridge_iflist *bif;
3168 	int error;
3169 
3170 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3171 	if (bif == NULL) {
3172 		return ENOENT;
3173 	}
3174 
3175 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3176 	    req->ifba_flags);
3177 
3178 	return error;
3179 }
3180 
3181 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3182 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3183 {
3184 	struct ifbrparam *param = arg;
3185 
3186 	sc->sc_brttimeout = param->ifbrp_ctime;
3187 	return 0;
3188 }
3189 
3190 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3191 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3192 {
3193 	struct ifbrparam *param = arg;
3194 
3195 	param->ifbrp_ctime = sc->sc_brttimeout;
3196 	return 0;
3197 }
3198 
3199 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3200 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3201 {
3202 	struct ifbareq32 *req = arg;
3203 
3204 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3205 }
3206 
3207 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3208 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3209 {
3210 	struct ifbareq64 *req = arg;
3211 
3212 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3213 }
3214 
3215 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3216 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3217 {
3218 	struct ifbreq *req = arg;
3219 
3220 	bridge_rtflush(sc, req->ifbr_ifsflags);
3221 	return 0;
3222 }
3223 
3224 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3225 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3226 {
3227 	struct ifbrparam *param = arg;
3228 	struct bstp_state *bs = &sc->sc_stp;
3229 
3230 	param->ifbrp_prio = bs->bs_bridge_priority;
3231 	return 0;
3232 }
3233 
3234 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3235 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3236 {
3237 #if BRIDGESTP
3238 	struct ifbrparam *param = arg;
3239 
3240 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3241 #else /* !BRIDGESTP */
3242 #pragma unused(sc, arg)
3243 	return EOPNOTSUPP;
3244 #endif /* !BRIDGESTP */
3245 }
3246 
3247 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3248 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3249 {
3250 	struct ifbrparam *param = arg;
3251 	struct bstp_state *bs = &sc->sc_stp;
3252 
3253 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3254 	return 0;
3255 }
3256 
3257 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3258 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3259 {
3260 #if BRIDGESTP
3261 	struct ifbrparam *param = arg;
3262 
3263 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3264 #else /* !BRIDGESTP */
3265 #pragma unused(sc, arg)
3266 	return EOPNOTSUPP;
3267 #endif /* !BRIDGESTP */
3268 }
3269 
3270 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3271 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3272 {
3273 	struct ifbrparam *param;
3274 	struct bstp_state *bs;
3275 
3276 	param = arg;
3277 	bs = &sc->sc_stp;
3278 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3279 	return 0;
3280 }
3281 
3282 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3283 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3284 {
3285 #if BRIDGESTP
3286 	struct ifbrparam *param = arg;
3287 
3288 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3289 #else /* !BRIDGESTP */
3290 #pragma unused(sc, arg)
3291 	return EOPNOTSUPP;
3292 #endif /* !BRIDGESTP */
3293 }
3294 
3295 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3296 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3297 {
3298 	struct ifbrparam *param;
3299 	struct bstp_state *bs;
3300 
3301 	param = arg;
3302 	bs = &sc->sc_stp;
3303 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3304 	return 0;
3305 }
3306 
3307 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3308 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3309 {
3310 #if BRIDGESTP
3311 	struct ifbrparam *param = arg;
3312 
3313 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3314 #else /* !BRIDGESTP */
3315 #pragma unused(sc, arg)
3316 	return EOPNOTSUPP;
3317 #endif /* !BRIDGESTP */
3318 }
3319 
3320 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3321 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3322 {
3323 #if BRIDGESTP
3324 	struct ifbreq *req = arg;
3325 	struct bridge_iflist *bif;
3326 
3327 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3328 	if (bif == NULL) {
3329 		return ENOENT;
3330 	}
3331 
3332 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3333 #else /* !BRIDGESTP */
3334 #pragma unused(sc, arg)
3335 	return EOPNOTSUPP;
3336 #endif /* !BRIDGESTP */
3337 }
3338 
3339 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3340 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3341 {
3342 #if BRIDGESTP
3343 	struct ifbreq *req = arg;
3344 	struct bridge_iflist *bif;
3345 
3346 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3347 	if (bif == NULL) {
3348 		return ENOENT;
3349 	}
3350 
3351 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3352 #else /* !BRIDGESTP */
3353 #pragma unused(sc, arg)
3354 	return EOPNOTSUPP;
3355 #endif /* !BRIDGESTP */
3356 }
3357 
3358 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3359 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3360 {
3361 	struct ifbrparam *param = arg;
3362 
3363 	param->ifbrp_filter = sc->sc_filter_flags;
3364 
3365 	return 0;
3366 }
3367 
3368 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3369 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3370 {
3371 	struct ifbrparam *param = arg;
3372 
3373 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3374 		return EINVAL;
3375 	}
3376 
3377 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3378 		return EINVAL;
3379 	}
3380 
3381 	sc->sc_filter_flags = param->ifbrp_filter;
3382 
3383 	return 0;
3384 }
3385 
3386 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3387 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3388 {
3389 	struct ifbreq *req = arg;
3390 	struct bridge_iflist *bif;
3391 
3392 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3393 	if (bif == NULL) {
3394 		return ENOENT;
3395 	}
3396 
3397 	bif->bif_addrmax = req->ifbr_addrmax;
3398 	return 0;
3399 }
3400 
3401 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3402 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3403 {
3404 	struct ifbreq *req = arg;
3405 	struct bridge_iflist *bif = NULL;
3406 	struct ifnet *ifs;
3407 
3408 	ifs = ifunit(req->ifbr_ifsname);
3409 	if (ifs == NULL) {
3410 		return ENOENT;
3411 	}
3412 
3413 	if (IFNET_IS_INTCOPROC(ifs)) {
3414 		return EINVAL;
3415 	}
3416 
3417 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3418 	if (ifs == bif->bif_ifp) {
3419 		return EBUSY;
3420 	}
3421 
3422 	if (ifs->if_bridge != NULL) {
3423 		return EBUSY;
3424 	}
3425 
3426 	switch (ifs->if_type) {
3427 	case IFT_ETHER:
3428 	case IFT_L2VLAN:
3429 	case IFT_IEEE8023ADLAG:
3430 		break;
3431 	case IFT_GIF:
3432 	/* currently not supported */
3433 	/* FALLTHRU */
3434 	default:
3435 		return EINVAL;
3436 	}
3437 
3438 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3439 
3440 	bif->bif_ifp = ifs;
3441 	bif->bif_ifflags = IFBIF_SPAN;
3442 
3443 	ifnet_reference(bif->bif_ifp);
3444 
3445 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3446 
3447 	return 0;
3448 }
3449 
3450 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3451 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3452 {
3453 	struct ifbreq *req = arg;
3454 	struct bridge_iflist *bif;
3455 	struct ifnet *ifs;
3456 
3457 	ifs = ifunit(req->ifbr_ifsname);
3458 	if (ifs == NULL) {
3459 		return ENOENT;
3460 	}
3461 
3462 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3463 	if (ifs == bif->bif_ifp) {
3464 		break;
3465 	}
3466 
3467 	if (bif == NULL) {
3468 		return ENOENT;
3469 	}
3470 
3471 	bridge_delete_span(sc, bif);
3472 
3473 	return 0;
3474 }
3475 
3476 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3477 	struct bstp_state *bs = &sc->sc_stp;                            \
3478 	struct bstp_port *root_port;                                    \
3479                                                                         \
3480 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3481 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3482 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3483                                                                         \
3484 	root_port = bs->bs_root_port;                                   \
3485 	if (root_port == NULL)                                          \
3486 	        req->ifbop_root_port = 0;                               \
3487 	else                                                            \
3488 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3489                                                                         \
3490 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3491 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3492 	req->ifbop_protocol = bs->bs_protover;                          \
3493 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3494 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3495 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3496 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3497 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3498 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3499 } while (0)
3500 
3501 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3502 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3503 {
3504 	struct ifbropreq32 *req = arg;
3505 
3506 	BRIDGE_IOCTL_GBPARAM;
3507 	return 0;
3508 }
3509 
3510 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3511 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3512 {
3513 	struct ifbropreq64 *req = arg;
3514 
3515 	BRIDGE_IOCTL_GBPARAM;
3516 	return 0;
3517 }
3518 
3519 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3520 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3521 {
3522 	struct ifbrparam *param = arg;
3523 
3524 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3525 	return 0;
3526 }
3527 
3528 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3529 	struct bridge_iflist *bif;                                      \
3530 	struct bstp_port *bp;                                           \
3531 	struct ifbpstpreq bpreq;                                        \
3532 	char *buf, *outbuf;                                             \
3533 	unsigned int count, buflen, len;                                \
3534                                                                         \
3535 	count = 0;                                                      \
3536 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3537 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3538 	                count++;                                        \
3539 	}                                                               \
3540                                                                         \
3541 	buflen = sizeof (bpreq) * count;                                \
3542 	if (bifstp->ifbpstp_len == 0) {                                 \
3543 	        bifstp->ifbpstp_len = buflen;                           \
3544 	        return (0);                                             \
3545 	}                                                               \
3546                                                                         \
3547 	BRIDGE_UNLOCK(sc);                                              \
3548 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3549 	BRIDGE_LOCK(sc);                                                \
3550                                                                         \
3551 	count = 0;                                                      \
3552 	buf = outbuf;                                                   \
3553 	len = min(bifstp->ifbpstp_len, buflen);                         \
3554 	bzero(&bpreq, sizeof (bpreq));                                  \
3555 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3556 	        if (len < sizeof (bpreq))                               \
3557 	                break;                                          \
3558                                                                         \
3559 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3560 	                continue;                                       \
3561                                                                         \
3562 	        bp = &bif->bif_stp;                                     \
3563 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3564 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3565 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3566 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3567 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3568 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3569                                                                         \
3570 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3571 	        count++;                                                \
3572 	        buf += sizeof (bpreq);                                  \
3573 	        len -= sizeof (bpreq);                                  \
3574 	}                                                               \
3575                                                                         \
3576 	BRIDGE_UNLOCK(sc);                                              \
3577 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3578 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3579 	BRIDGE_LOCK(sc);                                                \
3580 	kfree_data(outbuf, buflen);                                     \
3581 	return (error);                                                 \
3582 } while (0)
3583 
3584 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3585 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3586 {
3587 	struct ifbpstpconf32 *bifstp = arg;
3588 	int error = 0;
3589 
3590 	BRIDGE_IOCTL_GIFSSTP;
3591 	return error;
3592 }
3593 
3594 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3595 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3596 {
3597 	struct ifbpstpconf64 *bifstp = arg;
3598 	int error = 0;
3599 
3600 	BRIDGE_IOCTL_GIFSSTP;
3601 	return error;
3602 }
3603 
3604 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3605 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3606 {
3607 #if BRIDGESTP
3608 	struct ifbrparam *param = arg;
3609 
3610 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3611 #else /* !BRIDGESTP */
3612 #pragma unused(sc, arg)
3613 	return EOPNOTSUPP;
3614 #endif /* !BRIDGESTP */
3615 }
3616 
3617 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3618 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3619 {
3620 #if BRIDGESTP
3621 	struct ifbrparam *param = arg;
3622 
3623 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3624 #else /* !BRIDGESTP */
3625 #pragma unused(sc, arg)
3626 	return EOPNOTSUPP;
3627 #endif /* !BRIDGESTP */
3628 }
3629 
3630 
3631 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3632 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3633 {
3634 	struct ifbrhostfilter *req = arg;
3635 	struct bridge_iflist *bif;
3636 
3637 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3638 	if (bif == NULL) {
3639 		return ENOENT;
3640 	}
3641 
3642 	bzero(req, sizeof(struct ifbrhostfilter));
3643 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3644 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3645 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3646 		    ETHER_ADDR_LEN);
3647 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3648 	}
3649 	return 0;
3650 }
3651 
3652 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3653 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3654 {
3655 	struct ifbrhostfilter *req = arg;
3656 	struct bridge_iflist *bif;
3657 
3658 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3659 	if (bif == NULL) {
3660 		return ENOENT;
3661 	}
3662 
3663 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3664 		bif->bif_flags |= BIFF_HOST_FILTER;
3665 
3666 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3667 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3668 			    ETHER_ADDR_LEN);
3669 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3670 			    ETHER_ADDR_LEN) != 0) {
3671 				bif->bif_flags |= BIFF_HF_HWSRC;
3672 			} else {
3673 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3674 			}
3675 		}
3676 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3677 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3678 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3679 				bif->bif_flags |= BIFF_HF_IPSRC;
3680 			} else {
3681 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3682 			}
3683 		}
3684 	} else {
3685 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3686 		    BIFF_HF_IPSRC);
3687 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3688 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3689 	}
3690 
3691 	return 0;
3692 }
3693 
3694 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3695 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3696     unsigned int * count_p, char *buf, unsigned int *len_p)
3697 {
3698 	unsigned int            count = *count_p;
3699 	struct ifbrmne          ifbmne;
3700 	unsigned int            len = *len_p;
3701 	struct mac_nat_entry    *mne;
3702 	unsigned long           now;
3703 
3704 	bzero(&ifbmne, sizeof(ifbmne));
3705 	LIST_FOREACH(mne, list, mne_list) {
3706 		if (len < sizeof(ifbmne)) {
3707 			break;
3708 		}
3709 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3710 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3711 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3712 		    sizeof(ifbmne.ifbmne_mac));
3713 		now = (unsigned long) net_uptime();
3714 		if (now < mne->mne_expire) {
3715 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3716 		} else {
3717 			ifbmne.ifbmne_expire = 0;
3718 		}
3719 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3720 			ifbmne.ifbmne_af = AF_INET6;
3721 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3722 		} else {
3723 			ifbmne.ifbmne_af = AF_INET;
3724 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3725 		}
3726 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3727 		count++;
3728 		buf += sizeof(ifbmne);
3729 		len -= sizeof(ifbmne);
3730 	}
3731 	*count_p = count;
3732 	*len_p = len;
3733 	return buf;
3734 }
3735 
3736 /*
3737  * bridge_ioctl_gmnelist()
3738  *   Perform the get mac_nat_entry list ioctl.
3739  *
3740  * Note:
3741  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3742  *   field size/layout except for the last field ifbml_buf, the user-supplied
3743  *   buffer pointer. That is passed in separately via the 'user_addr'
3744  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3745  */
3746 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3747 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3748     user_addr_t user_addr)
3749 {
3750 	unsigned int            count;
3751 	char                    *buf;
3752 	int                     error = 0;
3753 	char                    *outbuf = NULL;
3754 	struct mac_nat_entry    *mne;
3755 	unsigned int            buflen;
3756 	unsigned int            len;
3757 
3758 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3759 	count = 0;
3760 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3761 		count++;
3762 	}
3763 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3764 		count++;
3765 	}
3766 	buflen = sizeof(struct ifbrmne) * count;
3767 	if (buflen == 0 || mnl->ifbml_len == 0) {
3768 		mnl->ifbml_len = buflen;
3769 		return error;
3770 	}
3771 	BRIDGE_UNLOCK(sc);
3772 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3773 	BRIDGE_LOCK(sc);
3774 	count = 0;
3775 	buf = outbuf;
3776 	len = min(mnl->ifbml_len, buflen);
3777 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3778 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3779 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3780 	BRIDGE_UNLOCK(sc);
3781 	error = copyout(outbuf, user_addr, mnl->ifbml_len);
3782 	kfree_data(outbuf, buflen);
3783 	BRIDGE_LOCK(sc);
3784 	return error;
3785 }
3786 
3787 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3788 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3789 {
3790 	struct ifbrmnelist64 *mnl = arg;
3791 
3792 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3793 }
3794 
3795 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3796 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3797 {
3798 	struct ifbrmnelist32 *mnl = arg;
3799 
3800 	return bridge_ioctl_gmnelist(sc, arg,
3801 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
3802 }
3803 
3804 /*
3805  * bridge_ioctl_gifstats()
3806  *   Return per-member stats.
3807  *
3808  * Note:
3809  *   The ifbrmreq32 and ifbrmreq64 structures have the same
3810  *   field size/layout except for the last field brmr_buf, the user-supplied
3811  *   buffer pointer. That is passed in separately via the 'user_addr'
3812  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3813  */
3814 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3815 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3816     user_addr_t user_addr)
3817 {
3818 	struct bridge_iflist    *bif;
3819 	int                     error = 0;
3820 	unsigned int            buflen;
3821 
3822 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3823 	if (bif == NULL) {
3824 		error = ENOENT;
3825 		goto done;
3826 	}
3827 
3828 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3829 	if (buflen == 0 || mreq->brmr_len == 0) {
3830 		mreq->brmr_len = buflen;
3831 		goto done;
3832 	}
3833 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3834 		error = ENOBUFS;
3835 		goto done;
3836 	}
3837 	mreq->brmr_len = buflen;
3838 	error = copyout(&bif->bif_stats, user_addr, buflen);
3839 done:
3840 	return error;
3841 }
3842 
3843 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3844 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3845 {
3846 	struct ifbrmreq32 *mreq = arg;
3847 
3848 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3849 }
3850 
3851 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3852 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3853 {
3854 	struct ifbrmreq64 *mreq = arg;
3855 
3856 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3857 }
3858 
3859 /*
3860  * bridge_proto_attach_changed
3861  *
3862  *	Called when protocol attachment on the interface changes.
3863  */
3864 static void
bridge_proto_attach_changed(struct ifnet * ifp)3865 bridge_proto_attach_changed(struct ifnet *ifp)
3866 {
3867 	boolean_t changed = FALSE;
3868 	struct bridge_iflist *bif;
3869 	boolean_t input_broadcast;
3870 	struct bridge_softc *sc = ifp->if_bridge;
3871 
3872 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3873 	if (sc == NULL) {
3874 		return;
3875 	}
3876 	input_broadcast = interface_needs_input_broadcast(ifp);
3877 	BRIDGE_LOCK(sc);
3878 	bif = bridge_lookup_member_if(sc, ifp);
3879 	if (bif != NULL) {
3880 		changed = bif_set_input_broadcast(bif, input_broadcast);
3881 	}
3882 	BRIDGE_UNLOCK(sc);
3883 	if (changed) {
3884 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3885 		    "%s input broadcast %s", ifp->if_xname,
3886 		    input_broadcast ? "ENABLED" : "DISABLED");
3887 	}
3888 	return;
3889 }
3890 
3891 /*
3892  * interface_media_active:
3893  *
3894  *	Tells if an interface media is active.
3895  */
3896 static int
interface_media_active(struct ifnet * ifp)3897 interface_media_active(struct ifnet *ifp)
3898 {
3899 	struct ifmediareq   ifmr;
3900 	int status = 0;
3901 
3902 	bzero(&ifmr, sizeof(ifmr));
3903 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3904 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3905 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3906 		}
3907 	}
3908 
3909 	return status;
3910 }
3911 
3912 /*
3913  * bridge_updatelinkstatus:
3914  *
3915  *      Update the media active status of the bridge based on the
3916  *	media active status of its member.
3917  *	If changed, return the corresponding onf/off link event.
3918  */
3919 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3920 bridge_updatelinkstatus(struct bridge_softc *sc)
3921 {
3922 	struct bridge_iflist *bif;
3923 	int active_member = 0;
3924 	u_int32_t event_code = 0;
3925 
3926 	BRIDGE_LOCK_ASSERT_HELD(sc);
3927 
3928 	/*
3929 	 * Find out if we have an active interface
3930 	 */
3931 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3932 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3933 			active_member = 1;
3934 			break;
3935 		}
3936 	}
3937 
3938 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3939 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
3940 		event_code = KEV_DL_LINK_ON;
3941 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3942 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3943 		event_code = KEV_DL_LINK_OFF;
3944 	}
3945 
3946 	return event_code;
3947 }
3948 
3949 /*
3950  * bridge_iflinkevent:
3951  */
3952 static void
bridge_iflinkevent(struct ifnet * ifp)3953 bridge_iflinkevent(struct ifnet *ifp)
3954 {
3955 	struct bridge_softc *sc = ifp->if_bridge;
3956 	struct bridge_iflist *bif;
3957 	u_int32_t event_code = 0;
3958 	int media_active;
3959 
3960 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3961 
3962 	/* Check if the interface is a bridge member */
3963 	if (sc == NULL) {
3964 		return;
3965 	}
3966 
3967 	media_active = interface_media_active(ifp);
3968 	BRIDGE_LOCK(sc);
3969 	bif = bridge_lookup_member_if(sc, ifp);
3970 	if (bif != NULL) {
3971 		if (media_active) {
3972 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3973 		} else {
3974 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3975 		}
3976 		if (sc->sc_mac_nat_bif != NULL) {
3977 			bridge_mac_nat_flush_entries(sc, bif);
3978 		}
3979 
3980 		event_code = bridge_updatelinkstatus(sc);
3981 	}
3982 	BRIDGE_UNLOCK(sc);
3983 
3984 	if (event_code != 0) {
3985 		bridge_link_event(sc->sc_ifp, event_code);
3986 	}
3987 }
3988 
3989 /*
3990  * bridge_delayed_callback:
3991  *
3992  *	Makes a delayed call
3993  */
3994 static void
bridge_delayed_callback(void * param,__unused void * param2)3995 bridge_delayed_callback(void *param, __unused void *param2)
3996 {
3997 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
3998 	struct bridge_softc *sc = call->bdc_sc;
3999 
4000 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4001 	if (bridge_delayed_callback_delay > 0) {
4002 		struct timespec ts;
4003 
4004 		ts.tv_sec = bridge_delayed_callback_delay;
4005 		ts.tv_nsec = 0;
4006 
4007 		BRIDGE_LOG(LOG_NOTICE, 0,
4008 		    "sleeping for %d seconds",
4009 		    bridge_delayed_callback_delay);
4010 
4011 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
4012 		    __func__, &ts);
4013 
4014 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
4015 	}
4016 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4017 
4018 	BRIDGE_LOCK(sc);
4019 
4020 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4021 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4022 	    "%s call 0x%llx flags 0x%x",
4023 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4024 	    call->bdc_flags);
4025 }
4026 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4027 
4028 	if (call->bdc_flags & BDCF_CANCELLING) {
4029 		wakeup(call);
4030 	} else {
4031 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
4032 			(*call->bdc_func)(sc);
4033 		}
4034 	}
4035 	call->bdc_flags &= ~BDCF_OUTSTANDING;
4036 	BRIDGE_UNLOCK(sc);
4037 }
4038 
4039 /*
4040  * bridge_schedule_delayed_call:
4041  *
4042  *	Schedule a function to be called on a separate thread
4043  *      The actual call may be scheduled to run at a given time or ASAP.
4044  */
4045 static void
4046 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
4047 {
4048 	uint64_t deadline = 0;
4049 	struct bridge_softc *sc = call->bdc_sc;
4050 
4051 	BRIDGE_LOCK_ASSERT_HELD(sc);
4052 
4053 	if ((sc->sc_flags & SCF_DETACHING) ||
4054 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4055 		return;
4056 	}
4057 
4058 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4059 		nanoseconds_to_absolutetime(
4060 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4061 			call->bdc_ts.tv_nsec, &deadline);
4062 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4063 	}
4064 
4065 	call->bdc_flags = BDCF_OUTSTANDING;
4066 
4067 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4068 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4069 	    "%s call 0x%llx flags 0x%x",
4070 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4071 	    call->bdc_flags);
4072 }
4073 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4074 
4075 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4076 		thread_call_func_delayed(
4077 			(thread_call_func_t)bridge_delayed_callback,
4078 			call, deadline);
4079 	} else {
4080 		if (call->bdc_thread_call == NULL) {
4081 			call->bdc_thread_call = thread_call_allocate(
4082 				(thread_call_func_t)bridge_delayed_callback,
4083 				call);
4084 		}
4085 		thread_call_enter(call->bdc_thread_call);
4086 	}
4087 }
4088 
4089 /*
4090  * bridge_cancel_delayed_call:
4091  *
4092  *	Cancel a queued or running delayed call.
4093  *	If call is running, does not return until the call is done to
4094  *	prevent race condition with the brigde interface getting destroyed
4095  */
4096 static void
4097 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4098 {
4099 	boolean_t result;
4100 	struct bridge_softc *sc = call->bdc_sc;
4101 
4102 	/*
4103 	 * The call was never scheduled
4104 	 */
4105 	if (sc == NULL) {
4106 		return;
4107 	}
4108 
4109 	BRIDGE_LOCK_ASSERT_HELD(sc);
4110 
4111 	call->bdc_flags |= BDCF_CANCELLING;
4112 
4113 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4114 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4115 		    "%s call 0x%llx flags 0x%x",
4116 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4117 		    call->bdc_flags);
4118 		result = thread_call_func_cancel(
4119 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4120 
4121 		if (result) {
4122 			/*
4123 			 * We managed to dequeue the delayed call
4124 			 */
4125 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4126 		} else {
4127 			/*
4128 			 * Wait for delayed call do be done running
4129 			 */
4130 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4131 		}
4132 	}
4133 	call->bdc_flags &= ~BDCF_CANCELLING;
4134 }
4135 
4136 /*
4137  * bridge_cleanup_delayed_call:
4138  *
4139  *	Dispose resource allocated for a delayed call
4140  *	Assume the delayed call is not queued or running .
4141  */
4142 static void
4143 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4144 {
4145 	boolean_t result;
4146 	struct bridge_softc *sc = call->bdc_sc;
4147 
4148 	/*
4149 	 * The call was never scheduled
4150 	 */
4151 	if (sc == NULL) {
4152 		return;
4153 	}
4154 
4155 	BRIDGE_LOCK_ASSERT_HELD(sc);
4156 
4157 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4158 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4159 
4160 	if (call->bdc_thread_call != NULL) {
4161 		result = thread_call_free(call->bdc_thread_call);
4162 		if (result == FALSE) {
4163 			panic("%s thread_call_free() failed for call %p",
4164 			    __func__, call);
4165 		}
4166 		call->bdc_thread_call = NULL;
4167 	}
4168 }
4169 
4170 /*
4171  * bridge_init:
4172  *
4173  *	Initialize a bridge interface.
4174  */
4175 static int
4176 bridge_init(struct ifnet *ifp)
4177 {
4178 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4179 	errno_t error;
4180 
4181 	BRIDGE_LOCK_ASSERT_HELD(sc);
4182 
4183 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4184 		return 0;
4185 	}
4186 
4187 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4188 
4189 	/*
4190 	 * Calling bridge_aging_timer() is OK as there are no entries to
4191 	 * age so we're just going to arm the timer
4192 	 */
4193 	bridge_aging_timer(sc);
4194 #if BRIDGESTP
4195 	if (error == 0) {
4196 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4197 	}
4198 #endif /* BRIDGESTP */
4199 	return error;
4200 }
4201 
4202 /*
4203  * bridge_ifstop:
4204  *
4205  *	Stop the bridge interface.
4206  */
4207 static void
4208 bridge_ifstop(struct ifnet *ifp, int disable)
4209 {
4210 #pragma unused(disable)
4211 	struct bridge_softc *sc = ifp->if_softc;
4212 
4213 	BRIDGE_LOCK_ASSERT_HELD(sc);
4214 
4215 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4216 		return;
4217 	}
4218 
4219 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4220 
4221 #if BRIDGESTP
4222 	bstp_stop(&sc->sc_stp);
4223 #endif /* BRIDGESTP */
4224 
4225 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4226 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4227 }
4228 
4229 /*
4230  * bridge_compute_cksum:
4231  *
4232  *	If the packet has checksum flags, compare the hardware checksum
4233  *	capabilities of the source and destination interfaces. If they
4234  *	are the same, there's nothing to do. If they are different,
4235  *	finalize the checksum so that it can be sent on the destination
4236  *	interface.
4237  */
4238 static void
4239 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4240 {
4241 	uint32_t csum_flags;
4242 	uint16_t dst_hw_csum;
4243 	uint32_t did_sw = 0;
4244 	struct ether_header *eh;
4245 	uint16_t src_hw_csum;
4246 
4247 	if (src_if == dst_if) {
4248 		return;
4249 	}
4250 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4251 	if (csum_flags == 0) {
4252 		/* no checksum offload */
4253 		return;
4254 	}
4255 
4256 	/*
4257 	 * if destination/source differ in checksum offload
4258 	 * capabilities, finalize/compute the checksum
4259 	 */
4260 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4261 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4262 	if (dst_hw_csum == src_hw_csum) {
4263 		return;
4264 	}
4265 	eh = mtod(m, struct ether_header *);
4266 	switch (ntohs(eh->ether_type)) {
4267 	case ETHERTYPE_IP:
4268 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4269 		break;
4270 	case ETHERTYPE_IPV6:
4271 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4272 		break;
4273 	}
4274 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4275 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4276 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4277 	    m->m_pkthdr.csum_flags);
4278 }
4279 
4280 static errno_t
4281 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4282 {
4283 	struct flowadv  adv = { .code = FADV_SUCCESS };
4284 	errno_t         error;
4285 
4286 	error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4287 	if (error == 0) {
4288 		if (adv.code == FADV_FLOW_CONTROLLED) {
4289 			error = EQFULL;
4290 		} else if (adv.code == FADV_SUSPENDED) {
4291 			error = EQSUSPENDED;
4292 		}
4293 	}
4294 	return error;
4295 }
4296 
4297 static int
4298 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4299     bool *is_fragmented)
4300 {
4301 	int newoff;
4302 
4303 	*is_fragmented = false;
4304 	while (1) {
4305 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4306 		if (newoff < 0) {
4307 			return off;
4308 		} else if (newoff < off) {
4309 			return -1;    /* invalid */
4310 		} else if (newoff == off) {
4311 			return newoff;
4312 		}
4313 		off = newoff;
4314 		proto = *nxtp;
4315 		if (proto == IPPROTO_FRAGMENT) {
4316 			*is_fragmented = true;
4317 		}
4318 	}
4319 }
4320 
4321 static int
4322 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4323     ip_packet_info_t info_p, struct bripstats * stats_p)
4324 {
4325 	int             error = 0;
4326 	u_int           hlen;
4327 	u_int           ip_hlen;
4328 	u_int           ip_pay_len;
4329 	struct mbuf *   m0 = *mp;
4330 	int             off;
4331 	int             opt_len = 0;
4332 	int             proto = 0;
4333 
4334 	bzero(info_p, sizeof(*info_p));
4335 	if (is_ipv4) {
4336 		struct ip *     ip;
4337 		u_int           ip_total_len;
4338 
4339 		/* IPv4 */
4340 		hlen = mac_hlen + sizeof(struct ip);
4341 		if (m0->m_pkthdr.len < hlen) {
4342 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4343 			    "Short IP packet %d < %d",
4344 			    m0->m_pkthdr.len, hlen);
4345 			error = _EBADIP;
4346 			stats_p->bips_bad_ip++;
4347 			goto done;
4348 		}
4349 		if (m0->m_len < hlen) {
4350 			*mp = m0 = m_pullup(m0, hlen);
4351 			if (m0 == NULL) {
4352 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4353 				    "m_pullup failed hlen %d",
4354 				    hlen);
4355 				error = ENOBUFS;
4356 				stats_p->bips_bad_ip++;
4357 				goto done;
4358 			}
4359 		}
4360 		ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4361 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4362 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4363 			    "bad IP version");
4364 			error = _EBADIP;
4365 			stats_p->bips_bad_ip++;
4366 			goto done;
4367 		}
4368 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4369 		if (ip_hlen < sizeof(struct ip)) {
4370 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4371 			    "bad IP header length %d < %d",
4372 			    ip_hlen,
4373 			    (int)sizeof(struct ip));
4374 			error = _EBADIP;
4375 			stats_p->bips_bad_ip++;
4376 			goto done;
4377 		}
4378 		hlen = mac_hlen + ip_hlen;
4379 		if (m0->m_len < hlen) {
4380 			*mp = m0 = m_pullup(m0, hlen);
4381 			if (m0 == NULL) {
4382 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4383 				    "m_pullup failed hlen %d",
4384 				    hlen);
4385 				error = ENOBUFS;
4386 				stats_p->bips_bad_ip++;
4387 				goto done;
4388 			}
4389 		}
4390 
4391 		ip_total_len = ntohs(ip->ip_len);
4392 		if (ip_total_len < ip_hlen) {
4393 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4394 			    "IP total len %d < header len %d",
4395 			    ip_total_len, ip_hlen);
4396 			error = _EBADIP;
4397 			stats_p->bips_bad_ip++;
4398 			goto done;
4399 		}
4400 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4401 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4402 			    "invalid IP payload length %d > %d",
4403 			    ip_total_len,
4404 			    (m0->m_pkthdr.len - mac_hlen));
4405 			error = _EBADIP;
4406 			stats_p->bips_bad_ip++;
4407 			goto done;
4408 		}
4409 		ip_pay_len = ip_total_len - ip_hlen;
4410 		info_p->ip_proto = ip->ip_p;
4411 		info_p->ip_hdr.ip = ip;
4412 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4413 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4414 			info_p->ip_is_fragmented = true;
4415 		}
4416 		stats_p->bips_ip++;
4417 	} else {
4418 		struct ip6_hdr *ip6;
4419 
4420 		/* IPv6 */
4421 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4422 		if (m0->m_pkthdr.len < hlen) {
4423 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4424 			    "short IPv6 packet %d < %d",
4425 			    m0->m_pkthdr.len, hlen);
4426 			error = _EBADIPV6;
4427 			stats_p->bips_bad_ip6++;
4428 			goto done;
4429 		}
4430 		if (m0->m_len < hlen) {
4431 			*mp = m0 = m_pullup(m0, hlen);
4432 			if (m0 == NULL) {
4433 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4434 				    "m_pullup failed hlen %d",
4435 				    hlen);
4436 				error = ENOBUFS;
4437 				stats_p->bips_bad_ip6++;
4438 				goto done;
4439 			}
4440 		}
4441 		ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4442 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4443 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4444 			    "bad IPv6 version");
4445 			error = _EBADIPV6;
4446 			stats_p->bips_bad_ip6++;
4447 			goto done;
4448 		}
4449 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4450 		    &info_p->ip_is_fragmented);
4451 		if (off < 0 || m0->m_pkthdr.len < off) {
4452 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4453 			    "ip6_lasthdr() returned %d",
4454 			    off);
4455 			error = _EBADIPV6;
4456 			stats_p->bips_bad_ip6++;
4457 			goto done;
4458 		}
4459 		ip_hlen = sizeof(*ip6);
4460 		opt_len = off - mac_hlen - ip_hlen;
4461 		if (opt_len < 0) {
4462 			error = _EBADIPV6;
4463 			stats_p->bips_bad_ip6++;
4464 			goto done;
4465 		}
4466 		info_p->ip_proto = proto;
4467 		info_p->ip_hdr.ip6 = ip6;
4468 		ip_pay_len = ntohs(ip6->ip6_plen);
4469 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4470 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4471 			    "invalid IPv6 payload length %d > %d",
4472 			    ip_pay_len,
4473 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4474 			error = _EBADIPV6;
4475 			stats_p->bips_bad_ip6++;
4476 			goto done;
4477 		}
4478 		stats_p->bips_ip6++;
4479 	}
4480 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4481 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4482 	    is_ipv4 ? '4' : '6',
4483 	    proto, ip_hlen, ip_pay_len, opt_len,
4484 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4485 	info_p->ip_hlen = ip_hlen;
4486 	info_p->ip_pay_len = ip_pay_len;
4487 	info_p->ip_opt_len = opt_len;
4488 	info_p->ip_is_ipv4 = is_ipv4;
4489 done:
4490 	return error;
4491 }
4492 
4493 static int
4494 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4495     ip_packet_info_t info_p, struct bripstats * stats_p)
4496 {
4497 	int             error;
4498 	u_int           hlen;
4499 
4500 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4501 	if (error != 0) {
4502 		goto done;
4503 	}
4504 	if (info_p->ip_proto != IPPROTO_TCP) {
4505 		/* not a TCP frame, not an error, just a bad guess */
4506 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4507 		    "non-TCP (%d) IPv%c frame %d bytes",
4508 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4509 		    (*mp)->m_pkthdr.len);
4510 		goto done;
4511 	}
4512 	if (info_p->ip_is_fragmented) {
4513 		/* both TSO and IP fragmentation don't make sense */
4514 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4515 		    "fragmented TSO packet?");
4516 		stats_p->bips_bad_tcp++;
4517 		error = _EBADTCP;
4518 		goto done;
4519 	}
4520 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4521 	    info_p->ip_opt_len;
4522 	if ((*mp)->m_len < hlen) {
4523 		*mp = m_pullup(*mp, hlen);
4524 		if (*mp == NULL) {
4525 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4526 			    "m_pullup %d failed",
4527 			    hlen);
4528 			stats_p->bips_bad_tcp++;
4529 			error = _EBADTCP;
4530 			goto done;
4531 		}
4532 	}
4533 	info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4534 	    info_p->ip_hlen + info_p->ip_opt_len;
4535 done:
4536 	return error;
4537 }
4538 
4539 static inline void
4540 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4541 {
4542 	if (proto == IPPROTO_TCP) {
4543 		stats_p->brcs_tcp_checksum++;
4544 	} else {
4545 		stats_p->brcs_udp_checksum++;
4546 	}
4547 	return;
4548 }
4549 
4550 static bool
4551 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4552 {
4553 	uint16_t        ether_type;
4554 	bool            is_ip = TRUE;
4555 
4556 	ether_type = ntohs(eh->ether_type);
4557 	switch (ether_type) {
4558 	case ETHERTYPE_IP:
4559 		*is_ipv4 = TRUE;
4560 		break;
4561 	case ETHERTYPE_IPV6:
4562 		*is_ipv4 = FALSE;
4563 		break;
4564 	default:
4565 		is_ip = FALSE;
4566 		break;
4567 	}
4568 	return is_ip;
4569 }
4570 
4571 static errno_t
4572 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4573 {
4574 	struct brcsumstats *csum_stats_p;
4575 	struct ether_header     *eh;
4576 	errno_t         error = 0;
4577 	ip_packet_info  info;
4578 	bool            is_ipv4;
4579 	struct mbuf *   m;
4580 	u_int           mac_hlen = sizeof(struct ether_header);
4581 	uint16_t        sum;
4582 	bool            valid;
4583 
4584 	eh = mtod(*mp, struct ether_header *);
4585 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4586 		goto done;
4587 	}
4588 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4589 	    &stats_p->brms_out_ip);
4590 	m = *mp;
4591 	if (error != 0) {
4592 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4593 		    "bridge_get_ip_proto failed %d",
4594 		    error);
4595 		goto done;
4596 	}
4597 	if (is_ipv4) {
4598 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4599 			/* hardware offloaded IP header checksum */
4600 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4601 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4602 			    "IP checksum HW %svalid",
4603 			    valid ? "" : "in");
4604 			if (!valid) {
4605 				stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4606 				error = _EBADIPCHECKSUM;
4607 				goto done;
4608 			}
4609 			stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4610 		} else {
4611 			/* verify */
4612 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4613 			valid = (sum == 0);
4614 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4615 			    "IP checksum SW %svalid",
4616 			    valid ? "" : "in");
4617 			if (!valid) {
4618 				stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4619 				error = _EBADIPCHECKSUM;
4620 				goto done;
4621 			}
4622 			stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4623 		}
4624 	}
4625 	if (info.ip_is_fragmented) {
4626 		/* can't verify checksum on fragmented packets */
4627 		goto done;
4628 	}
4629 	switch (info.ip_proto) {
4630 	case IPPROTO_TCP:
4631 		stats_p->brms_out_ip.bips_tcp++;
4632 		break;
4633 	case IPPROTO_UDP:
4634 		stats_p->brms_out_ip.bips_udp++;
4635 		break;
4636 	default:
4637 		goto done;
4638 	}
4639 	/* check for hardware offloaded UDP/TCP checksum */
4640 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4641 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4642 		/* checksum verified by hardware */
4643 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4644 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4645 		    "IPv%c %s checksum HW 0x%x %svalid",
4646 		    is_ipv4 ? '4' : '6',
4647 		    (info.ip_proto == IPPROTO_TCP)
4648 		    ? "TCP" : "UDP",
4649 		    m->m_pkthdr.csum_data,
4650 		    valid ? "" : "in" );
4651 		if (!valid) {
4652 			/* bad checksum */
4653 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4654 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4655 			    : _EBADTCPCHECKSUM;
4656 		} else {
4657 			/* good checksum */
4658 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4659 		}
4660 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4661 		goto done;
4662 	}
4663 	m->m_data += mac_hlen;
4664 	m->m_len -= mac_hlen;
4665 	m->m_pkthdr.len -= mac_hlen;
4666 	if (is_ipv4) {
4667 		sum = inet_cksum(m, info.ip_proto,
4668 		    info.ip_hlen,
4669 		    info.ip_pay_len);
4670 	} else {
4671 		sum = inet6_cksum(m, info.ip_proto,
4672 		    info.ip_hlen + info.ip_opt_len,
4673 		    info.ip_pay_len - info.ip_opt_len);
4674 	}
4675 	valid = (sum == 0);
4676 	if (valid) {
4677 		csum_stats_p = &stats_p->brms_out_cksum_good;
4678 	} else {
4679 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4680 		error = (info.ip_proto == IPPROTO_TCP)
4681 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4682 	}
4683 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4684 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4685 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4686 	    is_ipv4 ? '4' : '6',
4687 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4688 	    valid ? "" : "in",
4689 	    sum, info.ip_hlen, info.ip_pay_len);
4690 	m->m_data -= mac_hlen;
4691 	m->m_len += mac_hlen;
4692 	m->m_pkthdr.len += mac_hlen;
4693 done:
4694 	return error;
4695 }
4696 
4697 static errno_t
4698 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4699     struct ifbrmstats * stats_p)
4700 {
4701 	uint16_t *      csum_p;
4702 	errno_t         error = 0;
4703 	u_int           hlen;
4704 	struct mbuf *   m0 = *mp;
4705 	u_int           mac_hlen = sizeof(struct ether_header);
4706 	u_int           pkt_hdr_len;
4707 	struct tcphdr * tcp;
4708 	u_int           tcp_hlen;
4709 	struct udphdr * udp;
4710 
4711 	if (info_p->ip_is_ipv4) {
4712 		/* compute IP header checksum */
4713 		info_p->ip_hdr.ip->ip_sum = 0;
4714 		info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4715 		    info_p->ip_hlen);
4716 		stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4717 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4718 		    "IPv4 checksum 0x%x",
4719 		    ntohs(info_p->ip_hdr.ip->ip_sum));
4720 	}
4721 	if (info_p->ip_is_fragmented) {
4722 		/* can't compute checksum on fragmented packets */
4723 		goto done;
4724 	}
4725 	pkt_hdr_len = m0->m_pkthdr.len;
4726 	switch (info_p->ip_proto) {
4727 	case IPPROTO_TCP:
4728 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4729 		    + sizeof(struct tcphdr);
4730 		if (m0->m_len < hlen) {
4731 			*mp = m0 = m_pullup(m0, hlen);
4732 			if (m0 == NULL) {
4733 				stats_p->brms_in_ip.bips_bad_tcp++;
4734 				error = _EBADTCP;
4735 				goto done;
4736 			}
4737 		}
4738 		tcp = (struct tcphdr *)(void *)
4739 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4740 		    + info_p->ip_opt_len);
4741 		tcp_hlen = tcp->th_off << 2;
4742 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4743 		if (hlen > pkt_hdr_len) {
4744 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4745 			    "bad tcp header length %u",
4746 			    tcp_hlen);
4747 			stats_p->brms_in_ip.bips_bad_tcp++;
4748 			error = _EBADTCP;
4749 			goto done;
4750 		}
4751 		csum_p = &tcp->th_sum;
4752 		stats_p->brms_in_ip.bips_tcp++;
4753 		break;
4754 	case IPPROTO_UDP:
4755 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4756 		if (m0->m_len < hlen) {
4757 			*mp = m0 = m_pullup(m0, hlen);
4758 			if (m0 == NULL) {
4759 				stats_p->brms_in_ip.bips_bad_udp++;
4760 				error = ENOBUFS;
4761 				goto done;
4762 			}
4763 		}
4764 		udp = (struct udphdr *)(void *)
4765 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4766 		    + info_p->ip_opt_len);
4767 		csum_p = &udp->uh_sum;
4768 		stats_p->brms_in_ip.bips_udp++;
4769 		break;
4770 	default:
4771 		/* not TCP or UDP */
4772 		goto done;
4773 	}
4774 	*csum_p = 0;
4775 	m0->m_data += mac_hlen;
4776 	m0->m_len -= mac_hlen;
4777 	m0->m_pkthdr.len -= mac_hlen;
4778 	if (info_p->ip_is_ipv4) {
4779 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4780 		    info_p->ip_pay_len);
4781 	} else {
4782 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
4783 		    info_p->ip_hlen + info_p->ip_opt_len,
4784 		    info_p->ip_pay_len - info_p->ip_opt_len);
4785 	}
4786 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4787 		/* RFC 1122 4.1.3.4 */
4788 		*csum_p = 0xffff;
4789 	}
4790 	m0->m_data -= mac_hlen;
4791 	m0->m_len += mac_hlen;
4792 	m0->m_pkthdr.len += mac_hlen;
4793 	proto_csum_stats_increment(info_p->ip_proto,
4794 	    &stats_p->brms_in_computed_cksum);
4795 
4796 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4797 	    "IPv%c %s set checksum 0x%x",
4798 	    info_p->ip_is_ipv4 ? '4' : '6',
4799 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4800 	    ntohs(*csum_p));
4801 done:
4802 	return error;
4803 }
4804 
4805 static errno_t
4806 bridge_send(struct ifnet *src_ifp,
4807     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4808 {
4809 	switch (cksum_op) {
4810 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4811 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4812 		break;
4813 	case CHECKSUM_OPERATION_FINALIZE:
4814 		/* the checksum might not be correct, finalize now */
4815 		bridge_finalize_cksum(dst_ifp, m);
4816 		break;
4817 	case CHECKSUM_OPERATION_COMPUTE:
4818 		bridge_compute_cksum(src_ifp, dst_ifp, m);
4819 		break;
4820 	default:
4821 		break;
4822 	}
4823 #if HAS_IF_CAP
4824 	/*
4825 	 * If underlying interface can not do VLAN tag insertion itself
4826 	 * then attach a packet tag that holds it.
4827 	 */
4828 	if ((m->m_flags & M_VLANTAG) &&
4829 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4830 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4831 		if (m == NULL) {
4832 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4833 			    "%s: unable to prepend VLAN header",
4834 			    dst_ifp->if_xname);
4835 			(void) ifnet_stat_increment_out(dst_ifp,
4836 			    0, 0, 1);
4837 			return 0;
4838 		}
4839 		m->m_flags &= ~M_VLANTAG;
4840 	}
4841 #endif /* HAS_IF_CAP */
4842 	return bridge_transmit(dst_ifp, m);
4843 }
4844 
4845 static errno_t
4846 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4847 {
4848 	errno_t                 error;
4849 	u_int                   mac_hlen;
4850 
4851 	mac_hlen = sizeof(struct ether_header);
4852 
4853 #if HAS_IF_CAP
4854 	/*
4855 	 * If underlying interface can not do VLAN tag insertion itself
4856 	 * then attach a packet tag that holds it.
4857 	 */
4858 	if ((m->m_flags & M_VLANTAG) &&
4859 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4860 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4861 		if (m == NULL) {
4862 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4863 			    "%s: unable to prepend VLAN header",
4864 			    dst_ifp->if_xname);
4865 			(void) ifnet_stat_increment_out(dst_ifp,
4866 			    0, 0, 1);
4867 			error = ENOBUFS;
4868 			goto done;
4869 		}
4870 		m->m_flags &= ~M_VLANTAG;
4871 		mac_hlen += ETHER_VLAN_ENCAP_LEN;
4872 	}
4873 #endif /* HAS_IF_CAP */
4874 	error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4875 	return error;
4876 }
4877 
4878 /*
4879  * tso_hwassist:
4880  * - determine whether the destination interface supports TSO offload
4881  * - if the packet is already marked for offload and the hardware supports
4882  *   it, just allow the packet to continue on
4883  * - if not, parse the packet headers to verify that this is a large TCP
4884  *   packet requiring segmentation; if the hardware doesn't support it
4885  *   set need_sw_tso; otherwise, mark the packet for TSO offload
4886  */
4887 static int
4888 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4889     bool * need_sw_tso, bool * is_large_tcp)
4890 {
4891 	int             error = 0;
4892 	u_int32_t       if_csum;
4893 	u_int32_t       if_tso;
4894 	u_int32_t       mbuf_tso;
4895 	bool            supports_cksum = false;
4896 
4897 	*need_sw_tso = false;
4898 	*is_large_tcp = false;
4899 	if (is_ipv4) {
4900 		/*
4901 		 * Enable both TCP and IP offload if the hardware supports it.
4902 		 * If the hardware doesn't support TCP offload, supports_cksum
4903 		 * will be false so we won't set either offload.
4904 		 */
4905 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4906 		supports_cksum = (if_csum & CSUM_TCP) != 0;
4907 		if_tso = IFNET_TSO_IPV4;
4908 		mbuf_tso = CSUM_TSO_IPV4;
4909 	} else {
4910 		supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4911 		if_csum = CSUM_TCPIPV6;
4912 		if_tso = IFNET_TSO_IPV6;
4913 		mbuf_tso = CSUM_TSO_IPV6;
4914 	}
4915 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4916 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
4917 	    ifp->if_xname, supports_cksum ? "" : " not",
4918 	    ifp->if_hwassist, if_csum);
4919 	if ((ifp->if_hwassist & if_tso) != 0 &&
4920 	    ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4921 		/* hardware TSO, mbuf already marked */
4922 	} else {
4923 		/* verify that this is a large TCP frame */
4924 		uint32_t                csum_flags;
4925 		ip_packet_info          info;
4926 		int                     mss;
4927 		struct bripstats        stats;
4928 		struct tcphdr *         tcp;
4929 
4930 		error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4931 		    &info, &stats);
4932 		if (error != 0) {
4933 			/* bad packet */
4934 			goto done;
4935 		}
4936 		if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4937 		    ifp->if_mtu) {
4938 			/* not actually a large packet */
4939 			goto done;
4940 		}
4941 		if (info.ip_proto_hdr == NULL) {
4942 			/* not a TCP packet */
4943 			goto done;
4944 		}
4945 		if ((ifp->if_hwassist & if_tso) == 0) {
4946 			/* hardware does not support TSO, enable sw tso */
4947 			*need_sw_tso = if_bridge_segmentation != 0;
4948 			goto done;
4949 		}
4950 		/* use hardware TSO */
4951 		(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4952 		tcp = (struct tcphdr *)info.ip_proto_hdr;
4953 		mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
4954 		    - (tcp->th_off << 2) - if_bridge_tso_reduce_mss_tx;
4955 		assert(mss > 0);
4956 		csum_flags = mbuf_tso;
4957 		if (supports_cksum) {
4958 			csum_flags |= if_csum;
4959 		}
4960 		(*mp)->m_pkthdr.tso_segsz = mss;
4961 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
4962 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4963 		*is_large_tcp = true;
4964 	}
4965 done:
4966 	return error;
4967 }
4968 
4969 /*
4970  * bridge_enqueue:
4971  *
4972  *	Enqueue a packet on a bridge member interface.
4973  *
4974  */
4975 static errno_t
4976 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
4977     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4978 {
4979 	errno_t         error = 0;
4980 	int             len;
4981 
4982 	VERIFY(dst_ifp != NULL);
4983 
4984 	/*
4985 	 * We may be sending a fragment so traverse the mbuf
4986 	 *
4987 	 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
4988 	 */
4989 	for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
4990 		bool            need_sw_tso = false;
4991 		bool            is_ipv4 = false;
4992 		bool            is_large_pkt;
4993 		errno_t         _error = 0;
4994 
4995 		len = m->m_pkthdr.len;
4996 		m->m_flags |= M_PROTO1; /* set to avoid loops */
4997 		next_m = m->m_nextpkt;
4998 		m->m_nextpkt = NULL;
4999 		/*
5000 		 * Need to segment the packet if it is a large frame
5001 		 * and the destination interface does not support TSO.
5002 		 *
5003 		 * Note that with trailers, it's possible for a packet to
5004 		 * be large but not actually require segmentation.
5005 		 */
5006 		is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
5007 		if (is_large_pkt) {
5008 			struct ether_header     *eh;
5009 			bool                    is_large_tcp = false;
5010 
5011 			eh = mtod(m, struct ether_header *);
5012 			if (ether_header_type_is_ip(eh, &is_ipv4)) {
5013 				_error = tso_hwassist(&m, is_ipv4,
5014 				    dst_ifp, sizeof(struct ether_header),
5015 				    &need_sw_tso, &is_large_tcp);
5016 				if (is_large_tcp) {
5017 					cksum_op = CHECKSUM_OPERATION_NONE;
5018 				}
5019 			} else {
5020 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5021 				    "large non IP packet");
5022 			}
5023 		}
5024 		if (_error != 0) {
5025 			if (m != NULL) {
5026 				m_freem(m);
5027 			}
5028 		} else if (need_sw_tso) {
5029 			_error = bridge_send_tso(dst_ifp, m, is_ipv4);
5030 		} else {
5031 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5032 			    "%s bridge_send(%s) len %d op %d",
5033 			    bridge_ifp->if_xname,
5034 			    dst_ifp->if_xname,
5035 			    len, cksum_op);
5036 			_error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
5037 		}
5038 
5039 		/* Preserve first error value */
5040 		if (error == 0 && _error != 0) {
5041 			error = _error;
5042 		}
5043 		if (_error == 0) {
5044 			(void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
5045 		} else {
5046 			(void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
5047 		}
5048 	}
5049 
5050 	return error;
5051 }
5052 
5053 #if HAS_BRIDGE_DUMMYNET
5054 /*
5055  * bridge_dummynet:
5056  *
5057  *	Receive a queued packet from dummynet and pass it on to the output
5058  *	interface.
5059  *
5060  *	The mbuf has the Ethernet header already attached.
5061  */
5062 static void
5063 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5064 {
5065 	struct bridge_softc *sc;
5066 
5067 	sc = ifp->if_bridge;
5068 
5069 	/*
5070 	 * The packet didn't originate from a member interface. This should only
5071 	 * ever happen if a member interface is removed while packets are
5072 	 * queued for it.
5073 	 */
5074 	if (sc == NULL) {
5075 		m_freem(m);
5076 		return;
5077 	}
5078 
5079 	if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5080 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5081 			return;
5082 		}
5083 		if (m == NULL) {
5084 			return;
5085 		}
5086 	}
5087 	(void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5088 }
5089 
5090 #endif /* HAS_BRIDGE_DUMMYNET */
5091 
5092 /*
5093  * bridge_member_output:
5094  *
5095  *	Send output from a bridge member interface.  This
5096  *	performs the bridging function for locally originated
5097  *	packets.
5098  *
5099  *	The mbuf has the Ethernet header already attached.
5100  */
5101 static errno_t
5102 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5103 {
5104 	ifnet_t bridge_ifp;
5105 	struct ether_header *eh;
5106 	struct ifnet *dst_if;
5107 	uint16_t vlan;
5108 	struct bridge_iflist *mac_nat_bif;
5109 	ifnet_t mac_nat_ifp;
5110 	mbuf_t m = *data;
5111 
5112 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5113 	    "ifp %s", ifp->if_xname);
5114 	if (m->m_len < ETHER_HDR_LEN) {
5115 		m = m_pullup(m, ETHER_HDR_LEN);
5116 		if (m == NULL) {
5117 			*data = NULL;
5118 			return EJUSTRETURN;
5119 		}
5120 	}
5121 
5122 	eh = mtod(m, struct ether_header *);
5123 	vlan = VLANTAGOF(m);
5124 
5125 	BRIDGE_LOCK(sc);
5126 	mac_nat_bif = sc->sc_mac_nat_bif;
5127 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5128 	if (mac_nat_ifp == ifp) {
5129 		/* record the IP address used by the MAC NAT interface */
5130 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5131 		m = *data;
5132 		if (m == NULL) {
5133 			/* packet was deallocated */
5134 			BRIDGE_UNLOCK(sc);
5135 			return EJUSTRETURN;
5136 		}
5137 	}
5138 	bridge_ifp = sc->sc_ifp;
5139 
5140 	/*
5141 	 * APPLE MODIFICATION
5142 	 * If the packet is an 802.1X ethertype, then only send on the
5143 	 * original output interface.
5144 	 */
5145 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5146 		dst_if = ifp;
5147 		goto sendunicast;
5148 	}
5149 
5150 	/*
5151 	 * If bridge is down, but the original output interface is up,
5152 	 * go ahead and send out that interface.  Otherwise, the packet
5153 	 * is dropped below.
5154 	 */
5155 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5156 		dst_if = ifp;
5157 		goto sendunicast;
5158 	}
5159 
5160 	/*
5161 	 * If the packet is a multicast, or we don't know a better way to
5162 	 * get there, send to all interfaces.
5163 	 */
5164 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5165 		dst_if = NULL;
5166 	} else {
5167 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5168 	}
5169 	if (dst_if == NULL) {
5170 		struct bridge_iflist *bif;
5171 		struct mbuf *mc;
5172 		errno_t error;
5173 
5174 
5175 		bridge_span(sc, m);
5176 
5177 		BRIDGE_LOCK2REF(sc, error);
5178 		if (error != 0) {
5179 			m_freem(m);
5180 			return EJUSTRETURN;
5181 		}
5182 
5183 		/*
5184 		 * Duplicate and send the packet across all member interfaces
5185 		 * except the originating interface.
5186 		 */
5187 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5188 			dst_if = bif->bif_ifp;
5189 			if (dst_if == ifp) {
5190 				/* skip the originating interface */
5191 				continue;
5192 			}
5193 			/* skip interface with inactive link status */
5194 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5195 				continue;
5196 			}
5197 #if 0
5198 			if (dst_if->if_type == IFT_GIF) {
5199 				continue;
5200 			}
5201 #endif
5202 			/* skip interface that isn't running */
5203 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5204 				continue;
5205 			}
5206 			/*
5207 			 * If the interface is participating in spanning
5208 			 * tree, make sure the port is in a state that
5209 			 * allows forwarding.
5210 			 */
5211 			if ((bif->bif_ifflags & IFBIF_STP) &&
5212 			    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5213 				continue;
5214 			}
5215 			/*
5216 			 * If the destination is the MAC NAT interface,
5217 			 * skip sending the packet. The packet can't be sent
5218 			 * if the source MAC is incorrect.
5219 			 */
5220 			if (dst_if == mac_nat_ifp) {
5221 				continue;
5222 			}
5223 
5224 			/* make a deep copy to send on this member interface */
5225 			mc = m_dup(m, M_DONTWAIT);
5226 			if (mc == NULL) {
5227 				(void)ifnet_stat_increment_out(bridge_ifp,
5228 				    0, 0, 1);
5229 				continue;
5230 			}
5231 			(void)bridge_enqueue(bridge_ifp, ifp, dst_if,
5232 			    mc, CHECKSUM_OPERATION_COMPUTE);
5233 		}
5234 		BRIDGE_UNREF(sc);
5235 
5236 		if ((ifp->if_flags & IFF_RUNNING) == 0) {
5237 			m_freem(m);
5238 			return EJUSTRETURN;
5239 		}
5240 		/* allow packet to continue on the originating interface */
5241 		return 0;
5242 	}
5243 
5244 sendunicast:
5245 	/*
5246 	 * XXX Spanning tree consideration here?
5247 	 */
5248 
5249 	bridge_span(sc, m);
5250 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5251 		m_freem(m);
5252 		BRIDGE_UNLOCK(sc);
5253 		return EJUSTRETURN;
5254 	}
5255 
5256 	BRIDGE_UNLOCK(sc);
5257 	if (dst_if == ifp) {
5258 		/* allow packet to continue on the originating interface */
5259 		return 0;
5260 	}
5261 	if (dst_if != mac_nat_ifp) {
5262 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5263 		    CHECKSUM_OPERATION_COMPUTE);
5264 	} else {
5265 		/*
5266 		 * This is not the original output interface
5267 		 * and the destination is the MAC NAT interface.
5268 		 * Drop the packet because the packet can't be sent
5269 		 * if the source MAC is incorrect.
5270 		 */
5271 		m_freem(m);
5272 	}
5273 	return EJUSTRETURN;
5274 }
5275 
5276 /*
5277  * Output callback.
5278  *
5279  * This routine is called externally from above only when if_bridge_txstart
5280  * is disabled; otherwise it is called internally by bridge_start().
5281  */
5282 static int
5283 bridge_output(struct ifnet *ifp, struct mbuf *m)
5284 {
5285 	struct bridge_softc *sc = ifnet_softc(ifp);
5286 	struct ether_header *eh;
5287 	struct ifnet *dst_if = NULL;
5288 	int error = 0;
5289 
5290 	eh = mtod(m, struct ether_header *);
5291 
5292 	BRIDGE_LOCK(sc);
5293 
5294 	if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5295 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5296 	}
5297 
5298 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5299 
5300 #if NBPFILTER > 0
5301 	if (sc->sc_bpf_output) {
5302 		bridge_bpf_output(ifp, m);
5303 	}
5304 #endif
5305 
5306 	if (dst_if == NULL) {
5307 		/* callee will unlock */
5308 		bridge_broadcast(sc, NULL, m, 0);
5309 	} else {
5310 		ifnet_t bridge_ifp;
5311 
5312 		bridge_ifp = sc->sc_ifp;
5313 		BRIDGE_UNLOCK(sc);
5314 
5315 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5316 		    CHECKSUM_OPERATION_FINALIZE);
5317 	}
5318 
5319 	return error;
5320 }
5321 
5322 static void
5323 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5324 {
5325 	struct ether_header *eh;
5326 	bool is_ipv4;
5327 	uint32_t sw_csum, hwcap;
5328 	uint32_t did_sw;
5329 	uint32_t csum_flags;
5330 
5331 	eh = mtod(m, struct ether_header *);
5332 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5333 		return;
5334 	}
5335 
5336 	/* do in software what the hardware cannot */
5337 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5338 	csum_flags = m->m_pkthdr.csum_flags;
5339 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5340 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5341 
5342 	if (is_ipv4) {
5343 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5344 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5345 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5346 				uint16_t start =
5347 				    sizeof(*eh) + sizeof(struct ip);
5348 				uint16_t ulpoff =
5349 				    m->m_pkthdr.csum_data & 0xffff;
5350 				m->m_pkthdr.csum_flags |=
5351 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5352 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5353 				m->m_pkthdr.csum_tx_start = start;
5354 			} else {
5355 				sw_csum |= (CSUM_DELAY_DATA &
5356 				    m->m_pkthdr.csum_flags);
5357 			}
5358 		}
5359 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5360 	} else {
5361 		if ((hwcap & CSUM_PARTIAL) &&
5362 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5363 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5364 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5365 				uint16_t start =
5366 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5367 				uint16_t ulpoff =
5368 				    m->m_pkthdr.csum_data & 0xffff;
5369 				m->m_pkthdr.csum_flags |=
5370 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5371 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5372 				m->m_pkthdr.csum_tx_start = start;
5373 			} else {
5374 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5375 				    m->m_pkthdr.csum_flags);
5376 			}
5377 		}
5378 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5379 	}
5380 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5381 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5382 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5383 	    did_sw, m->m_pkthdr.csum_flags);
5384 }
5385 
5386 /*
5387  * bridge_start:
5388  *
5389  *	Start output on a bridge.
5390  *
5391  * This routine is invoked by the start worker thread; because we never call
5392  * it directly, there is no need do deploy any serialization mechanism other
5393  * than what's already used by the worker thread, i.e. this is already single
5394  * threaded.
5395  *
5396  * This routine is called only when if_bridge_txstart is enabled.
5397  */
5398 static void
5399 bridge_start(struct ifnet *ifp)
5400 {
5401 	struct mbuf *m;
5402 
5403 	for (;;) {
5404 		if (ifnet_dequeue(ifp, &m) != 0) {
5405 			break;
5406 		}
5407 
5408 		(void) bridge_output(ifp, m);
5409 	}
5410 }
5411 
5412 /*
5413  * bridge_forward:
5414  *
5415  *	The forwarding function of the bridge.
5416  *
5417  *	NOTE: Releases the lock on return.
5418  */
5419 static void
5420 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5421     struct mbuf *m)
5422 {
5423 	struct bridge_iflist *dbif;
5424 	ifnet_t bridge_ifp;
5425 	struct ifnet *src_if, *dst_if;
5426 	struct ether_header *eh;
5427 	uint16_t vlan;
5428 	uint8_t *dst;
5429 	int error;
5430 	struct mac_nat_record mnr;
5431 	bool translate_mac = FALSE;
5432 	uint32_t sc_filter_flags = 0;
5433 
5434 	BRIDGE_LOCK_ASSERT_HELD(sc);
5435 
5436 	bridge_ifp = sc->sc_ifp;
5437 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5438 	    "%s m 0x%llx", bridge_ifp->if_xname,
5439 	    (uint64_t)VM_KERNEL_ADDRPERM(m));
5440 
5441 	src_if = m->m_pkthdr.rcvif;
5442 	if (src_if != sbif->bif_ifp) {
5443 		const char *    src_if_name;
5444 
5445 		src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5446 		BRIDGE_LOG(LOG_NOTICE, 0,
5447 		    "src_if %s != bif_ifp %s",
5448 		    src_if_name, sbif->bif_ifp->if_xname);
5449 		goto drop;
5450 	}
5451 
5452 	(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5453 	vlan = VLANTAGOF(m);
5454 
5455 
5456 	if ((sbif->bif_ifflags & IFBIF_STP) &&
5457 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5458 		goto drop;
5459 	}
5460 
5461 	eh = mtod(m, struct ether_header *);
5462 	dst = eh->ether_dhost;
5463 
5464 	/* If the interface is learning, record the address. */
5465 	if (sbif->bif_ifflags & IFBIF_LEARNING) {
5466 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5467 		    sbif, 0, IFBAF_DYNAMIC);
5468 		/*
5469 		 * If the interface has addresses limits then deny any source
5470 		 * that is not in the cache.
5471 		 */
5472 		if (error && sbif->bif_addrmax) {
5473 			goto drop;
5474 		}
5475 	}
5476 
5477 	if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5478 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5479 		goto drop;
5480 	}
5481 
5482 	/*
5483 	 * At this point, the port either doesn't participate
5484 	 * in spanning tree or it is in the forwarding state.
5485 	 */
5486 
5487 	/*
5488 	 * If the packet is unicast, destined for someone on
5489 	 * "this" side of the bridge, drop it.
5490 	 */
5491 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5492 		/* unicast */
5493 		dst_if = bridge_rtlookup(sc, dst, vlan);
5494 		if (src_if == dst_if) {
5495 			goto drop;
5496 		}
5497 	} else {
5498 		/* broadcast/multicast */
5499 
5500 		/*
5501 		 * Check if its a reserved multicast address, any address
5502 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
5503 		 * bridge.
5504 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5505 		 */
5506 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
5507 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
5508 		    dst[4] == 0x00 && dst[5] <= 0x0f) {
5509 			goto drop;
5510 		}
5511 
5512 
5513 		/* ...forward it to all interfaces. */
5514 		atomic_add_64(&bridge_ifp->if_imcasts, 1);
5515 		dst_if = NULL;
5516 	}
5517 
5518 	/*
5519 	 * If we have a destination interface which is a member of our bridge,
5520 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
5521 	 * For broadcast or multicast packets, don't bother because it will
5522 	 * be reinjected into ether_input. We do this before we pass the packets
5523 	 * through the pfil(9) framework, as it is possible that pfil(9) will
5524 	 * drop the packet, or possibly modify it, making it difficult to debug
5525 	 * firewall issues on the bridge.
5526 	 */
5527 #if NBPFILTER > 0
5528 	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5529 	    dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5530 		m->m_pkthdr.rcvif = bridge_ifp;
5531 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5532 	}
5533 #endif /* NBPFILTER */
5534 
5535 	if (dst_if == NULL) {
5536 		/* bridge_broadcast will unlock */
5537 		bridge_broadcast(sc, sbif, m, 1);
5538 		return;
5539 	}
5540 
5541 	/*
5542 	 * Unicast.
5543 	 */
5544 	/*
5545 	 * At this point, we're dealing with a unicast frame
5546 	 * going to a different interface.
5547 	 */
5548 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5549 		goto drop;
5550 	}
5551 
5552 	dbif = bridge_lookup_member_if(sc, dst_if);
5553 	if (dbif == NULL) {
5554 		/* Not a member of the bridge (anymore?) */
5555 		goto drop;
5556 	}
5557 
5558 	/* Private segments can not talk to each other */
5559 	if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5560 		goto drop;
5561 	}
5562 
5563 	if ((dbif->bif_ifflags & IFBIF_STP) &&
5564 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5565 		goto drop;
5566 	}
5567 
5568 #if HAS_DHCPRA_MASK
5569 	/* APPLE MODIFICATION <rdar:6985737> */
5570 	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5571 		m = ip_xdhcpra_output(dst_if, m);
5572 		if (!m) {
5573 			++bridge_ifp.if_xdhcpra;
5574 			BRIDGE_UNLOCK(sc);
5575 			return;
5576 		}
5577 	}
5578 #endif /* HAS_DHCPRA_MASK */
5579 
5580 	if (dbif == sc->sc_mac_nat_bif) {
5581 		/* determine how to translate the packet */
5582 		translate_mac
5583 		        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5584 		if (m == NULL) {
5585 			/* packet was deallocated */
5586 			BRIDGE_UNLOCK(sc);
5587 			return;
5588 		}
5589 	} else if (bif_has_checksum_offload(dbif) &&
5590 	    !bif_has_checksum_offload(sbif)) {
5591 		/*
5592 		 * If the destination interface has checksum offload enabled,
5593 		 * verify the checksum now, unless the source interface also has
5594 		 * checksum offload enabled. The checksum in that case has
5595 		 * already just been computed and verifying it is unnecessary.
5596 		 */
5597 		error = bridge_verify_checksum(&m, &dbif->bif_stats);
5598 		if (error != 0) {
5599 			BRIDGE_UNLOCK(sc);
5600 			if (m != NULL) {
5601 				m_freem(m);
5602 			}
5603 			return;
5604 		}
5605 	}
5606 
5607 	sc_filter_flags = sc->sc_filter_flags;
5608 
5609 	BRIDGE_UNLOCK(sc);
5610 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5611 		if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5612 			return;
5613 		}
5614 		if (m == NULL) {
5615 			return;
5616 		}
5617 	}
5618 
5619 	/* if we need to, translate the MAC address */
5620 	if (translate_mac) {
5621 		bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5622 	}
5623 	/*
5624 	 * We're forwarding an inbound packet in which the checksum must
5625 	 * already have been computed and if required, verified.
5626 	 */
5627 	if (m != NULL) {
5628 		(void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5629 		    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5630 	}
5631 	return;
5632 
5633 drop:
5634 	BRIDGE_UNLOCK(sc);
5635 	m_freem(m);
5636 }
5637 
5638 static void
5639 inject_input_packet(ifnet_t ifp, mbuf_t m)
5640 {
5641 	mbuf_pkthdr_setrcvif(m, ifp);
5642 	mbuf_pkthdr_setheader(m, mbuf_data(m));
5643 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5644 	    mbuf_len(m) - ETHER_HDR_LEN);
5645 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5646 	m->m_flags |= M_PROTO1; /* set to avoid loops */
5647 	dlil_input_packet_list(ifp, m);
5648 	return;
5649 }
5650 
5651 static bool
5652 in_addr_is_ours(struct in_addr ip)
5653 {
5654 	struct in_ifaddr *ia;
5655 	bool             ours = false;
5656 
5657 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5658 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5659 		if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5660 			ours = true;
5661 			break;
5662 		}
5663 	}
5664 	lck_rw_done(&in_ifaddr_rwlock);
5665 	return ours;
5666 }
5667 
5668 static bool
5669 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5670 {
5671 	struct in6_ifaddr       *ia6;
5672 	bool                    ours = false;
5673 
5674 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5675 		struct in6_addr         dst_ip;
5676 
5677 		/* need to embed scope ID for comparison */
5678 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5679 		dst_ip.s6_addr16[1] = htons(ifscope);
5680 		ip6_p = &dst_ip;
5681 	}
5682 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5683 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5684 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5685 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5686 			ours = true;
5687 			break;
5688 		}
5689 	}
5690 	lck_rw_done(&in6_ifaddr_rwlock);
5691 	return ours;
5692 }
5693 
5694 static void
5695 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5696     bpf_packet_func bpf_input_func)
5697 {
5698 	size_t                  byte_count;
5699 	struct ether_header     *eh;
5700 	errno_t                 error;
5701 	bool                    is_ipv4;
5702 	int                     len;
5703 	u_int                   mac_hlen;
5704 	int                     pkt_count;
5705 
5706 	/* segment large packets before sending them up */
5707 	if (if_bridge_segmentation == 0) {
5708 		goto done;
5709 	}
5710 	len = m->m_pkthdr.len;
5711 	if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5712 		goto done;
5713 	}
5714 	eh = mtod(m, struct ether_header *);
5715 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5716 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5717 		    "large non IPv4/IPv6 packet");
5718 		goto done;
5719 	}
5720 
5721 	/*
5722 	 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5723 	 *
5724 	 * If gso_tcp() returns success (0), the packet(s) are
5725 	 * ready to be passed up. If the destination is a local IP address,
5726 	 * the packet will be passed up as a large, single packet.
5727 	 *
5728 	 * If gso_tcp() returns an error, the packet has already
5729 	 * been freed.
5730 	 */
5731 	mac_hlen = sizeof(*eh);
5732 	error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5733 	if (error != 0) {
5734 		return;
5735 	}
5736 
5737 done:
5738 	pkt_count = 0;
5739 	byte_count = 0;
5740 	for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5741 		/* Mark the packet as arriving on the bridge interface */
5742 		mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5743 		mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5744 		if (bpf_input_func != NULL) {
5745 			(*bpf_input_func)(bridge_ifp, scan);
5746 		}
5747 		mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5748 		    mbuf_len(scan) - ETHER_HDR_LEN);
5749 		mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5750 		byte_count += mbuf_pkthdr_len(scan);
5751 		pkt_count++;
5752 	}
5753 	(void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5754 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5755 	    "%s %d packet(s) %ld bytes",
5756 	    bridge_ifp->if_xname, pkt_count, byte_count);
5757 	dlil_input_packet_list(bridge_ifp, m);
5758 	return;
5759 }
5760 
5761 static bool
5762 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5763 {
5764 	bool    ours;
5765 
5766 	if (info_p->ip_is_ipv4) {
5767 		struct in_addr  dst_ip;
5768 
5769 		bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5770 		ours = in_addr_is_ours(dst_ip);
5771 	} else {
5772 		ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5773 	}
5774 	return ours;
5775 }
5776 
5777 static inline errno_t
5778 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5779     const u_char * ether_dhost, mbuf_t *mp,
5780     bool is_broadcast, bool is_ip, bool is_ipv4,
5781     ip_packet_info * info_p, struct bripstats * stats_p,
5782     bool *info_initialized)
5783 {
5784 	errno_t         error = 0;
5785 	bool            is_local = false;
5786 	struct pf_mtag *pf_mtag;
5787 	u_int16_t       tag = vmnet_tag;
5788 
5789 	*info_initialized = false;
5790 	if (is_broadcast) {
5791 		if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5792 			tag = vmnet_broadcast_tag;
5793 		} else {
5794 			tag = vmnet_multicast_tag;
5795 		}
5796 	} else if (is_ip) {
5797 		unsigned int    mac_hlen = sizeof(struct ether_header);
5798 
5799 		bzero(stats_p, sizeof(*stats_p));
5800 		*info_initialized = true;
5801 		error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5802 		    stats_p);
5803 		if (error != 0) {
5804 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5805 			    "%s(%s) bridge_get_ip_proto failed %d",
5806 			    bridge_ifp->if_xname,
5807 			    ifp->if_xname, error);
5808 			if (*mp == NULL) {
5809 				return EJUSTRETURN;
5810 			}
5811 		} else {
5812 			is_local = is_our_ip(info_p, bridge_ifp->if_index);
5813 			if (is_local) {
5814 				tag = vmnet_local_tag;
5815 			}
5816 		}
5817 	}
5818 	pf_mtag = pf_get_mtag(*mp);
5819 	if (pf_mtag != NULL) {
5820 		pf_mtag->pftag_tag = tag;
5821 	}
5822 #if DEBUG || DEVELOPMENT
5823 	{
5824 		bool forced;
5825 
5826 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5827 		if (forced) {
5828 			m_freem(*mp);
5829 			*mp = NULL;
5830 			error = EJUSTRETURN;
5831 			goto done;
5832 		}
5833 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5834 		if (forced) {
5835 			error = _EBADIP;
5836 			goto done;
5837 		}
5838 	}
5839 done:
5840 #endif /* DEBUG || DEVELOPMENT */
5841 	return error;
5842 }
5843 
5844 static void
5845 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5846 {
5847 	dst_p->bips_ip += src_p->bips_ip;
5848 	dst_p->bips_ip6 += src_p->bips_ip6;
5849 	dst_p->bips_udp += src_p->bips_udp;
5850 	dst_p->bips_tcp += src_p->bips_tcp;
5851 
5852 	dst_p->bips_bad_ip += src_p->bips_bad_ip;
5853 	dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5854 	dst_p->bips_bad_udp += src_p->bips_bad_udp;
5855 	dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5856 }
5857 
5858 static void
5859 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5860 {
5861 	struct bridge_iflist *bif;
5862 	struct bridge_softc *sc = ifp->if_bridge;
5863 
5864 	BRIDGE_LOCK(sc);
5865 	bif = bridge_lookup_member_if(sc, ifp);
5866 	if (bif == NULL) {
5867 		goto done;
5868 	}
5869 	if (!bif_has_checksum_offload(bif)) {
5870 		goto done;
5871 	}
5872 	bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5873 
5874 done:
5875 	BRIDGE_UNLOCK(sc);
5876 	return;
5877 }
5878 
5879 /*
5880  * bridge_input:
5881  *
5882  *	Filter input from a member interface.  Queue the packet for
5883  *	bridging if it is not for us.
5884  */
5885 errno_t
5886 bridge_input(struct ifnet *ifp, mbuf_t *data)
5887 {
5888 	struct bridge_softc *sc = ifp->if_bridge;
5889 	struct bridge_iflist *bif, *bif2;
5890 	struct ether_header eh_in;
5891 	bool is_ip = false;
5892 	bool is_ipv4 = false;
5893 	ifnet_t bridge_ifp;
5894 	struct mbuf *mc, *mc2;
5895 	unsigned int mac_hlen = sizeof(struct ether_header);
5896 	uint16_t vlan;
5897 	errno_t error;
5898 	ip_packet_info info;
5899 	struct bripstats stats;
5900 	bool info_initialized = false;
5901 	errno_t ip_packet_error = 0;
5902 	bool is_broadcast;
5903 	bool is_ip_broadcast = false;
5904 	bool is_ifp_mac = false;
5905 	mbuf_t m = *data;
5906 	uint32_t sc_filter_flags = 0;
5907 
5908 	bridge_ifp = sc->sc_ifp;
5909 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5910 	    "%s from %s m 0x%llx data 0x%llx",
5911 	    bridge_ifp->if_xname, ifp->if_xname,
5912 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
5913 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5914 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5915 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5916 		    "%s not running passing along",
5917 		    bridge_ifp->if_xname);
5918 		return 0;
5919 	}
5920 
5921 	vlan = VLANTAGOF(m);
5922 
5923 #ifdef IFF_MONITOR
5924 	/*
5925 	 * Implement support for bridge monitoring. If this flag has been
5926 	 * set on this interface, discard the packet once we push it through
5927 	 * the bpf(4) machinery, but before we do, increment the byte and
5928 	 * packet counters associated with this interface.
5929 	 */
5930 	if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5931 		m->m_pkthdr.rcvif = bridge_ifp;
5932 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5933 		(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5934 		*data = NULL;
5935 		m_freem(m);
5936 		return EJUSTRETURN;
5937 	}
5938 #endif /* IFF_MONITOR */
5939 
5940 	is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5941 
5942 	/*
5943 	 * Need to clear the promiscuous flag otherwise it will be
5944 	 * dropped by DLIL after processing filters
5945 	 */
5946 	if ((mbuf_flags(m) & MBUF_PROMISC)) {
5947 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5948 	}
5949 
5950 	/* copy the ethernet header */
5951 	eh_in = *(mtod(m, struct ether_header *));
5952 
5953 	is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
5954 
5955 	if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
5956 		/* tag packets coming from VMNET interfaces */
5957 		ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
5958 		    eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
5959 		    &info, &stats, &info_initialized);
5960 		m = *data;
5961 		if (m == NULL) {
5962 			bridge_bripstats_apply(ifp, &stats);
5963 			return EJUSTRETURN;
5964 		}
5965 	}
5966 
5967 	sc_filter_flags = sc->sc_filter_flags;
5968 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5969 		error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
5970 		m = *data;
5971 		if (error != 0 || m == NULL) {
5972 			return EJUSTRETURN;
5973 		}
5974 	}
5975 
5976 	BRIDGE_LOCK(sc);
5977 	bif = bridge_lookup_member_if(sc, ifp);
5978 	if (bif == NULL) {
5979 		BRIDGE_UNLOCK(sc);
5980 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5981 		    "%s bridge_lookup_member_if failed",
5982 		    bridge_ifp->if_xname);
5983 		return 0;
5984 	}
5985 	if (is_ip && bif_has_checksum_offload(bif)) {
5986 		if (info_initialized) {
5987 			bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
5988 		} else {
5989 			error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
5990 			    &info, &bif->bif_stats.brms_in_ip);
5991 			if (error != 0) {
5992 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5993 				    "%s(%s) bridge_get_ip_proto failed %d",
5994 				    bridge_ifp->if_xname,
5995 				    bif->bif_ifp->if_xname, error);
5996 				ip_packet_error = error;
5997 			}
5998 		}
5999 		if (ip_packet_error == 0) {
6000 			/* need to compute IP/UDP/TCP/checksums */
6001 			error = bridge_offload_checksum(data, &info,
6002 			    &bif->bif_stats);
6003 			if (error != 0) {
6004 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
6005 				    "%s(%s) bridge_offload_checksum failed %d",
6006 				    bridge_ifp->if_xname,
6007 				    bif->bif_ifp->if_xname, error);
6008 				ip_packet_error = error;
6009 			}
6010 		}
6011 		if (ip_packet_error != 0) {
6012 			BRIDGE_UNLOCK(sc);
6013 			if (*data != NULL) {
6014 				m_freem(*data);
6015 				*data = NULL;
6016 			}
6017 			return EJUSTRETURN;
6018 		}
6019 		m = *data;
6020 	}
6021 
6022 	if (bif->bif_flags & BIFF_HOST_FILTER) {
6023 		error = bridge_host_filter(bif, data);
6024 		if (error != 0) {
6025 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
6026 			    "%s bridge_host_filter failed",
6027 			    bif->bif_ifp->if_xname);
6028 			BRIDGE_UNLOCK(sc);
6029 			return EJUSTRETURN;
6030 		}
6031 		m = *data;
6032 	}
6033 
6034 	if (!is_broadcast &&
6035 	    _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
6036 		/* the packet is unicast to the interface's MAC address */
6037 		if (is_ip && sc->sc_mac_nat_bif == bif) {
6038 			/* doing MAC-NAT, check if destination is IP broadcast */
6039 			is_ip_broadcast = is_broadcast_ip_packet(data);
6040 			if (*data == NULL) {
6041 				BRIDGE_UNLOCK(sc);
6042 				return EJUSTRETURN;
6043 			}
6044 			m = *data;
6045 		}
6046 		if (!is_ip_broadcast) {
6047 			is_ifp_mac = TRUE;
6048 		}
6049 	}
6050 
6051 	bridge_span(sc, m);
6052 
6053 	if (is_broadcast || is_ip_broadcast) {
6054 		if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
6055 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6056 			    " multicast: "
6057 			    "%02x:%02x:%02x:%02x:%02x:%02x",
6058 			    eh_in.ether_dhost[0], eh_in.ether_dhost[1],
6059 			    eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6060 			    eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6061 		}
6062 		/* Tap off 802.1D packets; they do not get forwarded. */
6063 		if (is_broadcast &&
6064 		    _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6065 #if BRIDGESTP
6066 			m = bstp_input(&bif->bif_stp, ifp, m);
6067 #else /* !BRIDGESTP */
6068 			m_freem(m);
6069 			m = NULL;
6070 #endif /* !BRIDGESTP */
6071 			if (m == NULL) {
6072 				BRIDGE_UNLOCK(sc);
6073 				return EJUSTRETURN;
6074 			}
6075 		}
6076 
6077 		if ((bif->bif_ifflags & IFBIF_STP) &&
6078 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6079 			BRIDGE_UNLOCK(sc);
6080 			return 0;
6081 		}
6082 
6083 		/*
6084 		 * Make a deep copy of the packet and enqueue the copy
6085 		 * for bridge processing.
6086 		 */
6087 		mc = m_dup(m, M_DONTWAIT);
6088 		if (mc == NULL) {
6089 			BRIDGE_UNLOCK(sc);
6090 			return 0;
6091 		}
6092 
6093 		/*
6094 		 * Perform the bridge forwarding function with the copy.
6095 		 *
6096 		 * Note that bridge_forward calls BRIDGE_UNLOCK
6097 		 */
6098 		if (is_ip_broadcast) {
6099 			struct ether_header *eh;
6100 
6101 			/* make the copy look like it is actually broadcast */
6102 			mc->m_flags |= M_BCAST;
6103 			eh = mtod(mc, struct ether_header *);
6104 			bcopy(etherbroadcastaddr, eh->ether_dhost,
6105 			    ETHER_ADDR_LEN);
6106 		}
6107 		bridge_forward(sc, bif, mc);
6108 
6109 		/*
6110 		 * Reinject the mbuf as arriving on the bridge so we have a
6111 		 * chance at claiming multicast packets. We can not loop back
6112 		 * here from ether_input as a bridge is never a member of a
6113 		 * bridge.
6114 		 */
6115 		VERIFY(bridge_ifp->if_bridge == NULL);
6116 		mc2 = m_dup(m, M_DONTWAIT);
6117 		if (mc2 != NULL) {
6118 			/* Keep the layer3 header aligned */
6119 			int i = min(mc2->m_pkthdr.len, max_protohdr);
6120 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6121 		}
6122 		if (mc2 != NULL) {
6123 			/* mark packet as arriving on the bridge */
6124 			mc2->m_pkthdr.rcvif = bridge_ifp;
6125 			mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6126 			BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6127 			(void) mbuf_setdata(mc2,
6128 			    (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6129 			    mbuf_len(mc2) - ETHER_HDR_LEN);
6130 			(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6131 			(void) ifnet_stat_increment_in(bridge_ifp, 1,
6132 			    mbuf_pkthdr_len(mc2), 0);
6133 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6134 			    "%s mcast for us", bridge_ifp->if_xname);
6135 			dlil_input_packet_list(bridge_ifp, mc2);
6136 		}
6137 
6138 		/* Return the original packet for local processing. */
6139 		return 0;
6140 	}
6141 
6142 	if ((bif->bif_ifflags & IFBIF_STP) &&
6143 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6144 		BRIDGE_UNLOCK(sc);
6145 		return 0;
6146 	}
6147 
6148 #ifdef DEV_CARP
6149 #define CARP_CHECK_WE_ARE_DST(iface) \
6150 	((iface)->if_carp &&\
6151 	        carp_forus((iface)->if_carp, eh_in.ether_dhost))
6152 #define CARP_CHECK_WE_ARE_SRC(iface) \
6153 	((iface)->if_carp &&\
6154 	        carp_forus((iface)->if_carp, eh_in.ether_shost))
6155 #else
6156 #define CARP_CHECK_WE_ARE_DST(iface) 0
6157 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6158 #endif
6159 
6160 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6161 
6162 #define PFIL_PHYS(sc, ifp, m)
6163 
6164 #define GRAB_OUR_PACKETS(iface)                                         \
6165 	if ((iface)->if_type == IFT_GIF)                                \
6166 	        continue;                                               \
6167 	/* It is destined for us. */                                    \
6168 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 ||   \
6169 	    CARP_CHECK_WE_ARE_DST((iface))) {                           \
6170 	        if ((iface)->if_type == IFT_BRIDGE) {                   \
6171 	                BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
6172 	/* Filter on the physical interface. */         \
6173 	                PFIL_PHYS(sc, iface, m);                        \
6174 	        } else {                                                \
6175 	                bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0);      \
6176 	        }                                                       \
6177 	        if (bif->bif_ifflags & IFBIF_LEARNING) {                \
6178 	                error = bridge_rtupdate(sc, eh_in.ether_shost,  \
6179 	                    vlan, bif, 0, IFBAF_DYNAMIC);               \
6180 	                if (error && bif->bif_addrmax) {                \
6181 	                        BRIDGE_UNLOCK(sc);                      \
6182 	                        m_freem(m);                             \
6183 	                        return (EJUSTRETURN);                   \
6184 	                }                                               \
6185 	        }                                                       \
6186 	        BRIDGE_UNLOCK(sc);                                      \
6187 	        inject_input_packet(iface, m);                          \
6188 	        return (EJUSTRETURN);                                   \
6189 	}                                                               \
6190                                                                         \
6191 	/* We just received a packet that we sent out. */               \
6192 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 ||   \
6193 	    CARP_CHECK_WE_ARE_SRC((iface))) {                           \
6194 	        BRIDGE_UNLOCK(sc);                                      \
6195 	        m_freem(m);                                             \
6196 	        return (EJUSTRETURN);                                   \
6197 	}
6198 
6199 	/*
6200 	 * Unicast.
6201 	 */
6202 
6203 	/* handle MAC-NAT if enabled */
6204 	if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6205 		ifnet_t dst_if;
6206 		boolean_t is_input = FALSE;
6207 
6208 		dst_if = bridge_mac_nat_input(sc, data, &is_input);
6209 		m = *data;
6210 		if (dst_if == ifp) {
6211 			/* our input packet */
6212 		} else if (dst_if != NULL || m == NULL) {
6213 			BRIDGE_UNLOCK(sc);
6214 			if (dst_if != NULL) {
6215 				ASSERT(m != NULL);
6216 				if (is_input) {
6217 					inject_input_packet(dst_if, m);
6218 				} else {
6219 					(void)bridge_enqueue(bridge_ifp, NULL,
6220 					    dst_if, m,
6221 					    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6222 				}
6223 			}
6224 			return EJUSTRETURN;
6225 		}
6226 	}
6227 
6228 	/*
6229 	 * If the packet is for the bridge, pass it up for local processing.
6230 	 */
6231 	if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6232 	    CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6233 		bpf_packet_func     bpf_input_func = sc->sc_bpf_input;
6234 
6235 		/*
6236 		 * If the interface is learning, and the source
6237 		 * address is valid and not multicast, record
6238 		 * the address.
6239 		 */
6240 		if (bif->bif_ifflags & IFBIF_LEARNING) {
6241 			(void) bridge_rtupdate(sc, eh_in.ether_shost,
6242 			    vlan, bif, 0, IFBAF_DYNAMIC);
6243 		}
6244 		BRIDGE_UNLOCK(sc);
6245 
6246 		bridge_interface_input(bridge_ifp, m, bpf_input_func);
6247 		return EJUSTRETURN;
6248 	}
6249 
6250 	/*
6251 	 * if the destination of the packet is for the MAC address of
6252 	 * the member interface itself, then we don't need to forward
6253 	 * it -- just pass it back.  Note that it'll likely just be
6254 	 * dropped by the stack, but if something else is bound to
6255 	 * the interface directly (for example, the wireless stats
6256 	 * protocol -- although that actually uses BPF right now),
6257 	 * then it will consume the packet
6258 	 *
6259 	 * ALSO, note that we do this check AFTER checking for the
6260 	 * bridge's own MAC address, because the bridge may be
6261 	 * using the SAME MAC address as one of its interfaces
6262 	 */
6263 	if (is_ifp_mac) {
6264 
6265 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6266 		BRIDGE_LOG(LOG_NOTICE, 0,
6267 		    "not forwarding packet bound for member interface");
6268 #endif
6269 
6270 		BRIDGE_UNLOCK(sc);
6271 		return 0;
6272 	}
6273 
6274 	/* Now check the remaining bridge members. */
6275 	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6276 		if (bif2->bif_ifp != ifp) {
6277 			GRAB_OUR_PACKETS(bif2->bif_ifp);
6278 		}
6279 	}
6280 
6281 #undef CARP_CHECK_WE_ARE_DST
6282 #undef CARP_CHECK_WE_ARE_SRC
6283 #undef GRAB_OUR_PACKETS
6284 
6285 	/*
6286 	 * Perform the bridge forwarding function.
6287 	 *
6288 	 * Note that bridge_forward calls BRIDGE_UNLOCK
6289 	 */
6290 	bridge_forward(sc, bif, m);
6291 
6292 	return EJUSTRETURN;
6293 }
6294 
6295 /*
6296  * bridge_broadcast:
6297  *
6298  *	Send a frame to all interfaces that are members of
6299  *	the bridge, except for the one on which the packet
6300  *	arrived.
6301  *
6302  *	NOTE: Releases the lock on return.
6303  */
6304 static void
6305 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6306     struct mbuf *m, int runfilt)
6307 {
6308 	ifnet_t bridge_ifp;
6309 	struct bridge_iflist *dbif;
6310 	struct ifnet * src_if;
6311 	struct mbuf *mc;
6312 	struct mbuf *mc_in;
6313 	struct ifnet *dst_if;
6314 	int error = 0, used = 0;
6315 	boolean_t bridge_if_out;
6316 	ChecksumOperation cksum_op;
6317 	struct mac_nat_record mnr;
6318 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6319 	boolean_t translate_mac = FALSE;
6320 	uint32_t sc_filter_flags = 0;
6321 
6322 	bridge_ifp = sc->sc_ifp;
6323 	if (sbif != NULL) {
6324 		bridge_if_out = FALSE;
6325 		src_if = sbif->bif_ifp;
6326 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6327 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6328 			/* get the translation record while holding the lock */
6329 			translate_mac
6330 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6331 			if (m == NULL) {
6332 				/* packet was deallocated */
6333 				BRIDGE_UNLOCK(sc);
6334 				return;
6335 			}
6336 		}
6337 	} else {
6338 		/*
6339 		 * sbif is NULL when the bridge interface calls
6340 		 * bridge_broadcast().
6341 		 */
6342 		bridge_if_out = TRUE;
6343 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6344 		sbif = NULL;
6345 		src_if = NULL;
6346 	}
6347 
6348 	BRIDGE_LOCK2REF(sc, error);
6349 	if (error) {
6350 		m_freem(m);
6351 		return;
6352 	}
6353 
6354 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6355 		dst_if = dbif->bif_ifp;
6356 		if (dst_if == src_if) {
6357 			/* skip the interface that the packet came in on */
6358 			continue;
6359 		}
6360 
6361 		/* Private segments can not talk to each other */
6362 		if (sbif != NULL &&
6363 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6364 			continue;
6365 		}
6366 
6367 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6368 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6369 			continue;
6370 		}
6371 
6372 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6373 		    (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6374 			continue;
6375 		}
6376 
6377 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6378 			continue;
6379 		}
6380 
6381 		if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6382 			continue;
6383 		}
6384 
6385 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6386 			mc = m;
6387 			used = 1;
6388 		} else {
6389 			mc = m_dup(m, M_DONTWAIT);
6390 			if (mc == NULL) {
6391 				(void) ifnet_stat_increment_out(bridge_ifp,
6392 				    0, 0, 1);
6393 				continue;
6394 			}
6395 		}
6396 
6397 		/*
6398 		 * If broadcast input is enabled, do so only if this
6399 		 * is an input packet.
6400 		 */
6401 		if (!bridge_if_out &&
6402 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6403 			mc_in = m_dup(mc, M_DONTWAIT);
6404 			/* this could fail, but we continue anyways */
6405 		} else {
6406 			mc_in = NULL;
6407 		}
6408 
6409 		/* out */
6410 		if (translate_mac && mac_nat_bif == dbif) {
6411 			/* translate the packet without holding the lock */
6412 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6413 		}
6414 
6415 		sc_filter_flags = sc->sc_filter_flags;
6416 		if (runfilt &&
6417 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6418 			if (used == 0) {
6419 				/* Keep the layer3 header aligned */
6420 				int i = min(mc->m_pkthdr.len, max_protohdr);
6421 				mc = m_copyup(mc, i, ETHER_ALIGN);
6422 				if (mc == NULL) {
6423 					(void) ifnet_stat_increment_out(
6424 						sc->sc_ifp, 0, 0, 1);
6425 					if (mc_in != NULL) {
6426 						m_freem(mc_in);
6427 						mc_in = NULL;
6428 					}
6429 					continue;
6430 				}
6431 			}
6432 			if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6433 				if (mc_in != NULL) {
6434 					m_freem(mc_in);
6435 					mc_in = NULL;
6436 				}
6437 				continue;
6438 			}
6439 			if (mc == NULL) {
6440 				if (mc_in != NULL) {
6441 					m_freem(mc_in);
6442 					mc_in = NULL;
6443 				}
6444 				continue;
6445 			}
6446 		}
6447 
6448 		if (mc != NULL) {
6449 			/* verify checksum if necessary */
6450 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6451 			    !bif_has_checksum_offload(sbif)) {
6452 				error = bridge_verify_checksum(&mc,
6453 				    &dbif->bif_stats);
6454 				if (error != 0) {
6455 					if (mc != NULL) {
6456 						m_freem(mc);
6457 					}
6458 					mc = NULL;
6459 				}
6460 			}
6461 			if (mc != NULL) {
6462 				(void) bridge_enqueue(bridge_ifp,
6463 				    NULL, dst_if, mc, cksum_op);
6464 			}
6465 		}
6466 
6467 		/* in */
6468 		if (mc_in == NULL) {
6469 			continue;
6470 		}
6471 		bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6472 		mbuf_pkthdr_setrcvif(mc_in, dst_if);
6473 		mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6474 		mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6475 		    mbuf_len(mc_in) - ETHER_HDR_LEN);
6476 		mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6477 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6478 		dlil_input_packet_list(dst_if, mc_in);
6479 	}
6480 	if (used == 0) {
6481 		m_freem(m);
6482 	}
6483 
6484 
6485 	BRIDGE_UNREF(sc);
6486 }
6487 
6488 /*
6489  * bridge_span:
6490  *
6491  *	Duplicate a packet out one or more interfaces that are in span mode,
6492  *	the original mbuf is unmodified.
6493  */
6494 static void
6495 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6496 {
6497 	struct bridge_iflist *bif;
6498 	struct ifnet *dst_if;
6499 	struct mbuf *mc;
6500 
6501 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6502 		return;
6503 	}
6504 
6505 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6506 		dst_if = bif->bif_ifp;
6507 
6508 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6509 			continue;
6510 		}
6511 
6512 		mc = m_copypacket(m, M_DONTWAIT);
6513 		if (mc == NULL) {
6514 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6515 			continue;
6516 		}
6517 
6518 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6519 		    CHECKSUM_OPERATION_NONE);
6520 	}
6521 }
6522 
6523 
6524 /*
6525  * bridge_rtupdate:
6526  *
6527  *	Add a bridge routing entry.
6528  */
6529 static int
6530 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6531     struct bridge_iflist *bif, int setflags, uint8_t flags)
6532 {
6533 	struct bridge_rtnode *brt;
6534 	int error;
6535 
6536 	BRIDGE_LOCK_ASSERT_HELD(sc);
6537 
6538 	/* Check the source address is valid and not multicast. */
6539 	if (ETHER_IS_MULTICAST(dst) ||
6540 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6541 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6542 		return EINVAL;
6543 	}
6544 
6545 
6546 	/* 802.1p frames map to vlan 1 */
6547 	if (vlan == 0) {
6548 		vlan = 1;
6549 	}
6550 
6551 	/*
6552 	 * A route for this destination might already exist.  If so,
6553 	 * update it, otherwise create a new one.
6554 	 */
6555 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6556 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6557 			sc->sc_brtexceeded++;
6558 			return ENOSPC;
6559 		}
6560 		/* Check per interface address limits (if enabled) */
6561 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6562 			bif->bif_addrexceeded++;
6563 			return ENOSPC;
6564 		}
6565 
6566 		/*
6567 		 * Allocate a new bridge forwarding node, and
6568 		 * initialize the expiration time and Ethernet
6569 		 * address.
6570 		 */
6571 		brt = zalloc_noblock(bridge_rtnode_pool);
6572 		if (brt == NULL) {
6573 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6574 			    "zalloc_nolock failed");
6575 			return ENOMEM;
6576 		}
6577 		bzero(brt, sizeof(struct bridge_rtnode));
6578 
6579 		if (bif->bif_ifflags & IFBIF_STICKY) {
6580 			brt->brt_flags = IFBAF_STICKY;
6581 		} else {
6582 			brt->brt_flags = IFBAF_DYNAMIC;
6583 		}
6584 
6585 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6586 		brt->brt_vlan = vlan;
6587 
6588 
6589 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6590 			zfree(bridge_rtnode_pool, brt);
6591 			return error;
6592 		}
6593 		brt->brt_dst = bif;
6594 		bif->bif_addrcnt++;
6595 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6596 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6597 		    "on %s count %u hashsize %u",
6598 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6599 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6600 		    sc->sc_rthash_size);
6601 	}
6602 
6603 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6604 	    brt->brt_dst != bif) {
6605 		brt->brt_dst->bif_addrcnt--;
6606 		brt->brt_dst = bif;
6607 		brt->brt_dst->bif_addrcnt++;
6608 	}
6609 
6610 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6611 		unsigned long now;
6612 
6613 		now = (unsigned long) net_uptime();
6614 		brt->brt_expire = now + sc->sc_brttimeout;
6615 	}
6616 	if (setflags) {
6617 		brt->brt_flags = flags;
6618 	}
6619 
6620 
6621 	return 0;
6622 }
6623 
6624 /*
6625  * bridge_rtlookup:
6626  *
6627  *	Lookup the destination interface for an address.
6628  */
6629 static struct ifnet *
6630 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6631 {
6632 	struct bridge_rtnode *brt;
6633 
6634 	BRIDGE_LOCK_ASSERT_HELD(sc);
6635 
6636 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6637 		return NULL;
6638 	}
6639 
6640 	return brt->brt_ifp;
6641 }
6642 
6643 /*
6644  * bridge_rttrim:
6645  *
6646  *	Trim the routine table so that we have a number
6647  *	of routing entries less than or equal to the
6648  *	maximum number.
6649  */
6650 static void
6651 bridge_rttrim(struct bridge_softc *sc)
6652 {
6653 	struct bridge_rtnode *brt, *nbrt;
6654 
6655 	BRIDGE_LOCK_ASSERT_HELD(sc);
6656 
6657 	/* Make sure we actually need to do this. */
6658 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6659 		return;
6660 	}
6661 
6662 	/* Force an aging cycle; this might trim enough addresses. */
6663 	bridge_rtage(sc);
6664 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6665 		return;
6666 	}
6667 
6668 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6669 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6670 			bridge_rtnode_destroy(sc, brt);
6671 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6672 				return;
6673 			}
6674 		}
6675 	}
6676 }
6677 
6678 /*
6679  * bridge_aging_timer:
6680  *
6681  *	Aging periodic timer for the bridge routing table.
6682  */
6683 static void
6684 bridge_aging_timer(struct bridge_softc *sc)
6685 {
6686 	BRIDGE_LOCK_ASSERT_HELD(sc);
6687 
6688 	bridge_rtage(sc);
6689 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6690 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6691 		sc->sc_aging_timer.bdc_sc = sc;
6692 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6693 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6694 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6695 	}
6696 }
6697 
6698 /*
6699  * bridge_rtage:
6700  *
6701  *	Perform an aging cycle.
6702  */
6703 static void
6704 bridge_rtage(struct bridge_softc *sc)
6705 {
6706 	struct bridge_rtnode *brt, *nbrt;
6707 	unsigned long now;
6708 
6709 	BRIDGE_LOCK_ASSERT_HELD(sc);
6710 
6711 	now = (unsigned long) net_uptime();
6712 
6713 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6714 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6715 			if (now >= brt->brt_expire) {
6716 				bridge_rtnode_destroy(sc, brt);
6717 			}
6718 		}
6719 	}
6720 	if (sc->sc_mac_nat_bif != NULL) {
6721 		bridge_mac_nat_age_entries(sc, now);
6722 	}
6723 }
6724 
6725 /*
6726  * bridge_rtflush:
6727  *
6728  *	Remove all dynamic addresses from the bridge.
6729  */
6730 static void
6731 bridge_rtflush(struct bridge_softc *sc, int full)
6732 {
6733 	struct bridge_rtnode *brt, *nbrt;
6734 
6735 	BRIDGE_LOCK_ASSERT_HELD(sc);
6736 
6737 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6738 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6739 			bridge_rtnode_destroy(sc, brt);
6740 		}
6741 	}
6742 }
6743 
6744 /*
6745  * bridge_rtdaddr:
6746  *
6747  *	Remove an address from the table.
6748  */
6749 static int
6750 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6751 {
6752 	struct bridge_rtnode *brt;
6753 	int found = 0;
6754 
6755 	BRIDGE_LOCK_ASSERT_HELD(sc);
6756 
6757 	/*
6758 	 * If vlan is zero then we want to delete for all vlans so the lookup
6759 	 * may return more than one.
6760 	 */
6761 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6762 		bridge_rtnode_destroy(sc, brt);
6763 		found = 1;
6764 	}
6765 
6766 	return found ? 0 : ENOENT;
6767 }
6768 
6769 /*
6770  * bridge_rtdelete:
6771  *
6772  *	Delete routes to a specific member interface.
6773  */
6774 static void
6775 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6776 {
6777 	struct bridge_rtnode *brt, *nbrt;
6778 
6779 	BRIDGE_LOCK_ASSERT_HELD(sc);
6780 
6781 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6782 		if (brt->brt_ifp == ifp && (full ||
6783 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6784 			bridge_rtnode_destroy(sc, brt);
6785 		}
6786 	}
6787 }
6788 
6789 /*
6790  * bridge_rtable_init:
6791  *
6792  *	Initialize the route table for this bridge.
6793  */
6794 static int
6795 bridge_rtable_init(struct bridge_softc *sc)
6796 {
6797 	u_int32_t i;
6798 
6799 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6800 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6801 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6802 
6803 	for (i = 0; i < sc->sc_rthash_size; i++) {
6804 		LIST_INIT(&sc->sc_rthash[i]);
6805 	}
6806 
6807 	sc->sc_rthash_key = RandomULong();
6808 
6809 	LIST_INIT(&sc->sc_rtlist);
6810 
6811 	return 0;
6812 }
6813 
6814 /*
6815  * bridge_rthash_delayed_resize:
6816  *
6817  *	Resize the routing table hash on a delayed thread call.
6818  */
6819 static void
6820 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6821 {
6822 	u_int32_t new_rthash_size = 0;
6823 	u_int32_t old_rthash_size = 0;
6824 	struct _bridge_rtnode_list *new_rthash = NULL;
6825 	struct _bridge_rtnode_list *old_rthash = NULL;
6826 	u_int32_t i;
6827 	struct bridge_rtnode *brt;
6828 	int error = 0;
6829 
6830 	BRIDGE_LOCK_ASSERT_HELD(sc);
6831 
6832 	/*
6833 	 * Four entries per hash bucket is our ideal load factor
6834 	 */
6835 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6836 		goto out;
6837 	}
6838 
6839 	/*
6840 	 * Doubling the number of hash buckets may be too simplistic
6841 	 * especially when facing a spike of new entries
6842 	 */
6843 	new_rthash_size = sc->sc_rthash_size * 2;
6844 
6845 	sc->sc_flags |= SCF_RESIZING;
6846 	BRIDGE_UNLOCK(sc);
6847 
6848 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6849 	    Z_WAITOK | Z_ZERO);
6850 
6851 	BRIDGE_LOCK(sc);
6852 	sc->sc_flags &= ~SCF_RESIZING;
6853 
6854 	if (new_rthash == NULL) {
6855 		error = ENOMEM;
6856 		goto out;
6857 	}
6858 	if ((sc->sc_flags & SCF_DETACHING)) {
6859 		error = ENODEV;
6860 		goto out;
6861 	}
6862 	/*
6863 	 * Fail safe from here on
6864 	 */
6865 	old_rthash = sc->sc_rthash;
6866 	old_rthash_size = sc->sc_rthash_size;
6867 	sc->sc_rthash = new_rthash;
6868 	sc->sc_rthash_size = new_rthash_size;
6869 
6870 	/*
6871 	 * Get a new key to force entries to be shuffled around to reduce
6872 	 * the likelihood they will land in the same buckets
6873 	 */
6874 	sc->sc_rthash_key = RandomULong();
6875 
6876 	for (i = 0; i < sc->sc_rthash_size; i++) {
6877 		LIST_INIT(&sc->sc_rthash[i]);
6878 	}
6879 
6880 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6881 		LIST_REMOVE(brt, brt_hash);
6882 		(void) bridge_rtnode_hash(sc, brt);
6883 	}
6884 out:
6885 	if (error == 0) {
6886 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6887 		    "%s new size %u",
6888 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6889 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6890 	} else {
6891 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6892 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6893 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6894 	}
6895 }
6896 
6897 /*
6898  * Resize the number of hash buckets based on the load factor
6899  * Currently only grow
6900  * Failing to resize the hash table is not fatal
6901  */
6902 static void
6903 bridge_rthash_resize(struct bridge_softc *sc)
6904 {
6905 	BRIDGE_LOCK_ASSERT_HELD(sc);
6906 
6907 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6908 		return;
6909 	}
6910 
6911 	/*
6912 	 * Four entries per hash bucket is our ideal load factor
6913 	 */
6914 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6915 		return;
6916 	}
6917 	/*
6918 	 * Hard limit on the size of the routing hash table
6919 	 */
6920 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6921 		return;
6922 	}
6923 
6924 	sc->sc_resize_call.bdc_sc = sc;
6925 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6926 	bridge_schedule_delayed_call(&sc->sc_resize_call);
6927 }
6928 
6929 /*
6930  * bridge_rtable_fini:
6931  *
6932  *	Deconstruct the route table for this bridge.
6933  */
6934 static void
6935 bridge_rtable_fini(struct bridge_softc *sc)
6936 {
6937 	KASSERT(sc->sc_brtcnt == 0,
6938 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6939 	kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
6940 	    sc->sc_rthash);
6941 	sc->sc_rthash = NULL;
6942 	sc->sc_rthash_size = 0;
6943 }
6944 
6945 /*
6946  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6947  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6948  */
6949 #define mix(a, b, c)                                                    \
6950 do {                                                                    \
6951 	a -= b; a -= c; a ^= (c >> 13);                                 \
6952 	b -= c; b -= a; b ^= (a << 8);                                  \
6953 	c -= a; c -= b; c ^= (b >> 13);                                 \
6954 	a -= b; a -= c; a ^= (c >> 12);                                 \
6955 	b -= c; b -= a; b ^= (a << 16);                                 \
6956 	c -= a; c -= b; c ^= (b >> 5);                                  \
6957 	a -= b; a -= c; a ^= (c >> 3);                                  \
6958 	b -= c; b -= a; b ^= (a << 10);                                 \
6959 	c -= a; c -= b; c ^= (b >> 15);                                 \
6960 } while ( /*CONSTCOND*/ 0)
6961 
6962 static __inline uint32_t
6963 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
6964 {
6965 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6966 
6967 	b += addr[5] << 8;
6968 	b += addr[4];
6969 	a += addr[3] << 24;
6970 	a += addr[2] << 16;
6971 	a += addr[1] << 8;
6972 	a += addr[0];
6973 
6974 	mix(a, b, c);
6975 
6976 	return c & BRIDGE_RTHASH_MASK(sc);
6977 }
6978 
6979 #undef mix
6980 
6981 static int
6982 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
6983 {
6984 	int i, d;
6985 
6986 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6987 		d = ((int)a[i]) - ((int)b[i]);
6988 	}
6989 
6990 	return d;
6991 }
6992 
6993 /*
6994  * bridge_rtnode_lookup:
6995  *
6996  *	Look up a bridge route node for the specified destination. Compare the
6997  *	vlan id or if zero then just return the first match.
6998  */
6999 static struct bridge_rtnode *
7000 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
7001     uint16_t vlan)
7002 {
7003 	struct bridge_rtnode *brt;
7004 	uint32_t hash;
7005 	int dir;
7006 
7007 	BRIDGE_LOCK_ASSERT_HELD(sc);
7008 
7009 	hash = bridge_rthash(sc, addr);
7010 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
7011 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
7012 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
7013 			return brt;
7014 		}
7015 		if (dir > 0) {
7016 			return NULL;
7017 		}
7018 	}
7019 
7020 	return NULL;
7021 }
7022 
7023 /*
7024  * bridge_rtnode_hash:
7025  *
7026  *	Insert the specified bridge node into the route hash table.
7027  *	This is used when adding a new node or to rehash when resizing
7028  *	the hash table
7029  */
7030 static int
7031 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
7032 {
7033 	struct bridge_rtnode *lbrt;
7034 	uint32_t hash;
7035 	int dir;
7036 
7037 	BRIDGE_LOCK_ASSERT_HELD(sc);
7038 
7039 	hash = bridge_rthash(sc, brt->brt_addr);
7040 
7041 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
7042 	if (lbrt == NULL) {
7043 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
7044 		goto out;
7045 	}
7046 
7047 	do {
7048 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
7049 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
7050 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7051 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
7052 			    sc->sc_ifp->if_xname,
7053 			    brt->brt_addr[0], brt->brt_addr[1],
7054 			    brt->brt_addr[2], brt->brt_addr[3],
7055 			    brt->brt_addr[4], brt->brt_addr[5]);
7056 			return EEXIST;
7057 		}
7058 		if (dir > 0) {
7059 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7060 			goto out;
7061 		}
7062 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7063 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7064 			goto out;
7065 		}
7066 		lbrt = LIST_NEXT(lbrt, brt_hash);
7067 	} while (lbrt != NULL);
7068 
7069 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7070 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7071 	    sc->sc_ifp->if_xname,
7072 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7073 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7074 out:
7075 	return 0;
7076 }
7077 
7078 /*
7079  * bridge_rtnode_insert:
7080  *
7081  *	Insert the specified bridge node into the route table.  We
7082  *	assume the entry is not already in the table.
7083  */
7084 static int
7085 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7086 {
7087 	int error;
7088 
7089 	error = bridge_rtnode_hash(sc, brt);
7090 	if (error != 0) {
7091 		return error;
7092 	}
7093 
7094 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7095 	sc->sc_brtcnt++;
7096 
7097 	bridge_rthash_resize(sc);
7098 
7099 	return 0;
7100 }
7101 
7102 /*
7103  * bridge_rtnode_destroy:
7104  *
7105  *	Destroy a bridge rtnode.
7106  */
7107 static void
7108 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7109 {
7110 	BRIDGE_LOCK_ASSERT_HELD(sc);
7111 
7112 	LIST_REMOVE(brt, brt_hash);
7113 
7114 	LIST_REMOVE(brt, brt_list);
7115 	sc->sc_brtcnt--;
7116 	brt->brt_dst->bif_addrcnt--;
7117 	zfree(bridge_rtnode_pool, brt);
7118 }
7119 
7120 #if BRIDGESTP
7121 /*
7122  * bridge_rtable_expire:
7123  *
7124  *	Set the expiry time for all routes on an interface.
7125  */
7126 static void
7127 bridge_rtable_expire(struct ifnet *ifp, int age)
7128 {
7129 	struct bridge_softc *sc = ifp->if_bridge;
7130 	struct bridge_rtnode *brt;
7131 
7132 	BRIDGE_LOCK(sc);
7133 
7134 	/*
7135 	 * If the age is zero then flush, otherwise set all the expiry times to
7136 	 * age for the interface
7137 	 */
7138 	if (age == 0) {
7139 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7140 	} else {
7141 		unsigned long now;
7142 
7143 		now = (unsigned long) net_uptime();
7144 
7145 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7146 			/* Cap the expiry time to 'age' */
7147 			if (brt->brt_ifp == ifp &&
7148 			    brt->brt_expire > now + age &&
7149 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7150 				brt->brt_expire = now + age;
7151 			}
7152 		}
7153 	}
7154 	BRIDGE_UNLOCK(sc);
7155 }
7156 
7157 /*
7158  * bridge_state_change:
7159  *
7160  *	Callback from the bridgestp code when a port changes states.
7161  */
7162 static void
7163 bridge_state_change(struct ifnet *ifp, int state)
7164 {
7165 	struct bridge_softc *sc = ifp->if_bridge;
7166 	static const char *stpstates[] = {
7167 		"disabled",
7168 		"listening",
7169 		"learning",
7170 		"forwarding",
7171 		"blocking",
7172 		"discarding"
7173 	};
7174 
7175 	if (log_stp) {
7176 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7177 		    sc->sc_ifp->if_xname,
7178 		    stpstates[state], ifp->if_xname);
7179 	}
7180 }
7181 #endif /* BRIDGESTP */
7182 
7183 /*
7184  * bridge_set_bpf_tap:
7185  *
7186  *	Sets ups the BPF callbacks.
7187  */
7188 static errno_t
7189 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7190 {
7191 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7192 
7193 	/* TBD locking */
7194 	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7195 		return ENODEV;
7196 	}
7197 	switch (mode) {
7198 	case BPF_TAP_DISABLE:
7199 		sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7200 		break;
7201 
7202 	case BPF_TAP_INPUT:
7203 		sc->sc_bpf_input = bpf_callback;
7204 		break;
7205 
7206 	case BPF_TAP_OUTPUT:
7207 		sc->sc_bpf_output = bpf_callback;
7208 		break;
7209 
7210 	case BPF_TAP_INPUT_OUTPUT:
7211 		sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7212 		break;
7213 
7214 	default:
7215 		break;
7216 	}
7217 
7218 	return 0;
7219 }
7220 
7221 /*
7222  * bridge_detach:
7223  *
7224  *	Callback when interface has been detached.
7225  */
7226 static void
7227 bridge_detach(ifnet_t ifp)
7228 {
7229 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7230 
7231 #if BRIDGESTP
7232 	bstp_detach(&sc->sc_stp);
7233 #endif /* BRIDGESTP */
7234 
7235 	/* Tear down the routing table. */
7236 	bridge_rtable_fini(sc);
7237 
7238 	lck_mtx_lock(&bridge_list_mtx);
7239 	LIST_REMOVE(sc, sc_list);
7240 	lck_mtx_unlock(&bridge_list_mtx);
7241 
7242 	ifnet_release(ifp);
7243 
7244 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7245 	if_clone_softc_deallocate(&bridge_cloner, sc);
7246 }
7247 
7248 /*
7249  * bridge_bpf_input:
7250  *
7251  *	Invoke the input BPF callback if enabled
7252  */
7253 static errno_t
7254 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7255 {
7256 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7257 	bpf_packet_func     input_func = sc->sc_bpf_input;
7258 
7259 	if (input_func != NULL) {
7260 		if (mbuf_pkthdr_rcvif(m) != ifp) {
7261 			BRIDGE_LOG(LOG_NOTICE, 0,
7262 			    "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7263 			    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7264 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7265 		}
7266 		(*input_func)(ifp, m);
7267 	}
7268 	return 0;
7269 }
7270 
7271 /*
7272  * bridge_bpf_output:
7273  *
7274  *	Invoke the output BPF callback if enabled
7275  */
7276 static errno_t
7277 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7278 {
7279 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7280 	bpf_packet_func     output_func = sc->sc_bpf_output;
7281 
7282 	if (output_func != NULL) {
7283 		(*output_func)(ifp, m);
7284 	}
7285 	return 0;
7286 }
7287 
7288 /*
7289  * bridge_link_event:
7290  *
7291  *	Report a data link event on an interface
7292  */
7293 static void
7294 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7295 {
7296 	struct event {
7297 		u_int32_t ifnet_family;
7298 		u_int32_t unit;
7299 		char if_name[IFNAMSIZ];
7300 	};
7301 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7302 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7303 	struct event *data = (struct event *)(header + 1);
7304 
7305 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7306 	    "%s event_code %u - %s", ifp->if_xname,
7307 	    event_code, dlil_kev_dl_code_str(event_code));
7308 	header->total_size   = sizeof(message);
7309 	header->vendor_code  = KEV_VENDOR_APPLE;
7310 	header->kev_class    = KEV_NETWORK_CLASS;
7311 	header->kev_subclass = KEV_DL_SUBCLASS;
7312 	header->event_code   = event_code;
7313 	data->ifnet_family   = ifnet_family(ifp);
7314 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7315 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7316 	ifnet_event(ifp, header);
7317 }
7318 
7319 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7320 	        bridge_hostfilter_stats.reason++;                       \
7321 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7322 	                   "%s.%d" #reason, func, line);                \
7323 	        error = EINVAL;                                         \
7324 	}
7325 
7326 /*
7327  * Make sure this is a DHCP or Bootp request that match the host filter
7328  */
7329 static int
7330 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7331 {
7332 	int error = EINVAL;
7333 	struct dhcp dhcp;
7334 
7335 	/*
7336 	 * Note: We use the dhcp structure because bootp structure definition
7337 	 * is larger and some vendors do not pad the request
7338 	 */
7339 	error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7340 	if (error != 0) {
7341 		BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7342 		goto done;
7343 	}
7344 	if (dhcp.dp_op != BOOTREQUEST) {
7345 		BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7346 		goto done;
7347 	}
7348 	/*
7349 	 * The hardware address must be an exact match
7350 	 */
7351 	if (dhcp.dp_htype != ARPHRD_ETHER) {
7352 		BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7353 		goto done;
7354 	}
7355 	if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7356 		BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7357 		goto done;
7358 	}
7359 	if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7360 	    ETHER_ADDR_LEN) != 0) {
7361 		BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7362 		goto done;
7363 	}
7364 	/*
7365 	 * Client address must match the host address or be not specified
7366 	 */
7367 	if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7368 	    dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7369 		BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7370 		goto done;
7371 	}
7372 	error = 0;
7373 done:
7374 	return error;
7375 }
7376 
7377 static int
7378 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7379 {
7380 	int error = EINVAL;
7381 	struct ether_header *eh;
7382 	static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7383 	mbuf_t m = *data;
7384 
7385 	eh = mtod(m, struct ether_header *);
7386 
7387 	/*
7388 	 * Restrict the source hardware address
7389 	 */
7390 	if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7391 	    bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7392 	    ETHER_ADDR_LEN) != 0) {
7393 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7394 		goto done;
7395 	}
7396 
7397 	/*
7398 	 * Restrict Ethernet protocols to ARP and IP
7399 	 */
7400 	if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7401 		struct ether_arp *ea;
7402 		size_t minlen = sizeof(struct ether_header) +
7403 		    sizeof(struct ether_arp);
7404 
7405 		/*
7406 		 * Make the Ethernet and ARP headers contiguous
7407 		 */
7408 		if (mbuf_pkthdr_len(m) < minlen) {
7409 			BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7410 			goto done;
7411 		}
7412 		if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7413 			BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7414 			    __func__, __LINE__);
7415 			goto done;
7416 		}
7417 		m = *data;
7418 
7419 		/*
7420 		 * Verify this is an ethernet/ip arp
7421 		 */
7422 		eh = mtod(m, struct ether_header *);
7423 		ea = (struct ether_arp *)(eh + 1);
7424 		if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7425 			BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7426 			    __func__, __LINE__);
7427 			goto done;
7428 		}
7429 		if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7430 			BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7431 			    __func__, __LINE__);
7432 			goto done;
7433 		}
7434 		/*
7435 		 * Verify the address lengths are correct
7436 		 */
7437 		if (ea->arp_hln != ETHER_ADDR_LEN) {
7438 			BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7439 			goto done;
7440 		}
7441 		if (ea->arp_pln != sizeof(struct in_addr)) {
7442 			BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7443 			    __func__, __LINE__);
7444 			goto done;
7445 		}
7446 
7447 		/*
7448 		 * Allow only ARP request or ARP reply
7449 		 */
7450 		if (ea->arp_op != htons(ARPOP_REQUEST) &&
7451 		    ea->arp_op != htons(ARPOP_REPLY)) {
7452 			BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7453 			goto done;
7454 		}
7455 		/*
7456 		 * Verify source hardware address matches
7457 		 */
7458 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7459 		    ETHER_ADDR_LEN) != 0) {
7460 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7461 			goto done;
7462 		}
7463 		/*
7464 		 * Verify source protocol address:
7465 		 * May be null for an ARP probe
7466 		 */
7467 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7468 		    sizeof(struct in_addr)) != 0 &&
7469 		    bcmp(ea->arp_spa, &inaddr_any,
7470 		    sizeof(struct in_addr)) != 0) {
7471 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7472 			goto done;
7473 		}
7474 		bridge_hostfilter_stats.brhf_arp_ok += 1;
7475 		error = 0;
7476 	} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7477 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7478 		struct ip iphdr;
7479 		size_t offset;
7480 
7481 		/*
7482 		 * Make the Ethernet and IP headers contiguous
7483 		 */
7484 		if (mbuf_pkthdr_len(m) < minlen) {
7485 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7486 			goto done;
7487 		}
7488 		offset = sizeof(struct ether_header);
7489 		error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7490 		if (error != 0) {
7491 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7492 			goto done;
7493 		}
7494 		/*
7495 		 * Verify the source IP address
7496 		 */
7497 		if (iphdr.ip_p == IPPROTO_UDP) {
7498 			struct udphdr udp;
7499 
7500 			minlen += sizeof(struct udphdr);
7501 			if (mbuf_pkthdr_len(m) < minlen) {
7502 				BRIDGE_HF_DROP(brhf_ip_too_small,
7503 				    __func__, __LINE__);
7504 				goto done;
7505 			}
7506 
7507 			/*
7508 			 * Allow all zero addresses for DHCP requests
7509 			 */
7510 			if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7511 			    iphdr.ip_src.s_addr != INADDR_ANY) {
7512 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7513 				    __func__, __LINE__);
7514 				goto done;
7515 			}
7516 			offset = sizeof(struct ether_header) +
7517 			    (IP_VHL_HL(iphdr.ip_vhl) << 2);
7518 			error = mbuf_copydata(m, offset,
7519 			    sizeof(struct udphdr), &udp);
7520 			if (error != 0) {
7521 				BRIDGE_HF_DROP(brhf_ip_too_small,
7522 				    __func__, __LINE__);
7523 				goto done;
7524 			}
7525 			/*
7526 			 * Either it's a Bootp/DHCP packet that we like or
7527 			 * it's a UDP packet from the host IP as source address
7528 			 */
7529 			if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7530 			    udp.uh_dport == htons(IPPORT_BOOTPS)) {
7531 				minlen += sizeof(struct dhcp);
7532 				if (mbuf_pkthdr_len(m) < minlen) {
7533 					BRIDGE_HF_DROP(brhf_ip_too_small,
7534 					    __func__, __LINE__);
7535 					goto done;
7536 				}
7537 				offset += sizeof(struct udphdr);
7538 				error = bridge_dhcp_filter(bif, m, offset);
7539 				if (error != 0) {
7540 					goto done;
7541 				}
7542 			} else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7543 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7544 				    __func__, __LINE__);
7545 				goto done;
7546 			}
7547 		} else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7548 		    bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7549 			BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7550 			goto done;
7551 		}
7552 		/*
7553 		 * Allow only boring IP protocols
7554 		 */
7555 		if (iphdr.ip_p != IPPROTO_TCP &&
7556 		    iphdr.ip_p != IPPROTO_UDP &&
7557 		    iphdr.ip_p != IPPROTO_ICMP &&
7558 		    iphdr.ip_p != IPPROTO_ESP &&
7559 		    iphdr.ip_p != IPPROTO_AH &&
7560 		    iphdr.ip_p != IPPROTO_GRE) {
7561 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7562 			goto done;
7563 		}
7564 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7565 		error = 0;
7566 	} else {
7567 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7568 		goto done;
7569 	}
7570 done:
7571 	if (error != 0) {
7572 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7573 			if (m) {
7574 				brlog_mbuf_data(m, 0,
7575 				    sizeof(struct ether_header) +
7576 				    sizeof(struct ip));
7577 			}
7578 		}
7579 
7580 		if (m != NULL) {
7581 			m_freem(m);
7582 		}
7583 	}
7584 	return error;
7585 }
7586 
7587 /*
7588  * MAC NAT
7589  */
7590 
7591 static errno_t
7592 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7593 {
7594 	errno_t         error = 0;
7595 
7596 	BRIDGE_LOCK_ASSERT_HELD(sc);
7597 
7598 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7599 		error = EINVAL;
7600 		goto done;
7601 	}
7602 	if (sc->sc_mac_nat_bif != NULL) {
7603 		if (sc->sc_mac_nat_bif != bif) {
7604 			error = EBUSY;
7605 		}
7606 		goto done;
7607 	}
7608 	sc->sc_mac_nat_bif = bif;
7609 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7610 	bridge_mac_nat_populate_entries(sc);
7611 
7612 done:
7613 	return error;
7614 }
7615 
7616 static void
7617 bridge_mac_nat_disable(struct bridge_softc *sc)
7618 {
7619 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7620 
7621 	assert(mac_nat_bif != NULL);
7622 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7623 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7624 	sc->sc_mac_nat_bif = NULL;
7625 	return;
7626 }
7627 
7628 static void
7629 mac_nat_entry_print2(struct mac_nat_entry *mne,
7630     char *ifname, const char *msg1, const char *msg2)
7631 {
7632 	int             af;
7633 	char            etopbuf[24];
7634 	char            ntopbuf[MAX_IPv6_STR_LEN];
7635 	const char      *space;
7636 
7637 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7638 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7639 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7640 	if (msg2 == NULL) {
7641 		msg2 = "";
7642 		space = "";
7643 	} else {
7644 		space = " ";
7645 	}
7646 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7647 	    "%s %s%s%s %p (%s, %s, %s)",
7648 	    ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7649 	    ntopbuf, etopbuf);
7650 }
7651 
7652 static void
7653 mac_nat_entry_print(struct mac_nat_entry *mne,
7654     char *ifname, const char *msg)
7655 {
7656 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7657 }
7658 
7659 static struct mac_nat_entry *
7660 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7661 {
7662 	struct mac_nat_entry    *mne;
7663 	struct mac_nat_entry    *ret_mne = NULL;
7664 
7665 	if (af == AF_INET) {
7666 		in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7667 
7668 		LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7669 			if (mne->mne_ip.s_addr == s_addr) {
7670 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7671 					mac_nat_entry_print(mne, sc->sc_if_xname,
7672 					    "found");
7673 				}
7674 				ret_mne = mne;
7675 				break;
7676 			}
7677 		}
7678 	} else {
7679 		const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7680 
7681 		LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7682 			if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7683 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7684 					mac_nat_entry_print(mne, sc->sc_if_xname,
7685 					    "found");
7686 				}
7687 				ret_mne = mne;
7688 				break;
7689 			}
7690 		}
7691 	}
7692 	return ret_mne;
7693 }
7694 
7695 static void
7696 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7697     struct mac_nat_entry *mne, const char *reason)
7698 {
7699 	LIST_REMOVE(mne, mne_list);
7700 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7701 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7702 	}
7703 	zfree(bridge_mne_pool, mne);
7704 	sc->sc_mne_count--;
7705 }
7706 
7707 static struct mac_nat_entry *
7708 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7709     struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7710 {
7711 	struct mac_nat_entry_list *list;
7712 	struct mac_nat_entry *mne;
7713 
7714 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7715 		sc->sc_mne_allocation_failures++;
7716 		return NULL;
7717 	}
7718 	mne = zalloc_noblock(bridge_mne_pool);
7719 	if (mne == NULL) {
7720 		sc->sc_mne_allocation_failures++;
7721 		return NULL;
7722 	}
7723 	sc->sc_mne_count++;
7724 	bzero(mne, sizeof(*mne));
7725 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7726 	mne->mne_bif = bif;
7727 	if (af == AF_INET) {
7728 		bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7729 		list = &sc->sc_mne_list;
7730 	} else {
7731 		bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7732 		mne->mne_flags |= MNE_FLAGS_IPV6;
7733 		list = &sc->sc_mne_list_v6;
7734 	}
7735 	LIST_INSERT_HEAD(list, mne, mne_list);
7736 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7737 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7738 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7739 	}
7740 	return mne;
7741 }
7742 
7743 static struct mac_nat_entry *
7744 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7745     struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7746 {
7747 	struct mac_nat_entry *mne;
7748 
7749 	mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7750 	if (mne != NULL) {
7751 		struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7752 
7753 		if (mne->mne_bif == mac_nat_bif) {
7754 			/* the MAC NAT interface takes precedence */
7755 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7756 				if (mne->mne_bif != bif) {
7757 					mac_nat_entry_print2(mne,
7758 					    sc->sc_if_xname, "reject",
7759 					    bif->bif_ifp->if_xname);
7760 				}
7761 			}
7762 		} else if (mne->mne_bif != bif) {
7763 			const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7764 
7765 			mne->mne_bif = bif;
7766 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7767 				mac_nat_entry_print2(mne,
7768 				    sc->sc_if_xname, "replaced",
7769 				    old_if);
7770 			}
7771 			bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7772 		}
7773 		mne->mne_expire = (unsigned long)net_uptime() +
7774 		    sc->sc_brttimeout;
7775 	} else {
7776 		mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7777 	}
7778 	return mne;
7779 }
7780 
7781 static void
7782 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7783     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7784 {
7785 	struct mac_nat_entry *mne;
7786 	struct mac_nat_entry *tmne;
7787 
7788 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7789 		if (bif != NULL && mne->mne_bif != bif) {
7790 			continue;
7791 		}
7792 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7793 	}
7794 }
7795 
7796 /*
7797  * bridge_mac_nat_flush_entries:
7798  *
7799  * Flush MAC NAT entries for the specified member. Flush all entries if
7800  * the member is the one that requires MAC NAT, otherwise just flush the
7801  * ones for the specified member.
7802  */
7803 static void
7804 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7805 {
7806 	struct bridge_iflist *flush_bif;
7807 
7808 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7809 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7810 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7811 }
7812 
7813 static void
7814 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7815 {
7816 	errno_t                 error;
7817 	ifnet_t                 ifp;
7818 	ifaddr_t                *list;
7819 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7820 
7821 	assert(mac_nat_bif != NULL);
7822 	ifp = mac_nat_bif->bif_ifp;
7823 	error = ifnet_get_address_list(ifp, &list);
7824 	if (error != 0) {
7825 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7826 		    "ifnet_get_address_list(%s) failed %d",
7827 		    ifp->if_xname, error);
7828 		return;
7829 	}
7830 	for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7831 		sa_family_t     af;
7832 		void            *ip;
7833 
7834 		union {
7835 			struct sockaddr         sa;
7836 			struct sockaddr_in      sin;
7837 			struct sockaddr_in6     sin6;
7838 		} u;
7839 		af = ifaddr_address_family(*scan);
7840 		switch (af) {
7841 		case AF_INET:
7842 		case AF_INET6:
7843 			error = ifaddr_address(*scan, &u.sa, sizeof(u));
7844 			if (error != 0) {
7845 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7846 				    "ifaddr_address failed %d",
7847 				    error);
7848 				break;
7849 			}
7850 			if (af == AF_INET) {
7851 				ip = (void *)&u.sin.sin_addr;
7852 			} else {
7853 				if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7854 					/* remove scope ID */
7855 					u.sin6.sin6_addr.s6_addr16[1] = 0;
7856 				}
7857 				ip = (void *)&u.sin6.sin6_addr;
7858 			}
7859 			bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7860 			    (uint8_t *)IF_LLADDR(ifp));
7861 			break;
7862 		default:
7863 			break;
7864 		}
7865 	}
7866 	ifnet_free_address_list(list);
7867 	return;
7868 }
7869 
7870 static void
7871 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7872     struct mac_nat_entry_list *list, unsigned long now)
7873 {
7874 	struct mac_nat_entry *mne;
7875 	struct mac_nat_entry *tmne;
7876 
7877 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7878 		if (now >= mne->mne_expire) {
7879 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7880 		}
7881 	}
7882 }
7883 
7884 static void
7885 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7886 {
7887 	if (sc->sc_mac_nat_bif == NULL) {
7888 		return;
7889 	}
7890 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7891 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7892 }
7893 
7894 static const char *
7895 get_in_out_string(boolean_t is_output)
7896 {
7897 	return is_output ? "OUT" : "IN";
7898 }
7899 
7900 /*
7901  * is_valid_arp_packet:
7902  *	Verify that this is a valid ARP packet.
7903  *
7904  *	Returns TRUE if the packet is valid, FALSE otherwise.
7905  */
7906 static boolean_t
7907 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7908     struct ether_header **eh_p, struct ether_arp **ea_p)
7909 {
7910 	struct ether_arp *ea;
7911 	struct ether_header *eh;
7912 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7913 	boolean_t is_valid = FALSE;
7914 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7915 
7916 	if (mbuf_pkthdr_len(*data) < minlen) {
7917 		BRIDGE_LOG(LOG_DEBUG, flags,
7918 		    "ARP %s short frame %lu < %lu",
7919 		    get_in_out_string(is_output),
7920 		    mbuf_pkthdr_len(*data), minlen);
7921 		goto done;
7922 	}
7923 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7924 		BRIDGE_LOG(LOG_DEBUG, flags,
7925 		    "ARP %s size %lu mbuf_pullup fail",
7926 		    get_in_out_string(is_output),
7927 		    minlen);
7928 		*data = NULL;
7929 		goto done;
7930 	}
7931 
7932 	/* validate ARP packet */
7933 	eh = mtod(*data, struct ether_header *);
7934 	ea = (struct ether_arp *)(eh + 1);
7935 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7936 		BRIDGE_LOG(LOG_DEBUG, flags,
7937 		    "ARP %s htype not ethernet",
7938 		    get_in_out_string(is_output));
7939 		goto done;
7940 	}
7941 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7942 		BRIDGE_LOG(LOG_DEBUG, flags,
7943 		    "ARP %s hlen not ethernet",
7944 		    get_in_out_string(is_output));
7945 		goto done;
7946 	}
7947 	if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7948 		BRIDGE_LOG(LOG_DEBUG, flags,
7949 		    "ARP %s ptype not IP",
7950 		    get_in_out_string(is_output));
7951 		goto done;
7952 	}
7953 	if (ea->arp_pln != sizeof(struct in_addr)) {
7954 		BRIDGE_LOG(LOG_DEBUG, flags,
7955 		    "ARP %s plen not IP",
7956 		    get_in_out_string(is_output));
7957 		goto done;
7958 	}
7959 	is_valid = TRUE;
7960 	*ea_p = ea;
7961 	*eh_p = eh;
7962 done:
7963 	return is_valid;
7964 }
7965 
7966 static struct mac_nat_entry *
7967 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7968 {
7969 	struct ether_arp        *ea;
7970 	struct ether_header     *eh;
7971 	struct mac_nat_entry    *mne = NULL;
7972 	u_short                 op;
7973 	struct in_addr          tpa;
7974 
7975 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7976 		goto done;
7977 	}
7978 	op = ntohs(ea->arp_op);
7979 	switch (op) {
7980 	case ARPOP_REQUEST:
7981 	case ARPOP_REPLY:
7982 		/* only care about REQUEST and REPLY */
7983 		break;
7984 	default:
7985 		goto done;
7986 	}
7987 
7988 	/* check the target IP address for a NAT entry */
7989 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7990 	if (tpa.s_addr != 0) {
7991 		mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
7992 	}
7993 	if (mne != NULL) {
7994 		if (op == ARPOP_REPLY) {
7995 			/* translate the MAC address */
7996 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7997 				char    mac_src[24];
7998 				char    mac_dst[24];
7999 
8000 				ether_ntop(mac_src, sizeof(mac_src),
8001 				    ea->arp_tha);
8002 				ether_ntop(mac_dst, sizeof(mac_dst),
8003 				    mne->mne_mac);
8004 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8005 				    "%s %s ARP %s -> %s",
8006 				    sc->sc_if_xname,
8007 				    mne->mne_bif->bif_ifp->if_xname,
8008 				    mac_src, mac_dst);
8009 			}
8010 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
8011 		}
8012 	} else {
8013 		/* handle conflicting ARP (sender matches mne) */
8014 		struct in_addr spa;
8015 
8016 		bcopy(ea->arp_spa, &spa, sizeof(spa));
8017 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
8018 			/* check the source IP for a NAT entry */
8019 			mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
8020 		}
8021 	}
8022 
8023 done:
8024 	return mne;
8025 }
8026 
8027 static boolean_t
8028 bridge_mac_nat_arp_output(struct bridge_softc *sc,
8029     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8030 {
8031 	struct ether_arp        *ea;
8032 	struct ether_header     *eh;
8033 	struct in_addr          ip;
8034 	struct mac_nat_entry    *mne = NULL;
8035 	u_short                 op;
8036 	boolean_t               translate = FALSE;
8037 
8038 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
8039 		goto done;
8040 	}
8041 	op = ntohs(ea->arp_op);
8042 	switch (op) {
8043 	case ARPOP_REQUEST:
8044 	case ARPOP_REPLY:
8045 		/* only care about REQUEST and REPLY */
8046 		break;
8047 	default:
8048 		goto done;
8049 	}
8050 
8051 	bcopy(ea->arp_spa, &ip, sizeof(ip));
8052 	if (ip.s_addr == 0) {
8053 		goto done;
8054 	}
8055 	/* XXX validate IP address: no multicast/broadcast */
8056 	mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
8057 	if (mnr != NULL && mne != NULL) {
8058 		/* record the offset to do the replacement */
8059 		translate = TRUE;
8060 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8061 	}
8062 
8063 done:
8064 	return translate;
8065 }
8066 
8067 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
8068 	                         + sizeof(struct ip))
8069 static struct ether_header *
8070 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8071 {
8072 	struct ether_header     *eh = NULL;
8073 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8074 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
8075 
8076 	if (mbuf_pkthdr_len(*data) < minlen) {
8077 		BRIDGE_LOG(LOG_DEBUG, flags,
8078 		    "IP %s short frame %lu < %lu",
8079 		    get_in_out_string(is_output),
8080 		    mbuf_pkthdr_len(*data), minlen);
8081 		goto done;
8082 	}
8083 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8084 		BRIDGE_LOG(LOG_DEBUG, flags,
8085 		    "IP %s size %lu mbuf_pullup fail",
8086 		    get_in_out_string(is_output),
8087 		    minlen);
8088 		*data = NULL;
8089 		goto done;
8090 	}
8091 	eh = mtod(*data, struct ether_header *);
8092 done:
8093 	return eh;
8094 }
8095 
8096 static bool
8097 is_broadcast_ip_packet(mbuf_t *data)
8098 {
8099 	struct ether_header     *eh;
8100 	uint16_t                ether_type;
8101 	bool                    is_broadcast = FALSE;
8102 
8103 	eh = mtod(*data, struct ether_header *);
8104 	ether_type = ntohs(eh->ether_type);
8105 	switch (ether_type) {
8106 	case ETHERTYPE_IP:
8107 		eh = get_ether_ip_header(data, FALSE);
8108 		if (eh != NULL) {
8109 			struct in_addr  dst;
8110 			struct ip       *iphdr;
8111 
8112 			iphdr = (struct ip *)(void *)(eh + 1);
8113 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8114 			is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8115 		}
8116 		break;
8117 	default:
8118 		break;
8119 	}
8120 	return is_broadcast;
8121 }
8122 
8123 static struct mac_nat_entry *
8124 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8125 {
8126 	struct in_addr          dst;
8127 	struct ether_header     *eh;
8128 	struct ip               *iphdr;
8129 	struct mac_nat_entry    *mne = NULL;
8130 
8131 	eh = get_ether_ip_header(data, FALSE);
8132 	if (eh == NULL) {
8133 		goto done;
8134 	}
8135 	iphdr = (struct ip *)(void *)(eh + 1);
8136 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8137 	/* XXX validate IP address */
8138 	if (dst.s_addr == 0) {
8139 		goto done;
8140 	}
8141 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8142 done:
8143 	return mne;
8144 }
8145 
8146 static void
8147 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8148     struct bridge_iflist *bif, mbuf_t m,
8149     uint8_t ip_header_len, struct mac_nat_record *mnr)
8150 {
8151 	uint16_t        dp_flags;
8152 	errno_t         error;
8153 	size_t          offset;
8154 	struct udphdr   udphdr;
8155 
8156 	/* copy the UDP header */
8157 	offset = sizeof(struct ether_header) + ip_header_len;
8158 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8159 	if (error != 0) {
8160 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8161 		    "mbuf_copydata udphdr failed %d",
8162 		    error);
8163 		return;
8164 	}
8165 	if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8166 	    ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8167 		/* not a BOOTP/DHCP packet */
8168 		return;
8169 	}
8170 	/* check whether the broadcast bit is already set */
8171 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8172 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8173 	if (error != 0) {
8174 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8175 		    "mbuf_copydata dp_flags failed %d",
8176 		    error);
8177 		return;
8178 	}
8179 	if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8180 		/* it's already set, nothing to do */
8181 		return;
8182 	}
8183 	/* broadcast bit needs to be set */
8184 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8185 	mnr->mnr_ip_header_len = ip_header_len;
8186 	if (udphdr.uh_sum != 0) {
8187 		uint16_t        delta;
8188 
8189 		/* adjust checksum to take modified dp_flags into account */
8190 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8191 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8192 	}
8193 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8194 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8195 	    sc->sc_if_xname,
8196 	    bif->bif_ifp->if_xname,
8197 	    ntohs(mnr->mnr_ip_dhcp_flags),
8198 	    ntohs(mnr->mnr_ip_udp_csum));
8199 	return;
8200 }
8201 
8202 static boolean_t
8203 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8204     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8205 {
8206 #pragma unused(mnr)
8207 	struct ether_header     *eh;
8208 	struct in_addr          ip;
8209 	struct ip               *iphdr;
8210 	uint8_t                 ip_header_len;
8211 	struct mac_nat_entry    *mne = NULL;
8212 	boolean_t               translate = FALSE;
8213 
8214 	eh = get_ether_ip_header(data, TRUE);
8215 	if (eh == NULL) {
8216 		goto done;
8217 	}
8218 	iphdr = (struct ip *)(void *)(eh + 1);
8219 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8220 	if (ip_header_len < sizeof(ip)) {
8221 		/* bogus IP header */
8222 		goto done;
8223 	}
8224 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8225 	/* XXX validate the source address */
8226 	if (ip.s_addr != 0) {
8227 		mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8228 		    eh->ether_shost);
8229 	}
8230 	if (mnr != NULL) {
8231 		if (iphdr->ip_p == IPPROTO_UDP) {
8232 			/* handle DHCP must broadcast */
8233 			bridge_mac_nat_udp_output(sc, bif, *data,
8234 			    ip_header_len, mnr);
8235 		}
8236 		translate = TRUE;
8237 	}
8238 done:
8239 	return translate;
8240 }
8241 
8242 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8243 	                         + sizeof(struct ip6_hdr))
8244 static struct ether_header *
8245 get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
8246 {
8247 	struct ether_header     *eh = NULL;
8248 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8249 	size_t          minlen = ETHER_IPV6_HEADER_LEN;
8250 
8251 	if (mbuf_pkthdr_len(*data) < minlen) {
8252 		BRIDGE_LOG(LOG_DEBUG, flags,
8253 		    "IP %s short frame %lu < %lu",
8254 		    get_in_out_string(is_output),
8255 		    mbuf_pkthdr_len(*data), minlen);
8256 		goto done;
8257 	}
8258 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8259 		BRIDGE_LOG(LOG_DEBUG, flags,
8260 		    "IP %s size %lu mbuf_pullup fail",
8261 		    get_in_out_string(is_output),
8262 		    minlen);
8263 		*data = NULL;
8264 		goto done;
8265 	}
8266 	eh = mtod(*data, struct ether_header *);
8267 done:
8268 	return eh;
8269 }
8270 
8271 #include <netinet/icmp6.h>
8272 #include <netinet6/nd6.h>
8273 
8274 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8275 
8276 static void
8277 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
8278     mbuf_t *data, struct ether_header *eh,
8279     struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
8280 {
8281 	struct icmp6_hdr *icmp6;
8282 	unsigned int    icmp6len;
8283 	int             lladdrlen = 0;
8284 	char            *lladdr = NULL;
8285 	mbuf_t          m = *data;
8286 	unsigned int    off = sizeof(*ip6h);
8287 
8288 	icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
8289 	if (icmp6len < sizeof(*icmp6)) {
8290 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8291 		    "short packet %d < %lu",
8292 		    icmp6len, sizeof(*icmp6));
8293 		return;
8294 	}
8295 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8296 	switch (icmp6->icmp6_type) {
8297 	case ND_NEIGHBOR_SOLICIT: {
8298 		struct nd_neighbor_solicit *nd_ns;
8299 		union nd_opts ndopts;
8300 		boolean_t is_dad_probe;
8301 		struct in6_addr taddr;
8302 
8303 		if (icmp6len < sizeof(*nd_ns)) {
8304 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8305 			    "short nd_ns %d < %lu",
8306 			    icmp6len, sizeof(*nd_ns));
8307 			return;
8308 		}
8309 
8310 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8311 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8312 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8313 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8314 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8315 			    "invalid target ignored");
8316 			return;
8317 		}
8318 		/* parse options */
8319 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8320 		if (nd6_options(&ndopts) < 0) {
8321 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8322 			    "invalid ND6 NS option");
8323 			return;
8324 		}
8325 		if (ndopts.nd_opts_src_lladdr != NULL) {
8326 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8327 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8328 		}
8329 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8330 		if (lladdr != NULL) {
8331 			if (is_dad_probe) {
8332 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8333 				    "bad ND6 DAD packet");
8334 				return;
8335 			}
8336 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8337 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8338 				    "source lladdrlen %d != %lu",
8339 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8340 				return;
8341 			}
8342 			mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8343 			    (uintptr_t)eh);
8344 			mnr->mnr_ip6_icmp6_len = icmp6len;
8345 			mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8346 			mnr->mnr_ip6_header_len = off;
8347 		}
8348 		if (is_dad_probe) {
8349 			/* node is trying use taddr, create an mne using taddr */
8350 			*saddrp = taddr;
8351 		}
8352 		break;
8353 	}
8354 	case ND_NEIGHBOR_ADVERT: {
8355 		struct nd_neighbor_advert *nd_na;
8356 		union nd_opts ndopts;
8357 		struct in6_addr taddr;
8358 
8359 
8360 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8361 
8362 		if (icmp6len < sizeof(*nd_na)) {
8363 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8364 			    "short nd_na %d < %lu",
8365 			    icmp6len, sizeof(*nd_na));
8366 			return;
8367 		}
8368 
8369 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8370 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8371 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8372 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8373 			    "invalid target ignored");
8374 			return;
8375 		}
8376 		/* parse options */
8377 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8378 		if (nd6_options(&ndopts) < 0) {
8379 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8380 			    "invalid ND6 NA option");
8381 			return;
8382 		}
8383 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8384 			/* target linklayer, nothing to do */
8385 			return;
8386 		}
8387 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8388 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8389 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8390 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8391 			    "target lladdrlen %d != %lu",
8392 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8393 			return;
8394 		}
8395 		mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr - (uintptr_t)eh);
8396 		mnr->mnr_ip6_icmp6_len = icmp6len;
8397 		mnr->mnr_ip6_header_len = off;
8398 		mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8399 		break;
8400 	}
8401 	case ND_ROUTER_SOLICIT: {
8402 		struct nd_router_solicit *nd_rs;
8403 		union nd_opts ndopts;
8404 
8405 		if (icmp6len < sizeof(*nd_rs)) {
8406 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8407 			    "short nd_rs %d < %lu",
8408 			    icmp6len, sizeof(*nd_rs));
8409 			return;
8410 		}
8411 		nd_rs = (struct nd_router_solicit *)(void *)icmp6;
8412 
8413 		/* parse options */
8414 		nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
8415 		if (nd6_options(&ndopts) < 0) {
8416 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8417 			    "invalid ND6 RS option");
8418 			return;
8419 		}
8420 		if (ndopts.nd_opts_src_lladdr != NULL) {
8421 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8422 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8423 		}
8424 		if (lladdr != NULL) {
8425 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8426 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8427 				    "source lladdrlen %d != %lu",
8428 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8429 				return;
8430 			}
8431 			mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8432 			    (uintptr_t)eh);
8433 			mnr->mnr_ip6_icmp6_len = icmp6len;
8434 			mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8435 			mnr->mnr_ip6_header_len = off;
8436 		}
8437 		break;
8438 	}
8439 	default:
8440 		break;
8441 	}
8442 	if (mnr->mnr_ip6_lladdr_offset != 0 &&
8443 	    BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8444 		const char *str;
8445 
8446 		switch (mnr->mnr_ip6_icmp6_type) {
8447 		case ND_ROUTER_SOLICIT:
8448 			str = "ROUTER SOLICIT";
8449 			break;
8450 		case ND_NEIGHBOR_ADVERT:
8451 			str = "NEIGHBOR ADVERT";
8452 			break;
8453 		case ND_NEIGHBOR_SOLICIT:
8454 			str = "NEIGHBOR SOLICIT";
8455 			break;
8456 		default:
8457 			str = "";
8458 			break;
8459 		}
8460 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8461 		    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8462 		    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8463 		    mnr->mnr_ip6_header_len,
8464 		    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8465 	}
8466 }
8467 
8468 static struct mac_nat_entry *
8469 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8470 {
8471 	struct in6_addr         dst;
8472 	struct ether_header     *eh;
8473 	struct ip6_hdr          *ip6h;
8474 	struct mac_nat_entry    *mne = NULL;
8475 
8476 	eh = get_ether_ipv6_header(data, FALSE);
8477 	if (eh == NULL) {
8478 		goto done;
8479 	}
8480 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8481 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8482 	/* XXX validate IPv6 address */
8483 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8484 		goto done;
8485 	}
8486 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8487 
8488 done:
8489 	return mne;
8490 }
8491 
8492 static boolean_t
8493 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8494     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8495 {
8496 	struct ether_header     *eh;
8497 	struct ip6_hdr          *ip6h;
8498 	struct in6_addr         saddr;
8499 	boolean_t               translate;
8500 
8501 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8502 	eh = get_ether_ipv6_header(data, TRUE);
8503 	if (eh == NULL) {
8504 		translate = FALSE;
8505 		goto done;
8506 	}
8507 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8508 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8509 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8510 		bridge_mac_nat_icmpv6_output(sc, bif, data,
8511 		    eh, ip6h, &saddr, mnr);
8512 	}
8513 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8514 		goto done;
8515 	}
8516 	(void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8517 	    eh->ether_shost);
8518 
8519 done:
8520 	return translate;
8521 }
8522 
8523 /*
8524  * bridge_mac_nat_input:
8525  * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8526  * This interface is the "external" interface with respect to NAT.
8527  * The interface is only capable of receiving a single MAC address
8528  * (e.g. a Wi-Fi STA interface).
8529  *
8530  * When a packet arrives on the external interface, look up the destination
8531  * IP address in the mac_nat_entry table. If there is a match, *is_input
8532  * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8533  * is set to FALSE and translate the MAC address if necessary.
8534  *
8535  * Returns:
8536  * The internal interface to direct the packet to, or NULL if the packet
8537  * should not be redirected.
8538  *
8539  * *data may be updated to point at a different mbuf chain, or set to NULL
8540  * if the chain was deallocated during processing.
8541  */
8542 static ifnet_t
8543 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8544     boolean_t *is_input)
8545 {
8546 	ifnet_t                 dst_if = NULL;
8547 	struct ether_header     *eh;
8548 	uint16_t                ether_type;
8549 	boolean_t               is_unicast;
8550 	mbuf_t                  m = *data;
8551 	struct mac_nat_entry    *mne = NULL;
8552 
8553 	BRIDGE_LOCK_ASSERT_HELD(sc);
8554 	*is_input = FALSE;
8555 	assert(sc->sc_mac_nat_bif != NULL);
8556 	is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8557 	eh = mtod(m, struct ether_header *);
8558 	ether_type = ntohs(eh->ether_type);
8559 	switch (ether_type) {
8560 	case ETHERTYPE_ARP:
8561 		mne = bridge_mac_nat_arp_input(sc, data);
8562 		break;
8563 	case ETHERTYPE_IP:
8564 		if (is_unicast) {
8565 			mne = bridge_mac_nat_ip_input(sc, data);
8566 		}
8567 		break;
8568 	case ETHERTYPE_IPV6:
8569 		if (is_unicast) {
8570 			mne = bridge_mac_nat_ipv6_input(sc, data);
8571 		}
8572 		break;
8573 	default:
8574 		break;
8575 	}
8576 	if (mne != NULL) {
8577 		if (is_unicast) {
8578 			if (m != *data) {
8579 				/* it may have changed */
8580 				eh = mtod(*data, struct ether_header *);
8581 			}
8582 			bcopy(mne->mne_mac, eh->ether_dhost,
8583 			    sizeof(eh->ether_dhost));
8584 		}
8585 		dst_if = mne->mne_bif->bif_ifp;
8586 		*is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8587 	}
8588 	return dst_if;
8589 }
8590 
8591 /*
8592  * bridge_mac_nat_output:
8593  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8594  * from the interface 'bif'.
8595  *
8596  * Create a mac_nat_entry containing the source IP address and MAC address
8597  * from the packet. Populate a mac_nat_record with information detailing
8598  * how to translate the packet. Translation takes place later when
8599  * the bridge lock is no longer held.
8600  *
8601  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8602  * interface is generating an output packet. No translation is required in this
8603  * case, we just record the IP address used to prevent another bif from
8604  * claiming our IP address.
8605  *
8606  * Returns:
8607  * TRUE if the packet should be translated (*mnr updated as well),
8608  * FALSE otherwise.
8609  *
8610  * *data may be updated to point at a different mbuf chain or NULL if
8611  * the chain was deallocated during processing.
8612  */
8613 
8614 static boolean_t
8615 bridge_mac_nat_output(struct bridge_softc *sc,
8616     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8617 {
8618 	struct ether_header     *eh;
8619 	uint16_t                ether_type;
8620 	boolean_t               translate = FALSE;
8621 
8622 	BRIDGE_LOCK_ASSERT_HELD(sc);
8623 	assert(sc->sc_mac_nat_bif != NULL);
8624 
8625 	eh = mtod(*data, struct ether_header *);
8626 	ether_type = ntohs(eh->ether_type);
8627 	if (mnr != NULL) {
8628 		bzero(mnr, sizeof(*mnr));
8629 		mnr->mnr_ether_type = ether_type;
8630 	}
8631 	switch (ether_type) {
8632 	case ETHERTYPE_ARP:
8633 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8634 		break;
8635 	case ETHERTYPE_IP:
8636 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8637 		break;
8638 	case ETHERTYPE_IPV6:
8639 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8640 		break;
8641 	default:
8642 		break;
8643 	}
8644 	return translate;
8645 }
8646 
8647 static void
8648 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8649     const caddr_t eaddr)
8650 {
8651 	errno_t                 error;
8652 
8653 	if (mnr->mnr_arp_offset == 0) {
8654 		return;
8655 	}
8656 	/* replace the source hardware address */
8657 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8658 	    ETHER_ADDR_LEN, eaddr,
8659 	    MBUF_DONTWAIT);
8660 	if (error != 0) {
8661 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8662 		    "mbuf_copyback failed");
8663 		m_freem(*data);
8664 		*data = NULL;
8665 	}
8666 	return;
8667 }
8668 
8669 static void
8670 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8671 {
8672 	errno_t         error;
8673 	size_t          offset;
8674 
8675 	if (mnr->mnr_ip_header_len == 0) {
8676 		return;
8677 	}
8678 	/* update the UDP checksum */
8679 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8680 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8681 	    sizeof(mnr->mnr_ip_udp_csum),
8682 	    &mnr->mnr_ip_udp_csum,
8683 	    MBUF_DONTWAIT);
8684 	if (error != 0) {
8685 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8686 		    "mbuf_copyback uh_sum failed");
8687 		m_freem(*data);
8688 		*data = NULL;
8689 	}
8690 	/* update the DHCP must broadcast flag */
8691 	offset += sizeof(struct udphdr);
8692 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8693 	    sizeof(mnr->mnr_ip_dhcp_flags),
8694 	    &mnr->mnr_ip_dhcp_flags,
8695 	    MBUF_DONTWAIT);
8696 	if (error != 0) {
8697 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8698 		    "mbuf_copyback dp_flags failed");
8699 		m_freem(*data);
8700 		*data = NULL;
8701 	}
8702 }
8703 
8704 static void
8705 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8706     const caddr_t eaddr)
8707 {
8708 	uint16_t        cksum;
8709 	errno_t         error;
8710 	mbuf_t          m = *data;
8711 
8712 	if (mnr->mnr_ip6_header_len == 0) {
8713 		return;
8714 	}
8715 	switch (mnr->mnr_ip6_icmp6_type) {
8716 	case ND_ROUTER_SOLICIT:
8717 	case ND_NEIGHBOR_SOLICIT:
8718 	case ND_NEIGHBOR_ADVERT:
8719 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8720 			/* nothing to do */
8721 			return;
8722 		}
8723 		break;
8724 	default:
8725 		return;
8726 	}
8727 
8728 	/*
8729 	 * replace the lladdr
8730 	 */
8731 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8732 	    ETHER_ADDR_LEN, eaddr,
8733 	    MBUF_DONTWAIT);
8734 	if (error != 0) {
8735 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8736 		    "mbuf_copyback lladdr failed");
8737 		m_freem(m);
8738 		*data = NULL;
8739 		return;
8740 	}
8741 
8742 	/*
8743 	 * recompute the icmp6 checksum
8744 	 */
8745 
8746 	/* skip past the ethernet header */
8747 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8748 	    mbuf_len(m) - ETHER_HDR_LEN);
8749 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8750 
8751 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8752 	/* set the checksum to zero */
8753 	cksum = 0;
8754 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8755 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8756 	if (error != 0) {
8757 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8758 		    "mbuf_copyback cksum=0 failed");
8759 		m_freem(m);
8760 		*data = NULL;
8761 		return;
8762 	}
8763 	/* compute and set the new checksum */
8764 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8765 	    mnr->mnr_ip6_icmp6_len);
8766 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8767 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8768 	if (error != 0) {
8769 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8770 		    "mbuf_copyback cksum failed");
8771 		m_freem(m);
8772 		*data = NULL;
8773 		return;
8774 	}
8775 	/* restore the ethernet header */
8776 	mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8777 	    mbuf_len(m) + ETHER_HDR_LEN);
8778 	mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8779 	return;
8780 }
8781 
8782 static void
8783 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8784     const caddr_t eaddr)
8785 {
8786 	struct ether_header     *eh;
8787 
8788 	/* replace the source ethernet address with the single MAC */
8789 	eh = mtod(*data, struct ether_header *);
8790 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8791 	switch (mnr->mnr_ether_type) {
8792 	case ETHERTYPE_ARP:
8793 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8794 		break;
8795 
8796 	case ETHERTYPE_IP:
8797 		bridge_mac_nat_ip_translate(data, mnr);
8798 		break;
8799 
8800 	case ETHERTYPE_IPV6:
8801 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8802 		break;
8803 
8804 	default:
8805 		break;
8806 	}
8807 	return;
8808 }
8809 
8810 /*
8811  * bridge packet filtering
8812  */
8813 
8814 /*
8815  * Perform basic checks on header size since
8816  * pfil assumes ip_input has already processed
8817  * it for it.  Cut-and-pasted from ip_input.c.
8818  * Given how simple the IPv6 version is,
8819  * does the IPv4 version really need to be
8820  * this complicated?
8821  *
8822  * XXX Should we update ipstat here, or not?
8823  * XXX Right now we update ipstat but not
8824  * XXX csum_counter.
8825  */
8826 static int
8827 bridge_ip_checkbasic(struct mbuf **mp)
8828 {
8829 	struct mbuf *m = *mp;
8830 	struct ip *ip;
8831 	int len, hlen;
8832 	u_short sum;
8833 
8834 	if (*mp == NULL) {
8835 		return -1;
8836 	}
8837 
8838 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8839 		/* max_linkhdr is already rounded up to nearest 4-byte */
8840 		if ((m = m_copyup(m, sizeof(struct ip),
8841 		    max_linkhdr)) == NULL) {
8842 			/* XXXJRT new stat, please */
8843 			ipstat.ips_toosmall++;
8844 			goto bad;
8845 		}
8846 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8847 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8848 			ipstat.ips_toosmall++;
8849 			goto bad;
8850 		}
8851 	}
8852 	ip = mtod(m, struct ip *);
8853 	if (ip == NULL) {
8854 		goto bad;
8855 	}
8856 
8857 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8858 		ipstat.ips_badvers++;
8859 		goto bad;
8860 	}
8861 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8862 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8863 		ipstat.ips_badhlen++;
8864 		goto bad;
8865 	}
8866 	if (hlen > m->m_len) {
8867 		if ((m = m_pullup(m, hlen)) == 0) {
8868 			ipstat.ips_badhlen++;
8869 			goto bad;
8870 		}
8871 		ip = mtod(m, struct ip *);
8872 		if (ip == NULL) {
8873 			goto bad;
8874 		}
8875 	}
8876 
8877 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8878 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8879 	} else {
8880 		if (hlen == sizeof(struct ip)) {
8881 			sum = in_cksum_hdr(ip);
8882 		} else {
8883 			sum = in_cksum(m, hlen);
8884 		}
8885 	}
8886 	if (sum) {
8887 		ipstat.ips_badsum++;
8888 		goto bad;
8889 	}
8890 
8891 	/* Retrieve the packet length. */
8892 	len = ntohs(ip->ip_len);
8893 
8894 	/*
8895 	 * Check for additional length bogosity
8896 	 */
8897 	if (len < hlen) {
8898 		ipstat.ips_badlen++;
8899 		goto bad;
8900 	}
8901 
8902 	/*
8903 	 * Check that the amount of data in the buffers
8904 	 * is as at least much as the IP header would have us expect.
8905 	 * Drop packet if shorter than we expect.
8906 	 */
8907 	if (m->m_pkthdr.len < len) {
8908 		ipstat.ips_tooshort++;
8909 		goto bad;
8910 	}
8911 
8912 	/* Checks out, proceed */
8913 	*mp = m;
8914 	return 0;
8915 
8916 bad:
8917 	*mp = m;
8918 	return -1;
8919 }
8920 
8921 /*
8922  * Same as above, but for IPv6.
8923  * Cut-and-pasted from ip6_input.c.
8924  * XXX Should we update ip6stat, or not?
8925  */
8926 static int
8927 bridge_ip6_checkbasic(struct mbuf **mp)
8928 {
8929 	struct mbuf *m = *mp;
8930 	struct ip6_hdr *ip6;
8931 
8932 	/*
8933 	 * If the IPv6 header is not aligned, slurp it up into a new
8934 	 * mbuf with space for link headers, in the event we forward
8935 	 * it.  Otherwise, if it is aligned, make sure the entire base
8936 	 * IPv6 header is in the first mbuf of the chain.
8937 	 */
8938 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8939 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8940 		/* max_linkhdr is already rounded up to nearest 4-byte */
8941 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8942 		    max_linkhdr)) == NULL) {
8943 			/* XXXJRT new stat, please */
8944 			ip6stat.ip6s_toosmall++;
8945 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8946 			goto bad;
8947 		}
8948 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8949 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8950 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8951 			ip6stat.ip6s_toosmall++;
8952 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8953 			goto bad;
8954 		}
8955 	}
8956 
8957 	ip6 = mtod(m, struct ip6_hdr *);
8958 
8959 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8960 		ip6stat.ip6s_badvers++;
8961 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8962 		goto bad;
8963 	}
8964 
8965 	/* Checks out, proceed */
8966 	*mp = m;
8967 	return 0;
8968 
8969 bad:
8970 	*mp = m;
8971 	return -1;
8972 }
8973 
8974 /*
8975  * the PF routines expect to be called from ip_input, so we
8976  * need to do and undo here some of the same processing.
8977  *
8978  * XXX : this is heavily inspired on bridge_pfil()
8979  */
8980 static int
8981 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
8982     int input)
8983 {
8984 	/*
8985 	 * XXX : mpetit : heavily inspired by bridge_pfil()
8986 	 */
8987 
8988 	int snap, error, i, hlen;
8989 	struct ether_header *eh1, eh2;
8990 	struct ip *ip;
8991 	struct llc llc1;
8992 	u_int16_t ether_type;
8993 
8994 	snap = 0;
8995 	error = -1;     /* Default error if not error == 0 */
8996 
8997 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
8998 		return 0; /* filtering is disabled */
8999 	}
9000 	i = min((*mp)->m_pkthdr.len, max_protohdr);
9001 	if ((*mp)->m_len < i) {
9002 		*mp = m_pullup(*mp, i);
9003 		if (*mp == NULL) {
9004 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
9005 			return -1;
9006 		}
9007 	}
9008 
9009 	eh1 = mtod(*mp, struct ether_header *);
9010 	ether_type = ntohs(eh1->ether_type);
9011 
9012 	/*
9013 	 * Check for SNAP/LLC.
9014 	 */
9015 	if (ether_type < ETHERMTU) {
9016 		struct llc *llc2 = (struct llc *)(eh1 + 1);
9017 
9018 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
9019 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
9020 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
9021 		    llc2->llc_control == LLC_UI) {
9022 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
9023 			snap = 1;
9024 		}
9025 	}
9026 
9027 	/*
9028 	 * If we're trying to filter bridge traffic, don't look at anything
9029 	 * other than IP and ARP traffic.  If the filter doesn't understand
9030 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
9031 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
9032 	 * but of course we don't have an AppleTalk filter to begin with.
9033 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
9034 	 * ARP traffic.)
9035 	 */
9036 	switch (ether_type) {
9037 	case ETHERTYPE_ARP:
9038 	case ETHERTYPE_REVARP:
9039 		return 0;         /* Automatically pass */
9040 
9041 	case ETHERTYPE_IP:
9042 	case ETHERTYPE_IPV6:
9043 		break;
9044 	default:
9045 		/*
9046 		 * Check to see if the user wants to pass non-ip
9047 		 * packets, these will not be checked by pf and
9048 		 * passed unconditionally so the default is to drop.
9049 		 */
9050 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
9051 			goto bad;
9052 		}
9053 		break;
9054 	}
9055 
9056 	/* Strip off the Ethernet header and keep a copy. */
9057 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
9058 	m_adj(*mp, ETHER_HDR_LEN);
9059 
9060 	/* Strip off snap header, if present */
9061 	if (snap) {
9062 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9063 		m_adj(*mp, sizeof(struct llc));
9064 	}
9065 
9066 	/*
9067 	 * Check the IP header for alignment and errors
9068 	 */
9069 	switch (ether_type) {
9070 	case ETHERTYPE_IP:
9071 		error = bridge_ip_checkbasic(mp);
9072 		break;
9073 	case ETHERTYPE_IPV6:
9074 		error = bridge_ip6_checkbasic(mp);
9075 		break;
9076 	default:
9077 		error = 0;
9078 		break;
9079 	}
9080 	if (error) {
9081 		goto bad;
9082 	}
9083 
9084 	error = 0;
9085 
9086 	/*
9087 	 * Run the packet through pf rules
9088 	 */
9089 	switch (ether_type) {
9090 	case ETHERTYPE_IP:
9091 		/*
9092 		 * before calling the firewall, swap fields the same as
9093 		 * IP does. here we assume the header is contiguous
9094 		 */
9095 		ip = mtod(*mp, struct ip *);
9096 
9097 		ip->ip_len = ntohs(ip->ip_len);
9098 		ip->ip_off = ntohs(ip->ip_off);
9099 
9100 		if (ifp != NULL) {
9101 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9102 		}
9103 
9104 		if (*mp == NULL || error != 0) { /* filter may consume */
9105 			break;
9106 		}
9107 
9108 		/* Recalculate the ip checksum and restore byte ordering */
9109 		ip = mtod(*mp, struct ip *);
9110 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9111 		if (hlen < (int)sizeof(struct ip)) {
9112 			goto bad;
9113 		}
9114 		if (hlen > (*mp)->m_len) {
9115 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9116 				goto bad;
9117 			}
9118 			ip = mtod(*mp, struct ip *);
9119 			if (ip == NULL) {
9120 				goto bad;
9121 			}
9122 		}
9123 		ip->ip_len = htons(ip->ip_len);
9124 		ip->ip_off = htons(ip->ip_off);
9125 		ip->ip_sum = 0;
9126 		if (hlen == sizeof(struct ip)) {
9127 			ip->ip_sum = in_cksum_hdr(ip);
9128 		} else {
9129 			ip->ip_sum = in_cksum(*mp, hlen);
9130 		}
9131 		break;
9132 
9133 	case ETHERTYPE_IPV6:
9134 		if (ifp != NULL) {
9135 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9136 		}
9137 
9138 		if (*mp == NULL || error != 0) { /* filter may consume */
9139 			break;
9140 		}
9141 		break;
9142 	default:
9143 		error = 0;
9144 		break;
9145 	}
9146 
9147 	if (*mp == NULL) {
9148 		return error;
9149 	}
9150 	if (error != 0) {
9151 		goto bad;
9152 	}
9153 
9154 	error = -1;
9155 
9156 	/*
9157 	 * Finally, put everything back the way it was and return
9158 	 */
9159 	if (snap) {
9160 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9161 		if (*mp == NULL) {
9162 			return error;
9163 		}
9164 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9165 	}
9166 
9167 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9168 	if (*mp == NULL) {
9169 		return error;
9170 	}
9171 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9172 
9173 	return 0;
9174 
9175 bad:
9176 	m_freem(*mp);
9177 	*mp = NULL;
9178 	return error;
9179 }
9180 
9181 /*
9182  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9183  * All rights reserved.
9184  *
9185  * Redistribution and use in source and binary forms, with or without
9186  * modification, are permitted provided that the following conditions
9187  * are met:
9188  *   1. Redistributions of source code must retain the above copyright
9189  *      notice, this list of conditions and the following disclaimer.
9190  *   2. Redistributions in binary form must reproduce the above copyright
9191  *      notice, this list of conditions and the following disclaimer in the
9192  *      documentation and/or other materials provided with the distribution.
9193  *
9194  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9195  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9196  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9197  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9198  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9199  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9200  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9201  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9202  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9203  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9204  * SUCH DAMAGE.
9205  */
9206 
9207 /*
9208  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9209  *
9210  * Create a queue of packets/segments which fit the given mss + hdr_len.
9211  * m0 points to mbuf chain to be segmented.
9212  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9213  * into segments of length MSS bytes and then copy the first hdr_len bytes
9214  * from m0 at the top of each segment.
9215  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9216  * in each segment after the first hdr_len bytes
9217  *
9218  * Return the new queue with the segments on success, NULL on failure.
9219  * (the mbuf queue is freed in this case).
9220  * nsegs contains the number of segments generated.
9221  */
9222 
9223 static struct mbuf *
9224 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9225     char * hdr2_buf, int hdr2_len)
9226 {
9227 	int off = 0, n, firstlen;
9228 	struct mbuf **mnext, *mseg;
9229 	int total_len = m0->m_pkthdr.len;
9230 
9231 	/*
9232 	 * Segmentation useless
9233 	 */
9234 	if (total_len <= hdr_len + mss) {
9235 		return m0;
9236 	}
9237 
9238 	if (hdr2_buf == NULL || hdr2_len <= 0) {
9239 		hdr2_buf = NULL;
9240 		hdr2_len = 0;
9241 	}
9242 
9243 	off = hdr_len + mss;
9244 	firstlen = mss; /* first segment stored in the original mbuf */
9245 
9246 	mnext = &(m0->m_nextpkt); /* pointer to next packet */
9247 
9248 	for (n = 1; off < total_len; off += mss, n++) {
9249 		struct mbuf *m;
9250 		/*
9251 		 * Copy the header from the original packet
9252 		 * and create a new mbuf chain
9253 		 */
9254 		if (MHLEN < hdr_len) {
9255 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9256 		} else {
9257 			m = m_gethdr(M_NOWAIT, MT_DATA);
9258 		}
9259 
9260 		if (m == NULL) {
9261 #ifdef GSO_DEBUG
9262 			D("MGETHDR error\n");
9263 #endif
9264 			goto err;
9265 		}
9266 
9267 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9268 
9269 		m->m_len = hdr_len;
9270 		/*
9271 		 * if the optional header is present, copy it
9272 		 */
9273 		if (hdr2_buf != NULL) {
9274 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9275 		}
9276 
9277 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9278 		if (off + mss >= total_len) {           /* last segment */
9279 			mss = total_len - off;
9280 		}
9281 		/*
9282 		 * Copy the payload from original packet
9283 		 */
9284 		mseg = m_copym(m0, off, mss, M_NOWAIT);
9285 		if (mseg == NULL) {
9286 			m_freem(m);
9287 #ifdef GSO_DEBUG
9288 			D("m_copym error\n");
9289 #endif
9290 			goto err;
9291 		}
9292 		m_cat(m, mseg);
9293 
9294 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9295 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9296 		/*
9297 		 * Copy the checksum flags and data (in_cksum() need this)
9298 		 */
9299 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9300 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9301 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9302 
9303 		*mnext = m;
9304 		mnext = &(m->m_nextpkt);
9305 	}
9306 
9307 	/*
9308 	 * Update first segment.
9309 	 * If the optional header is present, is necessary
9310 	 * to insert it into the first segment.
9311 	 */
9312 	if (hdr2_buf == NULL) {
9313 		m_adj(m0, hdr_len + firstlen - total_len);
9314 		m0->m_pkthdr.len = hdr_len + firstlen;
9315 	} else {
9316 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9317 		if (mseg == NULL) {
9318 #ifdef GSO_DEBUG
9319 			D("m_copym error\n");
9320 #endif
9321 			goto err;
9322 		}
9323 		m_adj(m0, hdr_len - total_len);
9324 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9325 		m_cat(m0, mseg);
9326 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9327 	}
9328 
9329 	if (nsegs != NULL) {
9330 		*nsegs = n;
9331 	}
9332 	return m0;
9333 err:
9334 	while (m0 != NULL) {
9335 		mseg = m0->m_nextpkt;
9336 		m0->m_nextpkt = NULL;
9337 		m_freem(m0);
9338 		m0 = mseg;
9339 	}
9340 	return NULL;
9341 }
9342 
9343 /*
9344  * Wrappers of IPv4 checksum functions
9345  */
9346 static inline void
9347 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9348 {
9349 	m->m_data += mac_hlen;
9350 	m->m_len -= mac_hlen;
9351 	m->m_pkthdr.len -= mac_hlen;
9352 #if __FreeBSD_version < 1000000
9353 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9354 #endif
9355 
9356 	in_delayed_cksum(m);
9357 
9358 #if __FreeBSD_version < 1000000
9359 	ip->ip_len = htons(ip->ip_len);
9360 #endif
9361 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9362 	m->m_len += mac_hlen;
9363 	m->m_pkthdr.len += mac_hlen;
9364 	m->m_data -= mac_hlen;
9365 }
9366 
9367 static inline void
9368 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9369 {
9370 	m->m_data += mac_hlen;
9371 
9372 	ip->ip_sum = in_cksum(m, ip_hlen);
9373 
9374 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
9375 	m->m_data -= mac_hlen;
9376 }
9377 
9378 /*
9379  * Structure that contains the state during the TCP segmentation
9380  */
9381 struct gso_ip_tcp_state {
9382 	void    (*update)
9383 	(struct gso_ip_tcp_state*, struct mbuf*);
9384 	void    (*internal)
9385 	(struct gso_ip_tcp_state*, struct mbuf*);
9386 	union iphdr hdr;
9387 	struct tcphdr *tcp;
9388 	int mac_hlen;
9389 	int ip_hlen;
9390 	int tcp_hlen;
9391 	int hlen;
9392 	int pay_len;
9393 	int sw_csum;
9394 	uint32_t tcp_seq;
9395 	uint16_t ip_id;
9396 	boolean_t is_tx;
9397 };
9398 
9399 /*
9400  * Update the pointers to TCP and IPv4 headers
9401  */
9402 static inline void
9403 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9404 {
9405 	state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9406 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9407 	state->pay_len = m->m_pkthdr.len - state->hlen;
9408 }
9409 
9410 /*
9411  * Set properly the TCP and IPv4 headers
9412  */
9413 static inline void
9414 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9415 {
9416 	/*
9417 	 * Update IP header
9418 	 */
9419 	state->hdr.ip->ip_id = htons((state->ip_id)++);
9420 	state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9421 	/*
9422 	 * TCP Checksum
9423 	 */
9424 	state->tcp->th_sum = 0;
9425 	state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9426 	    state->hdr.ip->ip_dst.s_addr,
9427 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9428 	/*
9429 	 * Checksum HW not supported (TCP)
9430 	 */
9431 	if (state->sw_csum & CSUM_DELAY_DATA) {
9432 		gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9433 	}
9434 
9435 	state->tcp_seq += state->pay_len;
9436 	/*
9437 	 * IP Checksum
9438 	 */
9439 	state->hdr.ip->ip_sum = 0;
9440 	/*
9441 	 * Checksum HW not supported (IP)
9442 	 */
9443 	if (state->sw_csum & CSUM_IP) {
9444 		gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9445 	}
9446 }
9447 
9448 
9449 /*
9450  * Updates the pointers to TCP and IPv6 headers
9451  */
9452 static inline void
9453 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9454 {
9455 	state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9456 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9457 	state->pay_len = m->m_pkthdr.len - state->hlen;
9458 }
9459 
9460 /*
9461  * Sets properly the TCP and IPv6 headers
9462  */
9463 static inline void
9464 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9465 {
9466 	state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9467 	    state->mac_hlen - state->ip_hlen);
9468 	/*
9469 	 * TCP Checksum
9470 	 */
9471 	state->tcp->th_sum = 0;
9472 	state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9473 	    &state->hdr.ip6->ip6_dst,
9474 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9475 	/*
9476 	 * Checksum HW not supported (TCP)
9477 	 */
9478 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9479 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9480 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9481 	}
9482 	state->tcp_seq += state->pay_len;
9483 }
9484 
9485 /*
9486  * Init the state during the TCP segmentation
9487  */
9488 static void
9489 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9490     bool is_ipv4, int mac_hlen, int ip_hlen,
9491     void * ip_hdr, struct tcphdr * tcp_hdr)
9492 {
9493 #pragma unused(ifp)
9494 
9495 	state->hdr.ptr = ip_hdr;
9496 	state->tcp = tcp_hdr;
9497 	if (is_ipv4) {
9498 		state->ip_id = ntohs(state->hdr.ip->ip_id);
9499 		state->update = gso_ipv4_tcp_update;
9500 		state->internal = gso_ipv4_tcp_internal;
9501 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9502 	} else {
9503 		state->update = gso_ipv6_tcp_update;
9504 		state->internal = gso_ipv6_tcp_internal;
9505 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9506 	}
9507 	state->mac_hlen = mac_hlen;
9508 	state->ip_hlen = ip_hlen;
9509 	state->tcp_hlen = state->tcp->th_off << 2;
9510 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9511 	state->tcp_seq = ntohl(state->tcp->th_seq);
9512 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9513 	return;
9514 }
9515 
9516 /*
9517  * GSO on TCP/IP (v4 or v6)
9518  *
9519  * If is_tx is TRUE, segmented packets are transmitted after they are
9520  * segmented.
9521  *
9522  * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9523  */
9524 static int
9525 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9526     boolean_t is_tx)
9527 {
9528 	struct mbuf *m, *m_tx;
9529 	int error = 0;
9530 	int mss = 0;
9531 	int nsegs = 0;
9532 	struct mbuf *m0 = *mp;
9533 #ifdef GSO_STATS
9534 	int total_len = m0->m_pkthdr.len;
9535 #endif /* GSO_STATS */
9536 
9537 #if 1
9538 	u_int reduce_mss;
9539 
9540 	reduce_mss = is_tx ? if_bridge_tso_reduce_mss_tx
9541 	    : if_bridge_tso_reduce_mss_forwarding;
9542 	mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen - reduce_mss;
9543 	assert(mss > 0);
9544 #else
9545 	if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9546 		mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9547 	} else {
9548 		mss = m0->m_pkthdr.tso_segsz;
9549 	}
9550 #endif
9551 
9552 	*mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9553 	if (m0 == NULL) {
9554 		return ENOBUFS; /* XXX ok? */
9555 	}
9556 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9557 	    "%s %s mss %d nsegs %d",
9558 	    ifp->if_xname,
9559 	    is_tx ? "TX" : "RX",
9560 	    mss, nsegs);
9561 	/*
9562 	 * XXX-ste: can this happen?
9563 	 */
9564 	if (m0->m_nextpkt == NULL) {
9565 #ifdef GSO_DEBUG
9566 		D("only 1 segment");
9567 #endif
9568 		if (is_tx) {
9569 			error = bridge_transmit(ifp, m0);
9570 		}
9571 		return error;
9572 	}
9573 #ifdef GSO_STATS
9574 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9575 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9576 	GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9577 #endif /* GSO_STATS */
9578 
9579 	/* first pkt */
9580 	m = m0;
9581 
9582 	state->update(state, m);
9583 
9584 	do {
9585 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9586 
9587 		state->internal(state, m);
9588 		m_tx = m;
9589 		m = m->m_nextpkt;
9590 		if (is_tx) {
9591 			m_tx->m_nextpkt = NULL;
9592 			if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9593 				/*
9594 				 * XXX: If a segment can not be sent, discard the following
9595 				 * segments and propagate the error to the upper levels.
9596 				 * In this way the TCP retransmits all the initial packet.
9597 				 */
9598 #ifdef GSO_DEBUG
9599 				D("if_transmit error\n");
9600 #endif
9601 				goto err;
9602 			}
9603 		}
9604 		state->update(state, m);
9605 
9606 		state->tcp->th_flags &= ~TH_CWR;
9607 		state->tcp->th_seq = htonl(state->tcp_seq);
9608 	} while (m->m_nextpkt);
9609 
9610 	/* last pkt */
9611 	state->internal(state, m);
9612 
9613 	if (is_tx) {
9614 		error = bridge_transmit(ifp, m);
9615 #ifdef GSO_DEBUG
9616 		if (error) {
9617 			D("last if_transmit error\n");
9618 			D("error - type = %d \n", error);
9619 		}
9620 #endif
9621 	}
9622 #ifdef GSO_STATS
9623 	if (!error) {
9624 		GSOSTAT_INC(tcp.gsos_segmented);
9625 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9626 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9627 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9628 	}
9629 #endif /* GSO_STATS */
9630 	return error;
9631 
9632 err:
9633 #ifdef GSO_DEBUG
9634 	D("error - type = %d \n", error);
9635 #endif
9636 	while (m != NULL) {
9637 		m_tx = m->m_nextpkt;
9638 		m->m_nextpkt = NULL;
9639 		m_freem(m);
9640 		m = m_tx;
9641 	}
9642 	return error;
9643 }
9644 
9645 /*
9646  * GSO for TCP/IPv[46]
9647  */
9648 static int
9649 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9650     boolean_t is_tx)
9651 {
9652 	int error;
9653 	ip_packet_info  info;
9654 	uint32_t csum_flags;
9655 	struct gso_ip_tcp_state state;
9656 	struct bripstats stats; /* XXX ignored */
9657 	struct tcphdr *tcp;
9658 
9659 	if (!is_tx && ipforwarding == 0) {
9660 		/* no need to segment if the packet will not be forwarded */
9661 		return 0;
9662 	}
9663 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9664 	if (error != 0) {
9665 		if (*mp != NULL) {
9666 			m_freem(*mp);
9667 			*mp = NULL;
9668 		}
9669 		return error;
9670 	}
9671 	if (info.ip_proto_hdr == NULL) {
9672 		/* not a TCP packet */
9673 		return 0;
9674 	}
9675 	tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9676 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9677 	    info.ip_hlen, info.ip_hdr.ptr, tcp);
9678 	if (is_ipv4) {
9679 		csum_flags = CSUM_DELAY_DATA; /* XXX */
9680 		if (!is_tx) {
9681 			/* if RX to our local IP address, don't segment */
9682 			struct in_addr  dst_ip;
9683 
9684 			bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9685 			if (in_addr_is_ours(dst_ip)) {
9686 				return 0;
9687 			}
9688 		}
9689 	} else {
9690 		csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9691 		if (!is_tx) {
9692 			/* if RX to our local IP address, don't segment */
9693 			if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9694 			    ifp->if_index)) {
9695 				/* local IP address, no need to segment */
9696 				return 0;
9697 			}
9698 		}
9699 	}
9700 	(*mp)->m_pkthdr.csum_flags = csum_flags;
9701 	(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9702 	return gso_ip_tcp(ifp, mp, &state, is_tx);
9703 }
9704