xref: /xnu-8020.121.3/bsd/net/if_bridge.c (revision fdd8201d7b966f0c3ea610489d29bd841d358941)
1 /*
2  * Copyright (c) 2004-2021 Apple Inc. All rights reserved.
3  *
4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. The rights granted to you under the License
10  * may not be used to create, or enable the creation or redistribution of,
11  * unlawful or unlicensed copies of an Apple operating system, or to
12  * circumvent, violate, or enable the circumvention or violation of, any
13  * terms of an Apple operating system software license agreement.
14  *
15  * Please obtain a copy of the License at
16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
17  *
18  * The Original Code and all software distributed under the License are
19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23  * Please see the License for the specific language governing rights and
24  * limitations under the License.
25  *
26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27  */
28 
29 /*	$NetBSD: if_bridge.c,v 1.31 2005/06/01 19:45:34 jdc Exp $	*/
30 /*
31  * Copyright 2001 Wasabi Systems, Inc.
32  * All rights reserved.
33  *
34  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed for the NetBSD Project by
47  *	Wasabi Systems, Inc.
48  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
49  *    or promote products derived from this software without specific prior
50  *    written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
54  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
55  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
56  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
57  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
58  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
59  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
60  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
61  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
62  * POSSIBILITY OF SUCH DAMAGE.
63  */
64 
65 /*
66  * Copyright (c) 1999, 2000 Jason L. Wright ([email protected])
67  * All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  *
78  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
79  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
80  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
81  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
82  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
83  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
84  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
86  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
87  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
88  * POSSIBILITY OF SUCH DAMAGE.
89  *
90  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
91  */
92 
93 /*
94  * Network interface bridge support.
95  *
96  * TODO:
97  *
98  *	- Currently only supports Ethernet-like interfaces (Ethernet,
99  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
100  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
101  *	  consider heterogenous bridges).
102  *
103  *	- GIF isn't handled due to the lack of IPPROTO_ETHERIP support.
104  */
105 
106 #include <sys/cdefs.h>
107 
108 #include <sys/param.h>
109 #include <sys/mbuf.h>
110 #include <sys/malloc.h>
111 #include <sys/protosw.h>
112 #include <sys/systm.h>
113 #include <sys/time.h>
114 #include <sys/socket.h> /* for net/if.h */
115 #include <sys/sockio.h>
116 #include <sys/kernel.h>
117 #include <sys/random.h>
118 #include <sys/syslog.h>
119 #include <sys/sysctl.h>
120 #include <sys/proc.h>
121 #include <sys/lock.h>
122 #include <sys/mcache.h>
123 
124 #include <sys/kauth.h>
125 
126 #include <kern/thread_call.h>
127 
128 #include <libkern/libkern.h>
129 
130 #include <kern/zalloc.h>
131 
132 #if NBPFILTER > 0
133 #include <net/bpf.h>
134 #endif
135 #include <net/if.h>
136 #include <net/if_dl.h>
137 #include <net/if_types.h>
138 #include <net/if_var.h>
139 #include <net/if_media.h>
140 #include <net/net_api_stats.h>
141 #include <net/pfvar.h>
142 
143 #include <netinet/in.h> /* for struct arpcom */
144 #include <netinet/tcp.h> /* for struct tcphdr */
145 #include <netinet/in_systm.h>
146 #include <netinet/in_var.h>
147 #define _IP_VHL
148 #include <netinet/ip.h>
149 #include <netinet/ip_var.h>
150 #include <netinet/ip6.h>
151 #include <netinet6/ip6_var.h>
152 #ifdef DEV_CARP
153 #include <netinet/ip_carp.h>
154 #endif
155 #include <netinet/if_ether.h> /* for struct arpcom */
156 #include <net/bridgestp.h>
157 #include <net/if_bridgevar.h>
158 #include <net/if_llc.h>
159 #if NVLAN > 0
160 #include <net/if_vlan_var.h>
161 #endif /* NVLAN > 0 */
162 
163 #include <net/if_ether.h>
164 #include <net/dlil.h>
165 #include <net/kpi_interfacefilter.h>
166 
167 #include <net/route.h>
168 #include <dev/random/randomdev.h>
169 
170 #include <netinet/bootp.h>
171 #include <netinet/dhcp.h>
172 
173 #if SKYWALK
174 #include <skywalk/nexus/netif/nx_netif.h>
175 #endif /* SKYWALK */
176 
177 #include <os/log.h>
178 
179 /*
180  * if_bridge_debug, BR_DBGF_*
181  * - 'if_bridge_debug' is a bitmask of BR_DBGF_* flags that can be set
182  *   to enable additional logs for the corresponding bridge function
183  * - "sysctl net.link.bridge.debug" controls the value of
184  *   'if_bridge_debug'
185  */
186 static uint32_t if_bridge_debug = 0;
187 #define BR_DBGF_LIFECYCLE       0x0001
188 #define BR_DBGF_INPUT           0x0002
189 #define BR_DBGF_OUTPUT          0x0004
190 #define BR_DBGF_RT_TABLE        0x0008
191 #define BR_DBGF_DELAYED_CALL    0x0010
192 #define BR_DBGF_IOCTL           0x0020
193 #define BR_DBGF_MBUF            0x0040
194 #define BR_DBGF_MCAST           0x0080
195 #define BR_DBGF_HOSTFILTER      0x0100
196 #define BR_DBGF_CHECKSUM        0x0200
197 #define BR_DBGF_MAC_NAT         0x0400
198 
199 /*
200  * if_bridge_log_level
201  * - 'if_bridge_log_level' ensures that by default important logs are
202  *   logged regardless of if_bridge_debug by comparing the log level
203  *   in BRIDGE_LOG to if_bridge_log_level
204  * - use "sysctl net.link.bridge.log_level" controls the value of
205  *   'if_bridge_log_level'
206  * - the default value of 'if_bridge_log_level' is LOG_NOTICE; important
207  *   logs must use LOG_NOTICE to ensure they appear by default
208  */
209 static int if_bridge_log_level = LOG_NOTICE;
210 
211 #define BRIDGE_DBGF_ENABLED(__flag)     ((if_bridge_debug & __flag) != 0)
212 
213 /*
214  * BRIDGE_LOG, BRIDGE_LOG_SIMPLE
215  * - macros to generate the specified log conditionally based on
216  *   the specified log level and debug flags
217  * - BRIDGE_LOG_SIMPLE does not include the function name in the log
218  */
219 #define BRIDGE_LOG(__level, __dbgf, __string, ...)              \
220 	do {                                                            \
221 	        if (__level <= if_bridge_log_level ||                   \
222 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
223 	                os_log(OS_LOG_DEFAULT, "%s: " __string, \
224 	                       __func__, ## __VA_ARGS__);       \
225 	        }                                                       \
226 	} while (0)
227 #define BRIDGE_LOG_SIMPLE(__level, __dbgf, __string, ...)               \
228 	do {                                                    \
229 	        if (__level <= if_bridge_log_level ||           \
230 	            BRIDGE_DBGF_ENABLED(__dbgf)) {                      \
231 	                os_log(OS_LOG_DEFAULT, __string, ## __VA_ARGS__); \
232 	        }                                                               \
233 	} while (0)
234 
235 #define _BRIDGE_LOCK(_sc)               lck_mtx_lock(&(_sc)->sc_mtx)
236 #define _BRIDGE_UNLOCK(_sc)             lck_mtx_unlock(&(_sc)->sc_mtx)
237 #define BRIDGE_LOCK_ASSERT_HELD(_sc)            \
238 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED)
239 #define BRIDGE_LOCK_ASSERT_NOTHELD(_sc)         \
240 	LCK_MTX_ASSERT(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED)
241 
242 #define BRIDGE_LOCK_DEBUG      1
243 #if BRIDGE_LOCK_DEBUG
244 
245 #define BR_LCKDBG_MAX                   4
246 
247 #define BRIDGE_LOCK(_sc)                bridge_lock(_sc)
248 #define BRIDGE_UNLOCK(_sc)              bridge_unlock(_sc)
249 #define BRIDGE_LOCK2REF(_sc, _err)      _err = bridge_lock2ref(_sc)
250 #define BRIDGE_UNREF(_sc)               bridge_unref(_sc)
251 #define BRIDGE_XLOCK(_sc)               bridge_xlock(_sc)
252 #define BRIDGE_XDROP(_sc)               bridge_xdrop(_sc)
253 
254 #else /* !BRIDGE_LOCK_DEBUG */
255 
256 #define BRIDGE_LOCK(_sc)                _BRIDGE_LOCK(_sc)
257 #define BRIDGE_UNLOCK(_sc)              _BRIDGE_UNLOCK(_sc)
258 #define BRIDGE_LOCK2REF(_sc, _err)      do {                            \
259 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
260 	if ((_sc)->sc_iflist_xcnt > 0)                                  \
261 	        (_err) = EBUSY;                                         \
262 	else {                                                          \
263 	        (_sc)->sc_iflist_ref++;                                 \
264 	        (_err) = 0;                                             \
265 	}                                                               \
266 	_BRIDGE_UNLOCK(_sc);                                            \
267 } while (0)
268 #define BRIDGE_UNREF(_sc)               do {                            \
269 	_BRIDGE_LOCK(_sc);                                              \
270 	(_sc)->sc_iflist_ref--;                                         \
271 	if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0))	{ \
272 	        _BRIDGE_UNLOCK(_sc);                                    \
273 	        wakeup(&(_sc)->sc_cv);                                  \
274 	} else                                                          \
275 	        _BRIDGE_UNLOCK(_sc);                                    \
276 } while (0)
277 #define BRIDGE_XLOCK(_sc)               do {                            \
278 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
279 	(_sc)->sc_iflist_xcnt++;                                        \
280 	while ((_sc)->sc_iflist_ref > 0)                                \
281 	        msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO,            \
282 	            "BRIDGE_XLOCK", NULL);                              \
283 } while (0)
284 #define BRIDGE_XDROP(_sc)               do {                            \
285 	BRIDGE_LOCK_ASSERT_HELD(_sc);                                   \
286 	(_sc)->sc_iflist_xcnt--;                                        \
287 } while (0)
288 
289 #endif /* BRIDGE_LOCK_DEBUG */
290 
291 #if NBPFILTER > 0
292 #define BRIDGE_BPF_MTAP_INPUT(sc, m)                                    \
293 	if (sc->sc_bpf_input != NULL)                                   \
294 	        bridge_bpf_input(sc->sc_ifp, m, __func__, __LINE__)
295 #else /* NBPFILTER */
296 #define BRIDGE_BPF_MTAP_INPUT(ifp, m)
297 #endif /* NBPFILTER */
298 
299 /*
300  * Initial size of the route hash table.  Must be a power of two.
301  */
302 #ifndef BRIDGE_RTHASH_SIZE
303 #define BRIDGE_RTHASH_SIZE              16
304 #endif
305 
306 /*
307  * Maximum size of the routing hash table
308  */
309 #define BRIDGE_RTHASH_SIZE_MAX          2048
310 
311 #define BRIDGE_RTHASH_MASK(sc)          ((sc)->sc_rthash_size - 1)
312 
313 /*
314  * Maximum number of addresses to cache.
315  */
316 #ifndef BRIDGE_RTABLE_MAX
317 #define BRIDGE_RTABLE_MAX               100
318 #endif
319 
320 
321 /*
322  * Timeout (in seconds) for entries learned dynamically.
323  */
324 #ifndef BRIDGE_RTABLE_TIMEOUT
325 #define BRIDGE_RTABLE_TIMEOUT           (20 * 60)       /* same as ARP */
326 #endif
327 
328 /*
329  * Number of seconds between walks of the route list.
330  */
331 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
332 #define BRIDGE_RTABLE_PRUNE_PERIOD      (5 * 60)
333 #endif
334 
335 /*
336  * Number of MAC NAT entries
337  * - sized based on 16 clients (including MAC NAT interface)
338  *   each with 4 addresses
339  */
340 #ifndef BRIDGE_MAC_NAT_ENTRY_MAX
341 #define BRIDGE_MAC_NAT_ENTRY_MAX        64
342 #endif /* BRIDGE_MAC_NAT_ENTRY_MAX */
343 
344 /*
345  * List of capabilities to possibly mask on the member interface.
346  */
347 #define BRIDGE_IFCAPS_MASK              (IFCAP_TSO | IFCAP_TXCSUM)
348 /*
349  * List of capabilities to disable on the member interface.
350  */
351 #define BRIDGE_IFCAPS_STRIP             IFCAP_LRO
352 
353 /*
354  * Bridge interface list entry.
355  */
356 struct bridge_iflist {
357 	TAILQ_ENTRY(bridge_iflist) bif_next;
358 	struct ifnet            *bif_ifp;       /* member if */
359 	struct bstp_port        bif_stp;        /* STP state */
360 	uint32_t                bif_ifflags;    /* member if flags */
361 	int                     bif_savedcaps;  /* saved capabilities */
362 	uint32_t                bif_addrmax;    /* max # of addresses */
363 	uint32_t                bif_addrcnt;    /* cur. # of addresses */
364 	uint32_t                bif_addrexceeded; /* # of address violations */
365 
366 	interface_filter_t      bif_iff_ref;
367 	struct bridge_softc     *bif_sc;
368 	uint32_t                bif_flags;
369 
370 	/* host filter */
371 	struct in_addr          bif_hf_ipsrc;
372 	uint8_t                 bif_hf_hwsrc[ETHER_ADDR_LEN];
373 
374 	struct ifbrmstats       bif_stats;
375 };
376 
377 static inline bool
bif_ifflags_are_set(struct bridge_iflist * bif,uint32_t flags)378 bif_ifflags_are_set(struct bridge_iflist * bif, uint32_t flags)
379 {
380 	return (bif->bif_ifflags & flags) == flags;
381 }
382 
383 static inline bool
bif_has_checksum_offload(struct bridge_iflist * bif)384 bif_has_checksum_offload(struct bridge_iflist * bif)
385 {
386 	return bif_ifflags_are_set(bif, IFBIF_CHECKSUM_OFFLOAD);
387 }
388 
389 /* fake errors to make the code clearer */
390 #define _EBADIP                 EJUSTRETURN
391 #define _EBADIPCHECKSUM         EJUSTRETURN
392 #define _EBADIPV6               EJUSTRETURN
393 #define _EBADUDP                EJUSTRETURN
394 #define _EBADTCP                EJUSTRETURN
395 #define _EBADUDPCHECKSUM        EJUSTRETURN
396 #define _EBADTCPCHECKSUM        EJUSTRETURN
397 
398 #define BIFF_PROMISC            0x01    /* promiscuous mode set */
399 #define BIFF_PROTO_ATTACHED     0x02    /* protocol attached */
400 #define BIFF_FILTER_ATTACHED    0x04    /* interface filter attached */
401 #define BIFF_MEDIA_ACTIVE       0x08    /* interface media active */
402 #define BIFF_HOST_FILTER        0x10    /* host filter enabled */
403 #define BIFF_HF_HWSRC           0x20    /* host filter source MAC is set */
404 #define BIFF_HF_IPSRC           0x40    /* host filter source IP is set */
405 #define BIFF_INPUT_BROADCAST    0x80    /* send broadcast packets in */
406 #define BIFF_IN_MEMBER_LIST     0x100   /* added to the member list */
407 #if SKYWALK
408 #define BIFF_FLOWSWITCH_ATTACHED 0x1000   /* we attached the flowswitch */
409 #define BIFF_NETAGENT_REMOVED    0x2000   /* we removed the netagent */
410 #endif /* SKYWALK */
411 
412 /*
413  * mac_nat_entry
414  * - translates between an IP address and MAC address on a specific
415  *   bridge interface member
416  */
417 struct mac_nat_entry {
418 	LIST_ENTRY(mac_nat_entry) mne_list;     /* list linkage */
419 	struct bridge_iflist    *mne_bif;       /* originating interface */
420 	unsigned long           mne_expire;     /* expiration time */
421 	union {
422 		struct in_addr  mneu_ip;        /* originating IPv4 address */
423 		struct in6_addr mneu_ip6;       /* originating IPv6 address */
424 	} mne_u;
425 	uint8_t                 mne_mac[ETHER_ADDR_LEN];
426 	uint8_t                 mne_flags;
427 	uint8_t                 mne_reserved;
428 };
429 #define mne_ip  mne_u.mneu_ip
430 #define mne_ip6 mne_u.mneu_ip6
431 
432 #define MNE_FLAGS_IPV6          0x01    /* IPv6 address */
433 
434 LIST_HEAD(mac_nat_entry_list, mac_nat_entry);
435 
436 /*
437  * mac_nat_record
438  * - used by bridge_mac_nat_output() to convey the translation that needs
439  *   to take place in bridge_mac_nat_translate
440  * - holds enough information so that the translation can be done later without
441  *   holding the bridge lock
442  */
443 struct mac_nat_record {
444 	uint16_t                mnr_ether_type;
445 	union {
446 		uint16_t        mnru_arp_offset;
447 		struct {
448 			uint16_t mnruip_dhcp_flags;
449 			uint16_t mnruip_udp_csum;
450 			uint8_t  mnruip_header_len;
451 		} mnru_ip;
452 		struct {
453 			uint16_t mnruip6_icmp6_len;
454 			uint16_t mnruip6_lladdr_offset;
455 			uint8_t mnruip6_icmp6_type;
456 			uint8_t mnruip6_header_len;
457 		} mnru_ip6;
458 	} mnr_u;
459 };
460 
461 #define mnr_arp_offset  mnr_u.mnru_arp_offset
462 
463 #define mnr_ip_header_len       mnr_u.mnru_ip.mnruip_header_len
464 #define mnr_ip_dhcp_flags       mnr_u.mnru_ip.mnruip_dhcp_flags
465 #define mnr_ip_udp_csum         mnr_u.mnru_ip.mnruip_udp_csum
466 
467 #define mnr_ip6_icmp6_len       mnr_u.mnru_ip6.mnruip6_icmp6_len
468 #define mnr_ip6_icmp6_type      mnr_u.mnru_ip6.mnruip6_icmp6_type
469 #define mnr_ip6_header_len      mnr_u.mnru_ip6.mnruip6_header_len
470 #define mnr_ip6_lladdr_offset   mnr_u.mnru_ip6.mnruip6_lladdr_offset
471 
472 /*
473  * Bridge route node.
474  */
475 struct bridge_rtnode {
476 	LIST_ENTRY(bridge_rtnode) brt_hash;     /* hash table linkage */
477 	LIST_ENTRY(bridge_rtnode) brt_list;     /* list linkage */
478 	struct bridge_iflist    *brt_dst;       /* destination if */
479 	unsigned long           brt_expire;     /* expiration time */
480 	uint8_t                 brt_flags;      /* address flags */
481 	uint8_t                 brt_addr[ETHER_ADDR_LEN];
482 	uint16_t                brt_vlan;       /* vlan id */
483 
484 };
485 #define brt_ifp                 brt_dst->bif_ifp
486 
487 /*
488  * Bridge delayed function call context
489  */
490 typedef void (*bridge_delayed_func_t)(struct bridge_softc *);
491 
492 struct bridge_delayed_call {
493 	struct bridge_softc     *bdc_sc;
494 	bridge_delayed_func_t   bdc_func; /* Function to call */
495 	struct timespec         bdc_ts; /* Time to call */
496 	u_int32_t               bdc_flags;
497 	thread_call_t           bdc_thread_call;
498 };
499 
500 #define BDCF_OUTSTANDING        0x01    /* Delayed call has been scheduled */
501 #define BDCF_CANCELLING         0x02    /* May be waiting for call completion */
502 
503 /*
504  * Software state for each bridge.
505  */
506 LIST_HEAD(_bridge_rtnode_list, bridge_rtnode);
507 
508 struct bridge_softc {
509 	struct ifnet            *sc_ifp;        /* make this an interface */
510 	u_int32_t               sc_flags;
511 	LIST_ENTRY(bridge_softc) sc_list;
512 	decl_lck_mtx_data(, sc_mtx);
513 	struct _bridge_rtnode_list *sc_rthash;  /* our forwarding table */
514 	struct _bridge_rtnode_list sc_rtlist;   /* list version of above */
515 	uint32_t                sc_rthash_key;  /* key for hash */
516 	uint32_t                sc_rthash_size; /* size of the hash table */
517 	struct bridge_delayed_call sc_aging_timer;
518 	struct bridge_delayed_call sc_resize_call;
519 	TAILQ_HEAD(, bridge_iflist) sc_spanlist;        /* span ports list */
520 	struct bstp_state       sc_stp;         /* STP state */
521 	bpf_packet_func         sc_bpf_input;
522 	bpf_packet_func         sc_bpf_output;
523 	void                    *sc_cv;
524 	uint32_t                sc_brtmax;      /* max # of addresses */
525 	uint32_t                sc_brtcnt;      /* cur. # of addresses */
526 	uint32_t                sc_brttimeout;  /* rt timeout in seconds */
527 	uint32_t                sc_iflist_ref;  /* refcount for sc_iflist */
528 	uint32_t                sc_iflist_xcnt; /* refcount for sc_iflist */
529 	TAILQ_HEAD(, bridge_iflist) sc_iflist;  /* member interface list */
530 	uint32_t                sc_brtexceeded; /* # of cache drops */
531 	uint32_t                sc_filter_flags; /* ipf and flags */
532 	struct ifnet            *sc_ifaddr;     /* member mac copied from */
533 	u_char                  sc_defaddr[6];  /* Default MAC address */
534 	char                    sc_if_xname[IFNAMSIZ];
535 
536 	struct bridge_iflist    *sc_mac_nat_bif; /* single MAC NAT interface */
537 	struct mac_nat_entry_list sc_mne_list;  /* MAC NAT IPv4 */
538 	struct mac_nat_entry_list sc_mne_list_v6;/* MAC NAT IPv6 */
539 	uint32_t                sc_mne_max;      /* max # of entries */
540 	uint32_t                sc_mne_count;    /* cur. # of entries */
541 	uint32_t                sc_mne_allocation_failures;
542 #if BRIDGE_LOCK_DEBUG
543 	/*
544 	 * Locking and unlocking calling history
545 	 */
546 	void                    *lock_lr[BR_LCKDBG_MAX];
547 	int                     next_lock_lr;
548 	void                    *unlock_lr[BR_LCKDBG_MAX];
549 	int                     next_unlock_lr;
550 #endif /* BRIDGE_LOCK_DEBUG */
551 };
552 
553 #define SCF_DETACHING            0x01
554 #define SCF_RESIZING             0x02
555 #define SCF_MEDIA_ACTIVE         0x04
556 
557 typedef enum {
558 	CHECKSUM_OPERATION_NONE = 0,
559 	CHECKSUM_OPERATION_CLEAR_OFFLOAD = 1,
560 	CHECKSUM_OPERATION_FINALIZE = 2,
561 	CHECKSUM_OPERATION_COMPUTE = 3,
562 } ChecksumOperation;
563 
564 union iphdr {
565 	struct ip *ip;
566 	struct ip6_hdr *ip6;
567 	void * ptr;
568 };
569 
570 typedef struct {
571 	u_int           ip_hlen;        /* IP header length */
572 	u_int           ip_pay_len;     /* length of payload (exclusive of ip_hlen) */
573 	u_int           ip_opt_len;     /* IPv6 options headers length */
574 	uint8_t         ip_proto;       /* IPPROTO_TCP, IPPROTO_UDP, etc. */
575 	bool            ip_is_ipv4;
576 	bool            ip_is_fragmented;
577 	union iphdr     ip_hdr;         /* pointer to IP header */
578 	void *          ip_proto_hdr;   /* ptr to protocol header (TCP) */
579 } ip_packet_info, *ip_packet_info_t;
580 
581 struct bridge_hostfilter_stats bridge_hostfilter_stats;
582 
583 static LCK_GRP_DECLARE(bridge_lock_grp, "if_bridge");
584 #if BRIDGE_LOCK_DEBUG
585 static LCK_ATTR_DECLARE(bridge_lock_attr, 0, 0);
586 #else
587 static LCK_ATTR_DECLARE(bridge_lock_attr, LCK_ATTR_DEBUG, 0);
588 #endif
589 static LCK_MTX_DECLARE_ATTR(bridge_list_mtx, &bridge_lock_grp, &bridge_lock_attr);
590 
591 static int      bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
592 
593 static ZONE_DEFINE(bridge_rtnode_pool, "bridge_rtnode",
594     sizeof(struct bridge_rtnode), ZC_NONE);
595 static ZONE_DEFINE(bridge_mne_pool, "bridge_mac_nat_entry",
596     sizeof(struct mac_nat_entry), ZC_NONE);
597 
598 static int      bridge_clone_create(struct if_clone *, uint32_t, void *);
599 static int      bridge_clone_destroy(struct ifnet *);
600 
601 static errno_t  bridge_ioctl(struct ifnet *, u_long, void *);
602 #if HAS_IF_CAP
603 static void     bridge_mutecaps(struct bridge_softc *);
604 static void     bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *,
605     int);
606 #endif
607 static errno_t bridge_set_tso(struct bridge_softc *);
608 static void     bridge_proto_attach_changed(struct ifnet *);
609 static int      bridge_init(struct ifnet *);
610 #if HAS_BRIDGE_DUMMYNET
611 static void     bridge_dummynet(struct mbuf *, struct ifnet *);
612 #endif
613 static void     bridge_ifstop(struct ifnet *, int);
614 static int      bridge_output(struct ifnet *, struct mbuf *);
615 static void     bridge_finalize_cksum(struct ifnet *, struct mbuf *);
616 static void     bridge_start(struct ifnet *);
617 static errno_t  bridge_input(struct ifnet *, mbuf_t *);
618 static errno_t  bridge_iff_input(void *, ifnet_t, protocol_family_t,
619     mbuf_t *, char **);
620 static errno_t  bridge_iff_output(void *, ifnet_t, protocol_family_t,
621     mbuf_t *);
622 static errno_t  bridge_member_output(struct bridge_softc *sc, ifnet_t ifp,
623     mbuf_t *m);
624 
625 static int      bridge_enqueue(ifnet_t, struct ifnet *,
626     struct ifnet *, struct mbuf *, ChecksumOperation);
627 static void     bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int);
628 
629 static void     bridge_forward(struct bridge_softc *, struct bridge_iflist *,
630     struct mbuf *);
631 
632 static void     bridge_aging_timer(struct bridge_softc *sc);
633 
634 static void     bridge_broadcast(struct bridge_softc *, struct bridge_iflist *,
635     struct mbuf *, int);
636 static void     bridge_span(struct bridge_softc *, struct mbuf *);
637 
638 static int      bridge_rtupdate(struct bridge_softc *, const uint8_t *,
639     uint16_t, struct bridge_iflist *, int, uint8_t);
640 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *,
641     uint16_t);
642 static void     bridge_rttrim(struct bridge_softc *);
643 static void     bridge_rtage(struct bridge_softc *);
644 static void     bridge_rtflush(struct bridge_softc *, int);
645 static int      bridge_rtdaddr(struct bridge_softc *, const uint8_t *,
646     uint16_t);
647 
648 static int      bridge_rtable_init(struct bridge_softc *);
649 static void     bridge_rtable_fini(struct bridge_softc *);
650 
651 static void     bridge_rthash_resize(struct bridge_softc *);
652 
653 static int      bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *);
654 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
655     const uint8_t *, uint16_t);
656 static int      bridge_rtnode_hash(struct bridge_softc *,
657     struct bridge_rtnode *);
658 static int      bridge_rtnode_insert(struct bridge_softc *,
659     struct bridge_rtnode *);
660 static void     bridge_rtnode_destroy(struct bridge_softc *,
661     struct bridge_rtnode *);
662 #if BRIDGESTP
663 static void     bridge_rtable_expire(struct ifnet *, int);
664 static void     bridge_state_change(struct ifnet *, int);
665 #endif /* BRIDGESTP */
666 
667 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
668     const char *name);
669 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
670     struct ifnet *ifp);
671 static void     bridge_delete_member(struct bridge_softc *,
672     struct bridge_iflist *);
673 static void     bridge_delete_span(struct bridge_softc *,
674     struct bridge_iflist *);
675 
676 static int      bridge_ioctl_add(struct bridge_softc *, void *);
677 static int      bridge_ioctl_del(struct bridge_softc *, void *);
678 static int      bridge_ioctl_gifflags(struct bridge_softc *, void *);
679 static int      bridge_ioctl_sifflags(struct bridge_softc *, void *);
680 static int      bridge_ioctl_scache(struct bridge_softc *, void *);
681 static int      bridge_ioctl_gcache(struct bridge_softc *, void *);
682 static int      bridge_ioctl_gifs32(struct bridge_softc *, void *);
683 static int      bridge_ioctl_gifs64(struct bridge_softc *, void *);
684 static int      bridge_ioctl_rts32(struct bridge_softc *, void *);
685 static int      bridge_ioctl_rts64(struct bridge_softc *, void *);
686 static int      bridge_ioctl_saddr32(struct bridge_softc *, void *);
687 static int      bridge_ioctl_saddr64(struct bridge_softc *, void *);
688 static int      bridge_ioctl_sto(struct bridge_softc *, void *);
689 static int      bridge_ioctl_gto(struct bridge_softc *, void *);
690 static int      bridge_ioctl_daddr32(struct bridge_softc *, void *);
691 static int      bridge_ioctl_daddr64(struct bridge_softc *, void *);
692 static int      bridge_ioctl_flush(struct bridge_softc *, void *);
693 static int      bridge_ioctl_gpri(struct bridge_softc *, void *);
694 static int      bridge_ioctl_spri(struct bridge_softc *, void *);
695 static int      bridge_ioctl_ght(struct bridge_softc *, void *);
696 static int      bridge_ioctl_sht(struct bridge_softc *, void *);
697 static int      bridge_ioctl_gfd(struct bridge_softc *, void *);
698 static int      bridge_ioctl_sfd(struct bridge_softc *, void *);
699 static int      bridge_ioctl_gma(struct bridge_softc *, void *);
700 static int      bridge_ioctl_sma(struct bridge_softc *, void *);
701 static int      bridge_ioctl_sifprio(struct bridge_softc *, void *);
702 static int      bridge_ioctl_sifcost(struct bridge_softc *, void *);
703 static int      bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
704 static int      bridge_ioctl_addspan(struct bridge_softc *, void *);
705 static int      bridge_ioctl_delspan(struct bridge_softc *, void *);
706 static int      bridge_ioctl_gbparam32(struct bridge_softc *, void *);
707 static int      bridge_ioctl_gbparam64(struct bridge_softc *, void *);
708 static int      bridge_ioctl_grte(struct bridge_softc *, void *);
709 static int      bridge_ioctl_gifsstp32(struct bridge_softc *, void *);
710 static int      bridge_ioctl_gifsstp64(struct bridge_softc *, void *);
711 static int      bridge_ioctl_sproto(struct bridge_softc *, void *);
712 static int      bridge_ioctl_stxhc(struct bridge_softc *, void *);
713 static int      bridge_ioctl_purge(struct bridge_softc *sc, void *);
714 static int      bridge_ioctl_gfilt(struct bridge_softc *, void *);
715 static int      bridge_ioctl_sfilt(struct bridge_softc *, void *);
716 static int      bridge_ioctl_ghostfilter(struct bridge_softc *, void *);
717 static int      bridge_ioctl_shostfilter(struct bridge_softc *, void *);
718 static int      bridge_ioctl_gmnelist32(struct bridge_softc *, void *);
719 static int      bridge_ioctl_gmnelist64(struct bridge_softc *, void *);
720 static int      bridge_ioctl_gifstats32(struct bridge_softc *, void *);
721 static int      bridge_ioctl_gifstats64(struct bridge_softc *, void *);
722 
723 static int bridge_pf(struct mbuf **, struct ifnet *, uint32_t sc_filter_flags, int input);
724 static int bridge_ip_checkbasic(struct mbuf **);
725 static int bridge_ip6_checkbasic(struct mbuf **);
726 
727 static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func);
728 static errno_t bridge_bpf_input(ifnet_t, struct mbuf *, const char *, int);
729 static errno_t bridge_bpf_output(ifnet_t, struct mbuf *);
730 
731 static void bridge_detach(ifnet_t);
732 static void bridge_link_event(struct ifnet *, u_int32_t);
733 static void bridge_iflinkevent(struct ifnet *);
734 static u_int32_t bridge_updatelinkstatus(struct bridge_softc *);
735 static int interface_media_active(struct ifnet *);
736 static void bridge_schedule_delayed_call(struct bridge_delayed_call *);
737 static void bridge_cancel_delayed_call(struct bridge_delayed_call *);
738 static void bridge_cleanup_delayed_call(struct bridge_delayed_call *);
739 static int bridge_host_filter(struct bridge_iflist *, mbuf_t *);
740 
741 static errno_t bridge_mac_nat_enable(struct bridge_softc *,
742     struct bridge_iflist *);
743 static void bridge_mac_nat_disable(struct bridge_softc *sc);
744 static void bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long);
745 static void bridge_mac_nat_populate_entries(struct bridge_softc *sc);
746 static void bridge_mac_nat_flush_entries(struct bridge_softc *sc,
747     struct bridge_iflist *);
748 static ifnet_t bridge_mac_nat_input(struct bridge_softc *, mbuf_t *,
749     boolean_t *);
750 static boolean_t bridge_mac_nat_output(struct bridge_softc *,
751     struct bridge_iflist *, mbuf_t *, struct mac_nat_record *);
752 static void bridge_mac_nat_translate(mbuf_t *, struct mac_nat_record *,
753     const caddr_t);
754 static bool is_broadcast_ip_packet(mbuf_t *);
755 static bool in_addr_is_ours(const struct in_addr);
756 static bool in6_addr_is_ours(const struct in6_addr *, uint32_t);
757 
758 #define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how)
759 
760 static int
761 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
762     boolean_t is_tx);
763 
764 /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */
765 #define VLANTAGOF(_m)   0
766 
767 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] =
768 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
769 
770 static u_int8_t ethernulladdr[ETHER_ADDR_LEN] =
771 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
772 
773 #if BRIDGESTP
774 static struct bstp_cb_ops bridge_ops = {
775 	.bcb_state = bridge_state_change,
776 	.bcb_rtage = bridge_rtable_expire
777 };
778 #endif /* BRIDGESTP */
779 
780 SYSCTL_DECL(_net_link);
781 SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
782     "Bridge");
783 
784 static int bridge_inherit_mac = 0;   /* share MAC with first bridge member */
785 SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac,
786     CTLFLAG_RW | CTLFLAG_LOCKED,
787     &bridge_inherit_mac, 0,
788     "Inherit MAC address from the first bridge member");
789 
790 SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period,
791     CTLFLAG_RW | CTLFLAG_LOCKED,
792     &bridge_rtable_prune_period, 0,
793     "Interval between pruning of routing table");
794 
795 static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX;
796 SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max,
797     CTLFLAG_RW | CTLFLAG_LOCKED,
798     &bridge_rtable_hash_size_max, 0,
799     "Maximum size of the routing hash table");
800 
801 #if BRIDGE_DELAYED_CALLBACK_DEBUG
802 static int bridge_delayed_callback_delay = 0;
803 SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay,
804     CTLFLAG_RW | CTLFLAG_LOCKED,
805     &bridge_delayed_callback_delay, 0,
806     "Delay before calling delayed function");
807 #endif
808 
809 SYSCTL_STRUCT(_net_link_bridge, OID_AUTO,
810     hostfilterstats, CTLFLAG_RD | CTLFLAG_LOCKED,
811     &bridge_hostfilter_stats, bridge_hostfilter_stats, "");
812 
813 #if BRIDGESTP
814 static int log_stp   = 0;   /* log STP state changes */
815 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW,
816     &log_stp, 0, "Log STP state changes");
817 #endif /* BRIDGESTP */
818 
819 struct bridge_control {
820 	int             (*bc_func)(struct bridge_softc *, void *);
821 	unsigned int    bc_argsize;
822 	unsigned int    bc_flags;
823 };
824 
825 #define VMNET_TAG               "com.apple.vmnet"
826 #define VMNET_LOCAL_TAG         VMNET_TAG ".local"
827 #define VMNET_BROADCAST_TAG     VMNET_TAG ".broadcast"
828 #define VMNET_MULTICAST_TAG     VMNET_TAG ".multicast"
829 
830 static u_int16_t vmnet_tag;
831 static u_int16_t vmnet_local_tag;
832 static u_int16_t vmnet_broadcast_tag;
833 static u_int16_t vmnet_multicast_tag;
834 
835 static u_int16_t
allocate_pf_tag(char * name)836 allocate_pf_tag(char * name)
837 {
838 	u_int16_t       tag;
839 
840 	tag = pf_tagname2tag_ext(name);
841 	BRIDGE_LOG(LOG_NOTICE, 0, "%s %d", name, tag);
842 	return tag;
843 }
844 
845 static void
allocate_vmnet_pf_tags(void)846 allocate_vmnet_pf_tags(void)
847 {
848 	/* allocate tags to use with PF */
849 	if (vmnet_tag == 0) {
850 		vmnet_tag = allocate_pf_tag(VMNET_TAG);
851 	}
852 	if (vmnet_local_tag == 0) {
853 		vmnet_local_tag = allocate_pf_tag(VMNET_LOCAL_TAG);
854 	}
855 	if (vmnet_broadcast_tag == 0) {
856 		vmnet_broadcast_tag = allocate_pf_tag(VMNET_BROADCAST_TAG);
857 	}
858 	if (vmnet_multicast_tag == 0) {
859 		vmnet_multicast_tag = allocate_pf_tag(VMNET_MULTICAST_TAG);
860 	}
861 }
862 
863 #define BC_F_COPYIN             0x01    /* copy arguments in */
864 #define BC_F_COPYOUT            0x02    /* copy arguments out */
865 #define BC_F_SUSER              0x04    /* do super-user check */
866 
867 static const struct bridge_control bridge_control_table32[] = {
868 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),             /* 0 */
869 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
870 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
871 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
872 
873 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
874 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
875 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
876 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
877 
878 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
879 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
880 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
881 	  .bc_flags = BC_F_COPYOUT },
882 
883 	{ .bc_func = bridge_ioctl_gifs32, .bc_argsize = sizeof(struct ifbifconf32),
884 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
885 	{ .bc_func = bridge_ioctl_rts32, .bc_argsize = sizeof(struct ifbaconf32),
886 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
887 
888 	{ .bc_func = bridge_ioctl_saddr32, .bc_argsize = sizeof(struct ifbareq32),
889 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
890 
891 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
892 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
893 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
894 	  .bc_flags = BC_F_COPYOUT },
895 
896 	{ .bc_func = bridge_ioctl_daddr32, .bc_argsize = sizeof(struct ifbareq32),
897 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
898 
899 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
900 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
901 
902 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
903 	  .bc_flags = BC_F_COPYOUT },
904 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
905 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
906 
907 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
908 	  .bc_flags = BC_F_COPYOUT },
909 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
910 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
911 
912 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
913 	  .bc_flags = BC_F_COPYOUT },
914 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
915 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
916 
917 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
918 	  .bc_flags = BC_F_COPYOUT },
919 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
920 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
921 
922 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
923 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
924 
925 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
926 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
927 
928 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
929 	  .bc_flags = BC_F_COPYOUT },
930 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
931 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
932 
933 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
934 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
935 
936 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
937 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
938 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
939 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
940 
941 	{ .bc_func = bridge_ioctl_gbparam32, .bc_argsize = sizeof(struct ifbropreq32),
942 	  .bc_flags = BC_F_COPYOUT },
943 
944 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
945 	  .bc_flags = BC_F_COPYOUT },
946 
947 	{ .bc_func = bridge_ioctl_gifsstp32, .bc_argsize = sizeof(struct ifbpstpconf32),     /* 30 */
948 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
949 
950 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
951 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
952 
953 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
954 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
955 
956 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
957 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
958 
959 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
960 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
961 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
962 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
963 
964 	{ .bc_func = bridge_ioctl_gmnelist32,
965 	  .bc_argsize = sizeof(struct ifbrmnelist32),
966 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
967 	{ .bc_func = bridge_ioctl_gifstats32,
968 	  .bc_argsize = sizeof(struct ifbrmreq32),
969 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
970 };
971 
972 static const struct bridge_control bridge_control_table64[] = {
973 	{ .bc_func = bridge_ioctl_add, .bc_argsize = sizeof(struct ifbreq),           /* 0 */
974 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
975 	{ .bc_func = bridge_ioctl_del, .bc_argsize = sizeof(struct ifbreq),
976 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
977 
978 	{ .bc_func = bridge_ioctl_gifflags, .bc_argsize = sizeof(struct ifbreq),
979 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
980 	{ .bc_func = bridge_ioctl_sifflags, .bc_argsize = sizeof(struct ifbreq),
981 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
982 
983 	{ .bc_func = bridge_ioctl_scache, .bc_argsize = sizeof(struct ifbrparam),
984 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
985 	{ .bc_func = bridge_ioctl_gcache, .bc_argsize = sizeof(struct ifbrparam),
986 	  .bc_flags = BC_F_COPYOUT },
987 
988 	{ .bc_func = bridge_ioctl_gifs64, .bc_argsize = sizeof(struct ifbifconf64),
989 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
990 	{ .bc_func = bridge_ioctl_rts64, .bc_argsize = sizeof(struct ifbaconf64),
991 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
992 
993 	{ .bc_func = bridge_ioctl_saddr64, .bc_argsize = sizeof(struct ifbareq64),
994 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
995 
996 	{ .bc_func = bridge_ioctl_sto, .bc_argsize = sizeof(struct ifbrparam),
997 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
998 	{ .bc_func = bridge_ioctl_gto, .bc_argsize = sizeof(struct ifbrparam),           /* 10 */
999 	  .bc_flags = BC_F_COPYOUT },
1000 
1001 	{ .bc_func = bridge_ioctl_daddr64, .bc_argsize = sizeof(struct ifbareq64),
1002 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1003 
1004 	{ .bc_func = bridge_ioctl_flush, .bc_argsize = sizeof(struct ifbreq),
1005 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1006 
1007 	{ .bc_func = bridge_ioctl_gpri, .bc_argsize = sizeof(struct ifbrparam),
1008 	  .bc_flags = BC_F_COPYOUT },
1009 	{ .bc_func = bridge_ioctl_spri, .bc_argsize = sizeof(struct ifbrparam),
1010 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1011 
1012 	{ .bc_func = bridge_ioctl_ght, .bc_argsize = sizeof(struct ifbrparam),
1013 	  .bc_flags = BC_F_COPYOUT },
1014 	{ .bc_func = bridge_ioctl_sht, .bc_argsize = sizeof(struct ifbrparam),
1015 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1016 
1017 	{ .bc_func = bridge_ioctl_gfd, .bc_argsize = sizeof(struct ifbrparam),
1018 	  .bc_flags = BC_F_COPYOUT },
1019 	{ .bc_func = bridge_ioctl_sfd, .bc_argsize = sizeof(struct ifbrparam),
1020 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1021 
1022 	{ .bc_func = bridge_ioctl_gma, .bc_argsize = sizeof(struct ifbrparam),
1023 	  .bc_flags = BC_F_COPYOUT },
1024 	{ .bc_func = bridge_ioctl_sma, .bc_argsize = sizeof(struct ifbrparam),           /* 20 */
1025 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1026 
1027 	{ .bc_func = bridge_ioctl_sifprio, .bc_argsize = sizeof(struct ifbreq),
1028 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1029 
1030 	{ .bc_func = bridge_ioctl_sifcost, .bc_argsize = sizeof(struct ifbreq),
1031 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1032 
1033 	{ .bc_func = bridge_ioctl_gfilt, .bc_argsize = sizeof(struct ifbrparam),
1034 	  .bc_flags = BC_F_COPYOUT },
1035 	{ .bc_func = bridge_ioctl_sfilt, .bc_argsize = sizeof(struct ifbrparam),
1036 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1037 
1038 	{ .bc_func = bridge_ioctl_purge, .bc_argsize = sizeof(struct ifbreq),
1039 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1040 
1041 	{ .bc_func = bridge_ioctl_addspan, .bc_argsize = sizeof(struct ifbreq),
1042 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1043 	{ .bc_func = bridge_ioctl_delspan, .bc_argsize = sizeof(struct ifbreq),
1044 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1045 
1046 	{ .bc_func = bridge_ioctl_gbparam64, .bc_argsize = sizeof(struct ifbropreq64),
1047 	  .bc_flags = BC_F_COPYOUT },
1048 
1049 	{ .bc_func = bridge_ioctl_grte, .bc_argsize = sizeof(struct ifbrparam),
1050 	  .bc_flags = BC_F_COPYOUT },
1051 
1052 	{ .bc_func = bridge_ioctl_gifsstp64, .bc_argsize = sizeof(struct ifbpstpconf64),     /* 30 */
1053 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1054 
1055 	{ .bc_func = bridge_ioctl_sproto, .bc_argsize = sizeof(struct ifbrparam),
1056 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1057 
1058 	{ .bc_func = bridge_ioctl_stxhc, .bc_argsize = sizeof(struct ifbrparam),
1059 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1060 
1061 	{ .bc_func = bridge_ioctl_sifmaxaddr, .bc_argsize = sizeof(struct ifbreq),
1062 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1063 
1064 	{ .bc_func = bridge_ioctl_ghostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1065 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1066 	{ .bc_func = bridge_ioctl_shostfilter, .bc_argsize = sizeof(struct ifbrhostfilter),
1067 	  .bc_flags = BC_F_COPYIN | BC_F_SUSER },
1068 
1069 	{ .bc_func = bridge_ioctl_gmnelist64,
1070 	  .bc_argsize = sizeof(struct ifbrmnelist64),
1071 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1072 	{ .bc_func = bridge_ioctl_gifstats64,
1073 	  .bc_argsize = sizeof(struct ifbrmreq64),
1074 	  .bc_flags = BC_F_COPYIN | BC_F_COPYOUT },
1075 };
1076 
1077 static const unsigned int bridge_control_table_size =
1078     sizeof(bridge_control_table32) / sizeof(bridge_control_table32[0]);
1079 
1080 static LIST_HEAD(, bridge_softc) bridge_list =
1081     LIST_HEAD_INITIALIZER(bridge_list);
1082 
1083 #define BRIDGENAME      "bridge"
1084 #define BRIDGES_MAX     IF_MAXUNIT
1085 #define BRIDGE_ZONE_MAX_ELEM    MIN(IFNETS_MAX, BRIDGES_MAX)
1086 
1087 static struct if_clone bridge_cloner =
1088     IF_CLONE_INITIALIZER(BRIDGENAME, bridge_clone_create, bridge_clone_destroy,
1089     0, BRIDGES_MAX, BRIDGE_ZONE_MAX_ELEM, sizeof(struct bridge_softc));
1090 
1091 static int if_bridge_txstart = 0;
1092 SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED,
1093     &if_bridge_txstart, 0, "Bridge interface uses TXSTART model");
1094 
1095 SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
1096     &if_bridge_debug, 0, "Bridge debug flags");
1097 
1098 SYSCTL_INT(_net_link_bridge, OID_AUTO, log_level,
1099     CTLFLAG_RW | CTLFLAG_LOCKED,
1100     &if_bridge_log_level, 0, "Bridge log level");
1101 
1102 static int if_bridge_segmentation = 1;
1103 SYSCTL_INT(_net_link_bridge, OID_AUTO, segmentation,
1104     CTLFLAG_RW | CTLFLAG_LOCKED,
1105     &if_bridge_segmentation, 0, "Bridge interface enable segmentation");
1106 
1107 static int if_bridge_vmnet_pf_tagging = 1;
1108 SYSCTL_INT(_net_link_bridge, OID_AUTO, vmnet_pf_tagging,
1109     CTLFLAG_RW | CTLFLAG_LOCKED,
1110     &if_bridge_segmentation, 0, "Bridge interface enable vmnet PF tagging");
1111 
1112 #if DEBUG || DEVELOPMENT
1113 #define BRIDGE_FORCE_ONE        0x00000001
1114 #define BRIDGE_FORCE_TWO        0x00000002
1115 static u_int32_t if_bridge_force_errors = 0;
1116 SYSCTL_INT(_net_link_bridge, OID_AUTO, force_errors,
1117     CTLFLAG_RW | CTLFLAG_LOCKED,
1118     &if_bridge_force_errors, 0, "Bridge interface force errors");
1119 static inline bool
bridge_error_is_forced(u_int32_t flags)1120 bridge_error_is_forced(u_int32_t flags)
1121 {
1122 	return (if_bridge_force_errors & flags) != 0;
1123 }
1124 
1125 #define BRIDGE_ERROR_GET_FORCED(__is_forced, __flags)                   \
1126 	do {                                                            \
1127 	        __is_forced = bridge_error_is_forced(__flags);          \
1128 	        if (__is_forced) {                                      \
1129 	                BRIDGE_LOG(LOG_NOTICE, 0, "0x%x forced", __flags); \
1130 	        }                                                       \
1131 	} while (0)
1132 #endif /* DEBUG || DEVELOPMENT */
1133 
1134 
1135 static void brlog_ether_header(struct ether_header *);
1136 static void brlog_mbuf_data(mbuf_t, size_t, size_t);
1137 static void brlog_mbuf_pkthdr(mbuf_t, const char *, const char *);
1138 static void brlog_mbuf(mbuf_t, const char *, const char *);
1139 static void brlog_link(struct bridge_softc * sc);
1140 
1141 #if BRIDGE_LOCK_DEBUG
1142 static void bridge_lock(struct bridge_softc *);
1143 static void bridge_unlock(struct bridge_softc *);
1144 static int bridge_lock2ref(struct bridge_softc *);
1145 static void bridge_unref(struct bridge_softc *);
1146 static void bridge_xlock(struct bridge_softc *);
1147 static void bridge_xdrop(struct bridge_softc *);
1148 
1149 static void
bridge_lock(struct bridge_softc * sc)1150 bridge_lock(struct bridge_softc *sc)
1151 {
1152 	void *lr_saved = __builtin_return_address(0);
1153 
1154 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1155 
1156 	_BRIDGE_LOCK(sc);
1157 
1158 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1159 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1160 }
1161 
1162 static void
bridge_unlock(struct bridge_softc * sc)1163 bridge_unlock(struct bridge_softc *sc)
1164 {
1165 	void *lr_saved = __builtin_return_address(0);
1166 
1167 	BRIDGE_LOCK_ASSERT_HELD(sc);
1168 
1169 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1170 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1171 
1172 	_BRIDGE_UNLOCK(sc);
1173 }
1174 
1175 static int
bridge_lock2ref(struct bridge_softc * sc)1176 bridge_lock2ref(struct bridge_softc *sc)
1177 {
1178 	int error = 0;
1179 	void *lr_saved = __builtin_return_address(0);
1180 
1181 	BRIDGE_LOCK_ASSERT_HELD(sc);
1182 
1183 	if (sc->sc_iflist_xcnt > 0) {
1184 		error = EBUSY;
1185 	} else {
1186 		sc->sc_iflist_ref++;
1187 	}
1188 
1189 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1190 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1191 
1192 	_BRIDGE_UNLOCK(sc);
1193 
1194 	return error;
1195 }
1196 
1197 static void
bridge_unref(struct bridge_softc * sc)1198 bridge_unref(struct bridge_softc *sc)
1199 {
1200 	void *lr_saved = __builtin_return_address(0);
1201 
1202 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1203 
1204 	_BRIDGE_LOCK(sc);
1205 	sc->lock_lr[sc->next_lock_lr] = lr_saved;
1206 	sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1207 
1208 	sc->sc_iflist_ref--;
1209 
1210 	sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1211 	sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1212 	if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) {
1213 		_BRIDGE_UNLOCK(sc);
1214 		wakeup(&sc->sc_cv);
1215 	} else {
1216 		_BRIDGE_UNLOCK(sc);
1217 	}
1218 }
1219 
1220 static void
bridge_xlock(struct bridge_softc * sc)1221 bridge_xlock(struct bridge_softc *sc)
1222 {
1223 	void *lr_saved = __builtin_return_address(0);
1224 
1225 	BRIDGE_LOCK_ASSERT_HELD(sc);
1226 
1227 	sc->sc_iflist_xcnt++;
1228 	while (sc->sc_iflist_ref > 0) {
1229 		sc->unlock_lr[sc->next_unlock_lr] = lr_saved;
1230 		sc->next_unlock_lr = (sc->next_unlock_lr + 1) % SO_LCKDBG_MAX;
1231 
1232 		msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL);
1233 
1234 		sc->lock_lr[sc->next_lock_lr] = lr_saved;
1235 		sc->next_lock_lr = (sc->next_lock_lr + 1) % SO_LCKDBG_MAX;
1236 	}
1237 }
1238 
1239 static void
bridge_xdrop(struct bridge_softc * sc)1240 bridge_xdrop(struct bridge_softc *sc)
1241 {
1242 	BRIDGE_LOCK_ASSERT_HELD(sc);
1243 
1244 	sc->sc_iflist_xcnt--;
1245 }
1246 
1247 #endif /* BRIDGE_LOCK_DEBUG */
1248 
1249 static void
brlog_mbuf_pkthdr(mbuf_t m,const char * prefix,const char * suffix)1250 brlog_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix)
1251 {
1252 	if (m) {
1253 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1254 		    "%spktlen: %u rcvif: 0x%llx header: 0x%llx nextpkt: 0x%llx%s",
1255 		    prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m),
1256 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
1257 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)),
1258 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)),
1259 		    suffix ? suffix : "");
1260 	} else {
1261 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1262 	}
1263 }
1264 
1265 static void
brlog_mbuf(mbuf_t m,const char * prefix,const char * suffix)1266 brlog_mbuf(mbuf_t m, const char *prefix, const char *suffix)
1267 {
1268 	if (m) {
1269 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1270 		    "%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx "
1271 		    "maxlen: %u datastart: 0x%llx next: 0x%llx%s",
1272 		    prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m),
1273 		    mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m),
1274 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
1275 		    (unsigned int)mbuf_maxlen(m),
1276 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)),
1277 		    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)),
1278 		    !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix);
1279 		if ((mbuf_flags(m) & MBUF_PKTHDR)) {
1280 			brlog_mbuf_pkthdr(m, "", suffix);
1281 		}
1282 	} else {
1283 		BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0, "%s<NULL>%s", prefix, suffix);
1284 	}
1285 }
1286 
1287 static void
brlog_mbuf_data(mbuf_t m,size_t offset,size_t len)1288 brlog_mbuf_data(mbuf_t m, size_t offset, size_t len)
1289 {
1290 	mbuf_t                  n;
1291 	size_t                  i, j;
1292 	size_t                  pktlen, mlen, maxlen;
1293 	unsigned char   *ptr;
1294 
1295 	pktlen = mbuf_pkthdr_len(m);
1296 
1297 	if (offset > pktlen) {
1298 		return;
1299 	}
1300 
1301 	maxlen = (pktlen - offset > len) ? len : pktlen - offset;
1302 	n = m;
1303 	mlen = mbuf_len(n);
1304 	ptr = mbuf_data(n);
1305 	for (i = 0, j = 0; i < maxlen; i++, j++) {
1306 		if (j >= mlen) {
1307 			n = mbuf_next(n);
1308 			if (n == 0) {
1309 				break;
1310 			}
1311 			ptr = mbuf_data(n);
1312 			mlen = mbuf_len(n);
1313 			j = 0;
1314 		}
1315 		if (i >= offset) {
1316 			BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1317 			    "%02x%s", ptr[j], i % 2 ? " " : "");
1318 		}
1319 	}
1320 }
1321 
1322 static void
brlog_ether_header(struct ether_header * eh)1323 brlog_ether_header(struct ether_header *eh)
1324 {
1325 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1326 	    "%02x:%02x:%02x:%02x:%02x:%02x > "
1327 	    "%02x:%02x:%02x:%02x:%02x:%02x 0x%04x ",
1328 	    eh->ether_shost[0], eh->ether_shost[1], eh->ether_shost[2],
1329 	    eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5],
1330 	    eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2],
1331 	    eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5],
1332 	    ntohs(eh->ether_type));
1333 }
1334 
1335 static char *
ether_ntop(char * buf,size_t len,const u_char * ap)1336 ether_ntop(char *buf, size_t len, const u_char *ap)
1337 {
1338 	snprintf(buf, len, "%02x:%02x:%02x:%02x:%02x:%02x",
1339 	    ap[0], ap[1], ap[2], ap[3], ap[4], ap[5]);
1340 
1341 	return buf;
1342 }
1343 
1344 static void
brlog_link(struct bridge_softc * sc)1345 brlog_link(struct bridge_softc * sc)
1346 {
1347 	int i;
1348 	uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) +
1349 	IFNAMSIZ + ETHER_ADDR_LEN];
1350 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer;
1351 	const u_char * lladdr;
1352 	char lladdr_str[48];
1353 
1354 	memset(sdl, 0, sizeof(sdl_buffer));
1355 	sdl->sdl_family = AF_LINK;
1356 	sdl->sdl_nlen = strlen(sc->sc_if_xname);
1357 	sdl->sdl_alen = ETHER_ADDR_LEN;
1358 	sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data);
1359 	memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen);
1360 	memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN);
1361 	lladdr_str[0] = '\0';
1362 	for (i = 0, lladdr = CONST_LLADDR(sdl);
1363 	    i < sdl->sdl_alen;
1364 	    i++, lladdr++) {
1365 		char    byte_str[4];
1366 
1367 		snprintf(byte_str, sizeof(byte_str), "%s%x", i ? ":" : "",
1368 		    *lladdr);
1369 		strlcat(lladdr_str, byte_str, sizeof(lladdr_str));
1370 	}
1371 	BRIDGE_LOG_SIMPLE(LOG_NOTICE, 0,
1372 	    "%s sdl len %d index %d family %d type 0x%x nlen %d alen %d"
1373 	    " slen %d addr %s", sc->sc_if_xname,
1374 	    sdl->sdl_len, sdl->sdl_index,
1375 	    sdl->sdl_family, sdl->sdl_type, sdl->sdl_nlen,
1376 	    sdl->sdl_alen, sdl->sdl_slen, lladdr_str);
1377 }
1378 
1379 
1380 /*
1381  * bridgeattach:
1382  *
1383  *	Pseudo-device attach routine.
1384  */
1385 __private_extern__ int
bridgeattach(int n)1386 bridgeattach(int n)
1387 {
1388 #pragma unused(n)
1389 	int error;
1390 
1391 	LIST_INIT(&bridge_list);
1392 
1393 #if BRIDGESTP
1394 	bstp_sys_init();
1395 #endif /* BRIDGESTP */
1396 
1397 	error = if_clone_attach(&bridge_cloner);
1398 	if (error != 0) {
1399 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_clone_attach failed %d", error);
1400 	}
1401 	return error;
1402 }
1403 
1404 
1405 static errno_t
bridge_ifnet_set_attrs(struct ifnet * ifp)1406 bridge_ifnet_set_attrs(struct ifnet * ifp)
1407 {
1408 	errno_t         error;
1409 
1410 	error = ifnet_set_mtu(ifp, ETHERMTU);
1411 	if (error != 0) {
1412 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_mtu failed %d", error);
1413 		goto done;
1414 	}
1415 	error = ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
1416 	if (error != 0) {
1417 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_addrlen failed %d", error);
1418 		goto done;
1419 	}
1420 	error = ifnet_set_hdrlen(ifp, ETHER_HDR_LEN);
1421 	if (error != 0) {
1422 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_hdrlen failed %d", error);
1423 		goto done;
1424 	}
1425 	error = ifnet_set_flags(ifp,
1426 	    IFF_BROADCAST | IFF_SIMPLEX | IFF_NOTRAILERS | IFF_MULTICAST,
1427 	    0xffff);
1428 
1429 	if (error != 0) {
1430 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1431 		goto done;
1432 	}
1433 done:
1434 	return error;
1435 }
1436 
1437 /*
1438  * bridge_clone_create:
1439  *
1440  *	Create a new bridge instance.
1441  */
1442 static int
bridge_clone_create(struct if_clone * ifc,uint32_t unit,void * params)1443 bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params)
1444 {
1445 #pragma unused(params)
1446 	struct ifnet *ifp = NULL;
1447 	struct bridge_softc *sc = NULL;
1448 	struct bridge_softc *sc2 = NULL;
1449 	struct ifnet_init_eparams init_params;
1450 	errno_t error = 0;
1451 	uint8_t eth_hostid[ETHER_ADDR_LEN];
1452 	int fb, retry, has_hostid;
1453 
1454 	sc =  if_clone_softc_allocate(&bridge_cloner);
1455 	if (sc == NULL) {
1456 		error = ENOMEM;
1457 		goto done;
1458 	}
1459 
1460 	lck_mtx_init(&sc->sc_mtx, &bridge_lock_grp, &bridge_lock_attr);
1461 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
1462 	sc->sc_mne_max = BRIDGE_MAC_NAT_ENTRY_MAX;
1463 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
1464 	sc->sc_filter_flags = 0;
1465 
1466 	TAILQ_INIT(&sc->sc_iflist);
1467 
1468 	/* use the interface name as the unique id for ifp recycle */
1469 	snprintf(sc->sc_if_xname, sizeof(sc->sc_if_xname), "%s%d",
1470 	    ifc->ifc_name, unit);
1471 	bzero(&init_params, sizeof(init_params));
1472 	init_params.ver                 = IFNET_INIT_CURRENT_VERSION;
1473 	init_params.len                 = sizeof(init_params);
1474 	/* Initialize our routing table. */
1475 	error = bridge_rtable_init(sc);
1476 	if (error != 0) {
1477 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_rtable_init failed %d", error);
1478 		goto done;
1479 	}
1480 	TAILQ_INIT(&sc->sc_spanlist);
1481 	if (if_bridge_txstart) {
1482 		init_params.start = bridge_start;
1483 	} else {
1484 		init_params.flags = IFNET_INIT_LEGACY;
1485 		init_params.output = bridge_output;
1486 	}
1487 	init_params.set_bpf_tap = bridge_set_bpf_tap;
1488 	init_params.uniqueid            = sc->sc_if_xname;
1489 	init_params.uniqueid_len        = strlen(sc->sc_if_xname);
1490 	init_params.sndq_maxlen         = IFQ_MAXLEN;
1491 	init_params.name                = ifc->ifc_name;
1492 	init_params.unit                = unit;
1493 	init_params.family              = IFNET_FAMILY_ETHERNET;
1494 	init_params.type                = IFT_BRIDGE;
1495 	init_params.demux               = ether_demux;
1496 	init_params.add_proto           = ether_add_proto;
1497 	init_params.del_proto           = ether_del_proto;
1498 	init_params.check_multi         = ether_check_multi;
1499 	init_params.framer_extended     = ether_frameout_extended;
1500 	init_params.softc               = sc;
1501 	init_params.ioctl               = bridge_ioctl;
1502 	init_params.detach              = bridge_detach;
1503 	init_params.broadcast_addr      = etherbroadcastaddr;
1504 	init_params.broadcast_len       = ETHER_ADDR_LEN;
1505 
1506 	error = ifnet_allocate_extended(&init_params, &ifp);
1507 	if (error != 0) {
1508 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_allocate failed %d", error);
1509 		goto done;
1510 	}
1511 	LIST_INIT(&sc->sc_mne_list);
1512 	LIST_INIT(&sc->sc_mne_list_v6);
1513 	sc->sc_ifp = ifp;
1514 	error = bridge_ifnet_set_attrs(ifp);
1515 	if (error != 0) {
1516 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_ifnet_set_attrs failed %d",
1517 		    error);
1518 		goto done;
1519 	}
1520 	/*
1521 	 * Generate an ethernet address with a locally administered address.
1522 	 *
1523 	 * Since we are using random ethernet addresses for the bridge, it is
1524 	 * possible that we might have address collisions, so make sure that
1525 	 * this hardware address isn't already in use on another bridge.
1526 	 * The first try uses the "hostid" and falls back to read_frandom();
1527 	 * for "hostid", we use the MAC address of the first-encountered
1528 	 * Ethernet-type interface that is currently configured.
1529 	 */
1530 	fb = 0;
1531 	has_hostid = (uuid_get_ethernet(&eth_hostid[0]) == 0);
1532 	for (retry = 1; retry != 0;) {
1533 		if (fb || has_hostid == 0) {
1534 			read_frandom(&sc->sc_defaddr, ETHER_ADDR_LEN);
1535 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1536 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1537 		} else {
1538 			bcopy(&eth_hostid[0], &sc->sc_defaddr,
1539 			    ETHER_ADDR_LEN);
1540 			sc->sc_defaddr[0] &= ~1; /* clear multicast bit */
1541 			sc->sc_defaddr[0] |= 2;  /* set the LAA bit */
1542 			sc->sc_defaddr[3] =     /* stir it up a bit */
1543 			    ((sc->sc_defaddr[3] & 0x0f) << 4) |
1544 			    ((sc->sc_defaddr[3] & 0xf0) >> 4);
1545 			/*
1546 			 * Mix in the LSB as it's actually pretty significant,
1547 			 * see rdar://14076061
1548 			 */
1549 			sc->sc_defaddr[4] =
1550 			    (((sc->sc_defaddr[4] & 0x0f) << 4) |
1551 			    ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^
1552 			    sc->sc_defaddr[5];
1553 			sc->sc_defaddr[5] = ifp->if_unit & 0xff;
1554 		}
1555 
1556 		fb = 1;
1557 		retry = 0;
1558 		lck_mtx_lock(&bridge_list_mtx);
1559 		LIST_FOREACH(sc2, &bridge_list, sc_list) {
1560 			if (_ether_cmp(sc->sc_defaddr,
1561 			    IF_LLADDR(sc2->sc_ifp)) == 0) {
1562 				retry = 1;
1563 			}
1564 		}
1565 		lck_mtx_unlock(&bridge_list_mtx);
1566 	}
1567 
1568 	sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
1569 
1570 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_LIFECYCLE)) {
1571 		brlog_link(sc);
1572 	}
1573 	error = ifnet_attach(ifp, NULL);
1574 	if (error != 0) {
1575 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_attach failed %d", error);
1576 		goto done;
1577 	}
1578 
1579 	error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN,
1580 	    IFT_ETHER);
1581 	if (error != 0) {
1582 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr_and_type failed %d",
1583 		    error);
1584 		goto done;
1585 	}
1586 
1587 	ifnet_set_offload(ifp,
1588 	    IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP |
1589 	    IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES);
1590 	error = bridge_set_tso(sc);
1591 	if (error != 0) {
1592 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
1593 		goto done;
1594 	}
1595 #if BRIDGESTP
1596 	bstp_attach(&sc->sc_stp, &bridge_ops);
1597 #endif /* BRIDGESTP */
1598 
1599 	lck_mtx_lock(&bridge_list_mtx);
1600 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
1601 	lck_mtx_unlock(&bridge_list_mtx);
1602 
1603 	/* attach as ethernet */
1604 	error = bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header),
1605 	    NULL, NULL);
1606 
1607 done:
1608 	if (error != 0) {
1609 		BRIDGE_LOG(LOG_NOTICE, 0, "failed error %d", error);
1610 		/* TBD: Clean up: sc, sc_rthash etc */
1611 	}
1612 
1613 	return error;
1614 }
1615 
1616 /*
1617  * bridge_clone_destroy:
1618  *
1619  *	Destroy a bridge instance.
1620  */
1621 static int
bridge_clone_destroy(struct ifnet * ifp)1622 bridge_clone_destroy(struct ifnet *ifp)
1623 {
1624 	struct bridge_softc *sc = ifp->if_softc;
1625 	struct bridge_iflist *bif;
1626 	errno_t error;
1627 
1628 	BRIDGE_LOCK(sc);
1629 	if ((sc->sc_flags & SCF_DETACHING)) {
1630 		BRIDGE_UNLOCK(sc);
1631 		return 0;
1632 	}
1633 	sc->sc_flags |= SCF_DETACHING;
1634 
1635 	bridge_ifstop(ifp, 1);
1636 
1637 	bridge_cancel_delayed_call(&sc->sc_resize_call);
1638 
1639 	bridge_cleanup_delayed_call(&sc->sc_resize_call);
1640 	bridge_cleanup_delayed_call(&sc->sc_aging_timer);
1641 
1642 	error = ifnet_set_flags(ifp, 0, IFF_UP);
1643 	if (error != 0) {
1644 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_flags failed %d", error);
1645 	}
1646 
1647 	while ((bif = TAILQ_FIRST(&sc->sc_iflist)) != NULL) {
1648 		bridge_delete_member(sc, bif);
1649 	}
1650 
1651 	while ((bif = TAILQ_FIRST(&sc->sc_spanlist)) != NULL) {
1652 		bridge_delete_span(sc, bif);
1653 	}
1654 	BRIDGE_UNLOCK(sc);
1655 
1656 	error = ifnet_detach(ifp);
1657 	if (error != 0) {
1658 		panic("%s (%d): ifnet_detach(%p) failed %d",
1659 		    __func__, __LINE__, ifp, error);
1660 	}
1661 	return 0;
1662 }
1663 
1664 #define DRVSPEC do { \
1665 	if (ifd->ifd_cmd >= bridge_control_table_size) {                \
1666 	        error = EINVAL;                                         \
1667 	        break;                                                  \
1668 	}                                                               \
1669 	bc = &bridge_control_table[ifd->ifd_cmd];                       \
1670                                                                         \
1671 	if (cmd == SIOCGDRVSPEC &&                                      \
1672 	    (bc->bc_flags & BC_F_COPYOUT) == 0) {                       \
1673 	        error = EINVAL;                                         \
1674 	        break;                                                  \
1675 	} else if (cmd == SIOCSDRVSPEC &&                               \
1676 	    (bc->bc_flags & BC_F_COPYOUT) != 0) {                       \
1677 	        error = EINVAL;                                         \
1678 	        break;                                                  \
1679 	}                                                               \
1680                                                                         \
1681 	if (bc->bc_flags & BC_F_SUSER) {                                \
1682 	        error = kauth_authorize_generic(kauth_cred_get(),       \
1683 	            KAUTH_GENERIC_ISSUSER);                             \
1684 	        if (error)                                              \
1685 	                break;                                          \
1686 	}                                                               \
1687                                                                         \
1688 	if (ifd->ifd_len != bc->bc_argsize ||                           \
1689 	    ifd->ifd_len > sizeof (args)) {                             \
1690 	        error = EINVAL;                                         \
1691 	        break;                                                  \
1692 	}                                                               \
1693                                                                         \
1694 	bzero(&args, sizeof (args));                                    \
1695 	if (bc->bc_flags & BC_F_COPYIN) {                               \
1696 	        error = copyin(ifd->ifd_data, &args, ifd->ifd_len);     \
1697 	        if (error)                                              \
1698 	                break;                                          \
1699 	}                                                               \
1700                                                                         \
1701 	BRIDGE_LOCK(sc);                                                \
1702 	error = (*bc->bc_func)(sc, &args);                              \
1703 	BRIDGE_UNLOCK(sc);                                              \
1704 	if (error)                                                      \
1705 	        break;                                                  \
1706                                                                         \
1707 	if (bc->bc_flags & BC_F_COPYOUT)                                \
1708 	        error = copyout(&args, ifd->ifd_data, ifd->ifd_len);    \
1709 } while (0)
1710 
1711 static boolean_t
interface_needs_input_broadcast(struct ifnet * ifp)1712 interface_needs_input_broadcast(struct ifnet * ifp)
1713 {
1714 	/*
1715 	 * Selectively enable input broadcast only when necessary.
1716 	 * The bridge interface itself attaches a fake protocol
1717 	 * so checking for at least two protocols means that the
1718 	 * interface is being used for something besides bridging
1719 	 * and needs to see broadcast packets from other members.
1720 	 */
1721 	return if_get_protolist(ifp, NULL, 0) >= 2;
1722 }
1723 
1724 static boolean_t
bif_set_input_broadcast(struct bridge_iflist * bif,boolean_t input_broadcast)1725 bif_set_input_broadcast(struct bridge_iflist * bif, boolean_t input_broadcast)
1726 {
1727 	boolean_t       old_input_broadcast;
1728 
1729 	old_input_broadcast = (bif->bif_flags & BIFF_INPUT_BROADCAST) != 0;
1730 	if (input_broadcast) {
1731 		bif->bif_flags |= BIFF_INPUT_BROADCAST;
1732 	} else {
1733 		bif->bif_flags &= ~BIFF_INPUT_BROADCAST;
1734 	}
1735 	return old_input_broadcast != input_broadcast;
1736 }
1737 
1738 /*
1739  * bridge_ioctl:
1740  *
1741  *	Handle a control request from the operator.
1742  */
1743 static errno_t
bridge_ioctl(struct ifnet * ifp,u_long cmd,void * data)1744 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1745 {
1746 	struct bridge_softc *sc = ifp->if_softc;
1747 	struct ifreq *ifr = (struct ifreq *)data;
1748 	struct bridge_iflist *bif;
1749 	int error = 0;
1750 
1751 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1752 
1753 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_IOCTL,
1754 	    "ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)",
1755 	    ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ',
1756 	    (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd),
1757 	    (char)IOCGROUP(cmd), cmd & 0xff);
1758 
1759 	switch (cmd) {
1760 	case SIOCSIFADDR:
1761 	case SIOCAIFADDR:
1762 		ifnet_set_flags(ifp, IFF_UP, IFF_UP);
1763 		break;
1764 
1765 	case SIOCGIFMEDIA32:
1766 	case SIOCGIFMEDIA64: {
1767 		struct ifmediareq *ifmr = (struct ifmediareq *)data;
1768 		user_addr_t user_addr;
1769 
1770 		user_addr = (cmd == SIOCGIFMEDIA64) ?
1771 		    ((struct ifmediareq64 *)ifmr)->ifmu_ulist :
1772 		    CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist);
1773 
1774 		ifmr->ifm_status = IFM_AVALID;
1775 		ifmr->ifm_mask = 0;
1776 		ifmr->ifm_count = 1;
1777 
1778 		BRIDGE_LOCK(sc);
1779 		if (!(sc->sc_flags & SCF_DETACHING) &&
1780 		    (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
1781 			ifmr->ifm_status |= IFM_ACTIVE;
1782 			ifmr->ifm_active = ifmr->ifm_current =
1783 			    IFM_ETHER | IFM_AUTO;
1784 		} else {
1785 			ifmr->ifm_active = ifmr->ifm_current = IFM_NONE;
1786 		}
1787 		BRIDGE_UNLOCK(sc);
1788 
1789 		if (user_addr != USER_ADDR_NULL) {
1790 			error = copyout(&ifmr->ifm_current, user_addr,
1791 			    sizeof(int));
1792 		}
1793 		break;
1794 	}
1795 
1796 	case SIOCADDMULTI:
1797 	case SIOCDELMULTI:
1798 		break;
1799 
1800 	case SIOCSDRVSPEC32:
1801 	case SIOCGDRVSPEC32: {
1802 		union {
1803 			struct ifbreq ifbreq;
1804 			struct ifbifconf32 ifbifconf;
1805 			struct ifbareq32 ifbareq;
1806 			struct ifbaconf32 ifbaconf;
1807 			struct ifbrparam ifbrparam;
1808 			struct ifbropreq32 ifbropreq;
1809 		} args;
1810 		struct ifdrv32 *ifd = (struct ifdrv32 *)data;
1811 		const struct bridge_control *bridge_control_table =
1812 		    bridge_control_table32, *bc;
1813 
1814 		DRVSPEC;
1815 
1816 		break;
1817 	}
1818 	case SIOCSDRVSPEC64:
1819 	case SIOCGDRVSPEC64: {
1820 		union {
1821 			struct ifbreq ifbreq;
1822 			struct ifbifconf64 ifbifconf;
1823 			struct ifbareq64 ifbareq;
1824 			struct ifbaconf64 ifbaconf;
1825 			struct ifbrparam ifbrparam;
1826 			struct ifbropreq64 ifbropreq;
1827 		} args;
1828 		struct ifdrv64 *ifd = (struct ifdrv64 *)data;
1829 		const struct bridge_control *bridge_control_table =
1830 		    bridge_control_table64, *bc;
1831 
1832 		DRVSPEC;
1833 
1834 		break;
1835 	}
1836 
1837 	case SIOCSIFFLAGS:
1838 		if (!(ifp->if_flags & IFF_UP) &&
1839 		    (ifp->if_flags & IFF_RUNNING)) {
1840 			/*
1841 			 * If interface is marked down and it is running,
1842 			 * then stop and disable it.
1843 			 */
1844 			BRIDGE_LOCK(sc);
1845 			bridge_ifstop(ifp, 1);
1846 			BRIDGE_UNLOCK(sc);
1847 		} else if ((ifp->if_flags & IFF_UP) &&
1848 		    !(ifp->if_flags & IFF_RUNNING)) {
1849 			/*
1850 			 * If interface is marked up and it is stopped, then
1851 			 * start it.
1852 			 */
1853 			BRIDGE_LOCK(sc);
1854 			error = bridge_init(ifp);
1855 			BRIDGE_UNLOCK(sc);
1856 		}
1857 		break;
1858 
1859 	case SIOCSIFLLADDR:
1860 		error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data,
1861 		    ifr->ifr_addr.sa_len);
1862 		if (error != 0) {
1863 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1864 			    "%s SIOCSIFLLADDR error %d", ifp->if_xname,
1865 			    error);
1866 		}
1867 		break;
1868 
1869 	case SIOCSIFMTU:
1870 		if (ifr->ifr_mtu < 576) {
1871 			error = EINVAL;
1872 			break;
1873 		}
1874 		BRIDGE_LOCK(sc);
1875 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
1876 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1877 			BRIDGE_UNLOCK(sc);
1878 			break;
1879 		}
1880 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1881 			if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) {
1882 				BRIDGE_LOG(LOG_NOTICE, 0,
1883 				    "%s invalid MTU: %u(%s) != %d",
1884 				    sc->sc_ifp->if_xname,
1885 				    bif->bif_ifp->if_mtu,
1886 				    bif->bif_ifp->if_xname, ifr->ifr_mtu);
1887 				error = EINVAL;
1888 				break;
1889 			}
1890 		}
1891 		if (!error) {
1892 			sc->sc_ifp->if_mtu = ifr->ifr_mtu;
1893 		}
1894 		BRIDGE_UNLOCK(sc);
1895 		break;
1896 
1897 	default:
1898 		error = ether_ioctl(ifp, cmd, data);
1899 		if (error != 0 && error != EOPNOTSUPP) {
1900 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_IOCTL,
1901 			    "ifp %s cmd 0x%08lx "
1902 			    "(%c%c [%lu] %c %lu) failed error: %d",
1903 			    ifp->if_xname, cmd,
1904 			    (cmd & IOC_IN) ? 'I' : ' ',
1905 			    (cmd & IOC_OUT) ? 'O' : ' ',
1906 			    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd),
1907 			    cmd & 0xff, error);
1908 		}
1909 		break;
1910 	}
1911 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
1912 
1913 	return error;
1914 }
1915 
1916 #if HAS_IF_CAP
1917 /*
1918  * bridge_mutecaps:
1919  *
1920  *	Clear or restore unwanted capabilities on the member interface
1921  */
1922 static void
bridge_mutecaps(struct bridge_softc * sc)1923 bridge_mutecaps(struct bridge_softc *sc)
1924 {
1925 	struct bridge_iflist *bif;
1926 	int enabled, mask;
1927 
1928 	/* Initial bitmask of capabilities to test */
1929 	mask = BRIDGE_IFCAPS_MASK;
1930 
1931 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1932 		/* Every member must support it or its disabled */
1933 		mask &= bif->bif_savedcaps;
1934 	}
1935 
1936 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1937 		enabled = bif->bif_ifp->if_capenable;
1938 		enabled &= ~BRIDGE_IFCAPS_STRIP;
1939 		/* strip off mask bits and enable them again if allowed */
1940 		enabled &= ~BRIDGE_IFCAPS_MASK;
1941 		enabled |= mask;
1942 
1943 		bridge_set_ifcap(sc, bif, enabled);
1944 	}
1945 }
1946 
1947 static void
bridge_set_ifcap(struct bridge_softc * sc,struct bridge_iflist * bif,int set)1948 bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set)
1949 {
1950 	struct ifnet *ifp = bif->bif_ifp;
1951 	struct ifreq ifr;
1952 	int error;
1953 
1954 	bzero(&ifr, sizeof(ifr));
1955 	ifr.ifr_reqcap = set;
1956 
1957 	if (ifp->if_capenable != set) {
1958 		IFF_LOCKGIANT(ifp);
1959 		error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr);
1960 		IFF_UNLOCKGIANT(ifp);
1961 		if (error) {
1962 			BRIDGE_LOG(LOG_NOTICE, 0,
1963 			    "%s error setting interface capabilities on %s",
1964 			    sc->sc_ifp->if_xname, ifp->if_xname);
1965 		}
1966 	}
1967 }
1968 #endif /* HAS_IF_CAP */
1969 
1970 static errno_t
bridge_set_tso(struct bridge_softc * sc)1971 bridge_set_tso(struct bridge_softc *sc)
1972 {
1973 	struct bridge_iflist *bif;
1974 	u_int32_t tso_v4_mtu;
1975 	u_int32_t tso_v6_mtu;
1976 	ifnet_offload_t offload;
1977 	errno_t error = 0;
1978 
1979 	/* By default, support TSO */
1980 	offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6;
1981 	tso_v4_mtu = IP_MAXPACKET;
1982 	tso_v6_mtu = IP_MAXPACKET;
1983 
1984 	/* Use the lowest common denominator of the members */
1985 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
1986 		ifnet_t ifp = bif->bif_ifp;
1987 
1988 		if (ifp == NULL) {
1989 			continue;
1990 		}
1991 
1992 		if (offload & IFNET_TSO_IPV4) {
1993 			if (ifp->if_hwassist & IFNET_TSO_IPV4) {
1994 				if (tso_v4_mtu > ifp->if_tso_v4_mtu) {
1995 					tso_v4_mtu = ifp->if_tso_v4_mtu;
1996 				}
1997 			} else {
1998 				offload &= ~IFNET_TSO_IPV4;
1999 				tso_v4_mtu = 0;
2000 			}
2001 		}
2002 		if (offload & IFNET_TSO_IPV6) {
2003 			if (ifp->if_hwassist & IFNET_TSO_IPV6) {
2004 				if (tso_v6_mtu > ifp->if_tso_v6_mtu) {
2005 					tso_v6_mtu = ifp->if_tso_v6_mtu;
2006 				}
2007 			} else {
2008 				offload &= ~IFNET_TSO_IPV6;
2009 				tso_v6_mtu = 0;
2010 			}
2011 		}
2012 	}
2013 
2014 	if (offload != sc->sc_ifp->if_hwassist) {
2015 		error = ifnet_set_offload(sc->sc_ifp, offload);
2016 		if (error != 0) {
2017 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2018 			    "ifnet_set_offload(%s, 0x%x) failed %d",
2019 			    sc->sc_ifp->if_xname, offload, error);
2020 			goto done;
2021 		}
2022 		/*
2023 		 * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least
2024 		 * as large as the interface MTU
2025 		 */
2026 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) {
2027 			if (tso_v4_mtu < sc->sc_ifp->if_mtu) {
2028 				tso_v4_mtu = sc->sc_ifp->if_mtu;
2029 			}
2030 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET,
2031 			    tso_v4_mtu);
2032 			if (error != 0) {
2033 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2034 				    "ifnet_set_tso_mtu(%s, "
2035 				    "AF_INET, %u) failed %d",
2036 				    sc->sc_ifp->if_xname,
2037 				    tso_v4_mtu, error);
2038 				goto done;
2039 			}
2040 		}
2041 		if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) {
2042 			if (tso_v6_mtu < sc->sc_ifp->if_mtu) {
2043 				tso_v6_mtu = sc->sc_ifp->if_mtu;
2044 			}
2045 			error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6,
2046 			    tso_v6_mtu);
2047 			if (error != 0) {
2048 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_LIFECYCLE,
2049 				    "ifnet_set_tso_mtu(%s, "
2050 				    "AF_INET6, %u) failed %d",
2051 				    sc->sc_ifp->if_xname,
2052 				    tso_v6_mtu, error);
2053 				goto done;
2054 			}
2055 		}
2056 	}
2057 done:
2058 	return error;
2059 }
2060 
2061 /*
2062  * bridge_lookup_member:
2063  *
2064  *	Lookup a bridge member interface.
2065  */
2066 static struct bridge_iflist *
bridge_lookup_member(struct bridge_softc * sc,const char * name)2067 bridge_lookup_member(struct bridge_softc *sc, const char *name)
2068 {
2069 	struct bridge_iflist *bif;
2070 	struct ifnet *ifp;
2071 
2072 	BRIDGE_LOCK_ASSERT_HELD(sc);
2073 
2074 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2075 		ifp = bif->bif_ifp;
2076 		if (strcmp(ifp->if_xname, name) == 0) {
2077 			return bif;
2078 		}
2079 	}
2080 
2081 	return NULL;
2082 }
2083 
2084 /*
2085  * bridge_lookup_member_if:
2086  *
2087  *	Lookup a bridge member interface by ifnet*.
2088  */
2089 static struct bridge_iflist *
bridge_lookup_member_if(struct bridge_softc * sc,struct ifnet * member_ifp)2090 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
2091 {
2092 	struct bridge_iflist *bif;
2093 
2094 	BRIDGE_LOCK_ASSERT_HELD(sc);
2095 
2096 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
2097 		if (bif->bif_ifp == member_ifp) {
2098 			return bif;
2099 		}
2100 	}
2101 
2102 	return NULL;
2103 }
2104 
2105 static errno_t
bridge_iff_input(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data,char ** frame_ptr)2106 bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2107     mbuf_t *data, char **frame_ptr)
2108 {
2109 #pragma unused(protocol)
2110 	errno_t error = 0;
2111 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2112 	struct bridge_softc *sc = bif->bif_sc;
2113 	int included = 0;
2114 	size_t frmlen = 0;
2115 	mbuf_t m = *data;
2116 
2117 	if ((m->m_flags & M_PROTO1)) {
2118 		goto out;
2119 	}
2120 
2121 	if (*frame_ptr >= (char *)mbuf_datastart(m) &&
2122 	    *frame_ptr <= (char *)mbuf_data(m)) {
2123 		included = 1;
2124 		frmlen = (char *)mbuf_data(m) - *frame_ptr;
2125 	}
2126 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2127 	    "%s from %s m 0x%llx data 0x%llx frame 0x%llx %s "
2128 	    "frmlen %lu", sc->sc_ifp->if_xname,
2129 	    ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m),
2130 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)),
2131 	    (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr),
2132 	    included ? "inside" : "outside", frmlen);
2133 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF)) {
2134 		brlog_mbuf(m, "bridge_iff_input[", "");
2135 		brlog_ether_header((struct ether_header *)
2136 		    (void *)*frame_ptr);
2137 		brlog_mbuf_data(m, 0, 20);
2138 	}
2139 	if (included == 0) {
2140 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT, "frame_ptr outside mbuf");
2141 		goto out;
2142 	}
2143 
2144 	/* Move data pointer to start of frame to the link layer header */
2145 	(void) mbuf_setdata(m, (char *)mbuf_data(m) - frmlen,
2146 	    mbuf_len(m) + frmlen);
2147 	(void) mbuf_pkthdr_adjustlen(m, frmlen);
2148 
2149 	/* make sure we can access the ethernet header */
2150 	if (mbuf_pkthdr_len(m) < sizeof(struct ether_header)) {
2151 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2152 		    "short frame %lu < %lu",
2153 		    mbuf_pkthdr_len(m), sizeof(struct ether_header));
2154 		goto out;
2155 	}
2156 	if (mbuf_len(m) < sizeof(struct ether_header)) {
2157 		error = mbuf_pullup(data, sizeof(struct ether_header));
2158 		if (error != 0) {
2159 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
2160 			    "mbuf_pullup(%lu) failed %d",
2161 			    sizeof(struct ether_header),
2162 			    error);
2163 			error = EJUSTRETURN;
2164 			goto out;
2165 		}
2166 		if (m != *data) {
2167 			m = *data;
2168 			*frame_ptr = mbuf_data(m);
2169 		}
2170 	}
2171 
2172 	error = bridge_input(ifp, data);
2173 
2174 	/* Adjust packet back to original */
2175 	if (error == 0) {
2176 		/* bridge_input might have modified *data */
2177 		if (*data != m) {
2178 			m = *data;
2179 			*frame_ptr = mbuf_data(m);
2180 		}
2181 		(void) mbuf_setdata(m, (char *)mbuf_data(m) + frmlen,
2182 		    mbuf_len(m) - frmlen);
2183 		(void) mbuf_pkthdr_adjustlen(m, -frmlen);
2184 	}
2185 
2186 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MBUF) &&
2187 	    BRIDGE_DBGF_ENABLED(BR_DBGF_INPUT)) {
2188 		brlog_mbuf(m, "bridge_iff_input]", "");
2189 	}
2190 
2191 out:
2192 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2193 
2194 	return error;
2195 }
2196 
2197 static errno_t
bridge_iff_output(void * cookie,ifnet_t ifp,protocol_family_t protocol,mbuf_t * data)2198 bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2199     mbuf_t *data)
2200 {
2201 #pragma unused(protocol)
2202 	errno_t error = 0;
2203 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2204 	struct bridge_softc *sc = bif->bif_sc;
2205 	mbuf_t m = *data;
2206 
2207 	if ((m->m_flags & M_PROTO1)) {
2208 		goto out;
2209 	}
2210 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
2211 	    "%s from %s m 0x%llx data 0x%llx",
2212 	    sc->sc_ifp->if_xname, ifp->if_xname,
2213 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
2214 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
2215 
2216 	error = bridge_member_output(sc, ifp, data);
2217 	if (error != 0 && error != EJUSTRETURN) {
2218 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_OUTPUT,
2219 		    "bridge_member_output failed error %d",
2220 		    error);
2221 	}
2222 out:
2223 	BRIDGE_LOCK_ASSERT_NOTHELD(sc);
2224 
2225 	return error;
2226 }
2227 
2228 static void
bridge_iff_event(void * cookie,ifnet_t ifp,protocol_family_t protocol,const struct kev_msg * event_msg)2229 bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol,
2230     const struct kev_msg *event_msg)
2231 {
2232 #pragma unused(protocol)
2233 	struct bridge_iflist *bif = (struct bridge_iflist *)cookie;
2234 	struct bridge_softc *sc = bif->bif_sc;
2235 
2236 	if (event_msg->vendor_code == KEV_VENDOR_APPLE &&
2237 	    event_msg->kev_class == KEV_NETWORK_CLASS &&
2238 	    event_msg->kev_subclass == KEV_DL_SUBCLASS) {
2239 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2240 		    "%s event_code %u - %s",
2241 		    ifp->if_xname, event_msg->event_code,
2242 		    dlil_kev_dl_code_str(event_msg->event_code));
2243 
2244 		switch (event_msg->event_code) {
2245 		case KEV_DL_LINK_OFF:
2246 		case KEV_DL_LINK_ON: {
2247 			bridge_iflinkevent(ifp);
2248 #if BRIDGESTP
2249 			bstp_linkstate(ifp, event_msg->event_code);
2250 #endif /* BRIDGESTP */
2251 			break;
2252 		}
2253 		case KEV_DL_SIFFLAGS: {
2254 			if ((bif->bif_flags & BIFF_PROMISC) == 0 &&
2255 			    (ifp->if_flags & IFF_UP)) {
2256 				errno_t error;
2257 
2258 				error = ifnet_set_promiscuous(ifp, 1);
2259 				if (error != 0) {
2260 					BRIDGE_LOG(LOG_NOTICE, 0,
2261 					    "ifnet_set_promiscuous (%s)"
2262 					    " failed %d", ifp->if_xname,
2263 					    error);
2264 				} else {
2265 					bif->bif_flags |= BIFF_PROMISC;
2266 				}
2267 			}
2268 			break;
2269 		}
2270 		case KEV_DL_IFCAP_CHANGED: {
2271 			BRIDGE_LOCK(sc);
2272 			bridge_set_tso(sc);
2273 			BRIDGE_UNLOCK(sc);
2274 			break;
2275 		}
2276 		case KEV_DL_PROTO_DETACHED:
2277 		case KEV_DL_PROTO_ATTACHED: {
2278 			bridge_proto_attach_changed(ifp);
2279 			break;
2280 		}
2281 		default:
2282 			break;
2283 		}
2284 	}
2285 }
2286 
2287 /*
2288  * bridge_iff_detached:
2289  *
2290  *      Called when our interface filter has been detached from a
2291  *      member interface.
2292  */
2293 static void
bridge_iff_detached(void * cookie,ifnet_t ifp)2294 bridge_iff_detached(void *cookie, ifnet_t ifp)
2295 {
2296 #pragma unused(cookie)
2297 	struct bridge_iflist *bif;
2298 	struct bridge_softc *sc = ifp->if_bridge;
2299 
2300 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2301 
2302 	/* Check if the interface is a bridge member */
2303 	if (sc != NULL) {
2304 		BRIDGE_LOCK(sc);
2305 		bif = bridge_lookup_member_if(sc, ifp);
2306 		if (bif != NULL) {
2307 			bridge_delete_member(sc, bif);
2308 		}
2309 		BRIDGE_UNLOCK(sc);
2310 		return;
2311 	}
2312 	/* Check if the interface is a span port */
2313 	lck_mtx_lock(&bridge_list_mtx);
2314 	LIST_FOREACH(sc, &bridge_list, sc_list) {
2315 		BRIDGE_LOCK(sc);
2316 		TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
2317 		if (ifp == bif->bif_ifp) {
2318 			bridge_delete_span(sc, bif);
2319 			break;
2320 		}
2321 		BRIDGE_UNLOCK(sc);
2322 	}
2323 	lck_mtx_unlock(&bridge_list_mtx);
2324 }
2325 
2326 static errno_t
bridge_proto_input(ifnet_t ifp,protocol_family_t protocol,mbuf_t packet,char * header)2327 bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet,
2328     char *header)
2329 {
2330 #pragma unused(protocol, packet, header)
2331 	BRIDGE_LOG(LOG_NOTICE, 0, "%s unexpected packet",
2332 	    ifp->if_xname);
2333 	return 0;
2334 }
2335 
2336 static int
bridge_attach_protocol(struct ifnet * ifp)2337 bridge_attach_protocol(struct ifnet *ifp)
2338 {
2339 	int     error;
2340 	struct ifnet_attach_proto_param reg;
2341 
2342 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2343 	bzero(&reg, sizeof(reg));
2344 	reg.input = bridge_proto_input;
2345 
2346 	error = ifnet_attach_protocol(ifp, PF_BRIDGE, &reg);
2347 	if (error) {
2348 		BRIDGE_LOG(LOG_NOTICE, 0,
2349 		    "ifnet_attach_protocol(%s) failed, %d",
2350 		    ifp->if_xname, error);
2351 	}
2352 
2353 	return error;
2354 }
2355 
2356 static int
bridge_detach_protocol(struct ifnet * ifp)2357 bridge_detach_protocol(struct ifnet *ifp)
2358 {
2359 	int     error;
2360 
2361 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
2362 	error = ifnet_detach_protocol(ifp, PF_BRIDGE);
2363 	if (error) {
2364 		BRIDGE_LOG(LOG_NOTICE, 0,
2365 		    "ifnet_detach_protocol(%s) failed, %d",
2366 		    ifp->if_xname, error);
2367 	}
2368 
2369 	return error;
2370 }
2371 
2372 /*
2373  * bridge_delete_member:
2374  *
2375  *	Delete the specified member interface.
2376  */
2377 static void
bridge_delete_member(struct bridge_softc * sc,struct bridge_iflist * bif)2378 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
2379 {
2380 	uint32_t    bif_flags;
2381 	struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp;
2382 	int lladdr_changed = 0, error;
2383 	uint8_t eaddr[ETHER_ADDR_LEN];
2384 	u_int32_t event_code = 0;
2385 
2386 	BRIDGE_LOCK_ASSERT_HELD(sc);
2387 	VERIFY(ifs != NULL);
2388 
2389 	/*
2390 	 * Remove the member from the list first so it cannot be found anymore
2391 	 * when we release the bridge lock below
2392 	 */
2393 	if ((bif->bif_flags & BIFF_IN_MEMBER_LIST) != 0) {
2394 		BRIDGE_XLOCK(sc);
2395 		TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next);
2396 		BRIDGE_XDROP(sc);
2397 	}
2398 	if (sc->sc_mac_nat_bif != NULL) {
2399 		if (bif == sc->sc_mac_nat_bif) {
2400 			bridge_mac_nat_disable(sc);
2401 		} else {
2402 			bridge_mac_nat_flush_entries(sc, bif);
2403 		}
2404 	}
2405 #if BRIDGESTP
2406 	if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2407 		bstp_disable(&bif->bif_stp);
2408 	}
2409 #endif /* BRIDGESTP */
2410 
2411 	/*
2412 	 * If removing the interface that gave the bridge its mac address, set
2413 	 * the mac address of the bridge to the address of the next member, or
2414 	 * to its default address if no members are left.
2415 	 */
2416 	if (bridge_inherit_mac && sc->sc_ifaddr == ifs) {
2417 		ifnet_release(sc->sc_ifaddr);
2418 		if (TAILQ_EMPTY(&sc->sc_iflist)) {
2419 			bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN);
2420 			sc->sc_ifaddr = NULL;
2421 		} else {
2422 			struct ifnet *fif =
2423 			    TAILQ_FIRST(&sc->sc_iflist)->bif_ifp;
2424 			bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN);
2425 			sc->sc_ifaddr = fif;
2426 			ifnet_reference(fif);   /* for sc_ifaddr */
2427 		}
2428 		lladdr_changed = 1;
2429 	}
2430 
2431 #if HAS_IF_CAP
2432 	bridge_mutecaps(sc);    /* recalculate now this interface is removed */
2433 #endif /* HAS_IF_CAP */
2434 
2435 	error = bridge_set_tso(sc);
2436 	if (error != 0) {
2437 		BRIDGE_LOG(LOG_NOTICE, 0, "bridge_set_tso failed %d", error);
2438 	}
2439 
2440 	bridge_rtdelete(sc, ifs, IFBF_FLUSHALL);
2441 
2442 	KASSERT(bif->bif_addrcnt == 0,
2443 	    ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt));
2444 
2445 	/*
2446 	 * Update link status of the bridge based on its remaining members
2447 	 */
2448 	event_code = bridge_updatelinkstatus(sc);
2449 	bif_flags = bif->bif_flags;
2450 	BRIDGE_UNLOCK(sc);
2451 
2452 	/* only perform these steps if the interface is still attached */
2453 	if (ifnet_is_attached(ifs, 1)) {
2454 #if SKYWALK
2455 		if ((bif_flags & BIFF_NETAGENT_REMOVED) != 0) {
2456 			ifnet_add_netagent(ifs);
2457 		}
2458 		if ((bif_flags & BIFF_FLOWSWITCH_ATTACHED) != 0) {
2459 			ifnet_detach_flowswitch_nexus(ifs);
2460 		}
2461 #endif /* SKYWALK */
2462 		/* disable promiscuous mode */
2463 		if ((bif_flags & BIFF_PROMISC) != 0) {
2464 			(void) ifnet_set_promiscuous(ifs, 0);
2465 		}
2466 #if HAS_IF_CAP
2467 		/* re-enable any interface capabilities */
2468 		bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
2469 #endif
2470 		/* detach bridge "protocol" */
2471 		if ((bif_flags & BIFF_PROTO_ATTACHED) != 0) {
2472 			(void)bridge_detach_protocol(ifs);
2473 		}
2474 		/* detach interface filter */
2475 		if ((bif_flags & BIFF_FILTER_ATTACHED) != 0) {
2476 			iflt_detach(bif->bif_iff_ref);
2477 		}
2478 		ifnet_decr_iorefcnt(ifs);
2479 	}
2480 
2481 	if (lladdr_changed &&
2482 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2483 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2484 	}
2485 
2486 	if (event_code != 0) {
2487 		bridge_link_event(bifp, event_code);
2488 	}
2489 
2490 #if BRIDGESTP
2491 	bstp_destroy(&bif->bif_stp);    /* prepare to free */
2492 #endif /* BRIDGESTP */
2493 
2494 	kfree_type(struct bridge_iflist, bif);
2495 	ifs->if_bridge = NULL;
2496 	ifnet_release(ifs);
2497 
2498 	BRIDGE_LOCK(sc);
2499 }
2500 
2501 /*
2502  * bridge_delete_span:
2503  *
2504  *	Delete the specified span interface.
2505  */
2506 static void
bridge_delete_span(struct bridge_softc * sc,struct bridge_iflist * bif)2507 bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif)
2508 {
2509 	BRIDGE_LOCK_ASSERT_HELD(sc);
2510 
2511 	KASSERT(bif->bif_ifp->if_bridge == NULL,
2512 	    ("%s: not a span interface", __func__));
2513 
2514 	ifnet_release(bif->bif_ifp);
2515 
2516 	TAILQ_REMOVE(&sc->sc_spanlist, bif, bif_next);
2517 	kfree_type(struct bridge_iflist, bif);
2518 }
2519 
2520 static int
bridge_ioctl_add(struct bridge_softc * sc,void * arg)2521 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
2522 {
2523 	struct ifbreq *req = arg;
2524 	struct bridge_iflist *bif = NULL;
2525 	struct ifnet *ifs, *bifp = sc->sc_ifp;
2526 	int error = 0, lladdr_changed = 0;
2527 	uint8_t eaddr[ETHER_ADDR_LEN];
2528 	struct iff_filter iff;
2529 	u_int32_t event_code = 0;
2530 	boolean_t mac_nat = FALSE;
2531 	boolean_t input_broadcast;
2532 
2533 	ifs = ifunit(req->ifbr_ifsname);
2534 	if (ifs == NULL) {
2535 		return ENOENT;
2536 	}
2537 	if (ifs->if_ioctl == NULL) {    /* must be supported */
2538 		return EINVAL;
2539 	}
2540 
2541 	if (IFNET_IS_INTCOPROC(ifs)) {
2542 		return EINVAL;
2543 	}
2544 
2545 	/* If it's in the span list, it can't be a member. */
2546 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
2547 		if (ifs == bif->bif_ifp) {
2548 			return EBUSY;
2549 		}
2550 	}
2551 
2552 	if (ifs->if_bridge == sc) {
2553 		return EEXIST;
2554 	}
2555 
2556 	if (ifs->if_bridge != NULL) {
2557 		return EBUSY;
2558 	}
2559 
2560 	switch (ifs->if_type) {
2561 	case IFT_ETHER:
2562 		if (strcmp(ifs->if_name, "en") == 0 &&
2563 		    ifs->if_subfamily == IFNET_SUBFAMILY_WIFI &&
2564 		    (ifs->if_eflags & IFEF_IPV4_ROUTER) == 0) {
2565 			/* XXX is there a better way to identify Wi-Fi STA? */
2566 			mac_nat = TRUE;
2567 		}
2568 		break;
2569 	case IFT_L2VLAN:
2570 	case IFT_IEEE8023ADLAG:
2571 		break;
2572 	case IFT_GIF:
2573 	/* currently not supported */
2574 	/* FALLTHRU */
2575 	default:
2576 		return EINVAL;
2577 	}
2578 
2579 	/* fail to add the interface if the MTU doesn't match */
2580 	if (!TAILQ_EMPTY(&sc->sc_iflist) && sc->sc_ifp->if_mtu != ifs->if_mtu) {
2581 		BRIDGE_LOG(LOG_NOTICE, 0, "%s invalid MTU for %s",
2582 		    sc->sc_ifp->if_xname,
2583 		    ifs->if_xname);
2584 		return EINVAL;
2585 	}
2586 
2587 	/* there's already an interface that's doing MAC NAT */
2588 	if (mac_nat && sc->sc_mac_nat_bif != NULL) {
2589 		return EBUSY;
2590 	}
2591 
2592 	/* prevent the interface from detaching while we add the member */
2593 	if (!ifnet_is_attached(ifs, 1)) {
2594 		return ENXIO;
2595 	}
2596 
2597 	/* allocate a new member */
2598 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
2599 	bif->bif_ifp = ifs;
2600 	ifnet_reference(ifs);
2601 	bif->bif_ifflags |= IFBIF_LEARNING | IFBIF_DISCOVER;
2602 #if HAS_IF_CAP
2603 	bif->bif_savedcaps = ifs->if_capenable;
2604 #endif /* HAS_IF_CAP */
2605 	bif->bif_sc = sc;
2606 	if (mac_nat) {
2607 		(void)bridge_mac_nat_enable(sc, bif);
2608 	}
2609 
2610 	if (IFNET_IS_VMNET(ifs)) {
2611 		allocate_vmnet_pf_tags();
2612 	}
2613 	/* Allow the first Ethernet member to define the MTU */
2614 	if (TAILQ_EMPTY(&sc->sc_iflist)) {
2615 		sc->sc_ifp->if_mtu = ifs->if_mtu;
2616 	}
2617 
2618 	/*
2619 	 * Assign the interface's MAC address to the bridge if it's the first
2620 	 * member and the MAC address of the bridge has not been changed from
2621 	 * the default (randomly) generated one.
2622 	 */
2623 	if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) &&
2624 	    _ether_cmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr) == 0) {
2625 		bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN);
2626 		sc->sc_ifaddr = ifs;
2627 		ifnet_reference(ifs);   /* for sc_ifaddr */
2628 		lladdr_changed = 1;
2629 	}
2630 
2631 	ifs->if_bridge = sc;
2632 #if BRIDGESTP
2633 	bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp);
2634 #endif /* BRIDGESTP */
2635 
2636 #if HAS_IF_CAP
2637 	/* Set interface capabilities to the intersection set of all members */
2638 	bridge_mutecaps(sc);
2639 #endif /* HAS_IF_CAP */
2640 
2641 
2642 	/*
2643 	 * Respect lock ordering with DLIL lock for the following operations
2644 	 */
2645 	BRIDGE_UNLOCK(sc);
2646 
2647 	/* enable promiscuous mode */
2648 	error = ifnet_set_promiscuous(ifs, 1);
2649 	switch (error) {
2650 	case 0:
2651 		bif->bif_flags |= BIFF_PROMISC;
2652 		break;
2653 	case ENETDOWN:
2654 	case EPWROFF:
2655 		BRIDGE_LOG(LOG_NOTICE, 0,
2656 		    "ifnet_set_promiscuous(%s) failed %d, ignoring",
2657 		    ifs->if_xname, error);
2658 		/* Ignore error when device is not up */
2659 		error = 0;
2660 		break;
2661 	default:
2662 		BRIDGE_LOG(LOG_NOTICE, 0,
2663 		    "ifnet_set_promiscuous(%s) failed %d",
2664 		    ifs->if_xname, error);
2665 		BRIDGE_LOCK(sc);
2666 		goto out;
2667 	}
2668 
2669 #if SKYWALK
2670 	/* ensure that the flowswitch is present for native interface */
2671 	if (SKYWALK_NATIVE(ifs)) {
2672 		if (ifnet_attach_flowswitch_nexus(ifs)) {
2673 			bif->bif_flags |= BIFF_FLOWSWITCH_ATTACHED;
2674 		}
2675 	}
2676 	/* remove the netagent on the flowswitch (rdar://75050182) */
2677 	if (ifnet_remove_netagent(ifs)) {
2678 		bif->bif_flags |= BIFF_NETAGENT_REMOVED;
2679 	}
2680 #endif /* SKYWALK */
2681 
2682 	/*
2683 	 * install an interface filter
2684 	 */
2685 	memset(&iff, 0, sizeof(struct iff_filter));
2686 	iff.iff_cookie = bif;
2687 	iff.iff_name = "com.apple.kernel.bsd.net.if_bridge";
2688 	iff.iff_input = bridge_iff_input;
2689 	iff.iff_output = bridge_iff_output;
2690 	iff.iff_event = bridge_iff_event;
2691 	iff.iff_detached = bridge_iff_detached;
2692 	error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref,
2693 	    DLIL_IFF_TSO | DLIL_IFF_INTERNAL);
2694 	if (error != 0) {
2695 		BRIDGE_LOG(LOG_NOTICE, 0, "iflt_attach failed %d", error);
2696 		BRIDGE_LOCK(sc);
2697 		goto out;
2698 	}
2699 	bif->bif_flags |= BIFF_FILTER_ATTACHED;
2700 
2701 	/*
2702 	 * install a dummy "bridge" protocol
2703 	 */
2704 	if ((error = bridge_attach_protocol(ifs)) != 0) {
2705 		if (error != 0) {
2706 			BRIDGE_LOG(LOG_NOTICE, 0,
2707 			    "bridge_attach_protocol failed %d", error);
2708 			BRIDGE_LOCK(sc);
2709 			goto out;
2710 		}
2711 	}
2712 	bif->bif_flags |= BIFF_PROTO_ATTACHED;
2713 
2714 	if (lladdr_changed &&
2715 	    (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) {
2716 		BRIDGE_LOG(LOG_NOTICE, 0, "ifnet_set_lladdr failed %d", error);
2717 	}
2718 
2719 	/*
2720 	 * No failures past this point. Add the member to the list.
2721 	 */
2722 	BRIDGE_LOCK(sc);
2723 	bif->bif_flags |= BIFF_IN_MEMBER_LIST;
2724 	BRIDGE_XLOCK(sc);
2725 	TAILQ_INSERT_TAIL(&sc->sc_iflist, bif, bif_next);
2726 	BRIDGE_XDROP(sc);
2727 
2728 	/* cache the member link status */
2729 	if (interface_media_active(ifs)) {
2730 		bif->bif_flags |= BIFF_MEDIA_ACTIVE;
2731 	} else {
2732 		bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
2733 	}
2734 
2735 	/* the new member may change the link status of the bridge interface */
2736 	event_code = bridge_updatelinkstatus(sc);
2737 
2738 	/* check whether we need input broadcast or not */
2739 	input_broadcast = interface_needs_input_broadcast(ifs);
2740 	bif_set_input_broadcast(bif, input_broadcast);
2741 	BRIDGE_UNLOCK(sc);
2742 
2743 	if (event_code != 0) {
2744 		bridge_link_event(bifp, event_code);
2745 	}
2746 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
2747 	    "%s input broadcast %s", ifs->if_xname,
2748 	    input_broadcast ? "ENABLED" : "DISABLED");
2749 
2750 	BRIDGE_LOCK(sc);
2751 	bridge_set_tso(sc);
2752 
2753 out:
2754 	/* allow the interface to detach */
2755 	ifnet_decr_iorefcnt(ifs);
2756 
2757 	if (error != 0) {
2758 		if (bif != NULL) {
2759 			bridge_delete_member(sc, bif);
2760 		}
2761 	} else if (IFNET_IS_VMNET(ifs)) {
2762 		INC_ATOMIC_INT64_LIM(net_api_stats.nas_vmnet_total);
2763 	}
2764 
2765 	return error;
2766 }
2767 
2768 static int
bridge_ioctl_del(struct bridge_softc * sc,void * arg)2769 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
2770 {
2771 	struct ifbreq *req = arg;
2772 	struct bridge_iflist *bif;
2773 
2774 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2775 	if (bif == NULL) {
2776 		return ENOENT;
2777 	}
2778 
2779 	bridge_delete_member(sc, bif);
2780 
2781 	return 0;
2782 }
2783 
2784 static int
bridge_ioctl_purge(struct bridge_softc * sc,void * arg)2785 bridge_ioctl_purge(struct bridge_softc *sc, void *arg)
2786 {
2787 #pragma unused(sc, arg)
2788 	return 0;
2789 }
2790 
2791 static int
bridge_ioctl_gifflags(struct bridge_softc * sc,void * arg)2792 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
2793 {
2794 	struct ifbreq *req = arg;
2795 	struct bridge_iflist *bif;
2796 
2797 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2798 	if (bif == NULL) {
2799 		return ENOENT;
2800 	}
2801 
2802 	struct bstp_port *bp;
2803 
2804 	bp = &bif->bif_stp;
2805 	req->ifbr_state = bp->bp_state;
2806 	req->ifbr_priority = bp->bp_priority;
2807 	req->ifbr_path_cost = bp->bp_path_cost;
2808 	req->ifbr_proto = bp->bp_protover;
2809 	req->ifbr_role = bp->bp_role;
2810 	req->ifbr_stpflags = bp->bp_flags;
2811 	req->ifbr_ifsflags = bif->bif_ifflags;
2812 
2813 	/* Copy STP state options as flags */
2814 	if (bp->bp_operedge) {
2815 		req->ifbr_ifsflags |= IFBIF_BSTP_EDGE;
2816 	}
2817 	if (bp->bp_flags & BSTP_PORT_AUTOEDGE) {
2818 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOEDGE;
2819 	}
2820 	if (bp->bp_ptp_link) {
2821 		req->ifbr_ifsflags |= IFBIF_BSTP_PTP;
2822 	}
2823 	if (bp->bp_flags & BSTP_PORT_AUTOPTP) {
2824 		req->ifbr_ifsflags |= IFBIF_BSTP_AUTOPTP;
2825 	}
2826 	if (bp->bp_flags & BSTP_PORT_ADMEDGE) {
2827 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMEDGE;
2828 	}
2829 	if (bp->bp_flags & BSTP_PORT_ADMCOST) {
2830 		req->ifbr_ifsflags |= IFBIF_BSTP_ADMCOST;
2831 	}
2832 
2833 	req->ifbr_portno = bif->bif_ifp->if_index & 0xfff;
2834 	req->ifbr_addrcnt = bif->bif_addrcnt;
2835 	req->ifbr_addrmax = bif->bif_addrmax;
2836 	req->ifbr_addrexceeded = bif->bif_addrexceeded;
2837 
2838 	return 0;
2839 }
2840 
2841 static int
bridge_ioctl_sifflags(struct bridge_softc * sc,void * arg)2842 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
2843 {
2844 	struct ifbreq *req = arg;
2845 	struct bridge_iflist *bif;
2846 #if BRIDGESTP
2847 	struct bstp_port *bp;
2848 	int error;
2849 #endif /* BRIDGESTP */
2850 
2851 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
2852 	if (bif == NULL) {
2853 		return ENOENT;
2854 	}
2855 
2856 	if (req->ifbr_ifsflags & IFBIF_SPAN) {
2857 		/* SPAN is readonly */
2858 		return EINVAL;
2859 	}
2860 #define _EXCLUSIVE_FLAGS        (IFBIF_CHECKSUM_OFFLOAD | IFBIF_MAC_NAT)
2861 	if ((req->ifbr_ifsflags & _EXCLUSIVE_FLAGS) == _EXCLUSIVE_FLAGS) {
2862 		/* can't specify both MAC-NAT and checksum offload */
2863 		return EINVAL;
2864 	}
2865 	if ((req->ifbr_ifsflags & IFBIF_MAC_NAT) != 0) {
2866 		errno_t error;
2867 
2868 		error = bridge_mac_nat_enable(sc, bif);
2869 		if (error != 0) {
2870 			return error;
2871 		}
2872 	} else if (sc->sc_mac_nat_bif == bif) {
2873 		bridge_mac_nat_disable(sc);
2874 	}
2875 
2876 
2877 #if BRIDGESTP
2878 	if (req->ifbr_ifsflags & IFBIF_STP) {
2879 		if ((bif->bif_ifflags & IFBIF_STP) == 0) {
2880 			error = bstp_enable(&bif->bif_stp);
2881 			if (error) {
2882 				return error;
2883 			}
2884 		}
2885 	} else {
2886 		if ((bif->bif_ifflags & IFBIF_STP) != 0) {
2887 			bstp_disable(&bif->bif_stp);
2888 		}
2889 	}
2890 
2891 	/* Pass on STP flags */
2892 	bp = &bif->bif_stp;
2893 	bstp_set_edge(bp, req->ifbr_ifsflags & IFBIF_BSTP_EDGE ? 1 : 0);
2894 	bstp_set_autoedge(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOEDGE ? 1 : 0);
2895 	bstp_set_ptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_PTP ? 1 : 0);
2896 	bstp_set_autoptp(bp, req->ifbr_ifsflags & IFBIF_BSTP_AUTOPTP ? 1 : 0);
2897 #else /* !BRIDGESTP */
2898 	if (req->ifbr_ifsflags & IFBIF_STP) {
2899 		return EOPNOTSUPP;
2900 	}
2901 #endif /* !BRIDGESTP */
2902 
2903 	/* Save the bits relating to the bridge */
2904 	bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK;
2905 
2906 
2907 	return 0;
2908 }
2909 
2910 static int
bridge_ioctl_scache(struct bridge_softc * sc,void * arg)2911 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
2912 {
2913 	struct ifbrparam *param = arg;
2914 
2915 	sc->sc_brtmax = param->ifbrp_csize;
2916 	bridge_rttrim(sc);
2917 	return 0;
2918 }
2919 
2920 static int
bridge_ioctl_gcache(struct bridge_softc * sc,void * arg)2921 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
2922 {
2923 	struct ifbrparam *param = arg;
2924 
2925 	param->ifbrp_csize = sc->sc_brtmax;
2926 
2927 	return 0;
2928 }
2929 
2930 #define BRIDGE_IOCTL_GIFS do { \
2931 	struct bridge_iflist *bif;                                      \
2932 	struct ifbreq breq;                                             \
2933 	char *buf, *outbuf;                                             \
2934 	unsigned int count, buflen, len;                                \
2935                                                                         \
2936 	count = 0;                                                      \
2937 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next)                    \
2938 	        count++;                                                \
2939 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)                  \
2940 	        count++;                                                \
2941                                                                         \
2942 	buflen = sizeof (breq) * count;                                 \
2943 	if (bifc->ifbic_len == 0) {                                     \
2944 	        bifc->ifbic_len = buflen;                               \
2945 	        return (0);                                             \
2946 	}                                                               \
2947 	BRIDGE_UNLOCK(sc);                                              \
2948 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
2949 	BRIDGE_LOCK(sc);                                                \
2950                                                                         \
2951 	count = 0;                                                      \
2952 	buf = outbuf;                                                   \
2953 	len = min(bifc->ifbic_len, buflen);                             \
2954 	bzero(&breq, sizeof (breq));                                    \
2955 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
2956 	        if (len < sizeof (breq))                                \
2957 	                break;                                          \
2958                                                                         \
2959 	        snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \
2960 	            "%s", bif->bif_ifp->if_xname);                      \
2961 	/* Fill in the ifbreq structure */                      \
2962 	        error = bridge_ioctl_gifflags(sc, &breq);               \
2963 	        if (error)                                              \
2964 	                break;                                          \
2965 	        memcpy(buf, &breq, sizeof (breq));                      \
2966 	        count++;                                                \
2967 	        buf += sizeof (breq);                                   \
2968 	        len -= sizeof (breq);                                   \
2969 	}                                                               \
2970 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {                \
2971 	        if (len < sizeof (breq))                                \
2972 	                break;                                          \
2973                                                                         \
2974 	        snprintf(breq.ifbr_ifsname,                             \
2975 	                 sizeof (breq.ifbr_ifsname),                    \
2976 	                 "%s", bif->bif_ifp->if_xname);                 \
2977 	        breq.ifbr_ifsflags = bif->bif_ifflags;                  \
2978 	        breq.ifbr_portno                                        \
2979 	                = bif->bif_ifp->if_index & 0xfff;               \
2980 	        memcpy(buf, &breq, sizeof (breq));                      \
2981 	        count++;                                                \
2982 	        buf += sizeof (breq);                                   \
2983 	        len -= sizeof (breq);                                   \
2984 	}                                                               \
2985                                                                         \
2986 	BRIDGE_UNLOCK(sc);                                              \
2987 	bifc->ifbic_len = sizeof (breq) * count;                        \
2988 	error = copyout(outbuf, bifc->ifbic_req, bifc->ifbic_len);      \
2989 	BRIDGE_LOCK(sc);                                                \
2990 	kfree_data(outbuf, buflen);                                     \
2991 } while (0)
2992 
2993 static int
bridge_ioctl_gifs64(struct bridge_softc * sc,void * arg)2994 bridge_ioctl_gifs64(struct bridge_softc *sc, void *arg)
2995 {
2996 	struct ifbifconf64 *bifc = arg;
2997 	int error = 0;
2998 
2999 	BRIDGE_IOCTL_GIFS;
3000 
3001 	return error;
3002 }
3003 
3004 static int
bridge_ioctl_gifs32(struct bridge_softc * sc,void * arg)3005 bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg)
3006 {
3007 	struct ifbifconf32 *bifc = arg;
3008 	int error = 0;
3009 
3010 	BRIDGE_IOCTL_GIFS;
3011 
3012 	return error;
3013 }
3014 
3015 #define BRIDGE_IOCTL_RTS do {                                               \
3016 	struct bridge_rtnode *brt;                                          \
3017 	char *buf;                                                          \
3018 	char *outbuf = NULL;                                                \
3019 	unsigned int count, buflen, len;                                    \
3020 	unsigned long now;                                                  \
3021                                                                             \
3022 	if (bac->ifbac_len == 0)                                            \
3023 	        return (0);                                                 \
3024                                                                             \
3025 	bzero(&bareq, sizeof (bareq));                                      \
3026 	count = 0;                                                          \
3027 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list)                         \
3028 	        count++;                                                    \
3029 	buflen = sizeof (bareq) * count;                                    \
3030                                                                             \
3031 	BRIDGE_UNLOCK(sc);                                                  \
3032 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);            \
3033 	BRIDGE_LOCK(sc);                                                    \
3034                                                                             \
3035 	count = 0;                                                          \
3036 	buf = outbuf;                                                       \
3037 	len = min(bac->ifbac_len, buflen);                                  \
3038 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {                       \
3039 	        if (len < sizeof (bareq))                                   \
3040 	                goto out;                                           \
3041 	        snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname),   \
3042 	                 "%s", brt->brt_ifp->if_xname);                     \
3043 	        memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \
3044 	        bareq.ifba_vlan = brt->brt_vlan;                            \
3045 	        if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {   \
3046 	                now = (unsigned long) net_uptime();                 \
3047 	                if (now < brt->brt_expire)                          \
3048 	                        bareq.ifba_expire =                         \
3049 	                            brt->brt_expire - now;                  \
3050 	        } else                                                      \
3051 	                bareq.ifba_expire = 0;                              \
3052 	        bareq.ifba_flags = brt->brt_flags;                          \
3053                                                                             \
3054 	        memcpy(buf, &bareq, sizeof (bareq));                        \
3055 	        count++;                                                    \
3056 	        buf += sizeof (bareq);                                      \
3057 	        len -= sizeof (bareq);                                      \
3058 	}                                                                   \
3059 out:                                                                        \
3060 	bac->ifbac_len = sizeof (bareq) * count;                            \
3061 	if (outbuf != NULL) {                                               \
3062 	        BRIDGE_UNLOCK(sc);                                          \
3063 	        error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len);    \
3064 	        kfree_data(outbuf, buflen);                                 \
3065 	        BRIDGE_LOCK(sc);                                            \
3066 	}                                                                   \
3067 	return (error);                                                     \
3068 } while (0)
3069 
3070 static int
bridge_ioctl_rts64(struct bridge_softc * sc,void * arg)3071 bridge_ioctl_rts64(struct bridge_softc *sc, void *arg)
3072 {
3073 	struct ifbaconf64 *bac = arg;
3074 	struct ifbareq64 bareq;
3075 	int error = 0;
3076 
3077 	BRIDGE_IOCTL_RTS;
3078 	return error;
3079 }
3080 
3081 static int
bridge_ioctl_rts32(struct bridge_softc * sc,void * arg)3082 bridge_ioctl_rts32(struct bridge_softc *sc, void *arg)
3083 {
3084 	struct ifbaconf32 *bac = arg;
3085 	struct ifbareq32 bareq;
3086 	int error = 0;
3087 
3088 	BRIDGE_IOCTL_RTS;
3089 	return error;
3090 }
3091 
3092 static int
bridge_ioctl_saddr32(struct bridge_softc * sc,void * arg)3093 bridge_ioctl_saddr32(struct bridge_softc *sc, void *arg)
3094 {
3095 	struct ifbareq32 *req = arg;
3096 	struct bridge_iflist *bif;
3097 	int error;
3098 
3099 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3100 	if (bif == NULL) {
3101 		return ENOENT;
3102 	}
3103 
3104 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3105 	    req->ifba_flags);
3106 
3107 	return error;
3108 }
3109 
3110 static int
bridge_ioctl_saddr64(struct bridge_softc * sc,void * arg)3111 bridge_ioctl_saddr64(struct bridge_softc *sc, void *arg)
3112 {
3113 	struct ifbareq64 *req = arg;
3114 	struct bridge_iflist *bif;
3115 	int error;
3116 
3117 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
3118 	if (bif == NULL) {
3119 		return ENOENT;
3120 	}
3121 
3122 	error = bridge_rtupdate(sc, req->ifba_dst, req->ifba_vlan, bif, 1,
3123 	    req->ifba_flags);
3124 
3125 	return error;
3126 }
3127 
3128 static int
bridge_ioctl_sto(struct bridge_softc * sc,void * arg)3129 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
3130 {
3131 	struct ifbrparam *param = arg;
3132 
3133 	sc->sc_brttimeout = param->ifbrp_ctime;
3134 	return 0;
3135 }
3136 
3137 static int
bridge_ioctl_gto(struct bridge_softc * sc,void * arg)3138 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
3139 {
3140 	struct ifbrparam *param = arg;
3141 
3142 	param->ifbrp_ctime = sc->sc_brttimeout;
3143 	return 0;
3144 }
3145 
3146 static int
bridge_ioctl_daddr32(struct bridge_softc * sc,void * arg)3147 bridge_ioctl_daddr32(struct bridge_softc *sc, void *arg)
3148 {
3149 	struct ifbareq32 *req = arg;
3150 
3151 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3152 }
3153 
3154 static int
bridge_ioctl_daddr64(struct bridge_softc * sc,void * arg)3155 bridge_ioctl_daddr64(struct bridge_softc *sc, void *arg)
3156 {
3157 	struct ifbareq64 *req = arg;
3158 
3159 	return bridge_rtdaddr(sc, req->ifba_dst, req->ifba_vlan);
3160 }
3161 
3162 static int
bridge_ioctl_flush(struct bridge_softc * sc,void * arg)3163 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
3164 {
3165 	struct ifbreq *req = arg;
3166 
3167 	bridge_rtflush(sc, req->ifbr_ifsflags);
3168 	return 0;
3169 }
3170 
3171 static int
bridge_ioctl_gpri(struct bridge_softc * sc,void * arg)3172 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
3173 {
3174 	struct ifbrparam *param = arg;
3175 	struct bstp_state *bs = &sc->sc_stp;
3176 
3177 	param->ifbrp_prio = bs->bs_bridge_priority;
3178 	return 0;
3179 }
3180 
3181 static int
bridge_ioctl_spri(struct bridge_softc * sc,void * arg)3182 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
3183 {
3184 #if BRIDGESTP
3185 	struct ifbrparam *param = arg;
3186 
3187 	return bstp_set_priority(&sc->sc_stp, param->ifbrp_prio);
3188 #else /* !BRIDGESTP */
3189 #pragma unused(sc, arg)
3190 	return EOPNOTSUPP;
3191 #endif /* !BRIDGESTP */
3192 }
3193 
3194 static int
bridge_ioctl_ght(struct bridge_softc * sc,void * arg)3195 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
3196 {
3197 	struct ifbrparam *param = arg;
3198 	struct bstp_state *bs = &sc->sc_stp;
3199 
3200 	param->ifbrp_hellotime = bs->bs_bridge_htime >> 8;
3201 	return 0;
3202 }
3203 
3204 static int
bridge_ioctl_sht(struct bridge_softc * sc,void * arg)3205 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
3206 {
3207 #if BRIDGESTP
3208 	struct ifbrparam *param = arg;
3209 
3210 	return bstp_set_htime(&sc->sc_stp, param->ifbrp_hellotime);
3211 #else /* !BRIDGESTP */
3212 #pragma unused(sc, arg)
3213 	return EOPNOTSUPP;
3214 #endif /* !BRIDGESTP */
3215 }
3216 
3217 static int
bridge_ioctl_gfd(struct bridge_softc * sc,void * arg)3218 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
3219 {
3220 	struct ifbrparam *param;
3221 	struct bstp_state *bs;
3222 
3223 	param = arg;
3224 	bs = &sc->sc_stp;
3225 	param->ifbrp_fwddelay = bs->bs_bridge_fdelay >> 8;
3226 	return 0;
3227 }
3228 
3229 static int
bridge_ioctl_sfd(struct bridge_softc * sc,void * arg)3230 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
3231 {
3232 #if BRIDGESTP
3233 	struct ifbrparam *param = arg;
3234 
3235 	return bstp_set_fdelay(&sc->sc_stp, param->ifbrp_fwddelay);
3236 #else /* !BRIDGESTP */
3237 #pragma unused(sc, arg)
3238 	return EOPNOTSUPP;
3239 #endif /* !BRIDGESTP */
3240 }
3241 
3242 static int
bridge_ioctl_gma(struct bridge_softc * sc,void * arg)3243 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
3244 {
3245 	struct ifbrparam *param;
3246 	struct bstp_state *bs;
3247 
3248 	param = arg;
3249 	bs = &sc->sc_stp;
3250 	param->ifbrp_maxage = bs->bs_bridge_max_age >> 8;
3251 	return 0;
3252 }
3253 
3254 static int
bridge_ioctl_sma(struct bridge_softc * sc,void * arg)3255 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
3256 {
3257 #if BRIDGESTP
3258 	struct ifbrparam *param = arg;
3259 
3260 	return bstp_set_maxage(&sc->sc_stp, param->ifbrp_maxage);
3261 #else /* !BRIDGESTP */
3262 #pragma unused(sc, arg)
3263 	return EOPNOTSUPP;
3264 #endif /* !BRIDGESTP */
3265 }
3266 
3267 static int
bridge_ioctl_sifprio(struct bridge_softc * sc,void * arg)3268 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
3269 {
3270 #if BRIDGESTP
3271 	struct ifbreq *req = arg;
3272 	struct bridge_iflist *bif;
3273 
3274 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3275 	if (bif == NULL) {
3276 		return ENOENT;
3277 	}
3278 
3279 	return bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority);
3280 #else /* !BRIDGESTP */
3281 #pragma unused(sc, arg)
3282 	return EOPNOTSUPP;
3283 #endif /* !BRIDGESTP */
3284 }
3285 
3286 static int
bridge_ioctl_sifcost(struct bridge_softc * sc,void * arg)3287 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
3288 {
3289 #if BRIDGESTP
3290 	struct ifbreq *req = arg;
3291 	struct bridge_iflist *bif;
3292 
3293 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3294 	if (bif == NULL) {
3295 		return ENOENT;
3296 	}
3297 
3298 	return bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost);
3299 #else /* !BRIDGESTP */
3300 #pragma unused(sc, arg)
3301 	return EOPNOTSUPP;
3302 #endif /* !BRIDGESTP */
3303 }
3304 
3305 static int
bridge_ioctl_gfilt(struct bridge_softc * sc,void * arg)3306 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
3307 {
3308 	struct ifbrparam *param = arg;
3309 
3310 	param->ifbrp_filter = sc->sc_filter_flags;
3311 
3312 	return 0;
3313 }
3314 
3315 static int
bridge_ioctl_sfilt(struct bridge_softc * sc,void * arg)3316 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
3317 {
3318 	struct ifbrparam *param = arg;
3319 
3320 	if (param->ifbrp_filter & ~IFBF_FILT_MASK) {
3321 		return EINVAL;
3322 	}
3323 
3324 	if (param->ifbrp_filter & IFBF_FILT_USEIPF) {
3325 		return EINVAL;
3326 	}
3327 
3328 	sc->sc_filter_flags = param->ifbrp_filter;
3329 
3330 	return 0;
3331 }
3332 
3333 static int
bridge_ioctl_sifmaxaddr(struct bridge_softc * sc,void * arg)3334 bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
3335 {
3336 	struct ifbreq *req = arg;
3337 	struct bridge_iflist *bif;
3338 
3339 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
3340 	if (bif == NULL) {
3341 		return ENOENT;
3342 	}
3343 
3344 	bif->bif_addrmax = req->ifbr_addrmax;
3345 	return 0;
3346 }
3347 
3348 static int
bridge_ioctl_addspan(struct bridge_softc * sc,void * arg)3349 bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
3350 {
3351 	struct ifbreq *req = arg;
3352 	struct bridge_iflist *bif = NULL;
3353 	struct ifnet *ifs;
3354 
3355 	ifs = ifunit(req->ifbr_ifsname);
3356 	if (ifs == NULL) {
3357 		return ENOENT;
3358 	}
3359 
3360 	if (IFNET_IS_INTCOPROC(ifs)) {
3361 		return EINVAL;
3362 	}
3363 
3364 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3365 	if (ifs == bif->bif_ifp) {
3366 		return EBUSY;
3367 	}
3368 
3369 	if (ifs->if_bridge != NULL) {
3370 		return EBUSY;
3371 	}
3372 
3373 	switch (ifs->if_type) {
3374 	case IFT_ETHER:
3375 	case IFT_L2VLAN:
3376 	case IFT_IEEE8023ADLAG:
3377 		break;
3378 	case IFT_GIF:
3379 	/* currently not supported */
3380 	/* FALLTHRU */
3381 	default:
3382 		return EINVAL;
3383 	}
3384 
3385 	bif = kalloc_type(struct bridge_iflist, Z_WAITOK | Z_ZERO | Z_NOFAIL);
3386 
3387 	bif->bif_ifp = ifs;
3388 	bif->bif_ifflags = IFBIF_SPAN;
3389 
3390 	ifnet_reference(bif->bif_ifp);
3391 
3392 	TAILQ_INSERT_HEAD(&sc->sc_spanlist, bif, bif_next);
3393 
3394 	return 0;
3395 }
3396 
3397 static int
bridge_ioctl_delspan(struct bridge_softc * sc,void * arg)3398 bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
3399 {
3400 	struct ifbreq *req = arg;
3401 	struct bridge_iflist *bif;
3402 	struct ifnet *ifs;
3403 
3404 	ifs = ifunit(req->ifbr_ifsname);
3405 	if (ifs == NULL) {
3406 		return ENOENT;
3407 	}
3408 
3409 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next)
3410 	if (ifs == bif->bif_ifp) {
3411 		break;
3412 	}
3413 
3414 	if (bif == NULL) {
3415 		return ENOENT;
3416 	}
3417 
3418 	bridge_delete_span(sc, bif);
3419 
3420 	return 0;
3421 }
3422 
3423 #define BRIDGE_IOCTL_GBPARAM do {                                       \
3424 	struct bstp_state *bs = &sc->sc_stp;                            \
3425 	struct bstp_port *root_port;                                    \
3426                                                                         \
3427 	req->ifbop_maxage = bs->bs_bridge_max_age >> 8;                 \
3428 	req->ifbop_hellotime = bs->bs_bridge_htime >> 8;                \
3429 	req->ifbop_fwddelay = bs->bs_bridge_fdelay >> 8;                \
3430                                                                         \
3431 	root_port = bs->bs_root_port;                                   \
3432 	if (root_port == NULL)                                          \
3433 	        req->ifbop_root_port = 0;                               \
3434 	else                                                            \
3435 	        req->ifbop_root_port = root_port->bp_ifp->if_index;     \
3436                                                                         \
3437 	req->ifbop_holdcount = bs->bs_txholdcount;                      \
3438 	req->ifbop_priority = bs->bs_bridge_priority;                   \
3439 	req->ifbop_protocol = bs->bs_protover;                          \
3440 	req->ifbop_root_path_cost = bs->bs_root_pv.pv_cost;             \
3441 	req->ifbop_bridgeid = bs->bs_bridge_pv.pv_dbridge_id;           \
3442 	req->ifbop_designated_root = bs->bs_root_pv.pv_root_id;         \
3443 	req->ifbop_designated_bridge = bs->bs_root_pv.pv_dbridge_id;    \
3444 	req->ifbop_last_tc_time.tv_sec = bs->bs_last_tc_time.tv_sec;    \
3445 	req->ifbop_last_tc_time.tv_usec = bs->bs_last_tc_time.tv_usec;  \
3446 } while (0)
3447 
3448 static int
bridge_ioctl_gbparam32(struct bridge_softc * sc,void * arg)3449 bridge_ioctl_gbparam32(struct bridge_softc *sc, void *arg)
3450 {
3451 	struct ifbropreq32 *req = arg;
3452 
3453 	BRIDGE_IOCTL_GBPARAM;
3454 	return 0;
3455 }
3456 
3457 static int
bridge_ioctl_gbparam64(struct bridge_softc * sc,void * arg)3458 bridge_ioctl_gbparam64(struct bridge_softc *sc, void *arg)
3459 {
3460 	struct ifbropreq64 *req = arg;
3461 
3462 	BRIDGE_IOCTL_GBPARAM;
3463 	return 0;
3464 }
3465 
3466 static int
bridge_ioctl_grte(struct bridge_softc * sc,void * arg)3467 bridge_ioctl_grte(struct bridge_softc *sc, void *arg)
3468 {
3469 	struct ifbrparam *param = arg;
3470 
3471 	param->ifbrp_cexceeded = sc->sc_brtexceeded;
3472 	return 0;
3473 }
3474 
3475 #define BRIDGE_IOCTL_GIFSSTP do {                                       \
3476 	struct bridge_iflist *bif;                                      \
3477 	struct bstp_port *bp;                                           \
3478 	struct ifbpstpreq bpreq;                                        \
3479 	char *buf, *outbuf;                                             \
3480 	unsigned int count, buflen, len;                                \
3481                                                                         \
3482 	count = 0;                                                      \
3483 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3484 	        if ((bif->bif_ifflags & IFBIF_STP) != 0)                \
3485 	                count++;                                        \
3486 	}                                                               \
3487                                                                         \
3488 	buflen = sizeof (bpreq) * count;                                \
3489 	if (bifstp->ifbpstp_len == 0) {                                 \
3490 	        bifstp->ifbpstp_len = buflen;                           \
3491 	        return (0);                                             \
3492 	}                                                               \
3493                                                                         \
3494 	BRIDGE_UNLOCK(sc);                                              \
3495 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);        \
3496 	BRIDGE_LOCK(sc);                                                \
3497                                                                         \
3498 	count = 0;                                                      \
3499 	buf = outbuf;                                                   \
3500 	len = min(bifstp->ifbpstp_len, buflen);                         \
3501 	bzero(&bpreq, sizeof (bpreq));                                  \
3502 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {                  \
3503 	        if (len < sizeof (bpreq))                               \
3504 	                break;                                          \
3505                                                                         \
3506 	        if ((bif->bif_ifflags & IFBIF_STP) == 0)                \
3507 	                continue;                                       \
3508                                                                         \
3509 	        bp = &bif->bif_stp;                                     \
3510 	        bpreq.ifbp_portno = bif->bif_ifp->if_index & 0xfff;     \
3511 	        bpreq.ifbp_fwd_trans = bp->bp_forward_transitions;      \
3512 	        bpreq.ifbp_design_cost = bp->bp_desg_pv.pv_cost;        \
3513 	        bpreq.ifbp_design_port = bp->bp_desg_pv.pv_port_id;     \
3514 	        bpreq.ifbp_design_bridge = bp->bp_desg_pv.pv_dbridge_id; \
3515 	        bpreq.ifbp_design_root = bp->bp_desg_pv.pv_root_id;     \
3516                                                                         \
3517 	        memcpy(buf, &bpreq, sizeof (bpreq));                    \
3518 	        count++;                                                \
3519 	        buf += sizeof (bpreq);                                  \
3520 	        len -= sizeof (bpreq);                                  \
3521 	}                                                               \
3522                                                                         \
3523 	BRIDGE_UNLOCK(sc);                                              \
3524 	bifstp->ifbpstp_len = sizeof (bpreq) * count;                   \
3525 	error = copyout(outbuf, bifstp->ifbpstp_req, bifstp->ifbpstp_len); \
3526 	BRIDGE_LOCK(sc);                                                \
3527 	kfree_data(outbuf, buflen);                                     \
3528 	return (error);                                                 \
3529 } while (0)
3530 
3531 static int
bridge_ioctl_gifsstp32(struct bridge_softc * sc,void * arg)3532 bridge_ioctl_gifsstp32(struct bridge_softc *sc, void *arg)
3533 {
3534 	struct ifbpstpconf32 *bifstp = arg;
3535 	int error = 0;
3536 
3537 	BRIDGE_IOCTL_GIFSSTP;
3538 	return error;
3539 }
3540 
3541 static int
bridge_ioctl_gifsstp64(struct bridge_softc * sc,void * arg)3542 bridge_ioctl_gifsstp64(struct bridge_softc *sc, void *arg)
3543 {
3544 	struct ifbpstpconf64 *bifstp = arg;
3545 	int error = 0;
3546 
3547 	BRIDGE_IOCTL_GIFSSTP;
3548 	return error;
3549 }
3550 
3551 static int
bridge_ioctl_sproto(struct bridge_softc * sc,void * arg)3552 bridge_ioctl_sproto(struct bridge_softc *sc, void *arg)
3553 {
3554 #if BRIDGESTP
3555 	struct ifbrparam *param = arg;
3556 
3557 	return bstp_set_protocol(&sc->sc_stp, param->ifbrp_proto);
3558 #else /* !BRIDGESTP */
3559 #pragma unused(sc, arg)
3560 	return EOPNOTSUPP;
3561 #endif /* !BRIDGESTP */
3562 }
3563 
3564 static int
bridge_ioctl_stxhc(struct bridge_softc * sc,void * arg)3565 bridge_ioctl_stxhc(struct bridge_softc *sc, void *arg)
3566 {
3567 #if BRIDGESTP
3568 	struct ifbrparam *param = arg;
3569 
3570 	return bstp_set_holdcount(&sc->sc_stp, param->ifbrp_txhc);
3571 #else /* !BRIDGESTP */
3572 #pragma unused(sc, arg)
3573 	return EOPNOTSUPP;
3574 #endif /* !BRIDGESTP */
3575 }
3576 
3577 
3578 static int
bridge_ioctl_ghostfilter(struct bridge_softc * sc,void * arg)3579 bridge_ioctl_ghostfilter(struct bridge_softc *sc, void *arg)
3580 {
3581 	struct ifbrhostfilter *req = arg;
3582 	struct bridge_iflist *bif;
3583 
3584 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3585 	if (bif == NULL) {
3586 		return ENOENT;
3587 	}
3588 
3589 	bzero(req, sizeof(struct ifbrhostfilter));
3590 	if (bif->bif_flags & BIFF_HOST_FILTER) {
3591 		req->ifbrhf_flags |= IFBRHF_ENABLED;
3592 		bcopy(bif->bif_hf_hwsrc, req->ifbrhf_hwsrca,
3593 		    ETHER_ADDR_LEN);
3594 		req->ifbrhf_ipsrc = bif->bif_hf_ipsrc.s_addr;
3595 	}
3596 	return 0;
3597 }
3598 
3599 static int
bridge_ioctl_shostfilter(struct bridge_softc * sc,void * arg)3600 bridge_ioctl_shostfilter(struct bridge_softc *sc, void *arg)
3601 {
3602 	struct ifbrhostfilter *req = arg;
3603 	struct bridge_iflist *bif;
3604 
3605 	bif = bridge_lookup_member(sc, req->ifbrhf_ifsname);
3606 	if (bif == NULL) {
3607 		return ENOENT;
3608 	}
3609 
3610 	if (req->ifbrhf_flags & IFBRHF_ENABLED) {
3611 		bif->bif_flags |= BIFF_HOST_FILTER;
3612 
3613 		if (req->ifbrhf_flags & IFBRHF_HWSRC) {
3614 			bcopy(req->ifbrhf_hwsrca, bif->bif_hf_hwsrc,
3615 			    ETHER_ADDR_LEN);
3616 			if (bcmp(req->ifbrhf_hwsrca, ethernulladdr,
3617 			    ETHER_ADDR_LEN) != 0) {
3618 				bif->bif_flags |= BIFF_HF_HWSRC;
3619 			} else {
3620 				bif->bif_flags &= ~BIFF_HF_HWSRC;
3621 			}
3622 		}
3623 		if (req->ifbrhf_flags & IFBRHF_IPSRC) {
3624 			bif->bif_hf_ipsrc.s_addr = req->ifbrhf_ipsrc;
3625 			if (bif->bif_hf_ipsrc.s_addr != INADDR_ANY) {
3626 				bif->bif_flags |= BIFF_HF_IPSRC;
3627 			} else {
3628 				bif->bif_flags &= ~BIFF_HF_IPSRC;
3629 			}
3630 		}
3631 	} else {
3632 		bif->bif_flags &= ~(BIFF_HOST_FILTER | BIFF_HF_HWSRC |
3633 		    BIFF_HF_IPSRC);
3634 		bzero(bif->bif_hf_hwsrc, ETHER_ADDR_LEN);
3635 		bif->bif_hf_ipsrc.s_addr = INADDR_ANY;
3636 	}
3637 
3638 	return 0;
3639 }
3640 
3641 static char *
bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,unsigned int * count_p,char * buf,unsigned int * len_p)3642 bridge_mac_nat_entry_out(struct mac_nat_entry_list * list,
3643     unsigned int * count_p, char *buf, unsigned int *len_p)
3644 {
3645 	unsigned int            count = *count_p;
3646 	struct ifbrmne          ifbmne;
3647 	unsigned int            len = *len_p;
3648 	struct mac_nat_entry    *mne;
3649 	unsigned long           now;
3650 
3651 	bzero(&ifbmne, sizeof(ifbmne));
3652 	LIST_FOREACH(mne, list, mne_list) {
3653 		if (len < sizeof(ifbmne)) {
3654 			break;
3655 		}
3656 		snprintf(ifbmne.ifbmne_ifname, sizeof(ifbmne.ifbmne_ifname),
3657 		    "%s", mne->mne_bif->bif_ifp->if_xname);
3658 		memcpy(ifbmne.ifbmne_mac, mne->mne_mac,
3659 		    sizeof(ifbmne.ifbmne_mac));
3660 		now = (unsigned long) net_uptime();
3661 		if (now < mne->mne_expire) {
3662 			ifbmne.ifbmne_expire = mne->mne_expire - now;
3663 		} else {
3664 			ifbmne.ifbmne_expire = 0;
3665 		}
3666 		if ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) {
3667 			ifbmne.ifbmne_af = AF_INET6;
3668 			ifbmne.ifbmne_ip6_addr = mne->mne_ip6;
3669 		} else {
3670 			ifbmne.ifbmne_af = AF_INET;
3671 			ifbmne.ifbmne_ip_addr = mne->mne_ip;
3672 		}
3673 		memcpy(buf, &ifbmne, sizeof(ifbmne));
3674 		count++;
3675 		buf += sizeof(ifbmne);
3676 		len -= sizeof(ifbmne);
3677 	}
3678 	*count_p = count;
3679 	*len_p = len;
3680 	return buf;
3681 }
3682 
3683 /*
3684  * bridge_ioctl_gmnelist()
3685  *   Perform the get mac_nat_entry list ioctl.
3686  *
3687  * Note:
3688  *   The struct ifbrmnelist32 and struct ifbrmnelist64 have the same
3689  *   field size/layout except for the last field ifbml_buf, the user-supplied
3690  *   buffer pointer. That is passed in separately via the 'user_addr'
3691  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3692  */
3693 static int
bridge_ioctl_gmnelist(struct bridge_softc * sc,struct ifbrmnelist32 * mnl,user_addr_t user_addr)3694 bridge_ioctl_gmnelist(struct bridge_softc *sc, struct ifbrmnelist32 *mnl,
3695     user_addr_t user_addr)
3696 {
3697 	unsigned int            count;
3698 	char                    *buf;
3699 	int                     error = 0;
3700 	char                    *outbuf = NULL;
3701 	struct mac_nat_entry    *mne;
3702 	unsigned int            buflen;
3703 	unsigned int            len;
3704 
3705 	mnl->ifbml_elsize = sizeof(struct ifbrmne);
3706 	count = 0;
3707 	LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
3708 		count++;
3709 	}
3710 	LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
3711 		count++;
3712 	}
3713 	buflen = sizeof(struct ifbrmne) * count;
3714 	if (buflen == 0 || mnl->ifbml_len == 0) {
3715 		mnl->ifbml_len = buflen;
3716 		return error;
3717 	}
3718 	BRIDGE_UNLOCK(sc);
3719 	outbuf = (char *)kalloc_data(buflen, Z_WAITOK | Z_ZERO);
3720 	BRIDGE_LOCK(sc);
3721 	count = 0;
3722 	buf = outbuf;
3723 	len = min(mnl->ifbml_len, buflen);
3724 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list, &count, buf, &len);
3725 	buf = bridge_mac_nat_entry_out(&sc->sc_mne_list_v6, &count, buf, &len);
3726 	mnl->ifbml_len = count * sizeof(struct ifbrmne);
3727 	BRIDGE_UNLOCK(sc);
3728 	error = copyout(outbuf, user_addr, mnl->ifbml_len);
3729 	kfree_data(outbuf, buflen);
3730 	BRIDGE_LOCK(sc);
3731 	return error;
3732 }
3733 
3734 static int
bridge_ioctl_gmnelist64(struct bridge_softc * sc,void * arg)3735 bridge_ioctl_gmnelist64(struct bridge_softc *sc, void *arg)
3736 {
3737 	struct ifbrmnelist64 *mnl = arg;
3738 
3739 	return bridge_ioctl_gmnelist(sc, arg, mnl->ifbml_buf);
3740 }
3741 
3742 static int
bridge_ioctl_gmnelist32(struct bridge_softc * sc,void * arg)3743 bridge_ioctl_gmnelist32(struct bridge_softc *sc, void *arg)
3744 {
3745 	struct ifbrmnelist32 *mnl = arg;
3746 
3747 	return bridge_ioctl_gmnelist(sc, arg,
3748 	           CAST_USER_ADDR_T(mnl->ifbml_buf));
3749 }
3750 
3751 /*
3752  * bridge_ioctl_gifstats()
3753  *   Return per-member stats.
3754  *
3755  * Note:
3756  *   The ifbrmreq32 and ifbrmreq64 structures have the same
3757  *   field size/layout except for the last field brmr_buf, the user-supplied
3758  *   buffer pointer. That is passed in separately via the 'user_addr'
3759  *   parameter from the respective 32-bit or 64-bit ioctl routine.
3760  */
3761 static int
bridge_ioctl_gifstats(struct bridge_softc * sc,struct ifbrmreq32 * mreq,user_addr_t user_addr)3762 bridge_ioctl_gifstats(struct bridge_softc *sc, struct ifbrmreq32 *mreq,
3763     user_addr_t user_addr)
3764 {
3765 	struct bridge_iflist    *bif;
3766 	int                     error = 0;
3767 	unsigned int            buflen;
3768 
3769 	bif = bridge_lookup_member(sc, mreq->brmr_ifname);
3770 	if (bif == NULL) {
3771 		error = ENOENT;
3772 		goto done;
3773 	}
3774 
3775 	buflen = mreq->brmr_elsize = sizeof(struct ifbrmstats);
3776 	if (buflen == 0 || mreq->brmr_len == 0) {
3777 		mreq->brmr_len = buflen;
3778 		goto done;
3779 	}
3780 	if (mreq->brmr_len != 0 && mreq->brmr_len < buflen) {
3781 		error = ENOBUFS;
3782 		goto done;
3783 	}
3784 	mreq->brmr_len = buflen;
3785 	error = copyout(&bif->bif_stats, user_addr, buflen);
3786 done:
3787 	return error;
3788 }
3789 
3790 static int
bridge_ioctl_gifstats32(struct bridge_softc * sc,void * arg)3791 bridge_ioctl_gifstats32(struct bridge_softc *sc, void *arg)
3792 {
3793 	struct ifbrmreq32 *mreq = arg;
3794 
3795 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3796 }
3797 
3798 static int
bridge_ioctl_gifstats64(struct bridge_softc * sc,void * arg)3799 bridge_ioctl_gifstats64(struct bridge_softc *sc, void *arg)
3800 {
3801 	struct ifbrmreq64 *mreq = arg;
3802 
3803 	return bridge_ioctl_gifstats(sc, arg, mreq->brmr_buf);
3804 }
3805 
3806 /*
3807  * bridge_proto_attach_changed
3808  *
3809  *	Called when protocol attachment on the interface changes.
3810  */
3811 static void
bridge_proto_attach_changed(struct ifnet * ifp)3812 bridge_proto_attach_changed(struct ifnet *ifp)
3813 {
3814 	boolean_t changed = FALSE;
3815 	struct bridge_iflist *bif;
3816 	boolean_t input_broadcast;
3817 	struct bridge_softc *sc = ifp->if_bridge;
3818 
3819 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3820 	if (sc == NULL) {
3821 		return;
3822 	}
3823 	input_broadcast = interface_needs_input_broadcast(ifp);
3824 	BRIDGE_LOCK(sc);
3825 	bif = bridge_lookup_member_if(sc, ifp);
3826 	if (bif != NULL) {
3827 		changed = bif_set_input_broadcast(bif, input_broadcast);
3828 	}
3829 	BRIDGE_UNLOCK(sc);
3830 	if (changed) {
3831 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
3832 		    "%s input broadcast %s", ifp->if_xname,
3833 		    input_broadcast ? "ENABLED" : "DISABLED");
3834 	}
3835 	return;
3836 }
3837 
3838 /*
3839  * interface_media_active:
3840  *
3841  *	Tells if an interface media is active.
3842  */
3843 static int
interface_media_active(struct ifnet * ifp)3844 interface_media_active(struct ifnet *ifp)
3845 {
3846 	struct ifmediareq   ifmr;
3847 	int status = 0;
3848 
3849 	bzero(&ifmr, sizeof(ifmr));
3850 	if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) {
3851 		if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) {
3852 			status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0;
3853 		}
3854 	}
3855 
3856 	return status;
3857 }
3858 
3859 /*
3860  * bridge_updatelinkstatus:
3861  *
3862  *      Update the media active status of the bridge based on the
3863  *	media active status of its member.
3864  *	If changed, return the corresponding onf/off link event.
3865  */
3866 static u_int32_t
bridge_updatelinkstatus(struct bridge_softc * sc)3867 bridge_updatelinkstatus(struct bridge_softc *sc)
3868 {
3869 	struct bridge_iflist *bif;
3870 	int active_member = 0;
3871 	u_int32_t event_code = 0;
3872 
3873 	BRIDGE_LOCK_ASSERT_HELD(sc);
3874 
3875 	/*
3876 	 * Find out if we have an active interface
3877 	 */
3878 	TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
3879 		if (bif->bif_flags & BIFF_MEDIA_ACTIVE) {
3880 			active_member = 1;
3881 			break;
3882 		}
3883 	}
3884 
3885 	if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3886 		sc->sc_flags |= SCF_MEDIA_ACTIVE;
3887 		event_code = KEV_DL_LINK_ON;
3888 	} else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) {
3889 		sc->sc_flags &= ~SCF_MEDIA_ACTIVE;
3890 		event_code = KEV_DL_LINK_OFF;
3891 	}
3892 
3893 	return event_code;
3894 }
3895 
3896 /*
3897  * bridge_iflinkevent:
3898  */
3899 static void
bridge_iflinkevent(struct ifnet * ifp)3900 bridge_iflinkevent(struct ifnet *ifp)
3901 {
3902 	struct bridge_softc *sc = ifp->if_bridge;
3903 	struct bridge_iflist *bif;
3904 	u_int32_t event_code = 0;
3905 	int media_active;
3906 
3907 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE, "%s", ifp->if_xname);
3908 
3909 	/* Check if the interface is a bridge member */
3910 	if (sc == NULL) {
3911 		return;
3912 	}
3913 
3914 	media_active = interface_media_active(ifp);
3915 	BRIDGE_LOCK(sc);
3916 	bif = bridge_lookup_member_if(sc, ifp);
3917 	if (bif != NULL) {
3918 		if (media_active) {
3919 			bif->bif_flags |= BIFF_MEDIA_ACTIVE;
3920 		} else {
3921 			bif->bif_flags &= ~BIFF_MEDIA_ACTIVE;
3922 		}
3923 		if (sc->sc_mac_nat_bif != NULL) {
3924 			bridge_mac_nat_flush_entries(sc, bif);
3925 		}
3926 
3927 		event_code = bridge_updatelinkstatus(sc);
3928 	}
3929 	BRIDGE_UNLOCK(sc);
3930 
3931 	if (event_code != 0) {
3932 		bridge_link_event(sc->sc_ifp, event_code);
3933 	}
3934 }
3935 
3936 /*
3937  * bridge_delayed_callback:
3938  *
3939  *	Makes a delayed call
3940  */
3941 static void
bridge_delayed_callback(void * param)3942 bridge_delayed_callback(void *param)
3943 {
3944 	struct bridge_delayed_call *call = (struct bridge_delayed_call *)param;
3945 	struct bridge_softc *sc = call->bdc_sc;
3946 
3947 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3948 	if (bridge_delayed_callback_delay > 0) {
3949 		struct timespec ts;
3950 
3951 		ts.tv_sec = bridge_delayed_callback_delay;
3952 		ts.tv_nsec = 0;
3953 
3954 		BRIDGE_LOG(LOG_NOTICE, 0,
3955 		    "sleeping for %d seconds",
3956 		    bridge_delayed_callback_delay);
3957 
3958 		msleep(&bridge_delayed_callback_delay, NULL, PZERO,
3959 		    __func__, &ts);
3960 
3961 		BRIDGE_LOG(LOG_NOTICE, 0, "awoken");
3962 	}
3963 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3964 
3965 	BRIDGE_LOCK(sc);
3966 
3967 #if BRIDGE_DELAYED_CALLBACK_DEBUG
3968 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
3969 	    "%s call 0x%llx flags 0x%x",
3970 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
3971 	    call->bdc_flags);
3972 }
3973 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
3974 
3975 	if (call->bdc_flags & BDCF_CANCELLING) {
3976 		wakeup(call);
3977 	} else {
3978 		if ((sc->sc_flags & SCF_DETACHING) == 0) {
3979 			(*call->bdc_func)(sc);
3980 		}
3981 	}
3982 	call->bdc_flags &= ~BDCF_OUTSTANDING;
3983 	BRIDGE_UNLOCK(sc);
3984 }
3985 
3986 /*
3987  * bridge_schedule_delayed_call:
3988  *
3989  *	Schedule a function to be called on a separate thread
3990  *      The actual call may be scheduled to run at a given time or ASAP.
3991  */
3992 static void
3993 bridge_schedule_delayed_call(struct bridge_delayed_call *call)
3994 {
3995 	uint64_t deadline = 0;
3996 	struct bridge_softc *sc = call->bdc_sc;
3997 
3998 	BRIDGE_LOCK_ASSERT_HELD(sc);
3999 
4000 	if ((sc->sc_flags & SCF_DETACHING) ||
4001 	    (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) {
4002 		return;
4003 	}
4004 
4005 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4006 		nanoseconds_to_absolutetime(
4007 			(uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC +
4008 			call->bdc_ts.tv_nsec, &deadline);
4009 		clock_absolutetime_interval_to_deadline(deadline, &deadline);
4010 	}
4011 
4012 	call->bdc_flags = BDCF_OUTSTANDING;
4013 
4014 #if BRIDGE_DELAYED_CALLBACK_DEBUG
4015 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4016 	    "%s call 0x%llx flags 0x%x",
4017 	    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4018 	    call->bdc_flags);
4019 }
4020 #endif /* BRIDGE_DELAYED_CALLBACK_DEBUG */
4021 
4022 	if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) {
4023 		thread_call_func_delayed(
4024 			(thread_call_func_t)bridge_delayed_callback,
4025 			call, deadline);
4026 	} else {
4027 		if (call->bdc_thread_call == NULL) {
4028 			call->bdc_thread_call = thread_call_allocate(
4029 				(thread_call_func_t)bridge_delayed_callback,
4030 				call);
4031 		}
4032 		thread_call_enter(call->bdc_thread_call);
4033 	}
4034 }
4035 
4036 /*
4037  * bridge_cancel_delayed_call:
4038  *
4039  *	Cancel a queued or running delayed call.
4040  *	If call is running, does not return until the call is done to
4041  *	prevent race condition with the brigde interface getting destroyed
4042  */
4043 static void
4044 bridge_cancel_delayed_call(struct bridge_delayed_call *call)
4045 {
4046 	boolean_t result;
4047 	struct bridge_softc *sc = call->bdc_sc;
4048 
4049 	/*
4050 	 * The call was never scheduled
4051 	 */
4052 	if (sc == NULL) {
4053 		return;
4054 	}
4055 
4056 	BRIDGE_LOCK_ASSERT_HELD(sc);
4057 
4058 	call->bdc_flags |= BDCF_CANCELLING;
4059 
4060 	while (call->bdc_flags & BDCF_OUTSTANDING) {
4061 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_DELAYED_CALL,
4062 		    "%s call 0x%llx flags 0x%x",
4063 		    sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call),
4064 		    call->bdc_flags);
4065 		result = thread_call_func_cancel(
4066 			(thread_call_func_t)bridge_delayed_callback, call, FALSE);
4067 
4068 		if (result) {
4069 			/*
4070 			 * We managed to dequeue the delayed call
4071 			 */
4072 			call->bdc_flags &= ~BDCF_OUTSTANDING;
4073 		} else {
4074 			/*
4075 			 * Wait for delayed call do be done running
4076 			 */
4077 			msleep(call, &sc->sc_mtx, PZERO, __func__, NULL);
4078 		}
4079 	}
4080 	call->bdc_flags &= ~BDCF_CANCELLING;
4081 }
4082 
4083 /*
4084  * bridge_cleanup_delayed_call:
4085  *
4086  *	Dispose resource allocated for a delayed call
4087  *	Assume the delayed call is not queued or running .
4088  */
4089 static void
4090 bridge_cleanup_delayed_call(struct bridge_delayed_call *call)
4091 {
4092 	boolean_t result;
4093 	struct bridge_softc *sc = call->bdc_sc;
4094 
4095 	/*
4096 	 * The call was never scheduled
4097 	 */
4098 	if (sc == NULL) {
4099 		return;
4100 	}
4101 
4102 	BRIDGE_LOCK_ASSERT_HELD(sc);
4103 
4104 	VERIFY((call->bdc_flags & BDCF_OUTSTANDING) == 0);
4105 	VERIFY((call->bdc_flags & BDCF_CANCELLING) == 0);
4106 
4107 	if (call->bdc_thread_call != NULL) {
4108 		result = thread_call_free(call->bdc_thread_call);
4109 		if (result == FALSE) {
4110 			panic("%s thread_call_free() failed for call %p",
4111 			    __func__, call);
4112 		}
4113 		call->bdc_thread_call = NULL;
4114 	}
4115 }
4116 
4117 /*
4118  * bridge_init:
4119  *
4120  *	Initialize a bridge interface.
4121  */
4122 static int
4123 bridge_init(struct ifnet *ifp)
4124 {
4125 	struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc;
4126 	errno_t error;
4127 
4128 	BRIDGE_LOCK_ASSERT_HELD(sc);
4129 
4130 	if ((ifnet_flags(ifp) & IFF_RUNNING)) {
4131 		return 0;
4132 	}
4133 
4134 	error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING);
4135 
4136 	/*
4137 	 * Calling bridge_aging_timer() is OK as there are no entries to
4138 	 * age so we're just going to arm the timer
4139 	 */
4140 	bridge_aging_timer(sc);
4141 #if BRIDGESTP
4142 	if (error == 0) {
4143 		bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */
4144 	}
4145 #endif /* BRIDGESTP */
4146 	return error;
4147 }
4148 
4149 /*
4150  * bridge_ifstop:
4151  *
4152  *	Stop the bridge interface.
4153  */
4154 static void
4155 bridge_ifstop(struct ifnet *ifp, int disable)
4156 {
4157 #pragma unused(disable)
4158 	struct bridge_softc *sc = ifp->if_softc;
4159 
4160 	BRIDGE_LOCK_ASSERT_HELD(sc);
4161 
4162 	if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) {
4163 		return;
4164 	}
4165 
4166 	bridge_cancel_delayed_call(&sc->sc_aging_timer);
4167 
4168 #if BRIDGESTP
4169 	bstp_stop(&sc->sc_stp);
4170 #endif /* BRIDGESTP */
4171 
4172 	bridge_rtflush(sc, IFBF_FLUSHDYN);
4173 	(void) ifnet_set_flags(ifp, 0, IFF_RUNNING);
4174 }
4175 
4176 /*
4177  * bridge_compute_cksum:
4178  *
4179  *	If the packet has checksum flags, compare the hardware checksum
4180  *	capabilities of the source and destination interfaces. If they
4181  *	are the same, there's nothing to do. If they are different,
4182  *	finalize the checksum so that it can be sent on the destination
4183  *	interface.
4184  */
4185 static void
4186 bridge_compute_cksum(struct ifnet *src_if, struct ifnet *dst_if, struct mbuf *m)
4187 {
4188 	uint32_t csum_flags;
4189 	uint16_t dst_hw_csum;
4190 	uint32_t did_sw = 0;
4191 	struct ether_header *eh;
4192 	uint16_t src_hw_csum;
4193 
4194 	if (src_if == dst_if) {
4195 		return;
4196 	}
4197 	csum_flags = m->m_pkthdr.csum_flags & IF_HWASSIST_CSUM_MASK;
4198 	if (csum_flags == 0) {
4199 		/* no checksum offload */
4200 		return;
4201 	}
4202 
4203 	/*
4204 	 * if destination/source differ in checksum offload
4205 	 * capabilities, finalize/compute the checksum
4206 	 */
4207 	dst_hw_csum = IF_HWASSIST_CSUM_FLAGS(dst_if->if_hwassist);
4208 	src_hw_csum = IF_HWASSIST_CSUM_FLAGS(src_if->if_hwassist);
4209 	if (dst_hw_csum == src_hw_csum) {
4210 		return;
4211 	}
4212 	eh = mtod(m, struct ether_header *);
4213 	switch (ntohs(eh->ether_type)) {
4214 	case ETHERTYPE_IP:
4215 		did_sw = in_finalize_cksum(m, sizeof(*eh), csum_flags);
4216 		break;
4217 	case ETHERTYPE_IPV6:
4218 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, csum_flags);
4219 		break;
4220 	}
4221 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4222 	    "[%s -> %s] before 0x%x did 0x%x after 0x%x",
4223 	    src_if->if_xname, dst_if->if_xname, csum_flags, did_sw,
4224 	    m->m_pkthdr.csum_flags);
4225 }
4226 
4227 static errno_t
4228 bridge_transmit(struct ifnet * ifp, struct mbuf *m)
4229 {
4230 	struct flowadv  adv = { .code = FADV_SUCCESS };
4231 	errno_t         error;
4232 
4233 	error = dlil_output(ifp, 0, m, NULL, NULL, 1, &adv);
4234 	if (error == 0) {
4235 		if (adv.code == FADV_FLOW_CONTROLLED) {
4236 			error = EQFULL;
4237 		} else if (adv.code == FADV_SUSPENDED) {
4238 			error = EQSUSPENDED;
4239 		}
4240 	}
4241 	return error;
4242 }
4243 
4244 static int
4245 get_last_ip6_hdr(struct mbuf *m, int off, int proto, int * nxtp,
4246     bool *is_fragmented)
4247 {
4248 	int newoff;
4249 
4250 	*is_fragmented = false;
4251 	while (1) {
4252 		newoff = ip6_nexthdr(m, off, proto, nxtp);
4253 		if (newoff < 0) {
4254 			return off;
4255 		} else if (newoff < off) {
4256 			return -1;    /* invalid */
4257 		} else if (newoff == off) {
4258 			return newoff;
4259 		}
4260 		off = newoff;
4261 		proto = *nxtp;
4262 		if (proto == IPPROTO_FRAGMENT) {
4263 			*is_fragmented = true;
4264 		}
4265 	}
4266 }
4267 
4268 static int
4269 bridge_get_ip_proto(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4270     ip_packet_info_t info_p, struct bripstats * stats_p)
4271 {
4272 	int             error = 0;
4273 	u_int           hlen;
4274 	u_int           ip_hlen;
4275 	u_int           ip_pay_len;
4276 	struct mbuf *   m0 = *mp;
4277 	int             off;
4278 	int             opt_len = 0;
4279 	int             proto = 0;
4280 
4281 	bzero(info_p, sizeof(*info_p));
4282 	if (is_ipv4) {
4283 		struct ip *     ip;
4284 		u_int           ip_total_len;
4285 
4286 		/* IPv4 */
4287 		hlen = mac_hlen + sizeof(struct ip);
4288 		if (m0->m_pkthdr.len < hlen) {
4289 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4290 			    "Short IP packet %d < %d",
4291 			    m0->m_pkthdr.len, hlen);
4292 			error = _EBADIP;
4293 			stats_p->bips_bad_ip++;
4294 			goto done;
4295 		}
4296 		if (m0->m_len < hlen) {
4297 			*mp = m0 = m_pullup(m0, hlen);
4298 			if (m0 == NULL) {
4299 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4300 				    "m_pullup failed hlen %d",
4301 				    hlen);
4302 				error = ENOBUFS;
4303 				stats_p->bips_bad_ip++;
4304 				goto done;
4305 			}
4306 		}
4307 		ip = (struct ip *)(void *)(mtod(m0, uint8_t *) + mac_hlen);
4308 		if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
4309 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4310 			    "bad IP version");
4311 			error = _EBADIP;
4312 			stats_p->bips_bad_ip++;
4313 			goto done;
4314 		}
4315 		ip_hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4316 		if (ip_hlen < sizeof(struct ip)) {
4317 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4318 			    "bad IP header length %d < %d",
4319 			    ip_hlen,
4320 			    (int)sizeof(struct ip));
4321 			error = _EBADIP;
4322 			stats_p->bips_bad_ip++;
4323 			goto done;
4324 		}
4325 		hlen = mac_hlen + ip_hlen;
4326 		if (m0->m_len < hlen) {
4327 			*mp = m0 = m_pullup(m0, hlen);
4328 			if (m0 == NULL) {
4329 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4330 				    "m_pullup failed hlen %d",
4331 				    hlen);
4332 				error = ENOBUFS;
4333 				stats_p->bips_bad_ip++;
4334 				goto done;
4335 			}
4336 		}
4337 
4338 		ip_total_len = ntohs(ip->ip_len);
4339 		if (ip_total_len < ip_hlen) {
4340 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4341 			    "IP total len %d < header len %d",
4342 			    ip_total_len, ip_hlen);
4343 			error = _EBADIP;
4344 			stats_p->bips_bad_ip++;
4345 			goto done;
4346 		}
4347 		if (ip_total_len > (m0->m_pkthdr.len - mac_hlen)) {
4348 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4349 			    "invalid IP payload length %d > %d",
4350 			    ip_total_len,
4351 			    (m0->m_pkthdr.len - mac_hlen));
4352 			error = _EBADIP;
4353 			stats_p->bips_bad_ip++;
4354 			goto done;
4355 		}
4356 		ip_pay_len = ip_total_len - ip_hlen;
4357 		info_p->ip_proto = ip->ip_p;
4358 		info_p->ip_hdr.ip = ip;
4359 #define FRAG_BITS       (IP_OFFMASK | IP_MF)
4360 		if ((ntohs(ip->ip_off) & FRAG_BITS) != 0) {
4361 			info_p->ip_is_fragmented = true;
4362 		}
4363 		stats_p->bips_ip++;
4364 	} else {
4365 		struct ip6_hdr *ip6;
4366 
4367 		/* IPv6 */
4368 		hlen = mac_hlen + sizeof(struct ip6_hdr);
4369 		if (m0->m_pkthdr.len < hlen) {
4370 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4371 			    "short IPv6 packet %d < %d",
4372 			    m0->m_pkthdr.len, hlen);
4373 			error = _EBADIPV6;
4374 			stats_p->bips_bad_ip6++;
4375 			goto done;
4376 		}
4377 		if (m0->m_len < hlen) {
4378 			*mp = m0 = m_pullup(m0, hlen);
4379 			if (m0 == NULL) {
4380 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4381 				    "m_pullup failed hlen %d",
4382 				    hlen);
4383 				error = ENOBUFS;
4384 				stats_p->bips_bad_ip6++;
4385 				goto done;
4386 			}
4387 		}
4388 		ip6 = (struct ip6_hdr *)(mtod(m0, uint8_t *) + mac_hlen);
4389 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
4390 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4391 			    "bad IPv6 version");
4392 			error = _EBADIPV6;
4393 			stats_p->bips_bad_ip6++;
4394 			goto done;
4395 		}
4396 		off = get_last_ip6_hdr(m0, mac_hlen, IPPROTO_IPV6, &proto,
4397 		    &info_p->ip_is_fragmented);
4398 		if (off < 0 || m0->m_pkthdr.len < off) {
4399 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4400 			    "ip6_lasthdr() returned %d",
4401 			    off);
4402 			error = _EBADIPV6;
4403 			stats_p->bips_bad_ip6++;
4404 			goto done;
4405 		}
4406 		ip_hlen = sizeof(*ip6);
4407 		opt_len = off - mac_hlen - ip_hlen;
4408 		if (opt_len < 0) {
4409 			error = _EBADIPV6;
4410 			stats_p->bips_bad_ip6++;
4411 			goto done;
4412 		}
4413 		info_p->ip_proto = proto;
4414 		info_p->ip_hdr.ip6 = ip6;
4415 		ip_pay_len = ntohs(ip6->ip6_plen);
4416 		if (ip_pay_len > (m0->m_pkthdr.len - mac_hlen - ip_hlen)) {
4417 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4418 			    "invalid IPv6 payload length %d > %d",
4419 			    ip_pay_len,
4420 			    (m0->m_pkthdr.len - mac_hlen - ip_hlen));
4421 			error = _EBADIPV6;
4422 			stats_p->bips_bad_ip6++;
4423 			goto done;
4424 		}
4425 		stats_p->bips_ip6++;
4426 	}
4427 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4428 	    "IPv%c proto %d ip %u pay %u opt %u pkt %u%s",
4429 	    is_ipv4 ? '4' : '6',
4430 	    proto, ip_hlen, ip_pay_len, opt_len,
4431 	    m0->m_pkthdr.len, info_p->ip_is_fragmented ? " frag" : "");
4432 	info_p->ip_hlen = ip_hlen;
4433 	info_p->ip_pay_len = ip_pay_len;
4434 	info_p->ip_opt_len = opt_len;
4435 	info_p->ip_is_ipv4 = is_ipv4;
4436 done:
4437 	return error;
4438 }
4439 
4440 static int
4441 bridge_get_tcp_header(struct mbuf * * mp, u_int mac_hlen, bool is_ipv4,
4442     ip_packet_info_t info_p, struct bripstats * stats_p)
4443 {
4444 	int             error;
4445 	u_int           hlen;
4446 
4447 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p, stats_p);
4448 	if (error != 0) {
4449 		goto done;
4450 	}
4451 	if (info_p->ip_proto != IPPROTO_TCP) {
4452 		/* not a TCP frame, not an error, just a bad guess */
4453 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4454 		    "non-TCP (%d) IPv%c frame %d bytes",
4455 		    info_p->ip_proto, is_ipv4 ? '4' : '6',
4456 		    (*mp)->m_pkthdr.len);
4457 		goto done;
4458 	}
4459 	if (info_p->ip_is_fragmented) {
4460 		/* both TSO and IP fragmentation don't make sense */
4461 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4462 		    "fragmented TSO packet?");
4463 		stats_p->bips_bad_tcp++;
4464 		error = _EBADTCP;
4465 		goto done;
4466 	}
4467 	hlen = mac_hlen + info_p->ip_hlen + sizeof(struct tcphdr) +
4468 	    info_p->ip_opt_len;
4469 	if ((*mp)->m_len < hlen) {
4470 		*mp = m_pullup(*mp, hlen);
4471 		if (*mp == NULL) {
4472 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4473 			    "m_pullup %d failed",
4474 			    hlen);
4475 			stats_p->bips_bad_tcp++;
4476 			error = _EBADTCP;
4477 			goto done;
4478 		}
4479 	}
4480 	info_p->ip_proto_hdr = ((caddr_t)info_p->ip_hdr.ptr) +
4481 	    info_p->ip_hlen + info_p->ip_opt_len;
4482 done:
4483 	return error;
4484 }
4485 
4486 static inline void
4487 proto_csum_stats_increment(uint8_t proto, struct brcsumstats * stats_p)
4488 {
4489 	if (proto == IPPROTO_TCP) {
4490 		stats_p->brcs_tcp_checksum++;
4491 	} else {
4492 		stats_p->brcs_udp_checksum++;
4493 	}
4494 	return;
4495 }
4496 
4497 static bool
4498 ether_header_type_is_ip(struct ether_header * eh, bool *is_ipv4)
4499 {
4500 	uint16_t        ether_type;
4501 	bool            is_ip = TRUE;
4502 
4503 	ether_type = ntohs(eh->ether_type);
4504 	switch (ether_type) {
4505 	case ETHERTYPE_IP:
4506 		*is_ipv4 = TRUE;
4507 		break;
4508 	case ETHERTYPE_IPV6:
4509 		*is_ipv4 = FALSE;
4510 		break;
4511 	default:
4512 		is_ip = FALSE;
4513 		break;
4514 	}
4515 	return is_ip;
4516 }
4517 
4518 static errno_t
4519 bridge_verify_checksum(struct mbuf * * mp, struct ifbrmstats *stats_p)
4520 {
4521 	struct brcsumstats *csum_stats_p;
4522 	struct ether_header     *eh;
4523 	errno_t         error = 0;
4524 	ip_packet_info  info;
4525 	bool            is_ipv4;
4526 	struct mbuf *   m;
4527 	u_int           mac_hlen = sizeof(struct ether_header);
4528 	uint16_t        sum;
4529 	bool            valid;
4530 
4531 	eh = mtod(*mp, struct ether_header *);
4532 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
4533 		goto done;
4534 	}
4535 	error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, &info,
4536 	    &stats_p->brms_out_ip);
4537 	m = *mp;
4538 	if (error != 0) {
4539 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4540 		    "bridge_get_ip_proto failed %d",
4541 		    error);
4542 		goto done;
4543 	}
4544 	if (is_ipv4) {
4545 		if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
4546 			/* hardware offloaded IP header checksum */
4547 			valid = (m->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0;
4548 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4549 			    "IP checksum HW %svalid",
4550 			    valid ? "" : "in");
4551 			if (!valid) {
4552 				stats_p->brms_out_cksum_bad_hw.brcs_ip_checksum++;
4553 				error = _EBADIPCHECKSUM;
4554 				goto done;
4555 			}
4556 			stats_p->brms_out_cksum_good_hw.brcs_ip_checksum++;
4557 		} else {
4558 			/* verify */
4559 			sum = inet_cksum(m, 0, mac_hlen, info.ip_hlen);
4560 			valid = (sum == 0);
4561 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4562 			    "IP checksum SW %svalid",
4563 			    valid ? "" : "in");
4564 			if (!valid) {
4565 				stats_p->brms_out_cksum_bad.brcs_ip_checksum++;
4566 				error = _EBADIPCHECKSUM;
4567 				goto done;
4568 			}
4569 			stats_p->brms_out_cksum_good.brcs_ip_checksum++;
4570 		}
4571 	}
4572 	if (info.ip_is_fragmented) {
4573 		/* can't verify checksum on fragmented packets */
4574 		goto done;
4575 	}
4576 	switch (info.ip_proto) {
4577 	case IPPROTO_TCP:
4578 		stats_p->brms_out_ip.bips_tcp++;
4579 		break;
4580 	case IPPROTO_UDP:
4581 		stats_p->brms_out_ip.bips_udp++;
4582 		break;
4583 	default:
4584 		goto done;
4585 	}
4586 	/* check for hardware offloaded UDP/TCP checksum */
4587 #define HW_CSUM         (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)
4588 	if ((m->m_pkthdr.csum_flags & HW_CSUM) == HW_CSUM) {
4589 		/* checksum verified by hardware */
4590 		valid = (m->m_pkthdr.csum_rx_val == 0xffff);
4591 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4592 		    "IPv%c %s checksum HW 0x%x %svalid",
4593 		    is_ipv4 ? '4' : '6',
4594 		    (info.ip_proto == IPPROTO_TCP)
4595 		    ? "TCP" : "UDP",
4596 		    m->m_pkthdr.csum_data,
4597 		    valid ? "" : "in" );
4598 		if (!valid) {
4599 			/* bad checksum */
4600 			csum_stats_p = &stats_p->brms_out_cksum_bad_hw;
4601 			error = (info.ip_proto == IPPROTO_TCP) ? _EBADTCPCHECKSUM
4602 			    : _EBADTCPCHECKSUM;
4603 		} else {
4604 			/* good checksum */
4605 			csum_stats_p = &stats_p->brms_out_cksum_good_hw;
4606 		}
4607 		proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4608 		goto done;
4609 	}
4610 	m->m_data += mac_hlen;
4611 	m->m_len -= mac_hlen;
4612 	m->m_pkthdr.len -= mac_hlen;
4613 	if (is_ipv4) {
4614 		sum = inet_cksum(m, info.ip_proto,
4615 		    info.ip_hlen,
4616 		    info.ip_pay_len);
4617 	} else {
4618 		sum = inet6_cksum(m, info.ip_proto,
4619 		    info.ip_hlen + info.ip_opt_len,
4620 		    info.ip_pay_len - info.ip_opt_len);
4621 	}
4622 	valid = (sum == 0);
4623 	if (valid) {
4624 		csum_stats_p = &stats_p->brms_out_cksum_good;
4625 	} else {
4626 		csum_stats_p = &stats_p->brms_out_cksum_bad;
4627 		error = (info.ip_proto == IPPROTO_TCP)
4628 		    ? _EBADTCPCHECKSUM : _EBADUDPCHECKSUM;
4629 	}
4630 	proto_csum_stats_increment(info.ip_proto, csum_stats_p);
4631 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4632 	    "IPv%c %s checksum SW %svalid (0x%x) hlen %d paylen %d",
4633 	    is_ipv4 ? '4' : '6',
4634 	    (info.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4635 	    valid ? "" : "in",
4636 	    sum, info.ip_hlen, info.ip_pay_len);
4637 	m->m_data -= mac_hlen;
4638 	m->m_len += mac_hlen;
4639 	m->m_pkthdr.len += mac_hlen;
4640 done:
4641 	return error;
4642 }
4643 
4644 static errno_t
4645 bridge_offload_checksum(struct mbuf * * mp, ip_packet_info * info_p,
4646     struct ifbrmstats * stats_p)
4647 {
4648 	uint16_t *      csum_p;
4649 	errno_t         error = 0;
4650 	u_int           hlen;
4651 	struct mbuf *   m0 = *mp;
4652 	u_int           mac_hlen = sizeof(struct ether_header);
4653 	u_int           pkt_hdr_len;
4654 	struct tcphdr * tcp;
4655 	u_int           tcp_hlen;
4656 	struct udphdr * udp;
4657 
4658 	if (info_p->ip_is_ipv4) {
4659 		/* compute IP header checksum */
4660 		info_p->ip_hdr.ip->ip_sum = 0;
4661 		info_p->ip_hdr.ip->ip_sum = inet_cksum(m0, 0, mac_hlen,
4662 		    info_p->ip_hlen);
4663 		stats_p->brms_in_computed_cksum.brcs_ip_checksum++;
4664 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4665 		    "IPv4 checksum 0x%x",
4666 		    ntohs(info_p->ip_hdr.ip->ip_sum));
4667 	}
4668 	if (info_p->ip_is_fragmented) {
4669 		/* can't compute checksum on fragmented packets */
4670 		goto done;
4671 	}
4672 	pkt_hdr_len = m0->m_pkthdr.len;
4673 	switch (info_p->ip_proto) {
4674 	case IPPROTO_TCP:
4675 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len
4676 		    + sizeof(struct tcphdr);
4677 		if (m0->m_len < hlen) {
4678 			*mp = m0 = m_pullup(m0, hlen);
4679 			if (m0 == NULL) {
4680 				stats_p->brms_in_ip.bips_bad_tcp++;
4681 				error = _EBADTCP;
4682 				goto done;
4683 			}
4684 		}
4685 		tcp = (struct tcphdr *)(void *)
4686 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4687 		    + info_p->ip_opt_len);
4688 		tcp_hlen = tcp->th_off << 2;
4689 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + tcp_hlen;
4690 		if (hlen > pkt_hdr_len) {
4691 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4692 			    "bad tcp header length %u",
4693 			    tcp_hlen);
4694 			stats_p->brms_in_ip.bips_bad_tcp++;
4695 			error = _EBADTCP;
4696 			goto done;
4697 		}
4698 		csum_p = &tcp->th_sum;
4699 		stats_p->brms_in_ip.bips_tcp++;
4700 		break;
4701 	case IPPROTO_UDP:
4702 		hlen = mac_hlen + info_p->ip_hlen + info_p->ip_opt_len + sizeof(*udp);
4703 		if (m0->m_len < hlen) {
4704 			*mp = m0 = m_pullup(m0, hlen);
4705 			if (m0 == NULL) {
4706 				stats_p->brms_in_ip.bips_bad_udp++;
4707 				error = ENOBUFS;
4708 				goto done;
4709 			}
4710 		}
4711 		udp = (struct udphdr *)(void *)
4712 		    ((caddr_t)info_p->ip_hdr.ptr + info_p->ip_hlen
4713 		    + info_p->ip_opt_len);
4714 		csum_p = &udp->uh_sum;
4715 		stats_p->brms_in_ip.bips_udp++;
4716 		break;
4717 	default:
4718 		/* not TCP or UDP */
4719 		goto done;
4720 	}
4721 	*csum_p = 0;
4722 	m0->m_data += mac_hlen;
4723 	m0->m_len -= mac_hlen;
4724 	m0->m_pkthdr.len -= mac_hlen;
4725 	if (info_p->ip_is_ipv4) {
4726 		*csum_p = inet_cksum(m0, info_p->ip_proto, info_p->ip_hlen,
4727 		    info_p->ip_pay_len);
4728 	} else {
4729 		*csum_p = inet6_cksum(m0, info_p->ip_proto,
4730 		    info_p->ip_hlen + info_p->ip_opt_len,
4731 		    info_p->ip_pay_len - info_p->ip_opt_len);
4732 	}
4733 	if (info_p->ip_proto == IPPROTO_UDP && *csum_p == 0) {
4734 		/* RFC 1122 4.1.3.4 */
4735 		*csum_p = 0xffff;
4736 	}
4737 	m0->m_data -= mac_hlen;
4738 	m0->m_len += mac_hlen;
4739 	m0->m_pkthdr.len += mac_hlen;
4740 	proto_csum_stats_increment(info_p->ip_proto,
4741 	    &stats_p->brms_in_computed_cksum);
4742 
4743 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4744 	    "IPv%c %s set checksum 0x%x",
4745 	    info_p->ip_is_ipv4 ? '4' : '6',
4746 	    (info_p->ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
4747 	    ntohs(*csum_p));
4748 done:
4749 	return error;
4750 }
4751 
4752 static errno_t
4753 bridge_send(struct ifnet *src_ifp,
4754     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4755 {
4756 	switch (cksum_op) {
4757 	case CHECKSUM_OPERATION_CLEAR_OFFLOAD:
4758 		m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
4759 		break;
4760 	case CHECKSUM_OPERATION_FINALIZE:
4761 		/* the checksum might not be correct, finalize now */
4762 		bridge_finalize_cksum(dst_ifp, m);
4763 		break;
4764 	case CHECKSUM_OPERATION_COMPUTE:
4765 		bridge_compute_cksum(src_ifp, dst_ifp, m);
4766 		break;
4767 	default:
4768 		break;
4769 	}
4770 #if HAS_IF_CAP
4771 	/*
4772 	 * If underlying interface can not do VLAN tag insertion itself
4773 	 * then attach a packet tag that holds it.
4774 	 */
4775 	if ((m->m_flags & M_VLANTAG) &&
4776 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4777 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4778 		if (m == NULL) {
4779 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4780 			    "%s: unable to prepend VLAN header",
4781 			    dst_ifp->if_xname);
4782 			(void) ifnet_stat_increment_out(dst_ifp,
4783 			    0, 0, 1);
4784 			return 0;
4785 		}
4786 		m->m_flags &= ~M_VLANTAG;
4787 	}
4788 #endif /* HAS_IF_CAP */
4789 	return bridge_transmit(dst_ifp, m);
4790 }
4791 
4792 static errno_t
4793 bridge_send_tso(struct ifnet *dst_ifp, struct mbuf *m, bool is_ipv4)
4794 {
4795 	errno_t                 error;
4796 	u_int                   mac_hlen;
4797 
4798 	mac_hlen = sizeof(struct ether_header);
4799 
4800 #if HAS_IF_CAP
4801 	/*
4802 	 * If underlying interface can not do VLAN tag insertion itself
4803 	 * then attach a packet tag that holds it.
4804 	 */
4805 	if ((m->m_flags & M_VLANTAG) &&
4806 	    (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) {
4807 		m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
4808 		if (m == NULL) {
4809 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
4810 			    "%s: unable to prepend VLAN header",
4811 			    dst_ifp->if_xname);
4812 			(void) ifnet_stat_increment_out(dst_ifp,
4813 			    0, 0, 1);
4814 			error = ENOBUFS;
4815 			goto done;
4816 		}
4817 		m->m_flags &= ~M_VLANTAG;
4818 		mac_hlen += ETHER_VLAN_ENCAP_LEN;
4819 	}
4820 #endif /* HAS_IF_CAP */
4821 	error = gso_tcp(dst_ifp, &m, mac_hlen, is_ipv4, TRUE);
4822 	return error;
4823 }
4824 
4825 /*
4826  * tso_hwassist:
4827  * - determine whether the destination interface supports TSO offload
4828  * - if the packet is already marked for offload and the hardware supports
4829  *   it, just allow the packet to continue on
4830  * - if not, parse the packet headers to verify that this is a large TCP
4831  *   packet requiring segmentation; if the hardware doesn't support it
4832  *   set need_sw_tso; otherwise, mark the packet for TSO offload
4833  */
4834 static int
4835 tso_hwassist(struct mbuf **mp, bool is_ipv4, struct ifnet * ifp, u_int mac_hlen,
4836     bool * need_sw_tso, bool * supports_cksum)
4837 {
4838 	int             error = 0;
4839 	u_int32_t       if_csum;
4840 	u_int32_t       if_tso;
4841 	u_int32_t       mbuf_tso;
4842 
4843 	if (is_ipv4) {
4844 		/*
4845 		 * Enable both TCP and IP offload if the hardware supports it.
4846 		 * If the hardware doesn't support TCP offload, *supports_cksum
4847 		 * will be false so we won't set either offload.
4848 		 */
4849 		if_csum = ifp->if_hwassist & (CSUM_TCP | CSUM_IP);
4850 		*supports_cksum = (if_csum & CSUM_TCP) != 0;
4851 		if_tso = IFNET_TSO_IPV4;
4852 		mbuf_tso = CSUM_TSO_IPV4;
4853 	} else {
4854 		*supports_cksum = (ifp->if_hwassist & CSUM_TCPIPV6) != 0;
4855 		if_csum = CSUM_TCPIPV6;
4856 		if_tso = IFNET_TSO_IPV6;
4857 		mbuf_tso = CSUM_TSO_IPV6;
4858 	}
4859 	*need_sw_tso = false;
4860 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4861 	    "%s: does%s support checksum 0x%x if_csum 0x%x",
4862 	    ifp->if_xname, *supports_cksum ? "" : " not",
4863 	    ifp->if_hwassist, if_csum);
4864 	if ((ifp->if_hwassist & if_tso) != 0 &&
4865 	    ((*mp)->m_pkthdr.csum_flags & mbuf_tso) != 0) {
4866 		/* hardware TSO, mbuf already marked */
4867 	} else {
4868 		/* verify that this is a large TCP frame */
4869 		uint32_t                csum_flags;
4870 		ip_packet_info          info;
4871 		u_int                   mss;
4872 		struct bripstats        stats;
4873 		struct tcphdr *         tcp;
4874 
4875 		error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4,
4876 		    &info, &stats);
4877 		if (error != 0) {
4878 			/* bad packet */
4879 			goto done;
4880 		}
4881 		if ((info.ip_hlen + info.ip_pay_len + info.ip_opt_len) <=
4882 		    ifp->if_mtu) {
4883 			/* not actually a large packet */
4884 			goto done;
4885 		}
4886 		if (info.ip_proto_hdr == NULL) {
4887 			/* not a TCP packet */
4888 			goto done;
4889 		}
4890 		if ((ifp->if_hwassist & if_tso) == 0) {
4891 			/* hardware does not support TSO, enable sw tso */
4892 			*need_sw_tso = if_bridge_segmentation != 0;
4893 			goto done;
4894 		}
4895 		/* use hardware TSO */
4896 		(*mp)->m_pkthdr.pkt_proto = IPPROTO_TCP;
4897 		tcp = (struct tcphdr *)info.ip_proto_hdr;
4898 		mss = ifp->if_mtu - info.ip_hlen - info.ip_opt_len
4899 		    - (tcp->th_off << 2);
4900 		csum_flags = mbuf_tso;
4901 		if (*supports_cksum) {
4902 			csum_flags |= if_csum;
4903 		}
4904 		(*mp)->m_pkthdr.tso_segsz = mss;
4905 		(*mp)->m_pkthdr.csum_flags |= csum_flags;
4906 		(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
4907 	}
4908 done:
4909 	return error;
4910 }
4911 
4912 /*
4913  * bridge_enqueue:
4914  *
4915  *	Enqueue a packet on a bridge member interface.
4916  *
4917  */
4918 static errno_t
4919 bridge_enqueue(ifnet_t bridge_ifp, struct ifnet *src_ifp,
4920     struct ifnet *dst_ifp, struct mbuf *m, ChecksumOperation cksum_op)
4921 {
4922 	errno_t         error = 0;
4923 	int             len;
4924 
4925 	VERIFY(dst_ifp != NULL);
4926 
4927 	/*
4928 	 * We may be sending a fragment so traverse the mbuf
4929 	 *
4930 	 * NOTE: bridge_fragment() is called only when PFIL_HOOKS is enabled.
4931 	 */
4932 	for (struct mbuf *next_m = NULL; m != NULL; m = next_m) {
4933 		bool            need_sw_tso = false;
4934 		bool            is_ipv4 = false;
4935 		bool            is_large_pkt;
4936 		errno_t         _error = 0;
4937 
4938 		len = m->m_pkthdr.len;
4939 		m->m_flags |= M_PROTO1; /* set to avoid loops */
4940 		next_m = m->m_nextpkt;
4941 		m->m_nextpkt = NULL;
4942 		/*
4943 		 * Need to segment the packet if it is a large frame
4944 		 * and the destination interface does not support TSO.
4945 		 *
4946 		 * Note that with trailers, it's possible for a packet to
4947 		 * be large but not actually require segmentation.
4948 		 */
4949 		is_large_pkt = (len > (bridge_ifp->if_mtu + ETHER_HDR_LEN));
4950 		if (is_large_pkt) {
4951 			struct ether_header     *eh;
4952 			bool                    hw_supports_cksum = false;
4953 
4954 			eh = mtod(m, struct ether_header *);
4955 			if (ether_header_type_is_ip(eh, &is_ipv4)) {
4956 				_error = tso_hwassist(&m, is_ipv4,
4957 				    dst_ifp, sizeof(struct ether_header),
4958 				    &need_sw_tso, &hw_supports_cksum);
4959 				if (_error == 0 && hw_supports_cksum) {
4960 					cksum_op = CHECKSUM_OPERATION_NONE;
4961 				}
4962 			} else {
4963 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4964 				    "large non IP packet");
4965 			}
4966 		}
4967 		if (_error != 0) {
4968 			if (m != NULL) {
4969 				m_freem(m);
4970 			}
4971 		} else if (need_sw_tso) {
4972 			_error = bridge_send_tso(dst_ifp, m, is_ipv4);
4973 		} else {
4974 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
4975 			    "%s bridge_send(%s) len %d op %d",
4976 			    bridge_ifp->if_xname,
4977 			    dst_ifp->if_xname,
4978 			    len, cksum_op);
4979 			_error = bridge_send(src_ifp, dst_ifp, m, cksum_op);
4980 		}
4981 
4982 		/* Preserve first error value */
4983 		if (error == 0 && _error != 0) {
4984 			error = _error;
4985 		}
4986 		if (_error == 0) {
4987 			(void) ifnet_stat_increment_out(bridge_ifp, 1, len, 0);
4988 		} else {
4989 			(void) ifnet_stat_increment_out(bridge_ifp, 0, 0, 1);
4990 		}
4991 	}
4992 
4993 	return error;
4994 }
4995 
4996 #if HAS_BRIDGE_DUMMYNET
4997 /*
4998  * bridge_dummynet:
4999  *
5000  *	Receive a queued packet from dummynet and pass it on to the output
5001  *	interface.
5002  *
5003  *	The mbuf has the Ethernet header already attached.
5004  */
5005 static void
5006 bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
5007 {
5008 	struct bridge_softc *sc;
5009 
5010 	sc = ifp->if_bridge;
5011 
5012 	/*
5013 	 * The packet didn't originate from a member interface. This should only
5014 	 * ever happen if a member interface is removed while packets are
5015 	 * queued for it.
5016 	 */
5017 	if (sc == NULL) {
5018 		m_freem(m);
5019 		return;
5020 	}
5021 
5022 	if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) {
5023 		if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
5024 			return;
5025 		}
5026 		if (m == NULL) {
5027 			return;
5028 		}
5029 	}
5030 	(void) bridge_enqueue(sc->sc_ifp, NULL, ifp, m, CHECKSUM_OPERATION_NONE);
5031 }
5032 
5033 #endif /* HAS_BRIDGE_DUMMYNET */
5034 
5035 /*
5036  * bridge_member_output:
5037  *
5038  *	Send output from a bridge member interface.  This
5039  *	performs the bridging function for locally originated
5040  *	packets.
5041  *
5042  *	The mbuf has the Ethernet header already attached.
5043  */
5044 static errno_t
5045 bridge_member_output(struct bridge_softc *sc, ifnet_t ifp, mbuf_t *data)
5046 {
5047 	ifnet_t bridge_ifp;
5048 	struct ether_header *eh;
5049 	struct ifnet *dst_if;
5050 	uint16_t vlan;
5051 	struct bridge_iflist *mac_nat_bif;
5052 	ifnet_t mac_nat_ifp;
5053 	mbuf_t m = *data;
5054 
5055 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5056 	    "ifp %s", ifp->if_xname);
5057 	if (m->m_len < ETHER_HDR_LEN) {
5058 		m = m_pullup(m, ETHER_HDR_LEN);
5059 		if (m == NULL) {
5060 			*data = NULL;
5061 			return EJUSTRETURN;
5062 		}
5063 	}
5064 
5065 	eh = mtod(m, struct ether_header *);
5066 	vlan = VLANTAGOF(m);
5067 
5068 	BRIDGE_LOCK(sc);
5069 	mac_nat_bif = sc->sc_mac_nat_bif;
5070 	mac_nat_ifp = (mac_nat_bif != NULL) ? mac_nat_bif->bif_ifp : NULL;
5071 	if (mac_nat_ifp == ifp) {
5072 		/* record the IP address used by the MAC NAT interface */
5073 		(void)bridge_mac_nat_output(sc, mac_nat_bif, data, NULL);
5074 		m = *data;
5075 		if (m == NULL) {
5076 			/* packet was deallocated */
5077 			BRIDGE_UNLOCK(sc);
5078 			return EJUSTRETURN;
5079 		}
5080 	}
5081 	bridge_ifp = sc->sc_ifp;
5082 
5083 	/*
5084 	 * APPLE MODIFICATION
5085 	 * If the packet is an 802.1X ethertype, then only send on the
5086 	 * original output interface.
5087 	 */
5088 	if (eh->ether_type == htons(ETHERTYPE_PAE)) {
5089 		dst_if = ifp;
5090 		goto sendunicast;
5091 	}
5092 
5093 	/*
5094 	 * If bridge is down, but the original output interface is up,
5095 	 * go ahead and send out that interface.  Otherwise, the packet
5096 	 * is dropped below.
5097 	 */
5098 	if ((bridge_ifp->if_flags & IFF_RUNNING) == 0) {
5099 		dst_if = ifp;
5100 		goto sendunicast;
5101 	}
5102 
5103 	/*
5104 	 * If the packet is a multicast, or we don't know a better way to
5105 	 * get there, send to all interfaces.
5106 	 */
5107 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
5108 		dst_if = NULL;
5109 	} else {
5110 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan);
5111 	}
5112 	if (dst_if == NULL) {
5113 		struct bridge_iflist *bif;
5114 		struct mbuf *mc;
5115 		int used = 0;
5116 		errno_t error;
5117 
5118 
5119 		bridge_span(sc, m);
5120 
5121 		BRIDGE_LOCK2REF(sc, error);
5122 		if (error != 0) {
5123 			m_freem(m);
5124 			return EJUSTRETURN;
5125 		}
5126 
5127 		TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) {
5128 			/* skip interface with inactive link status */
5129 			if ((bif->bif_flags & BIFF_MEDIA_ACTIVE) == 0) {
5130 				continue;
5131 			}
5132 			dst_if = bif->bif_ifp;
5133 
5134 #if 0
5135 			if (dst_if->if_type == IFT_GIF) {
5136 				continue;
5137 			}
5138 #endif
5139 			if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5140 				continue;
5141 			}
5142 			if (dst_if != ifp) {
5143 				/*
5144 				 * If this is not the original output interface,
5145 				 * and the interface is participating in spanning
5146 				 * tree, make sure the port is in a state that
5147 				 * allows forwarding.
5148 				 */
5149 				if ((bif->bif_ifflags & IFBIF_STP) &&
5150 				    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5151 					continue;
5152 				}
5153 				/*
5154 				 * If this is not the original output interface,
5155 				 * and the destination is the MAC NAT interface,
5156 				 * drop the packet. The packet can't be sent
5157 				 * if the source MAC is incorrect.
5158 				 */
5159 				if (dst_if == mac_nat_ifp) {
5160 					continue;
5161 				}
5162 			}
5163 			if (TAILQ_NEXT(bif, bif_next) == NULL) {
5164 				used = 1;
5165 				mc = m;
5166 			} else {
5167 				mc = m_dup(m, M_DONTWAIT);
5168 				if (mc == NULL) {
5169 					(void) ifnet_stat_increment_out(
5170 						bridge_ifp, 0, 0, 1);
5171 					continue;
5172 				}
5173 			}
5174 			(void) bridge_enqueue(bridge_ifp, ifp, dst_if,
5175 			    mc, CHECKSUM_OPERATION_COMPUTE);
5176 		}
5177 		if (used == 0) {
5178 			m_freem(m);
5179 		}
5180 		BRIDGE_UNREF(sc);
5181 		return EJUSTRETURN;
5182 	}
5183 
5184 sendunicast:
5185 	/*
5186 	 * XXX Spanning tree consideration here?
5187 	 */
5188 
5189 	bridge_span(sc, m);
5190 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5191 		m_freem(m);
5192 		BRIDGE_UNLOCK(sc);
5193 		return EJUSTRETURN;
5194 	}
5195 
5196 	BRIDGE_UNLOCK(sc);
5197 	if (dst_if == ifp) {
5198 		/* just let the packet continue on its way */
5199 		return 0;
5200 	}
5201 	if (dst_if != mac_nat_ifp) {
5202 		(void) bridge_enqueue(bridge_ifp, ifp, dst_if, m,
5203 		    CHECKSUM_OPERATION_COMPUTE);
5204 	} else {
5205 		/*
5206 		 * This is not the original output interface
5207 		 * and the destination is the MAC NAT interface.
5208 		 * Drop the packet because the packet can't be sent
5209 		 * if the source MAC is incorrect.
5210 		 */
5211 		m_freem(m);
5212 	}
5213 	return EJUSTRETURN;
5214 }
5215 
5216 /*
5217  * Output callback.
5218  *
5219  * This routine is called externally from above only when if_bridge_txstart
5220  * is disabled; otherwise it is called internally by bridge_start().
5221  */
5222 static int
5223 bridge_output(struct ifnet *ifp, struct mbuf *m)
5224 {
5225 	struct bridge_softc *sc = ifnet_softc(ifp);
5226 	struct ether_header *eh;
5227 	struct ifnet *dst_if = NULL;
5228 	int error = 0;
5229 
5230 	eh = mtod(m, struct ether_header *);
5231 
5232 	BRIDGE_LOCK(sc);
5233 
5234 	if (!(m->m_flags & (M_BCAST | M_MCAST))) {
5235 		dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0);
5236 	}
5237 
5238 	(void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0);
5239 
5240 #if NBPFILTER > 0
5241 	if (sc->sc_bpf_output) {
5242 		bridge_bpf_output(ifp, m);
5243 	}
5244 #endif
5245 
5246 	if (dst_if == NULL) {
5247 		/* callee will unlock */
5248 		bridge_broadcast(sc, NULL, m, 0);
5249 	} else {
5250 		ifnet_t bridge_ifp;
5251 
5252 		bridge_ifp = sc->sc_ifp;
5253 		BRIDGE_UNLOCK(sc);
5254 
5255 		error = bridge_enqueue(bridge_ifp, NULL, dst_if, m,
5256 		    CHECKSUM_OPERATION_FINALIZE);
5257 	}
5258 
5259 	return error;
5260 }
5261 
5262 static void
5263 bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m)
5264 {
5265 	struct ether_header *eh;
5266 	bool is_ipv4;
5267 	uint32_t sw_csum, hwcap;
5268 	uint32_t did_sw;
5269 	uint32_t csum_flags;
5270 
5271 	eh = mtod(m, struct ether_header *);
5272 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5273 		return;
5274 	}
5275 
5276 	/* do in software what the hardware cannot */
5277 	hwcap = (ifp->if_hwassist | CSUM_DATA_VALID);
5278 	csum_flags = m->m_pkthdr.csum_flags;
5279 	sw_csum = csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap);
5280 	sw_csum &= IF_HWASSIST_CSUM_MASK;
5281 
5282 	if (is_ipv4) {
5283 		if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) &&
5284 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) {
5285 			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
5286 				uint16_t start =
5287 				    sizeof(*eh) + sizeof(struct ip);
5288 				uint16_t ulpoff =
5289 				    m->m_pkthdr.csum_data & 0xffff;
5290 				m->m_pkthdr.csum_flags |=
5291 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5292 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5293 				m->m_pkthdr.csum_tx_start = start;
5294 			} else {
5295 				sw_csum |= (CSUM_DELAY_DATA &
5296 				    m->m_pkthdr.csum_flags);
5297 			}
5298 		}
5299 		did_sw = in_finalize_cksum(m, sizeof(*eh), sw_csum);
5300 	} else {
5301 		if ((hwcap & CSUM_PARTIAL) &&
5302 		    !(sw_csum & CSUM_DELAY_IPV6_DATA) &&
5303 		    (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
5304 			if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) {
5305 				uint16_t start =
5306 				    sizeof(*eh) + sizeof(struct ip6_hdr);
5307 				uint16_t ulpoff =
5308 				    m->m_pkthdr.csum_data & 0xffff;
5309 				m->m_pkthdr.csum_flags |=
5310 				    (CSUM_DATA_VALID | CSUM_PARTIAL);
5311 				m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
5312 				m->m_pkthdr.csum_tx_start = start;
5313 			} else {
5314 				sw_csum |= (CSUM_DELAY_IPV6_DATA &
5315 				    m->m_pkthdr.csum_flags);
5316 			}
5317 		}
5318 		did_sw = in6_finalize_cksum(m, sizeof(*eh), -1, -1, sw_csum);
5319 	}
5320 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5321 	    "[%s] before 0x%x hwcap 0x%x sw_csum 0x%x did 0x%x after 0x%x",
5322 	    ifp->if_xname, csum_flags, hwcap, sw_csum,
5323 	    did_sw, m->m_pkthdr.csum_flags);
5324 }
5325 
5326 /*
5327  * bridge_start:
5328  *
5329  *	Start output on a bridge.
5330  *
5331  * This routine is invoked by the start worker thread; because we never call
5332  * it directly, there is no need do deploy any serialization mechanism other
5333  * than what's already used by the worker thread, i.e. this is already single
5334  * threaded.
5335  *
5336  * This routine is called only when if_bridge_txstart is enabled.
5337  */
5338 static void
5339 bridge_start(struct ifnet *ifp)
5340 {
5341 	struct mbuf *m;
5342 
5343 	for (;;) {
5344 		if (ifnet_dequeue(ifp, &m) != 0) {
5345 			break;
5346 		}
5347 
5348 		(void) bridge_output(ifp, m);
5349 	}
5350 }
5351 
5352 /*
5353  * bridge_forward:
5354  *
5355  *	The forwarding function of the bridge.
5356  *
5357  *	NOTE: Releases the lock on return.
5358  */
5359 static void
5360 bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
5361     struct mbuf *m)
5362 {
5363 	struct bridge_iflist *dbif;
5364 	ifnet_t bridge_ifp;
5365 	struct ifnet *src_if, *dst_if;
5366 	struct ether_header *eh;
5367 	uint16_t vlan;
5368 	uint8_t *dst;
5369 	int error;
5370 	struct mac_nat_record mnr;
5371 	bool translate_mac = FALSE;
5372 	uint32_t sc_filter_flags = 0;
5373 
5374 	BRIDGE_LOCK_ASSERT_HELD(sc);
5375 
5376 	bridge_ifp = sc->sc_ifp;
5377 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_OUTPUT,
5378 	    "%s m 0x%llx", bridge_ifp->if_xname,
5379 	    (uint64_t)VM_KERNEL_ADDRPERM(m));
5380 
5381 	src_if = m->m_pkthdr.rcvif;
5382 	if (src_if != sbif->bif_ifp) {
5383 		const char *    src_if_name;
5384 
5385 		src_if_name = (src_if != NULL) ? src_if->if_xname : "?";
5386 		BRIDGE_LOG(LOG_NOTICE, 0,
5387 		    "src_if %s != bif_ifp %s",
5388 		    src_if_name, sbif->bif_ifp->if_xname);
5389 		goto drop;
5390 	}
5391 
5392 	(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5393 	vlan = VLANTAGOF(m);
5394 
5395 
5396 	if ((sbif->bif_ifflags & IFBIF_STP) &&
5397 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5398 		goto drop;
5399 	}
5400 
5401 	eh = mtod(m, struct ether_header *);
5402 	dst = eh->ether_dhost;
5403 
5404 	/* If the interface is learning, record the address. */
5405 	if (sbif->bif_ifflags & IFBIF_LEARNING) {
5406 		error = bridge_rtupdate(sc, eh->ether_shost, vlan,
5407 		    sbif, 0, IFBAF_DYNAMIC);
5408 		/*
5409 		 * If the interface has addresses limits then deny any source
5410 		 * that is not in the cache.
5411 		 */
5412 		if (error && sbif->bif_addrmax) {
5413 			goto drop;
5414 		}
5415 	}
5416 
5417 	if ((sbif->bif_ifflags & IFBIF_STP) != 0 &&
5418 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) {
5419 		goto drop;
5420 	}
5421 
5422 	/*
5423 	 * At this point, the port either doesn't participate
5424 	 * in spanning tree or it is in the forwarding state.
5425 	 */
5426 
5427 	/*
5428 	 * If the packet is unicast, destined for someone on
5429 	 * "this" side of the bridge, drop it.
5430 	 */
5431 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5432 		/* unicast */
5433 		dst_if = bridge_rtlookup(sc, dst, vlan);
5434 		if (src_if == dst_if) {
5435 			goto drop;
5436 		}
5437 	} else {
5438 		/* broadcast/multicast */
5439 
5440 		/*
5441 		 * Check if its a reserved multicast address, any address
5442 		 * listed in 802.1D section 7.12.6 may not be forwarded by the
5443 		 * bridge.
5444 		 * This is currently 01-80-C2-00-00-00 to 01-80-C2-00-00-0F
5445 		 */
5446 		if (dst[0] == 0x01 && dst[1] == 0x80 &&
5447 		    dst[2] == 0xc2 && dst[3] == 0x00 &&
5448 		    dst[4] == 0x00 && dst[5] <= 0x0f) {
5449 			goto drop;
5450 		}
5451 
5452 
5453 		/* ...forward it to all interfaces. */
5454 		atomic_add_64(&bridge_ifp->if_imcasts, 1);
5455 		dst_if = NULL;
5456 	}
5457 
5458 	/*
5459 	 * If we have a destination interface which is a member of our bridge,
5460 	 * OR this is a unicast packet, push it through the bpf(4) machinery.
5461 	 * For broadcast or multicast packets, don't bother because it will
5462 	 * be reinjected into ether_input. We do this before we pass the packets
5463 	 * through the pfil(9) framework, as it is possible that pfil(9) will
5464 	 * drop the packet, or possibly modify it, making it difficult to debug
5465 	 * firewall issues on the bridge.
5466 	 */
5467 #if NBPFILTER > 0
5468 	if (eh->ether_type == htons(ETHERTYPE_RSN_PREAUTH) ||
5469 	    dst_if != NULL || (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
5470 		m->m_pkthdr.rcvif = bridge_ifp;
5471 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5472 	}
5473 #endif /* NBPFILTER */
5474 
5475 	if (dst_if == NULL) {
5476 		/* bridge_broadcast will unlock */
5477 		bridge_broadcast(sc, sbif, m, 1);
5478 		return;
5479 	}
5480 
5481 	/*
5482 	 * Unicast.
5483 	 */
5484 	/*
5485 	 * At this point, we're dealing with a unicast frame
5486 	 * going to a different interface.
5487 	 */
5488 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
5489 		goto drop;
5490 	}
5491 
5492 	dbif = bridge_lookup_member_if(sc, dst_if);
5493 	if (dbif == NULL) {
5494 		/* Not a member of the bridge (anymore?) */
5495 		goto drop;
5496 	}
5497 
5498 	/* Private segments can not talk to each other */
5499 	if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) {
5500 		goto drop;
5501 	}
5502 
5503 	if ((dbif->bif_ifflags & IFBIF_STP) &&
5504 	    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
5505 		goto drop;
5506 	}
5507 
5508 #if HAS_DHCPRA_MASK
5509 	/* APPLE MODIFICATION <rdar:6985737> */
5510 	if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) {
5511 		m = ip_xdhcpra_output(dst_if, m);
5512 		if (!m) {
5513 			++bridge_ifp.if_xdhcpra;
5514 			BRIDGE_UNLOCK(sc);
5515 			return;
5516 		}
5517 	}
5518 #endif /* HAS_DHCPRA_MASK */
5519 
5520 	if (dbif == sc->sc_mac_nat_bif) {
5521 		/* determine how to translate the packet */
5522 		translate_mac
5523 		        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
5524 		if (m == NULL) {
5525 			/* packet was deallocated */
5526 			BRIDGE_UNLOCK(sc);
5527 			return;
5528 		}
5529 	} else if (bif_has_checksum_offload(dbif) &&
5530 	    !bif_has_checksum_offload(sbif)) {
5531 		/*
5532 		 * If the destination interface has checksum offload enabled,
5533 		 * verify the checksum now, unless the source interface also has
5534 		 * checksum offload enabled. The checksum in that case has
5535 		 * already just been computed and verifying it is unnecessary.
5536 		 */
5537 		error = bridge_verify_checksum(&m, &dbif->bif_stats);
5538 		if (error != 0) {
5539 			BRIDGE_UNLOCK(sc);
5540 			if (m != NULL) {
5541 				m_freem(m);
5542 			}
5543 			return;
5544 		}
5545 	}
5546 
5547 	sc_filter_flags = sc->sc_filter_flags;
5548 
5549 	BRIDGE_UNLOCK(sc);
5550 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5551 		if (bridge_pf(&m, dst_if, sc_filter_flags, FALSE) != 0) {
5552 			return;
5553 		}
5554 		if (m == NULL) {
5555 			return;
5556 		}
5557 	}
5558 
5559 	/* if we need to, translate the MAC address */
5560 	if (translate_mac) {
5561 		bridge_mac_nat_translate(&m, &mnr, IF_LLADDR(dst_if));
5562 	}
5563 	/*
5564 	 * We're forwarding an inbound packet in which the checksum must
5565 	 * already have been computed and if required, verified.
5566 	 */
5567 	if (m != NULL) {
5568 		(void) bridge_enqueue(bridge_ifp, src_if, dst_if, m,
5569 		    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
5570 	}
5571 	return;
5572 
5573 drop:
5574 	BRIDGE_UNLOCK(sc);
5575 	m_freem(m);
5576 }
5577 
5578 static void
5579 inject_input_packet(ifnet_t ifp, mbuf_t m)
5580 {
5581 	mbuf_pkthdr_setrcvif(m, ifp);
5582 	mbuf_pkthdr_setheader(m, mbuf_data(m));
5583 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
5584 	    mbuf_len(m) - ETHER_HDR_LEN);
5585 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
5586 	m->m_flags |= M_PROTO1; /* set to avoid loops */
5587 	dlil_input_packet_list(ifp, m);
5588 	return;
5589 }
5590 
5591 static bool
5592 in_addr_is_ours(struct in_addr ip)
5593 {
5594 	struct in_ifaddr *ia;
5595 	bool             ours = false;
5596 
5597 	lck_rw_lock_shared(&in_ifaddr_rwlock);
5598 	TAILQ_FOREACH(ia, INADDR_HASH(ip.s_addr), ia_hash) {
5599 		if (IA_SIN(ia)->sin_addr.s_addr == ip.s_addr) {
5600 			ours = true;
5601 			break;
5602 		}
5603 	}
5604 	lck_rw_done(&in_ifaddr_rwlock);
5605 	return ours;
5606 }
5607 
5608 static bool
5609 in6_addr_is_ours(const struct in6_addr * ip6_p, uint32_t ifscope)
5610 {
5611 	struct in6_ifaddr       *ia6;
5612 	bool                    ours = false;
5613 
5614 	if (in6_embedded_scope && IN6_IS_ADDR_LINKLOCAL(ip6_p)) {
5615 		struct in6_addr         dst_ip;
5616 
5617 		/* need to embed scope ID for comparison */
5618 		bcopy(ip6_p, &dst_ip, sizeof(dst_ip));
5619 		dst_ip.s6_addr16[1] = htons(ifscope);
5620 		ip6_p = &dst_ip;
5621 	}
5622 	lck_rw_lock_shared(&in6_ifaddr_rwlock);
5623 	TAILQ_FOREACH(ia6, IN6ADDR_HASH(ip6_p), ia6_hash) {
5624 		if (in6_are_addr_equal_scoped(&ia6->ia_addr.sin6_addr, ip6_p,
5625 		    ia6->ia_addr.sin6_scope_id, ifscope)) {
5626 			ours = true;
5627 			break;
5628 		}
5629 	}
5630 	lck_rw_done(&in6_ifaddr_rwlock);
5631 	return ours;
5632 }
5633 
5634 static void
5635 bridge_interface_input(ifnet_t bridge_ifp, mbuf_t m,
5636     bpf_packet_func bpf_input_func)
5637 {
5638 	size_t                  byte_count;
5639 	struct ether_header     *eh;
5640 	errno_t                 error;
5641 	bool                    is_ipv4;
5642 	int                     len;
5643 	u_int                   mac_hlen;
5644 	int                     pkt_count;
5645 
5646 	/* segment large packets before sending them up */
5647 	if (if_bridge_segmentation == 0) {
5648 		goto done;
5649 	}
5650 	len = m->m_pkthdr.len;
5651 	if (len <= (bridge_ifp->if_mtu + ETHER_HDR_LEN)) {
5652 		goto done;
5653 	}
5654 	eh = mtod(m, struct ether_header *);
5655 	if (!ether_header_type_is_ip(eh, &is_ipv4)) {
5656 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
5657 		    "large non IPv4/IPv6 packet");
5658 		goto done;
5659 	}
5660 
5661 	/*
5662 	 * We have a large IPv4/IPv6 TCP packet. Segment it if required.
5663 	 *
5664 	 * If gso_tcp() returns success (0), the packet(s) are
5665 	 * ready to be passed up. If the destination is a local IP address,
5666 	 * the packet will be passed up as a large, single packet.
5667 	 *
5668 	 * If gso_tcp() returns an error, the packet has already
5669 	 * been freed.
5670 	 */
5671 	mac_hlen = sizeof(*eh);
5672 	error = gso_tcp(bridge_ifp, &m, mac_hlen, is_ipv4, FALSE);
5673 	if (error != 0) {
5674 		return;
5675 	}
5676 
5677 done:
5678 	pkt_count = 0;
5679 	byte_count = 0;
5680 	for (mbuf_t scan = m; scan != NULL; scan = scan->m_nextpkt) {
5681 		/* Mark the packet as arriving on the bridge interface */
5682 		mbuf_pkthdr_setrcvif(scan, bridge_ifp);
5683 		mbuf_pkthdr_setheader(scan, mbuf_data(scan));
5684 		if (bpf_input_func != NULL) {
5685 			(*bpf_input_func)(bridge_ifp, scan);
5686 		}
5687 		mbuf_setdata(scan, (char *)mbuf_data(scan) + ETHER_HDR_LEN,
5688 		    mbuf_len(scan) - ETHER_HDR_LEN);
5689 		mbuf_pkthdr_adjustlen(scan, -ETHER_HDR_LEN);
5690 		byte_count += mbuf_pkthdr_len(scan);
5691 		pkt_count++;
5692 	}
5693 	(void)ifnet_stat_increment_in(bridge_ifp, pkt_count, byte_count, 0);
5694 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5695 	    "%s %d packet(s) %ld bytes",
5696 	    bridge_ifp->if_xname, pkt_count, byte_count);
5697 	dlil_input_packet_list(bridge_ifp, m);
5698 	return;
5699 }
5700 
5701 static bool
5702 is_our_ip(ip_packet_info_t info_p, uint32_t ifscope)
5703 {
5704 	bool    ours;
5705 
5706 	if (info_p->ip_is_ipv4) {
5707 		struct in_addr  dst_ip;
5708 
5709 		bcopy(&info_p->ip_hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
5710 		ours = in_addr_is_ours(dst_ip);
5711 	} else {
5712 		ours = in6_addr_is_ours(&info_p->ip_hdr.ip6->ip6_dst, ifscope);
5713 	}
5714 	return ours;
5715 }
5716 
5717 static inline errno_t
5718 bridge_vmnet_tag_input(ifnet_t bridge_ifp, ifnet_t ifp,
5719     const u_char * ether_dhost, mbuf_t *mp,
5720     bool is_broadcast, bool is_ip, bool is_ipv4,
5721     ip_packet_info * info_p, struct bripstats * stats_p,
5722     bool *info_initialized)
5723 {
5724 	errno_t         error = 0;
5725 	bool            is_local = false;
5726 	struct pf_mtag *pf_mtag;
5727 	u_int16_t       tag = vmnet_tag;
5728 
5729 	*info_initialized = false;
5730 	if (is_broadcast) {
5731 		if (_ether_cmp(ether_dhost, etherbroadcastaddr) == 0) {
5732 			tag = vmnet_broadcast_tag;
5733 		} else {
5734 			tag = vmnet_multicast_tag;
5735 		}
5736 	} else if (is_ip) {
5737 		unsigned int    mac_hlen = sizeof(struct ether_header);
5738 
5739 		bzero(stats_p, sizeof(*stats_p));
5740 		*info_initialized = true;
5741 		error = bridge_get_ip_proto(mp, mac_hlen, is_ipv4, info_p,
5742 		    stats_p);
5743 		if (error != 0) {
5744 			BRIDGE_LOG(LOG_NOTICE, BR_DBGF_INPUT,
5745 			    "%s(%s) bridge_get_ip_proto failed %d",
5746 			    bridge_ifp->if_xname,
5747 			    ifp->if_xname, error);
5748 			if (*mp == NULL) {
5749 				return EJUSTRETURN;
5750 			}
5751 		} else {
5752 			is_local = is_our_ip(info_p, bridge_ifp->if_index);
5753 			if (is_local) {
5754 				tag = vmnet_local_tag;
5755 			}
5756 		}
5757 	}
5758 	pf_mtag = pf_get_mtag(*mp);
5759 	if (pf_mtag != NULL) {
5760 		pf_mtag->pftag_tag = tag;
5761 	}
5762 #if DEBUG || DEVELOPMENT
5763 	{
5764 		bool forced;
5765 
5766 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_ONE);
5767 		if (forced) {
5768 			m_freem(*mp);
5769 			*mp = NULL;
5770 			error = EJUSTRETURN;
5771 			goto done;
5772 		}
5773 		BRIDGE_ERROR_GET_FORCED(forced, BRIDGE_FORCE_TWO);
5774 		if (forced) {
5775 			error = _EBADIP;
5776 			goto done;
5777 		}
5778 	}
5779 done:
5780 #endif /* DEBUG || DEVELOPMENT */
5781 	return error;
5782 }
5783 
5784 static void
5785 bripstats_apply(struct bripstats *dst_p, const struct bripstats *src_p)
5786 {
5787 	dst_p->bips_ip += src_p->bips_ip;
5788 	dst_p->bips_ip6 += src_p->bips_ip6;
5789 	dst_p->bips_udp += src_p->bips_udp;
5790 	dst_p->bips_tcp += src_p->bips_tcp;
5791 
5792 	dst_p->bips_bad_ip += src_p->bips_bad_ip;
5793 	dst_p->bips_bad_ip6 += src_p->bips_bad_ip6;
5794 	dst_p->bips_bad_udp += src_p->bips_bad_udp;
5795 	dst_p->bips_bad_tcp += src_p->bips_bad_tcp;
5796 }
5797 
5798 static void
5799 bridge_bripstats_apply(ifnet_t ifp, const struct bripstats *stats_p)
5800 {
5801 	struct bridge_iflist *bif;
5802 	struct bridge_softc *sc = ifp->if_bridge;
5803 
5804 	BRIDGE_LOCK(sc);
5805 	bif = bridge_lookup_member_if(sc, ifp);
5806 	if (bif == NULL) {
5807 		goto done;
5808 	}
5809 	if (!bif_has_checksum_offload(bif)) {
5810 		goto done;
5811 	}
5812 	bripstats_apply(&bif->bif_stats.brms_in_ip, stats_p);
5813 
5814 done:
5815 	BRIDGE_UNLOCK(sc);
5816 	return;
5817 }
5818 
5819 /*
5820  * bridge_input:
5821  *
5822  *	Filter input from a member interface.  Queue the packet for
5823  *	bridging if it is not for us.
5824  */
5825 errno_t
5826 bridge_input(struct ifnet *ifp, mbuf_t *data)
5827 {
5828 	struct bridge_softc *sc = ifp->if_bridge;
5829 	struct bridge_iflist *bif, *bif2;
5830 	struct ether_header eh_in;
5831 	bool is_ip = false;
5832 	bool is_ipv4 = false;
5833 	ifnet_t bridge_ifp;
5834 	struct mbuf *mc, *mc2;
5835 	unsigned int mac_hlen = sizeof(struct ether_header);
5836 	uint16_t vlan;
5837 	errno_t error;
5838 	ip_packet_info info;
5839 	struct bripstats stats;
5840 	bool info_initialized = false;
5841 	errno_t ip_packet_error = 0;
5842 	bool is_broadcast;
5843 	bool is_ip_broadcast = false;
5844 	bool is_ifp_mac = false;
5845 	mbuf_t m = *data;
5846 	uint32_t sc_filter_flags = 0;
5847 
5848 	bridge_ifp = sc->sc_ifp;
5849 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5850 	    "%s from %s m 0x%llx data 0x%llx",
5851 	    bridge_ifp->if_xname, ifp->if_xname,
5852 	    (uint64_t)VM_KERNEL_ADDRPERM(m),
5853 	    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)));
5854 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) {
5855 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5856 		    "%s not running passing along",
5857 		    bridge_ifp->if_xname);
5858 		return 0;
5859 	}
5860 
5861 	vlan = VLANTAGOF(m);
5862 
5863 #ifdef IFF_MONITOR
5864 	/*
5865 	 * Implement support for bridge monitoring. If this flag has been
5866 	 * set on this interface, discard the packet once we push it through
5867 	 * the bpf(4) machinery, but before we do, increment the byte and
5868 	 * packet counters associated with this interface.
5869 	 */
5870 	if ((bridge_ifp->if_flags & IFF_MONITOR) != 0) {
5871 		m->m_pkthdr.rcvif = bridge_ifp;
5872 		BRIDGE_BPF_MTAP_INPUT(sc, m);
5873 		(void) ifnet_stat_increment_in(bridge_ifp, 1, m->m_pkthdr.len, 0);
5874 		*data = NULL;
5875 		m_freem(m);
5876 		return EJUSTRETURN;
5877 	}
5878 #endif /* IFF_MONITOR */
5879 
5880 	is_broadcast = (m->m_flags & (M_BCAST | M_MCAST)) != 0;
5881 
5882 	/*
5883 	 * Need to clear the promiscuous flag otherwise it will be
5884 	 * dropped by DLIL after processing filters
5885 	 */
5886 	if ((mbuf_flags(m) & MBUF_PROMISC)) {
5887 		mbuf_setflags_mask(m, 0, MBUF_PROMISC);
5888 	}
5889 
5890 	/* copy the ethernet header */
5891 	eh_in = *(mtod(m, struct ether_header *));
5892 
5893 	is_ip = ether_header_type_is_ip(&eh_in, &is_ipv4);
5894 
5895 	if (if_bridge_vmnet_pf_tagging != 0 && IFNET_IS_VMNET(ifp)) {
5896 		/* tag packets coming from VMNET interfaces */
5897 		ip_packet_error = bridge_vmnet_tag_input(bridge_ifp, ifp,
5898 		    eh_in.ether_dhost, data, is_broadcast, is_ip, is_ipv4,
5899 		    &info, &stats, &info_initialized);
5900 		m = *data;
5901 		if (m == NULL) {
5902 			bridge_bripstats_apply(ifp, &stats);
5903 			return EJUSTRETURN;
5904 		}
5905 	}
5906 
5907 	sc_filter_flags = sc->sc_filter_flags;
5908 	if (PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
5909 		error = bridge_pf(data, ifp, sc_filter_flags, TRUE);
5910 		m = *data;
5911 		if (error != 0 || m == NULL) {
5912 			return EJUSTRETURN;
5913 		}
5914 	}
5915 
5916 	BRIDGE_LOCK(sc);
5917 	bif = bridge_lookup_member_if(sc, ifp);
5918 	if (bif == NULL) {
5919 		BRIDGE_UNLOCK(sc);
5920 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5921 		    "%s bridge_lookup_member_if failed",
5922 		    bridge_ifp->if_xname);
5923 		return 0;
5924 	}
5925 	if (is_ip && bif_has_checksum_offload(bif)) {
5926 		if (info_initialized) {
5927 			bripstats_apply(&bif->bif_stats.brms_in_ip, &stats);
5928 		} else {
5929 			error = bridge_get_ip_proto(data, mac_hlen, is_ipv4,
5930 			    &info, &bif->bif_stats.brms_in_ip);
5931 			if (error != 0) {
5932 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5933 				    "%s(%s) bridge_get_ip_proto failed %d",
5934 				    bridge_ifp->if_xname,
5935 				    bif->bif_ifp->if_xname, error);
5936 				ip_packet_error = error;
5937 			}
5938 		}
5939 		if (ip_packet_error == 0) {
5940 			/* need to compute IP/UDP/TCP/checksums */
5941 			error = bridge_offload_checksum(data, &info,
5942 			    &bif->bif_stats);
5943 			if (error != 0) {
5944 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_CHECKSUM,
5945 				    "%s(%s) bridge_offload_checksum failed %d",
5946 				    bridge_ifp->if_xname,
5947 				    bif->bif_ifp->if_xname, error);
5948 				ip_packet_error = error;
5949 			}
5950 		}
5951 		if (ip_packet_error != 0) {
5952 			BRIDGE_UNLOCK(sc);
5953 			if (*data != NULL) {
5954 				m_freem(*data);
5955 				*data = NULL;
5956 			}
5957 			return EJUSTRETURN;
5958 		}
5959 		m = *data;
5960 	}
5961 
5962 	if (bif->bif_flags & BIFF_HOST_FILTER) {
5963 		error = bridge_host_filter(bif, data);
5964 		if (error != 0) {
5965 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_INPUT,
5966 			    "%s bridge_host_filter failed",
5967 			    bif->bif_ifp->if_xname);
5968 			BRIDGE_UNLOCK(sc);
5969 			return EJUSTRETURN;
5970 		}
5971 		m = *data;
5972 	}
5973 
5974 	if (!is_broadcast &&
5975 	    _ether_cmp(eh_in.ether_dhost, IF_LLADDR(ifp)) == 0) {
5976 		/* the packet is unicast to the interface's MAC address */
5977 		if (is_ip && sc->sc_mac_nat_bif == bif) {
5978 			/* doing MAC-NAT, check if destination is IP broadcast */
5979 			is_ip_broadcast = is_broadcast_ip_packet(data);
5980 			if (*data == NULL) {
5981 				BRIDGE_UNLOCK(sc);
5982 				return EJUSTRETURN;
5983 			}
5984 			m = *data;
5985 		}
5986 		if (!is_ip_broadcast) {
5987 			is_ifp_mac = TRUE;
5988 		}
5989 	}
5990 
5991 	bridge_span(sc, m);
5992 
5993 	if (is_broadcast || is_ip_broadcast) {
5994 		if (is_broadcast && (m->m_flags & M_MCAST) != 0) {
5995 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
5996 			    " multicast: "
5997 			    "%02x:%02x:%02x:%02x:%02x:%02x",
5998 			    eh_in.ether_dhost[0], eh_in.ether_dhost[1],
5999 			    eh_in.ether_dhost[2], eh_in.ether_dhost[3],
6000 			    eh_in.ether_dhost[4], eh_in.ether_dhost[5]);
6001 		}
6002 		/* Tap off 802.1D packets; they do not get forwarded. */
6003 		if (is_broadcast &&
6004 		    _ether_cmp(eh_in.ether_dhost, bstp_etheraddr) == 0) {
6005 #if BRIDGESTP
6006 			m = bstp_input(&bif->bif_stp, ifp, m);
6007 #else /* !BRIDGESTP */
6008 			m_freem(m);
6009 			m = NULL;
6010 #endif /* !BRIDGESTP */
6011 			if (m == NULL) {
6012 				BRIDGE_UNLOCK(sc);
6013 				return EJUSTRETURN;
6014 			}
6015 		}
6016 
6017 		if ((bif->bif_ifflags & IFBIF_STP) &&
6018 		    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6019 			BRIDGE_UNLOCK(sc);
6020 			return 0;
6021 		}
6022 
6023 		/*
6024 		 * Make a deep copy of the packet and enqueue the copy
6025 		 * for bridge processing.
6026 		 */
6027 		mc = m_dup(m, M_DONTWAIT);
6028 		if (mc == NULL) {
6029 			BRIDGE_UNLOCK(sc);
6030 			return 0;
6031 		}
6032 
6033 		/*
6034 		 * Perform the bridge forwarding function with the copy.
6035 		 *
6036 		 * Note that bridge_forward calls BRIDGE_UNLOCK
6037 		 */
6038 		if (is_ip_broadcast) {
6039 			struct ether_header *eh;
6040 
6041 			/* make the copy look like it is actually broadcast */
6042 			mc->m_flags |= M_BCAST;
6043 			eh = mtod(mc, struct ether_header *);
6044 			bcopy(etherbroadcastaddr, eh->ether_dhost,
6045 			    ETHER_ADDR_LEN);
6046 		}
6047 		bridge_forward(sc, bif, mc);
6048 
6049 		/*
6050 		 * Reinject the mbuf as arriving on the bridge so we have a
6051 		 * chance at claiming multicast packets. We can not loop back
6052 		 * here from ether_input as a bridge is never a member of a
6053 		 * bridge.
6054 		 */
6055 		VERIFY(bridge_ifp->if_bridge == NULL);
6056 		mc2 = m_dup(m, M_DONTWAIT);
6057 		if (mc2 != NULL) {
6058 			/* Keep the layer3 header aligned */
6059 			int i = min(mc2->m_pkthdr.len, max_protohdr);
6060 			mc2 = m_copyup(mc2, i, ETHER_ALIGN);
6061 		}
6062 		if (mc2 != NULL) {
6063 			/* mark packet as arriving on the bridge */
6064 			mc2->m_pkthdr.rcvif = bridge_ifp;
6065 			mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2);
6066 			BRIDGE_BPF_MTAP_INPUT(sc, mc2);
6067 			(void) mbuf_setdata(mc2,
6068 			    (char *)mbuf_data(mc2) + ETHER_HDR_LEN,
6069 			    mbuf_len(mc2) - ETHER_HDR_LEN);
6070 			(void) mbuf_pkthdr_adjustlen(mc2, -ETHER_HDR_LEN);
6071 			(void) ifnet_stat_increment_in(bridge_ifp, 1,
6072 			    mbuf_pkthdr_len(mc2), 0);
6073 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MCAST,
6074 			    "%s mcast for us", bridge_ifp->if_xname);
6075 			dlil_input_packet_list(bridge_ifp, mc2);
6076 		}
6077 
6078 		/* Return the original packet for local processing. */
6079 		return 0;
6080 	}
6081 
6082 	if ((bif->bif_ifflags & IFBIF_STP) &&
6083 	    bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6084 		BRIDGE_UNLOCK(sc);
6085 		return 0;
6086 	}
6087 
6088 #ifdef DEV_CARP
6089 #define CARP_CHECK_WE_ARE_DST(iface) \
6090 	((iface)->if_carp &&\
6091 	        carp_forus((iface)->if_carp, eh_in.ether_dhost))
6092 #define CARP_CHECK_WE_ARE_SRC(iface) \
6093 	((iface)->if_carp &&\
6094 	        carp_forus((iface)->if_carp, eh_in.ether_shost))
6095 #else
6096 #define CARP_CHECK_WE_ARE_DST(iface) 0
6097 #define CARP_CHECK_WE_ARE_SRC(iface) 0
6098 #endif
6099 
6100 #define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook)
6101 
6102 #define PFIL_PHYS(sc, ifp, m)
6103 
6104 #define GRAB_OUR_PACKETS(iface)                                         \
6105 	if ((iface)->if_type == IFT_GIF)                                \
6106 	        continue;                                               \
6107 	/* It is destined for us. */                                    \
6108 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_dhost) == 0 ||   \
6109 	    CARP_CHECK_WE_ARE_DST((iface))) {                           \
6110 	        if ((iface)->if_type == IFT_BRIDGE) {                   \
6111 	                BRIDGE_BPF_MTAP_INPUT(sc, m);                   \
6112 	/* Filter on the physical interface. */         \
6113 	                PFIL_PHYS(sc, iface, m);                        \
6114 	        } else {                                                \
6115 	                bpf_tap_in(iface, DLT_EN10MB, m, NULL, 0);      \
6116 	        }                                                       \
6117 	        if (bif->bif_ifflags & IFBIF_LEARNING) {                \
6118 	                error = bridge_rtupdate(sc, eh_in.ether_shost,  \
6119 	                    vlan, bif, 0, IFBAF_DYNAMIC);               \
6120 	                if (error && bif->bif_addrmax) {                \
6121 	                        BRIDGE_UNLOCK(sc);                      \
6122 	                        m_freem(m);                             \
6123 	                        return (EJUSTRETURN);                   \
6124 	                }                                               \
6125 	        }                                                       \
6126 	        BRIDGE_UNLOCK(sc);                                      \
6127 	        inject_input_packet(iface, m);                          \
6128 	        return (EJUSTRETURN);                                   \
6129 	}                                                               \
6130                                                                         \
6131 	/* We just received a packet that we sent out. */               \
6132 	if (_ether_cmp(IF_LLADDR((iface)), eh_in.ether_shost) == 0 ||   \
6133 	    CARP_CHECK_WE_ARE_SRC((iface))) {                           \
6134 	        BRIDGE_UNLOCK(sc);                                      \
6135 	        m_freem(m);                                             \
6136 	        return (EJUSTRETURN);                                   \
6137 	}
6138 
6139 	/*
6140 	 * Unicast.
6141 	 */
6142 
6143 	/* handle MAC-NAT if enabled */
6144 	if (is_ifp_mac && sc->sc_mac_nat_bif == bif) {
6145 		ifnet_t dst_if;
6146 		boolean_t is_input = FALSE;
6147 
6148 		dst_if = bridge_mac_nat_input(sc, data, &is_input);
6149 		m = *data;
6150 		if (dst_if == ifp) {
6151 			/* our input packet */
6152 		} else if (dst_if != NULL || m == NULL) {
6153 			BRIDGE_UNLOCK(sc);
6154 			if (dst_if != NULL) {
6155 				ASSERT(m != NULL);
6156 				if (is_input) {
6157 					inject_input_packet(dst_if, m);
6158 				} else {
6159 					(void)bridge_enqueue(bridge_ifp, NULL,
6160 					    dst_if, m,
6161 					    CHECKSUM_OPERATION_CLEAR_OFFLOAD);
6162 				}
6163 			}
6164 			return EJUSTRETURN;
6165 		}
6166 	}
6167 
6168 	/*
6169 	 * If the packet is for the bridge, pass it up for local processing.
6170 	 */
6171 	if (_ether_cmp(eh_in.ether_dhost, IF_LLADDR(bridge_ifp)) == 0 ||
6172 	    CARP_CHECK_WE_ARE_DST(bridge_ifp)) {
6173 		bpf_packet_func     bpf_input_func = sc->sc_bpf_input;
6174 
6175 		/*
6176 		 * If the interface is learning, and the source
6177 		 * address is valid and not multicast, record
6178 		 * the address.
6179 		 */
6180 		if (bif->bif_ifflags & IFBIF_LEARNING) {
6181 			(void) bridge_rtupdate(sc, eh_in.ether_shost,
6182 			    vlan, bif, 0, IFBAF_DYNAMIC);
6183 		}
6184 		BRIDGE_UNLOCK(sc);
6185 
6186 		bridge_interface_input(bridge_ifp, m, bpf_input_func);
6187 		return EJUSTRETURN;
6188 	}
6189 
6190 	/*
6191 	 * if the destination of the packet is for the MAC address of
6192 	 * the member interface itself, then we don't need to forward
6193 	 * it -- just pass it back.  Note that it'll likely just be
6194 	 * dropped by the stack, but if something else is bound to
6195 	 * the interface directly (for example, the wireless stats
6196 	 * protocol -- although that actually uses BPF right now),
6197 	 * then it will consume the packet
6198 	 *
6199 	 * ALSO, note that we do this check AFTER checking for the
6200 	 * bridge's own MAC address, because the bridge may be
6201 	 * using the SAME MAC address as one of its interfaces
6202 	 */
6203 	if (is_ifp_mac) {
6204 
6205 #ifdef VERY_VERY_VERY_DIAGNOSTIC
6206 		BRIDGE_LOG(LOG_NOTICE, 0,
6207 		    "not forwarding packet bound for member interface");
6208 #endif
6209 
6210 		BRIDGE_UNLOCK(sc);
6211 		return 0;
6212 	}
6213 
6214 	/* Now check the remaining bridge members. */
6215 	TAILQ_FOREACH(bif2, &sc->sc_iflist, bif_next) {
6216 		if (bif2->bif_ifp != ifp) {
6217 			GRAB_OUR_PACKETS(bif2->bif_ifp);
6218 		}
6219 	}
6220 
6221 #undef CARP_CHECK_WE_ARE_DST
6222 #undef CARP_CHECK_WE_ARE_SRC
6223 #undef GRAB_OUR_PACKETS
6224 
6225 	/*
6226 	 * Perform the bridge forwarding function.
6227 	 *
6228 	 * Note that bridge_forward calls BRIDGE_UNLOCK
6229 	 */
6230 	bridge_forward(sc, bif, m);
6231 
6232 	return EJUSTRETURN;
6233 }
6234 
6235 /*
6236  * bridge_broadcast:
6237  *
6238  *	Send a frame to all interfaces that are members of
6239  *	the bridge, except for the one on which the packet
6240  *	arrived.
6241  *
6242  *	NOTE: Releases the lock on return.
6243  */
6244 static void
6245 bridge_broadcast(struct bridge_softc *sc, struct bridge_iflist * sbif,
6246     struct mbuf *m, int runfilt)
6247 {
6248 	ifnet_t bridge_ifp;
6249 	struct bridge_iflist *dbif;
6250 	struct ifnet * src_if;
6251 	struct mbuf *mc;
6252 	struct mbuf *mc_in;
6253 	struct ifnet *dst_if;
6254 	int error = 0, used = 0;
6255 	boolean_t bridge_if_out;
6256 	ChecksumOperation cksum_op;
6257 	struct mac_nat_record mnr;
6258 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
6259 	boolean_t translate_mac = FALSE;
6260 	uint32_t sc_filter_flags = 0;
6261 
6262 	bridge_ifp = sc->sc_ifp;
6263 	if (sbif != NULL) {
6264 		bridge_if_out = FALSE;
6265 		src_if = sbif->bif_ifp;
6266 		cksum_op = CHECKSUM_OPERATION_CLEAR_OFFLOAD;
6267 		if (mac_nat_bif != NULL && sbif != mac_nat_bif) {
6268 			/* get the translation record while holding the lock */
6269 			translate_mac
6270 			        = bridge_mac_nat_output(sc, sbif, &m, &mnr);
6271 			if (m == NULL) {
6272 				/* packet was deallocated */
6273 				BRIDGE_UNLOCK(sc);
6274 				return;
6275 			}
6276 		}
6277 	} else {
6278 		/*
6279 		 * sbif is NULL when the bridge interface calls
6280 		 * bridge_broadcast().
6281 		 */
6282 		bridge_if_out = TRUE;
6283 		cksum_op = CHECKSUM_OPERATION_FINALIZE;
6284 		sbif = NULL;
6285 		src_if = NULL;
6286 	}
6287 
6288 	BRIDGE_LOCK2REF(sc, error);
6289 	if (error) {
6290 		m_freem(m);
6291 		return;
6292 	}
6293 
6294 	TAILQ_FOREACH(dbif, &sc->sc_iflist, bif_next) {
6295 		dst_if = dbif->bif_ifp;
6296 		if (dst_if == src_if) {
6297 			/* skip the interface that the packet came in on */
6298 			continue;
6299 		}
6300 
6301 		/* Private segments can not talk to each other */
6302 		if (sbif != NULL &&
6303 		    (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) {
6304 			continue;
6305 		}
6306 
6307 		if ((dbif->bif_ifflags & IFBIF_STP) &&
6308 		    dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) {
6309 			continue;
6310 		}
6311 
6312 		if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 &&
6313 		    (m->m_flags & (M_BCAST | M_MCAST)) == 0) {
6314 			continue;
6315 		}
6316 
6317 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6318 			continue;
6319 		}
6320 
6321 		if (!(dbif->bif_flags & BIFF_MEDIA_ACTIVE)) {
6322 			continue;
6323 		}
6324 
6325 		if (TAILQ_NEXT(dbif, bif_next) == NULL) {
6326 			mc = m;
6327 			used = 1;
6328 		} else {
6329 			mc = m_dup(m, M_DONTWAIT);
6330 			if (mc == NULL) {
6331 				(void) ifnet_stat_increment_out(bridge_ifp,
6332 				    0, 0, 1);
6333 				continue;
6334 			}
6335 		}
6336 
6337 		/*
6338 		 * If broadcast input is enabled, do so only if this
6339 		 * is an input packet.
6340 		 */
6341 		if (!bridge_if_out &&
6342 		    (dbif->bif_flags & BIFF_INPUT_BROADCAST) != 0) {
6343 			mc_in = m_dup(mc, M_DONTWAIT);
6344 			/* this could fail, but we continue anyways */
6345 		} else {
6346 			mc_in = NULL;
6347 		}
6348 
6349 		/* out */
6350 		if (translate_mac && mac_nat_bif == dbif) {
6351 			/* translate the packet without holding the lock */
6352 			bridge_mac_nat_translate(&mc, &mnr, IF_LLADDR(dst_if));
6353 		}
6354 
6355 		sc_filter_flags = sc->sc_filter_flags;
6356 		if (runfilt &&
6357 		    PF_IS_ENABLED && (sc_filter_flags & IFBF_FILT_MEMBER)) {
6358 			if (used == 0) {
6359 				/* Keep the layer3 header aligned */
6360 				int i = min(mc->m_pkthdr.len, max_protohdr);
6361 				mc = m_copyup(mc, i, ETHER_ALIGN);
6362 				if (mc == NULL) {
6363 					(void) ifnet_stat_increment_out(
6364 						sc->sc_ifp, 0, 0, 1);
6365 					if (mc_in != NULL) {
6366 						m_freem(mc_in);
6367 						mc_in = NULL;
6368 					}
6369 					continue;
6370 				}
6371 			}
6372 			if (bridge_pf(&mc, dst_if, sc_filter_flags, FALSE) != 0) {
6373 				if (mc_in != NULL) {
6374 					m_freem(mc_in);
6375 					mc_in = NULL;
6376 				}
6377 				continue;
6378 			}
6379 			if (mc == NULL) {
6380 				if (mc_in != NULL) {
6381 					m_freem(mc_in);
6382 					mc_in = NULL;
6383 				}
6384 				continue;
6385 			}
6386 		}
6387 
6388 		if (mc != NULL) {
6389 			/* verify checksum if necessary */
6390 			if (bif_has_checksum_offload(dbif) && sbif != NULL &&
6391 			    !bif_has_checksum_offload(sbif)) {
6392 				error = bridge_verify_checksum(&mc,
6393 				    &dbif->bif_stats);
6394 				if (error != 0) {
6395 					if (mc != NULL) {
6396 						m_freem(mc);
6397 					}
6398 					mc = NULL;
6399 				}
6400 			}
6401 			if (mc != NULL) {
6402 				(void) bridge_enqueue(bridge_ifp,
6403 				    NULL, dst_if, mc, cksum_op);
6404 			}
6405 		}
6406 
6407 		/* in */
6408 		if (mc_in == NULL) {
6409 			continue;
6410 		}
6411 		bpf_tap_in(dst_if, DLT_EN10MB, mc_in, NULL, 0);
6412 		mbuf_pkthdr_setrcvif(mc_in, dst_if);
6413 		mbuf_pkthdr_setheader(mc_in, mbuf_data(mc_in));
6414 		mbuf_setdata(mc_in, (char *)mbuf_data(mc_in) + ETHER_HDR_LEN,
6415 		    mbuf_len(mc_in) - ETHER_HDR_LEN);
6416 		mbuf_pkthdr_adjustlen(mc_in, -ETHER_HDR_LEN);
6417 		mc_in->m_flags |= M_PROTO1; /* set to avoid loops */
6418 		dlil_input_packet_list(dst_if, mc_in);
6419 	}
6420 	if (used == 0) {
6421 		m_freem(m);
6422 	}
6423 
6424 
6425 	BRIDGE_UNREF(sc);
6426 }
6427 
6428 /*
6429  * bridge_span:
6430  *
6431  *	Duplicate a packet out one or more interfaces that are in span mode,
6432  *	the original mbuf is unmodified.
6433  */
6434 static void
6435 bridge_span(struct bridge_softc *sc, struct mbuf *m)
6436 {
6437 	struct bridge_iflist *bif;
6438 	struct ifnet *dst_if;
6439 	struct mbuf *mc;
6440 
6441 	if (TAILQ_EMPTY(&sc->sc_spanlist)) {
6442 		return;
6443 	}
6444 
6445 	TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) {
6446 		dst_if = bif->bif_ifp;
6447 
6448 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
6449 			continue;
6450 		}
6451 
6452 		mc = m_copypacket(m, M_DONTWAIT);
6453 		if (mc == NULL) {
6454 			(void) ifnet_stat_increment_out(sc->sc_ifp, 0, 0, 1);
6455 			continue;
6456 		}
6457 
6458 		(void) bridge_enqueue(sc->sc_ifp, NULL, dst_if, mc,
6459 		    CHECKSUM_OPERATION_NONE);
6460 	}
6461 }
6462 
6463 
6464 /*
6465  * bridge_rtupdate:
6466  *
6467  *	Add a bridge routing entry.
6468  */
6469 static int
6470 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan,
6471     struct bridge_iflist *bif, int setflags, uint8_t flags)
6472 {
6473 	struct bridge_rtnode *brt;
6474 	int error;
6475 
6476 	BRIDGE_LOCK_ASSERT_HELD(sc);
6477 
6478 	/* Check the source address is valid and not multicast. */
6479 	if (ETHER_IS_MULTICAST(dst) ||
6480 	    (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
6481 	    dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) {
6482 		return EINVAL;
6483 	}
6484 
6485 
6486 	/* 802.1p frames map to vlan 1 */
6487 	if (vlan == 0) {
6488 		vlan = 1;
6489 	}
6490 
6491 	/*
6492 	 * A route for this destination might already exist.  If so,
6493 	 * update it, otherwise create a new one.
6494 	 */
6495 	if ((brt = bridge_rtnode_lookup(sc, dst, vlan)) == NULL) {
6496 		if (sc->sc_brtcnt >= sc->sc_brtmax) {
6497 			sc->sc_brtexceeded++;
6498 			return ENOSPC;
6499 		}
6500 		/* Check per interface address limits (if enabled) */
6501 		if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
6502 			bif->bif_addrexceeded++;
6503 			return ENOSPC;
6504 		}
6505 
6506 		/*
6507 		 * Allocate a new bridge forwarding node, and
6508 		 * initialize the expiration time and Ethernet
6509 		 * address.
6510 		 */
6511 		brt = zalloc_noblock(bridge_rtnode_pool);
6512 		if (brt == NULL) {
6513 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6514 			    "zalloc_nolock failed");
6515 			return ENOMEM;
6516 		}
6517 		bzero(brt, sizeof(struct bridge_rtnode));
6518 
6519 		if (bif->bif_ifflags & IFBIF_STICKY) {
6520 			brt->brt_flags = IFBAF_STICKY;
6521 		} else {
6522 			brt->brt_flags = IFBAF_DYNAMIC;
6523 		}
6524 
6525 		memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
6526 		brt->brt_vlan = vlan;
6527 
6528 
6529 		if ((error = bridge_rtnode_insert(sc, brt)) != 0) {
6530 			zfree(bridge_rtnode_pool, brt);
6531 			return error;
6532 		}
6533 		brt->brt_dst = bif;
6534 		bif->bif_addrcnt++;
6535 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6536 		    "added %02x:%02x:%02x:%02x:%02x:%02x "
6537 		    "on %s count %u hashsize %u",
6538 		    dst[0], dst[1], dst[2], dst[3], dst[4], dst[5],
6539 		    sc->sc_ifp->if_xname, sc->sc_brtcnt,
6540 		    sc->sc_rthash_size);
6541 	}
6542 
6543 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
6544 	    brt->brt_dst != bif) {
6545 		brt->brt_dst->bif_addrcnt--;
6546 		brt->brt_dst = bif;
6547 		brt->brt_dst->bif_addrcnt++;
6548 	}
6549 
6550 	if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6551 		unsigned long now;
6552 
6553 		now = (unsigned long) net_uptime();
6554 		brt->brt_expire = now + sc->sc_brttimeout;
6555 	}
6556 	if (setflags) {
6557 		brt->brt_flags = flags;
6558 	}
6559 
6560 
6561 	return 0;
6562 }
6563 
6564 /*
6565  * bridge_rtlookup:
6566  *
6567  *	Lookup the destination interface for an address.
6568  */
6569 static struct ifnet *
6570 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6571 {
6572 	struct bridge_rtnode *brt;
6573 
6574 	BRIDGE_LOCK_ASSERT_HELD(sc);
6575 
6576 	if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) {
6577 		return NULL;
6578 	}
6579 
6580 	return brt->brt_ifp;
6581 }
6582 
6583 /*
6584  * bridge_rttrim:
6585  *
6586  *	Trim the routine table so that we have a number
6587  *	of routing entries less than or equal to the
6588  *	maximum number.
6589  */
6590 static void
6591 bridge_rttrim(struct bridge_softc *sc)
6592 {
6593 	struct bridge_rtnode *brt, *nbrt;
6594 
6595 	BRIDGE_LOCK_ASSERT_HELD(sc);
6596 
6597 	/* Make sure we actually need to do this. */
6598 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6599 		return;
6600 	}
6601 
6602 	/* Force an aging cycle; this might trim enough addresses. */
6603 	bridge_rtage(sc);
6604 	if (sc->sc_brtcnt <= sc->sc_brtmax) {
6605 		return;
6606 	}
6607 
6608 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6609 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6610 			bridge_rtnode_destroy(sc, brt);
6611 			if (sc->sc_brtcnt <= sc->sc_brtmax) {
6612 				return;
6613 			}
6614 		}
6615 	}
6616 }
6617 
6618 /*
6619  * bridge_aging_timer:
6620  *
6621  *	Aging periodic timer for the bridge routing table.
6622  */
6623 static void
6624 bridge_aging_timer(struct bridge_softc *sc)
6625 {
6626 	BRIDGE_LOCK_ASSERT_HELD(sc);
6627 
6628 	bridge_rtage(sc);
6629 	if ((sc->sc_ifp->if_flags & IFF_RUNNING) &&
6630 	    (sc->sc_flags & SCF_DETACHING) == 0) {
6631 		sc->sc_aging_timer.bdc_sc = sc;
6632 		sc->sc_aging_timer.bdc_func = bridge_aging_timer;
6633 		sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period;
6634 		bridge_schedule_delayed_call(&sc->sc_aging_timer);
6635 	}
6636 }
6637 
6638 /*
6639  * bridge_rtage:
6640  *
6641  *	Perform an aging cycle.
6642  */
6643 static void
6644 bridge_rtage(struct bridge_softc *sc)
6645 {
6646 	struct bridge_rtnode *brt, *nbrt;
6647 	unsigned long now;
6648 
6649 	BRIDGE_LOCK_ASSERT_HELD(sc);
6650 
6651 	now = (unsigned long) net_uptime();
6652 
6653 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6654 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6655 			if (now >= brt->brt_expire) {
6656 				bridge_rtnode_destroy(sc, brt);
6657 			}
6658 		}
6659 	}
6660 	if (sc->sc_mac_nat_bif != NULL) {
6661 		bridge_mac_nat_age_entries(sc, now);
6662 	}
6663 }
6664 
6665 /*
6666  * bridge_rtflush:
6667  *
6668  *	Remove all dynamic addresses from the bridge.
6669  */
6670 static void
6671 bridge_rtflush(struct bridge_softc *sc, int full)
6672 {
6673 	struct bridge_rtnode *brt, *nbrt;
6674 
6675 	BRIDGE_LOCK_ASSERT_HELD(sc);
6676 
6677 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6678 		if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
6679 			bridge_rtnode_destroy(sc, brt);
6680 		}
6681 	}
6682 }
6683 
6684 /*
6685  * bridge_rtdaddr:
6686  *
6687  *	Remove an address from the table.
6688  */
6689 static int
6690 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan)
6691 {
6692 	struct bridge_rtnode *brt;
6693 	int found = 0;
6694 
6695 	BRIDGE_LOCK_ASSERT_HELD(sc);
6696 
6697 	/*
6698 	 * If vlan is zero then we want to delete for all vlans so the lookup
6699 	 * may return more than one.
6700 	 */
6701 	while ((brt = bridge_rtnode_lookup(sc, addr, vlan)) != NULL) {
6702 		bridge_rtnode_destroy(sc, brt);
6703 		found = 1;
6704 	}
6705 
6706 	return found ? 0 : ENOENT;
6707 }
6708 
6709 /*
6710  * bridge_rtdelete:
6711  *
6712  *	Delete routes to a specific member interface.
6713  */
6714 static void
6715 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full)
6716 {
6717 	struct bridge_rtnode *brt, *nbrt;
6718 
6719 	BRIDGE_LOCK_ASSERT_HELD(sc);
6720 
6721 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
6722 		if (brt->brt_ifp == ifp && (full ||
6723 		    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)) {
6724 			bridge_rtnode_destroy(sc, brt);
6725 		}
6726 	}
6727 }
6728 
6729 /*
6730  * bridge_rtable_init:
6731  *
6732  *	Initialize the route table for this bridge.
6733  */
6734 static int
6735 bridge_rtable_init(struct bridge_softc *sc)
6736 {
6737 	u_int32_t i;
6738 
6739 	sc->sc_rthash = kalloc_type(struct _bridge_rtnode_list,
6740 	    BRIDGE_RTHASH_SIZE, Z_WAITOK_ZERO_NOFAIL);
6741 	sc->sc_rthash_size = BRIDGE_RTHASH_SIZE;
6742 
6743 	for (i = 0; i < sc->sc_rthash_size; i++) {
6744 		LIST_INIT(&sc->sc_rthash[i]);
6745 	}
6746 
6747 	sc->sc_rthash_key = RandomULong();
6748 
6749 	LIST_INIT(&sc->sc_rtlist);
6750 
6751 	return 0;
6752 }
6753 
6754 /*
6755  * bridge_rthash_delayed_resize:
6756  *
6757  *	Resize the routing table hash on a delayed thread call.
6758  */
6759 static void
6760 bridge_rthash_delayed_resize(struct bridge_softc *sc)
6761 {
6762 	u_int32_t new_rthash_size = 0;
6763 	u_int32_t old_rthash_size = 0;
6764 	struct _bridge_rtnode_list *new_rthash = NULL;
6765 	struct _bridge_rtnode_list *old_rthash = NULL;
6766 	u_int32_t i;
6767 	struct bridge_rtnode *brt;
6768 	int error = 0;
6769 
6770 	BRIDGE_LOCK_ASSERT_HELD(sc);
6771 
6772 	/*
6773 	 * Four entries per hash bucket is our ideal load factor
6774 	 */
6775 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6776 		goto out;
6777 	}
6778 
6779 	/*
6780 	 * Doubling the number of hash buckets may be too simplistic
6781 	 * especially when facing a spike of new entries
6782 	 */
6783 	new_rthash_size = sc->sc_rthash_size * 2;
6784 
6785 	sc->sc_flags |= SCF_RESIZING;
6786 	BRIDGE_UNLOCK(sc);
6787 
6788 	new_rthash = kalloc_type(struct _bridge_rtnode_list, new_rthash_size,
6789 	    Z_WAITOK | Z_ZERO);
6790 
6791 	BRIDGE_LOCK(sc);
6792 	sc->sc_flags &= ~SCF_RESIZING;
6793 
6794 	if (new_rthash == NULL) {
6795 		error = ENOMEM;
6796 		goto out;
6797 	}
6798 	if ((sc->sc_flags & SCF_DETACHING)) {
6799 		error = ENODEV;
6800 		goto out;
6801 	}
6802 	/*
6803 	 * Fail safe from here on
6804 	 */
6805 	old_rthash = sc->sc_rthash;
6806 	old_rthash_size = sc->sc_rthash_size;
6807 	sc->sc_rthash = new_rthash;
6808 	sc->sc_rthash_size = new_rthash_size;
6809 
6810 	/*
6811 	 * Get a new key to force entries to be shuffled around to reduce
6812 	 * the likelihood they will land in the same buckets
6813 	 */
6814 	sc->sc_rthash_key = RandomULong();
6815 
6816 	for (i = 0; i < sc->sc_rthash_size; i++) {
6817 		LIST_INIT(&sc->sc_rthash[i]);
6818 	}
6819 
6820 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
6821 		LIST_REMOVE(brt, brt_hash);
6822 		(void) bridge_rtnode_hash(sc, brt);
6823 	}
6824 out:
6825 	if (error == 0) {
6826 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6827 		    "%s new size %u",
6828 		    sc->sc_ifp->if_xname, sc->sc_rthash_size);
6829 		kfree_type(struct _bridge_rtnode_list, old_rthash_size, old_rthash);
6830 	} else {
6831 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_RT_TABLE,
6832 		    "%s failed %d", sc->sc_ifp->if_xname, error);
6833 		kfree_type(struct _bridge_rtnode_list, new_rthash_size, new_rthash);
6834 	}
6835 }
6836 
6837 /*
6838  * Resize the number of hash buckets based on the load factor
6839  * Currently only grow
6840  * Failing to resize the hash table is not fatal
6841  */
6842 static void
6843 bridge_rthash_resize(struct bridge_softc *sc)
6844 {
6845 	BRIDGE_LOCK_ASSERT_HELD(sc);
6846 
6847 	if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) {
6848 		return;
6849 	}
6850 
6851 	/*
6852 	 * Four entries per hash bucket is our ideal load factor
6853 	 */
6854 	if (sc->sc_brtcnt < sc->sc_rthash_size * 4) {
6855 		return;
6856 	}
6857 	/*
6858 	 * Hard limit on the size of the routing hash table
6859 	 */
6860 	if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) {
6861 		return;
6862 	}
6863 
6864 	sc->sc_resize_call.bdc_sc = sc;
6865 	sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize;
6866 	bridge_schedule_delayed_call(&sc->sc_resize_call);
6867 }
6868 
6869 /*
6870  * bridge_rtable_fini:
6871  *
6872  *	Deconstruct the route table for this bridge.
6873  */
6874 static void
6875 bridge_rtable_fini(struct bridge_softc *sc)
6876 {
6877 	KASSERT(sc->sc_brtcnt == 0,
6878 	    ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt));
6879 	kfree_type(struct _bridge_rtnode_list, sc->sc_rthash_size,
6880 	    sc->sc_rthash);
6881 	sc->sc_rthash = NULL;
6882 	sc->sc_rthash_size = 0;
6883 }
6884 
6885 /*
6886  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
6887  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
6888  */
6889 #define mix(a, b, c)                                                    \
6890 do {                                                                    \
6891 	a -= b; a -= c; a ^= (c >> 13);                                 \
6892 	b -= c; b -= a; b ^= (a << 8);                                  \
6893 	c -= a; c -= b; c ^= (b >> 13);                                 \
6894 	a -= b; a -= c; a ^= (c >> 12);                                 \
6895 	b -= c; b -= a; b ^= (a << 16);                                 \
6896 	c -= a; c -= b; c ^= (b >> 5);                                  \
6897 	a -= b; a -= c; a ^= (c >> 3);                                  \
6898 	b -= c; b -= a; b ^= (a << 10);                                 \
6899 	c -= a; c -= b; c ^= (b >> 15);                                 \
6900 } while ( /*CONSTCOND*/ 0)
6901 
6902 static __inline uint32_t
6903 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
6904 {
6905 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
6906 
6907 	b += addr[5] << 8;
6908 	b += addr[4];
6909 	a += addr[3] << 24;
6910 	a += addr[2] << 16;
6911 	a += addr[1] << 8;
6912 	a += addr[0];
6913 
6914 	mix(a, b, c);
6915 
6916 	return c & BRIDGE_RTHASH_MASK(sc);
6917 }
6918 
6919 #undef mix
6920 
6921 static int
6922 bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b)
6923 {
6924 	int i, d;
6925 
6926 	for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++) {
6927 		d = ((int)a[i]) - ((int)b[i]);
6928 	}
6929 
6930 	return d;
6931 }
6932 
6933 /*
6934  * bridge_rtnode_lookup:
6935  *
6936  *	Look up a bridge route node for the specified destination. Compare the
6937  *	vlan id or if zero then just return the first match.
6938  */
6939 static struct bridge_rtnode *
6940 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr,
6941     uint16_t vlan)
6942 {
6943 	struct bridge_rtnode *brt;
6944 	uint32_t hash;
6945 	int dir;
6946 
6947 	BRIDGE_LOCK_ASSERT_HELD(sc);
6948 
6949 	hash = bridge_rthash(sc, addr);
6950 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
6951 		dir = bridge_rtnode_addr_cmp(addr, brt->brt_addr);
6952 		if (dir == 0 && (brt->brt_vlan == vlan || vlan == 0)) {
6953 			return brt;
6954 		}
6955 		if (dir > 0) {
6956 			return NULL;
6957 		}
6958 	}
6959 
6960 	return NULL;
6961 }
6962 
6963 /*
6964  * bridge_rtnode_hash:
6965  *
6966  *	Insert the specified bridge node into the route hash table.
6967  *	This is used when adding a new node or to rehash when resizing
6968  *	the hash table
6969  */
6970 static int
6971 bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt)
6972 {
6973 	struct bridge_rtnode *lbrt;
6974 	uint32_t hash;
6975 	int dir;
6976 
6977 	BRIDGE_LOCK_ASSERT_HELD(sc);
6978 
6979 	hash = bridge_rthash(sc, brt->brt_addr);
6980 
6981 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
6982 	if (lbrt == NULL) {
6983 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
6984 		goto out;
6985 	}
6986 
6987 	do {
6988 		dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
6989 		if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) {
6990 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
6991 			    "%s EEXIST %02x:%02x:%02x:%02x:%02x:%02x",
6992 			    sc->sc_ifp->if_xname,
6993 			    brt->brt_addr[0], brt->brt_addr[1],
6994 			    brt->brt_addr[2], brt->brt_addr[3],
6995 			    brt->brt_addr[4], brt->brt_addr[5]);
6996 			return EEXIST;
6997 		}
6998 		if (dir > 0) {
6999 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
7000 			goto out;
7001 		}
7002 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
7003 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
7004 			goto out;
7005 		}
7006 		lbrt = LIST_NEXT(lbrt, brt_hash);
7007 	} while (lbrt != NULL);
7008 
7009 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_RT_TABLE,
7010 	    "%s impossible %02x:%02x:%02x:%02x:%02x:%02x",
7011 	    sc->sc_ifp->if_xname,
7012 	    brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2],
7013 	    brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]);
7014 out:
7015 	return 0;
7016 }
7017 
7018 /*
7019  * bridge_rtnode_insert:
7020  *
7021  *	Insert the specified bridge node into the route table.  We
7022  *	assume the entry is not already in the table.
7023  */
7024 static int
7025 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
7026 {
7027 	int error;
7028 
7029 	error = bridge_rtnode_hash(sc, brt);
7030 	if (error != 0) {
7031 		return error;
7032 	}
7033 
7034 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
7035 	sc->sc_brtcnt++;
7036 
7037 	bridge_rthash_resize(sc);
7038 
7039 	return 0;
7040 }
7041 
7042 /*
7043  * bridge_rtnode_destroy:
7044  *
7045  *	Destroy a bridge rtnode.
7046  */
7047 static void
7048 bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt)
7049 {
7050 	BRIDGE_LOCK_ASSERT_HELD(sc);
7051 
7052 	LIST_REMOVE(brt, brt_hash);
7053 
7054 	LIST_REMOVE(brt, brt_list);
7055 	sc->sc_brtcnt--;
7056 	brt->brt_dst->bif_addrcnt--;
7057 	zfree(bridge_rtnode_pool, brt);
7058 }
7059 
7060 #if BRIDGESTP
7061 /*
7062  * bridge_rtable_expire:
7063  *
7064  *	Set the expiry time for all routes on an interface.
7065  */
7066 static void
7067 bridge_rtable_expire(struct ifnet *ifp, int age)
7068 {
7069 	struct bridge_softc *sc = ifp->if_bridge;
7070 	struct bridge_rtnode *brt;
7071 
7072 	BRIDGE_LOCK(sc);
7073 
7074 	/*
7075 	 * If the age is zero then flush, otherwise set all the expiry times to
7076 	 * age for the interface
7077 	 */
7078 	if (age == 0) {
7079 		bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN);
7080 	} else {
7081 		unsigned long now;
7082 
7083 		now = (unsigned long) net_uptime();
7084 
7085 		LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
7086 			/* Cap the expiry time to 'age' */
7087 			if (brt->brt_ifp == ifp &&
7088 			    brt->brt_expire > now + age &&
7089 			    (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
7090 				brt->brt_expire = now + age;
7091 			}
7092 		}
7093 	}
7094 	BRIDGE_UNLOCK(sc);
7095 }
7096 
7097 /*
7098  * bridge_state_change:
7099  *
7100  *	Callback from the bridgestp code when a port changes states.
7101  */
7102 static void
7103 bridge_state_change(struct ifnet *ifp, int state)
7104 {
7105 	struct bridge_softc *sc = ifp->if_bridge;
7106 	static const char *stpstates[] = {
7107 		"disabled",
7108 		"listening",
7109 		"learning",
7110 		"forwarding",
7111 		"blocking",
7112 		"discarding"
7113 	};
7114 
7115 	if (log_stp) {
7116 		log(LOG_NOTICE, "%s: state changed to %s on %s",
7117 		    sc->sc_ifp->if_xname,
7118 		    stpstates[state], ifp->if_xname);
7119 	}
7120 }
7121 #endif /* BRIDGESTP */
7122 
7123 /*
7124  * bridge_set_bpf_tap:
7125  *
7126  *	Sets ups the BPF callbacks.
7127  */
7128 static errno_t
7129 bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback)
7130 {
7131 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7132 
7133 	/* TBD locking */
7134 	if (sc == NULL || (sc->sc_flags & SCF_DETACHING)) {
7135 		return ENODEV;
7136 	}
7137 	switch (mode) {
7138 	case BPF_TAP_DISABLE:
7139 		sc->sc_bpf_input = sc->sc_bpf_output = NULL;
7140 		break;
7141 
7142 	case BPF_TAP_INPUT:
7143 		sc->sc_bpf_input = bpf_callback;
7144 		break;
7145 
7146 	case BPF_TAP_OUTPUT:
7147 		sc->sc_bpf_output = bpf_callback;
7148 		break;
7149 
7150 	case BPF_TAP_INPUT_OUTPUT:
7151 		sc->sc_bpf_input = sc->sc_bpf_output = bpf_callback;
7152 		break;
7153 
7154 	default:
7155 		break;
7156 	}
7157 
7158 	return 0;
7159 }
7160 
7161 /*
7162  * bridge_detach:
7163  *
7164  *	Callback when interface has been detached.
7165  */
7166 static void
7167 bridge_detach(ifnet_t ifp)
7168 {
7169 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7170 
7171 #if BRIDGESTP
7172 	bstp_detach(&sc->sc_stp);
7173 #endif /* BRIDGESTP */
7174 
7175 	/* Tear down the routing table. */
7176 	bridge_rtable_fini(sc);
7177 
7178 	lck_mtx_lock(&bridge_list_mtx);
7179 	LIST_REMOVE(sc, sc_list);
7180 	lck_mtx_unlock(&bridge_list_mtx);
7181 
7182 	ifnet_release(ifp);
7183 
7184 	lck_mtx_destroy(&sc->sc_mtx, &bridge_lock_grp);
7185 	if_clone_softc_deallocate(&bridge_cloner, sc);
7186 }
7187 
7188 /*
7189  * bridge_bpf_input:
7190  *
7191  *	Invoke the input BPF callback if enabled
7192  */
7193 static errno_t
7194 bridge_bpf_input(ifnet_t ifp, struct mbuf *m, const char * func, int line)
7195 {
7196 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7197 	bpf_packet_func     input_func = sc->sc_bpf_input;
7198 
7199 	if (input_func != NULL) {
7200 		if (mbuf_pkthdr_rcvif(m) != ifp) {
7201 			BRIDGE_LOG(LOG_NOTICE, 0,
7202 			    "%s.%d: rcvif: 0x%llx != ifp 0x%llx", func, line,
7203 			    (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)),
7204 			    (uint64_t)VM_KERNEL_ADDRPERM(ifp));
7205 		}
7206 		(*input_func)(ifp, m);
7207 	}
7208 	return 0;
7209 }
7210 
7211 /*
7212  * bridge_bpf_output:
7213  *
7214  *	Invoke the output BPF callback if enabled
7215  */
7216 static errno_t
7217 bridge_bpf_output(ifnet_t ifp, struct mbuf *m)
7218 {
7219 	struct bridge_softc *sc = (struct bridge_softc *)ifnet_softc(ifp);
7220 	bpf_packet_func     output_func = sc->sc_bpf_output;
7221 
7222 	if (output_func != NULL) {
7223 		(*output_func)(ifp, m);
7224 	}
7225 	return 0;
7226 }
7227 
7228 /*
7229  * bridge_link_event:
7230  *
7231  *	Report a data link event on an interface
7232  */
7233 static void
7234 bridge_link_event(struct ifnet *ifp, u_int32_t event_code)
7235 {
7236 	struct event {
7237 		u_int32_t ifnet_family;
7238 		u_int32_t unit;
7239 		char if_name[IFNAMSIZ];
7240 	};
7241 	_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
7242 	struct kern_event_msg *header = (struct kern_event_msg*)message;
7243 	struct event *data = (struct event *)(header + 1);
7244 
7245 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_LIFECYCLE,
7246 	    "%s event_code %u - %s", ifp->if_xname,
7247 	    event_code, dlil_kev_dl_code_str(event_code));
7248 	header->total_size   = sizeof(message);
7249 	header->vendor_code  = KEV_VENDOR_APPLE;
7250 	header->kev_class    = KEV_NETWORK_CLASS;
7251 	header->kev_subclass = KEV_DL_SUBCLASS;
7252 	header->event_code   = event_code;
7253 	data->ifnet_family   = ifnet_family(ifp);
7254 	data->unit           = (u_int32_t)ifnet_unit(ifp);
7255 	strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
7256 	ifnet_event(ifp, header);
7257 }
7258 
7259 #define BRIDGE_HF_DROP(reason, func, line) {                            \
7260 	        bridge_hostfilter_stats.reason++;                       \
7261 	        BRIDGE_LOG(LOG_DEBUG, BR_DBGF_HOSTFILTER,               \
7262 	                   "%s.%d" #reason, func, line);                \
7263 	        error = EINVAL;                                         \
7264 	}
7265 
7266 /*
7267  * Make sure this is a DHCP or Bootp request that match the host filter
7268  */
7269 static int
7270 bridge_dhcp_filter(struct bridge_iflist *bif, struct mbuf *m, size_t offset)
7271 {
7272 	int error = EINVAL;
7273 	struct dhcp dhcp;
7274 
7275 	/*
7276 	 * Note: We use the dhcp structure because bootp structure definition
7277 	 * is larger and some vendors do not pad the request
7278 	 */
7279 	error = mbuf_copydata(m, offset, sizeof(struct dhcp), &dhcp);
7280 	if (error != 0) {
7281 		BRIDGE_HF_DROP(brhf_dhcp_too_small, __func__, __LINE__);
7282 		goto done;
7283 	}
7284 	if (dhcp.dp_op != BOOTREQUEST) {
7285 		BRIDGE_HF_DROP(brhf_dhcp_bad_op, __func__, __LINE__);
7286 		goto done;
7287 	}
7288 	/*
7289 	 * The hardware address must be an exact match
7290 	 */
7291 	if (dhcp.dp_htype != ARPHRD_ETHER) {
7292 		BRIDGE_HF_DROP(brhf_dhcp_bad_htype, __func__, __LINE__);
7293 		goto done;
7294 	}
7295 	if (dhcp.dp_hlen != ETHER_ADDR_LEN) {
7296 		BRIDGE_HF_DROP(brhf_dhcp_bad_hlen, __func__, __LINE__);
7297 		goto done;
7298 	}
7299 	if (bcmp(dhcp.dp_chaddr, bif->bif_hf_hwsrc,
7300 	    ETHER_ADDR_LEN) != 0) {
7301 		BRIDGE_HF_DROP(brhf_dhcp_bad_chaddr, __func__, __LINE__);
7302 		goto done;
7303 	}
7304 	/*
7305 	 * Client address must match the host address or be not specified
7306 	 */
7307 	if (dhcp.dp_ciaddr.s_addr != bif->bif_hf_ipsrc.s_addr &&
7308 	    dhcp.dp_ciaddr.s_addr != INADDR_ANY) {
7309 		BRIDGE_HF_DROP(brhf_dhcp_bad_ciaddr, __func__, __LINE__);
7310 		goto done;
7311 	}
7312 	error = 0;
7313 done:
7314 	return error;
7315 }
7316 
7317 static int
7318 bridge_host_filter(struct bridge_iflist *bif, mbuf_t *data)
7319 {
7320 	int error = EINVAL;
7321 	struct ether_header *eh;
7322 	static struct in_addr inaddr_any = { .s_addr = INADDR_ANY };
7323 	mbuf_t m = *data;
7324 
7325 	eh = mtod(m, struct ether_header *);
7326 
7327 	/*
7328 	 * Restrict the source hardware address
7329 	 */
7330 	if ((bif->bif_flags & BIFF_HF_HWSRC) == 0 ||
7331 	    bcmp(eh->ether_shost, bif->bif_hf_hwsrc,
7332 	    ETHER_ADDR_LEN) != 0) {
7333 		BRIDGE_HF_DROP(brhf_bad_ether_srchw_addr, __func__, __LINE__);
7334 		goto done;
7335 	}
7336 
7337 	/*
7338 	 * Restrict Ethernet protocols to ARP and IP
7339 	 */
7340 	if (eh->ether_type == htons(ETHERTYPE_ARP)) {
7341 		struct ether_arp *ea;
7342 		size_t minlen = sizeof(struct ether_header) +
7343 		    sizeof(struct ether_arp);
7344 
7345 		/*
7346 		 * Make the Ethernet and ARP headers contiguous
7347 		 */
7348 		if (mbuf_pkthdr_len(m) < minlen) {
7349 			BRIDGE_HF_DROP(brhf_arp_too_small, __func__, __LINE__);
7350 			goto done;
7351 		}
7352 		if (mbuf_len(m) < minlen && mbuf_pullup(data, minlen) != 0) {
7353 			BRIDGE_HF_DROP(brhf_arp_pullup_failed,
7354 			    __func__, __LINE__);
7355 			goto done;
7356 		}
7357 		m = *data;
7358 
7359 		/*
7360 		 * Verify this is an ethernet/ip arp
7361 		 */
7362 		eh = mtod(m, struct ether_header *);
7363 		ea = (struct ether_arp *)(eh + 1);
7364 		if (ea->arp_hrd != htons(ARPHRD_ETHER)) {
7365 			BRIDGE_HF_DROP(brhf_arp_bad_hw_type,
7366 			    __func__, __LINE__);
7367 			goto done;
7368 		}
7369 		if (ea->arp_pro != htons(ETHERTYPE_IP)) {
7370 			BRIDGE_HF_DROP(brhf_arp_bad_pro_type,
7371 			    __func__, __LINE__);
7372 			goto done;
7373 		}
7374 		/*
7375 		 * Verify the address lengths are correct
7376 		 */
7377 		if (ea->arp_hln != ETHER_ADDR_LEN) {
7378 			BRIDGE_HF_DROP(brhf_arp_bad_hw_len, __func__, __LINE__);
7379 			goto done;
7380 		}
7381 		if (ea->arp_pln != sizeof(struct in_addr)) {
7382 			BRIDGE_HF_DROP(brhf_arp_bad_pro_len,
7383 			    __func__, __LINE__);
7384 			goto done;
7385 		}
7386 
7387 		/*
7388 		 * Allow only ARP request or ARP reply
7389 		 */
7390 		if (ea->arp_op != htons(ARPOP_REQUEST) &&
7391 		    ea->arp_op != htons(ARPOP_REPLY)) {
7392 			BRIDGE_HF_DROP(brhf_arp_bad_op, __func__, __LINE__);
7393 			goto done;
7394 		}
7395 		/*
7396 		 * Verify source hardware address matches
7397 		 */
7398 		if (bcmp(ea->arp_sha, bif->bif_hf_hwsrc,
7399 		    ETHER_ADDR_LEN) != 0) {
7400 			BRIDGE_HF_DROP(brhf_arp_bad_sha, __func__, __LINE__);
7401 			goto done;
7402 		}
7403 		/*
7404 		 * Verify source protocol address:
7405 		 * May be null for an ARP probe
7406 		 */
7407 		if (bcmp(ea->arp_spa, &bif->bif_hf_ipsrc.s_addr,
7408 		    sizeof(struct in_addr)) != 0 &&
7409 		    bcmp(ea->arp_spa, &inaddr_any,
7410 		    sizeof(struct in_addr)) != 0) {
7411 			BRIDGE_HF_DROP(brhf_arp_bad_spa, __func__, __LINE__);
7412 			goto done;
7413 		}
7414 		bridge_hostfilter_stats.brhf_arp_ok += 1;
7415 		error = 0;
7416 	} else if (eh->ether_type == htons(ETHERTYPE_IP)) {
7417 		size_t minlen = sizeof(struct ether_header) + sizeof(struct ip);
7418 		struct ip iphdr;
7419 		size_t offset;
7420 
7421 		/*
7422 		 * Make the Ethernet and IP headers contiguous
7423 		 */
7424 		if (mbuf_pkthdr_len(m) < minlen) {
7425 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7426 			goto done;
7427 		}
7428 		offset = sizeof(struct ether_header);
7429 		error = mbuf_copydata(m, offset, sizeof(struct ip), &iphdr);
7430 		if (error != 0) {
7431 			BRIDGE_HF_DROP(brhf_ip_too_small, __func__, __LINE__);
7432 			goto done;
7433 		}
7434 		/*
7435 		 * Verify the source IP address
7436 		 */
7437 		if (iphdr.ip_p == IPPROTO_UDP) {
7438 			struct udphdr udp;
7439 
7440 			minlen += sizeof(struct udphdr);
7441 			if (mbuf_pkthdr_len(m) < minlen) {
7442 				BRIDGE_HF_DROP(brhf_ip_too_small,
7443 				    __func__, __LINE__);
7444 				goto done;
7445 			}
7446 
7447 			/*
7448 			 * Allow all zero addresses for DHCP requests
7449 			 */
7450 			if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr &&
7451 			    iphdr.ip_src.s_addr != INADDR_ANY) {
7452 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7453 				    __func__, __LINE__);
7454 				goto done;
7455 			}
7456 			offset = sizeof(struct ether_header) +
7457 			    (IP_VHL_HL(iphdr.ip_vhl) << 2);
7458 			error = mbuf_copydata(m, offset,
7459 			    sizeof(struct udphdr), &udp);
7460 			if (error != 0) {
7461 				BRIDGE_HF_DROP(brhf_ip_too_small,
7462 				    __func__, __LINE__);
7463 				goto done;
7464 			}
7465 			/*
7466 			 * Either it's a Bootp/DHCP packet that we like or
7467 			 * it's a UDP packet from the host IP as source address
7468 			 */
7469 			if (udp.uh_sport == htons(IPPORT_BOOTPC) &&
7470 			    udp.uh_dport == htons(IPPORT_BOOTPS)) {
7471 				minlen += sizeof(struct dhcp);
7472 				if (mbuf_pkthdr_len(m) < minlen) {
7473 					BRIDGE_HF_DROP(brhf_ip_too_small,
7474 					    __func__, __LINE__);
7475 					goto done;
7476 				}
7477 				offset += sizeof(struct udphdr);
7478 				error = bridge_dhcp_filter(bif, m, offset);
7479 				if (error != 0) {
7480 					goto done;
7481 				}
7482 			} else if (iphdr.ip_src.s_addr == INADDR_ANY) {
7483 				BRIDGE_HF_DROP(brhf_ip_bad_srcaddr,
7484 				    __func__, __LINE__);
7485 				goto done;
7486 			}
7487 		} else if (iphdr.ip_src.s_addr != bif->bif_hf_ipsrc.s_addr ||
7488 		    bif->bif_hf_ipsrc.s_addr == INADDR_ANY) {
7489 			BRIDGE_HF_DROP(brhf_ip_bad_srcaddr, __func__, __LINE__);
7490 			goto done;
7491 		}
7492 		/*
7493 		 * Allow only boring IP protocols
7494 		 */
7495 		if (iphdr.ip_p != IPPROTO_TCP &&
7496 		    iphdr.ip_p != IPPROTO_UDP &&
7497 		    iphdr.ip_p != IPPROTO_ICMP &&
7498 		    iphdr.ip_p != IPPROTO_ESP &&
7499 		    iphdr.ip_p != IPPROTO_AH &&
7500 		    iphdr.ip_p != IPPROTO_GRE) {
7501 			BRIDGE_HF_DROP(brhf_ip_bad_proto, __func__, __LINE__);
7502 			goto done;
7503 		}
7504 		bridge_hostfilter_stats.brhf_ip_ok += 1;
7505 		error = 0;
7506 	} else {
7507 		BRIDGE_HF_DROP(brhf_bad_ether_type, __func__, __LINE__);
7508 		goto done;
7509 	}
7510 done:
7511 	if (error != 0) {
7512 		if (BRIDGE_DBGF_ENABLED(BR_DBGF_HOSTFILTER)) {
7513 			if (m) {
7514 				brlog_mbuf_data(m, 0,
7515 				    sizeof(struct ether_header) +
7516 				    sizeof(struct ip));
7517 			}
7518 		}
7519 
7520 		if (m != NULL) {
7521 			m_freem(m);
7522 		}
7523 	}
7524 	return error;
7525 }
7526 
7527 /*
7528  * MAC NAT
7529  */
7530 
7531 static errno_t
7532 bridge_mac_nat_enable(struct bridge_softc *sc, struct bridge_iflist *bif)
7533 {
7534 	errno_t         error = 0;
7535 
7536 	BRIDGE_LOCK_ASSERT_HELD(sc);
7537 
7538 	if (IFNET_IS_VMNET(bif->bif_ifp)) {
7539 		error = EINVAL;
7540 		goto done;
7541 	}
7542 	if (sc->sc_mac_nat_bif != NULL) {
7543 		if (sc->sc_mac_nat_bif != bif) {
7544 			error = EBUSY;
7545 		}
7546 		goto done;
7547 	}
7548 	sc->sc_mac_nat_bif = bif;
7549 	bif->bif_ifflags |= IFBIF_MAC_NAT;
7550 	bridge_mac_nat_populate_entries(sc);
7551 
7552 done:
7553 	return error;
7554 }
7555 
7556 static void
7557 bridge_mac_nat_disable(struct bridge_softc *sc)
7558 {
7559 	struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7560 
7561 	assert(mac_nat_bif != NULL);
7562 	bridge_mac_nat_flush_entries(sc, mac_nat_bif);
7563 	mac_nat_bif->bif_ifflags &= ~IFBIF_MAC_NAT;
7564 	sc->sc_mac_nat_bif = NULL;
7565 	return;
7566 }
7567 
7568 static void
7569 mac_nat_entry_print2(struct mac_nat_entry *mne,
7570     char *ifname, const char *msg1, const char *msg2)
7571 {
7572 	int             af;
7573 	char            etopbuf[24];
7574 	char            ntopbuf[MAX_IPv6_STR_LEN];
7575 	const char      *space;
7576 
7577 	af = ((mne->mne_flags & MNE_FLAGS_IPV6) != 0) ? AF_INET6 : AF_INET;
7578 	ether_ntop(etopbuf, sizeof(etopbuf), mne->mne_mac);
7579 	(void)inet_ntop(af, &mne->mne_u, ntopbuf, sizeof(ntopbuf));
7580 	if (msg2 == NULL) {
7581 		msg2 = "";
7582 		space = "";
7583 	} else {
7584 		space = " ";
7585 	}
7586 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7587 	    "%s %s%s%s %p (%s, %s, %s)",
7588 	    ifname, msg1, space, msg2, mne, mne->mne_bif->bif_ifp->if_xname,
7589 	    ntopbuf, etopbuf);
7590 }
7591 
7592 static void
7593 mac_nat_entry_print(struct mac_nat_entry *mne,
7594     char *ifname, const char *msg)
7595 {
7596 	mac_nat_entry_print2(mne, ifname, msg, NULL);
7597 }
7598 
7599 static struct mac_nat_entry *
7600 bridge_lookup_mac_nat_entry(struct bridge_softc *sc, int af, void * ip)
7601 {
7602 	struct mac_nat_entry    *mne;
7603 	struct mac_nat_entry    *ret_mne = NULL;
7604 
7605 	if (af == AF_INET) {
7606 		in_addr_t s_addr = ((struct in_addr *)ip)->s_addr;
7607 
7608 		LIST_FOREACH(mne, &sc->sc_mne_list, mne_list) {
7609 			if (mne->mne_ip.s_addr == s_addr) {
7610 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7611 					mac_nat_entry_print(mne, sc->sc_if_xname,
7612 					    "found");
7613 				}
7614 				ret_mne = mne;
7615 				break;
7616 			}
7617 		}
7618 	} else {
7619 		const struct in6_addr *ip6 = (const struct in6_addr *)ip;
7620 
7621 		LIST_FOREACH(mne, &sc->sc_mne_list_v6, mne_list) {
7622 			if (IN6_ARE_ADDR_EQUAL(&mne->mne_ip6, ip6)) {
7623 				if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7624 					mac_nat_entry_print(mne, sc->sc_if_xname,
7625 					    "found");
7626 				}
7627 				ret_mne = mne;
7628 				break;
7629 			}
7630 		}
7631 	}
7632 	return ret_mne;
7633 }
7634 
7635 static void
7636 bridge_destroy_mac_nat_entry(struct bridge_softc *sc,
7637     struct mac_nat_entry *mne, const char *reason)
7638 {
7639 	LIST_REMOVE(mne, mne_list);
7640 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7641 		mac_nat_entry_print(mne, sc->sc_if_xname, reason);
7642 	}
7643 	zfree(bridge_mne_pool, mne);
7644 	sc->sc_mne_count--;
7645 }
7646 
7647 static struct mac_nat_entry *
7648 bridge_create_mac_nat_entry(struct bridge_softc *sc,
7649     struct bridge_iflist *bif, int af, const void *ip, uint8_t *eaddr)
7650 {
7651 	struct mac_nat_entry_list *list;
7652 	struct mac_nat_entry *mne;
7653 
7654 	if (sc->sc_mne_count >= sc->sc_mne_max) {
7655 		sc->sc_mne_allocation_failures++;
7656 		return NULL;
7657 	}
7658 	mne = zalloc_noblock(bridge_mne_pool);
7659 	if (mne == NULL) {
7660 		sc->sc_mne_allocation_failures++;
7661 		return NULL;
7662 	}
7663 	sc->sc_mne_count++;
7664 	bzero(mne, sizeof(*mne));
7665 	bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7666 	mne->mne_bif = bif;
7667 	if (af == AF_INET) {
7668 		bcopy(ip, &mne->mne_ip, sizeof(mne->mne_ip));
7669 		list = &sc->sc_mne_list;
7670 	} else {
7671 		bcopy(ip, &mne->mne_ip6, sizeof(mne->mne_ip6));
7672 		mne->mne_flags |= MNE_FLAGS_IPV6;
7673 		list = &sc->sc_mne_list_v6;
7674 	}
7675 	LIST_INSERT_HEAD(list, mne, mne_list);
7676 	mne->mne_expire = (unsigned long)net_uptime() + sc->sc_brttimeout;
7677 	if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7678 		mac_nat_entry_print(mne, sc->sc_if_xname, "created");
7679 	}
7680 	return mne;
7681 }
7682 
7683 static struct mac_nat_entry *
7684 bridge_update_mac_nat_entry(struct bridge_softc *sc,
7685     struct bridge_iflist *bif, int af, void *ip, uint8_t *eaddr)
7686 {
7687 	struct mac_nat_entry *mne;
7688 
7689 	mne = bridge_lookup_mac_nat_entry(sc, af, ip);
7690 	if (mne != NULL) {
7691 		struct bridge_iflist *mac_nat_bif = sc->sc_mac_nat_bif;
7692 
7693 		if (mne->mne_bif == mac_nat_bif) {
7694 			/* the MAC NAT interface takes precedence */
7695 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7696 				if (mne->mne_bif != bif) {
7697 					mac_nat_entry_print2(mne,
7698 					    sc->sc_if_xname, "reject",
7699 					    bif->bif_ifp->if_xname);
7700 				}
7701 			}
7702 		} else if (mne->mne_bif != bif) {
7703 			const char *old_if = mne->mne_bif->bif_ifp->if_xname;
7704 
7705 			mne->mne_bif = bif;
7706 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7707 				mac_nat_entry_print2(mne,
7708 				    sc->sc_if_xname, "replaced",
7709 				    old_if);
7710 			}
7711 			bcopy(eaddr, mne->mne_mac, sizeof(mne->mne_mac));
7712 		}
7713 		mne->mne_expire = (unsigned long)net_uptime() +
7714 		    sc->sc_brttimeout;
7715 	} else {
7716 		mne = bridge_create_mac_nat_entry(sc, bif, af, ip, eaddr);
7717 	}
7718 	return mne;
7719 }
7720 
7721 static void
7722 bridge_mac_nat_flush_entries_common(struct bridge_softc *sc,
7723     struct mac_nat_entry_list *list, struct bridge_iflist *bif)
7724 {
7725 	struct mac_nat_entry *mne;
7726 	struct mac_nat_entry *tmne;
7727 
7728 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7729 		if (bif != NULL && mne->mne_bif != bif) {
7730 			continue;
7731 		}
7732 		bridge_destroy_mac_nat_entry(sc, mne, "flushed");
7733 	}
7734 }
7735 
7736 /*
7737  * bridge_mac_nat_flush_entries:
7738  *
7739  * Flush MAC NAT entries for the specified member. Flush all entries if
7740  * the member is the one that requires MAC NAT, otherwise just flush the
7741  * ones for the specified member.
7742  */
7743 static void
7744 bridge_mac_nat_flush_entries(struct bridge_softc *sc, struct bridge_iflist * bif)
7745 {
7746 	struct bridge_iflist *flush_bif;
7747 
7748 	flush_bif = (bif == sc->sc_mac_nat_bif) ? NULL : bif;
7749 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list, flush_bif);
7750 	bridge_mac_nat_flush_entries_common(sc, &sc->sc_mne_list_v6, flush_bif);
7751 }
7752 
7753 static void
7754 bridge_mac_nat_populate_entries(struct bridge_softc *sc)
7755 {
7756 	errno_t                 error;
7757 	ifnet_t                 ifp;
7758 	ifaddr_t                *list;
7759 	struct bridge_iflist    *mac_nat_bif = sc->sc_mac_nat_bif;
7760 
7761 	assert(mac_nat_bif != NULL);
7762 	ifp = mac_nat_bif->bif_ifp;
7763 	error = ifnet_get_address_list(ifp, &list);
7764 	if (error != 0) {
7765 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7766 		    "ifnet_get_address_list(%s) failed %d",
7767 		    ifp->if_xname, error);
7768 		return;
7769 	}
7770 	for (ifaddr_t *scan = list; *scan != NULL; scan++) {
7771 		sa_family_t     af;
7772 		void            *ip;
7773 
7774 		union {
7775 			struct sockaddr         sa;
7776 			struct sockaddr_in      sin;
7777 			struct sockaddr_in6     sin6;
7778 		} u;
7779 		af = ifaddr_address_family(*scan);
7780 		switch (af) {
7781 		case AF_INET:
7782 		case AF_INET6:
7783 			error = ifaddr_address(*scan, &u.sa, sizeof(u));
7784 			if (error != 0) {
7785 				BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
7786 				    "ifaddr_address failed %d",
7787 				    error);
7788 				break;
7789 			}
7790 			if (af == AF_INET) {
7791 				ip = (void *)&u.sin.sin_addr;
7792 			} else {
7793 				if (IN6_IS_ADDR_LINKLOCAL(&u.sin6.sin6_addr)) {
7794 					/* remove scope ID */
7795 					u.sin6.sin6_addr.s6_addr16[1] = 0;
7796 				}
7797 				ip = (void *)&u.sin6.sin6_addr;
7798 			}
7799 			bridge_create_mac_nat_entry(sc, mac_nat_bif, af, ip,
7800 			    (uint8_t *)IF_LLADDR(ifp));
7801 			break;
7802 		default:
7803 			break;
7804 		}
7805 	}
7806 	ifnet_free_address_list(list);
7807 	return;
7808 }
7809 
7810 static void
7811 bridge_mac_nat_age_entries_common(struct bridge_softc *sc,
7812     struct mac_nat_entry_list *list, unsigned long now)
7813 {
7814 	struct mac_nat_entry *mne;
7815 	struct mac_nat_entry *tmne;
7816 
7817 	LIST_FOREACH_SAFE(mne, list, mne_list, tmne) {
7818 		if (now >= mne->mne_expire) {
7819 			bridge_destroy_mac_nat_entry(sc, mne, "aged out");
7820 		}
7821 	}
7822 }
7823 
7824 static void
7825 bridge_mac_nat_age_entries(struct bridge_softc *sc, unsigned long now)
7826 {
7827 	if (sc->sc_mac_nat_bif == NULL) {
7828 		return;
7829 	}
7830 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list, now);
7831 	bridge_mac_nat_age_entries_common(sc, &sc->sc_mne_list_v6, now);
7832 }
7833 
7834 static const char *
7835 get_in_out_string(boolean_t is_output)
7836 {
7837 	return is_output ? "OUT" : "IN";
7838 }
7839 
7840 /*
7841  * is_valid_arp_packet:
7842  *	Verify that this is a valid ARP packet.
7843  *
7844  *	Returns TRUE if the packet is valid, FALSE otherwise.
7845  */
7846 static boolean_t
7847 is_valid_arp_packet(mbuf_t *data, boolean_t is_output,
7848     struct ether_header **eh_p, struct ether_arp **ea_p)
7849 {
7850 	struct ether_arp *ea;
7851 	struct ether_header *eh;
7852 	size_t minlen = sizeof(struct ether_header) + sizeof(struct ether_arp);
7853 	boolean_t is_valid = FALSE;
7854 	int flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
7855 
7856 	if (mbuf_pkthdr_len(*data) < minlen) {
7857 		BRIDGE_LOG(LOG_DEBUG, flags,
7858 		    "ARP %s short frame %lu < %lu",
7859 		    get_in_out_string(is_output),
7860 		    mbuf_pkthdr_len(*data), minlen);
7861 		goto done;
7862 	}
7863 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
7864 		BRIDGE_LOG(LOG_DEBUG, flags,
7865 		    "ARP %s size %lu mbuf_pullup fail",
7866 		    get_in_out_string(is_output),
7867 		    minlen);
7868 		*data = NULL;
7869 		goto done;
7870 	}
7871 
7872 	/* validate ARP packet */
7873 	eh = mtod(*data, struct ether_header *);
7874 	ea = (struct ether_arp *)(eh + 1);
7875 	if (ntohs(ea->arp_hrd) != ARPHRD_ETHER) {
7876 		BRIDGE_LOG(LOG_DEBUG, flags,
7877 		    "ARP %s htype not ethernet",
7878 		    get_in_out_string(is_output));
7879 		goto done;
7880 	}
7881 	if (ea->arp_hln != ETHER_ADDR_LEN) {
7882 		BRIDGE_LOG(LOG_DEBUG, flags,
7883 		    "ARP %s hlen not ethernet",
7884 		    get_in_out_string(is_output));
7885 		goto done;
7886 	}
7887 	if (ntohs(ea->arp_pro) != ETHERTYPE_IP) {
7888 		BRIDGE_LOG(LOG_DEBUG, flags,
7889 		    "ARP %s ptype not IP",
7890 		    get_in_out_string(is_output));
7891 		goto done;
7892 	}
7893 	if (ea->arp_pln != sizeof(struct in_addr)) {
7894 		BRIDGE_LOG(LOG_DEBUG, flags,
7895 		    "ARP %s plen not IP",
7896 		    get_in_out_string(is_output));
7897 		goto done;
7898 	}
7899 	is_valid = TRUE;
7900 	*ea_p = ea;
7901 	*eh_p = eh;
7902 done:
7903 	return is_valid;
7904 }
7905 
7906 static struct mac_nat_entry *
7907 bridge_mac_nat_arp_input(struct bridge_softc *sc, mbuf_t *data)
7908 {
7909 	struct ether_arp        *ea;
7910 	struct ether_header     *eh;
7911 	struct mac_nat_entry    *mne = NULL;
7912 	u_short                 op;
7913 	struct in_addr          tpa;
7914 
7915 	if (!is_valid_arp_packet(data, FALSE, &eh, &ea)) {
7916 		goto done;
7917 	}
7918 	op = ntohs(ea->arp_op);
7919 	switch (op) {
7920 	case ARPOP_REQUEST:
7921 	case ARPOP_REPLY:
7922 		/* only care about REQUEST and REPLY */
7923 		break;
7924 	default:
7925 		goto done;
7926 	}
7927 
7928 	/* check the target IP address for a NAT entry */
7929 	bcopy(ea->arp_tpa, &tpa, sizeof(tpa));
7930 	if (tpa.s_addr != 0) {
7931 		mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &tpa);
7932 	}
7933 	if (mne != NULL) {
7934 		if (op == ARPOP_REPLY) {
7935 			/* translate the MAC address */
7936 			if (BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
7937 				char    mac_src[24];
7938 				char    mac_dst[24];
7939 
7940 				ether_ntop(mac_src, sizeof(mac_src),
7941 				    ea->arp_tha);
7942 				ether_ntop(mac_dst, sizeof(mac_dst),
7943 				    mne->mne_mac);
7944 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
7945 				    "%s %s ARP %s -> %s",
7946 				    sc->sc_if_xname,
7947 				    mne->mne_bif->bif_ifp->if_xname,
7948 				    mac_src, mac_dst);
7949 			}
7950 			bcopy(mne->mne_mac, ea->arp_tha, sizeof(ea->arp_tha));
7951 		}
7952 	} else {
7953 		/* handle conflicting ARP (sender matches mne) */
7954 		struct in_addr spa;
7955 
7956 		bcopy(ea->arp_spa, &spa, sizeof(spa));
7957 		if (spa.s_addr != 0 && spa.s_addr != tpa.s_addr) {
7958 			/* check the source IP for a NAT entry */
7959 			mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &spa);
7960 		}
7961 	}
7962 
7963 done:
7964 	return mne;
7965 }
7966 
7967 static boolean_t
7968 bridge_mac_nat_arp_output(struct bridge_softc *sc,
7969     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
7970 {
7971 	struct ether_arp        *ea;
7972 	struct ether_header     *eh;
7973 	struct in_addr          ip;
7974 	struct mac_nat_entry    *mne = NULL;
7975 	u_short                 op;
7976 	boolean_t               translate = FALSE;
7977 
7978 	if (!is_valid_arp_packet(data, TRUE, &eh, &ea)) {
7979 		goto done;
7980 	}
7981 	op = ntohs(ea->arp_op);
7982 	switch (op) {
7983 	case ARPOP_REQUEST:
7984 	case ARPOP_REPLY:
7985 		/* only care about REQUEST and REPLY */
7986 		break;
7987 	default:
7988 		goto done;
7989 	}
7990 
7991 	bcopy(ea->arp_spa, &ip, sizeof(ip));
7992 	if (ip.s_addr == 0) {
7993 		goto done;
7994 	}
7995 	/* XXX validate IP address: no multicast/broadcast */
7996 	mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip, ea->arp_sha);
7997 	if (mnr != NULL && mne != NULL) {
7998 		/* record the offset to do the replacement */
7999 		translate = TRUE;
8000 		mnr->mnr_arp_offset = (char *)ea->arp_sha - (char *)eh;
8001 	}
8002 
8003 done:
8004 	return translate;
8005 }
8006 
8007 #define ETHER_IPV4_HEADER_LEN   (sizeof(struct ether_header) +  \
8008 	                         + sizeof(struct ip))
8009 static struct ether_header *
8010 get_ether_ip_header(mbuf_t *data, boolean_t is_output)
8011 {
8012 	struct ether_header     *eh = NULL;
8013 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8014 	size_t          minlen = ETHER_IPV4_HEADER_LEN;
8015 
8016 	if (mbuf_pkthdr_len(*data) < minlen) {
8017 		BRIDGE_LOG(LOG_DEBUG, flags,
8018 		    "IP %s short frame %lu < %lu",
8019 		    get_in_out_string(is_output),
8020 		    mbuf_pkthdr_len(*data), minlen);
8021 		goto done;
8022 	}
8023 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8024 		BRIDGE_LOG(LOG_DEBUG, flags,
8025 		    "IP %s size %lu mbuf_pullup fail",
8026 		    get_in_out_string(is_output),
8027 		    minlen);
8028 		*data = NULL;
8029 		goto done;
8030 	}
8031 	eh = mtod(*data, struct ether_header *);
8032 done:
8033 	return eh;
8034 }
8035 
8036 static bool
8037 is_broadcast_ip_packet(mbuf_t *data)
8038 {
8039 	struct ether_header     *eh;
8040 	uint16_t                ether_type;
8041 	bool                    is_broadcast = FALSE;
8042 
8043 	eh = mtod(*data, struct ether_header *);
8044 	ether_type = ntohs(eh->ether_type);
8045 	switch (ether_type) {
8046 	case ETHERTYPE_IP:
8047 		eh = get_ether_ip_header(data, FALSE);
8048 		if (eh != NULL) {
8049 			struct in_addr  dst;
8050 			struct ip       *iphdr;
8051 
8052 			iphdr = (struct ip *)(void *)(eh + 1);
8053 			bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8054 			is_broadcast = (dst.s_addr == INADDR_BROADCAST);
8055 		}
8056 		break;
8057 	default:
8058 		break;
8059 	}
8060 	return is_broadcast;
8061 }
8062 
8063 static struct mac_nat_entry *
8064 bridge_mac_nat_ip_input(struct bridge_softc *sc, mbuf_t *data)
8065 {
8066 	struct in_addr          dst;
8067 	struct ether_header     *eh;
8068 	struct ip               *iphdr;
8069 	struct mac_nat_entry    *mne = NULL;
8070 
8071 	eh = get_ether_ip_header(data, FALSE);
8072 	if (eh == NULL) {
8073 		goto done;
8074 	}
8075 	iphdr = (struct ip *)(void *)(eh + 1);
8076 	bcopy(&iphdr->ip_dst, &dst, sizeof(dst));
8077 	/* XXX validate IP address */
8078 	if (dst.s_addr == 0) {
8079 		goto done;
8080 	}
8081 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET, &dst);
8082 done:
8083 	return mne;
8084 }
8085 
8086 static void
8087 bridge_mac_nat_udp_output(struct bridge_softc *sc,
8088     struct bridge_iflist *bif, mbuf_t m,
8089     uint8_t ip_header_len, struct mac_nat_record *mnr)
8090 {
8091 	uint16_t        dp_flags;
8092 	errno_t         error;
8093 	size_t          offset;
8094 	struct udphdr   udphdr;
8095 
8096 	/* copy the UDP header */
8097 	offset = sizeof(struct ether_header) + ip_header_len;
8098 	error = mbuf_copydata(m, offset, sizeof(struct udphdr), &udphdr);
8099 	if (error != 0) {
8100 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8101 		    "mbuf_copydata udphdr failed %d",
8102 		    error);
8103 		return;
8104 	}
8105 	if (ntohs(udphdr.uh_sport) != IPPORT_BOOTPC ||
8106 	    ntohs(udphdr.uh_dport) != IPPORT_BOOTPS) {
8107 		/* not a BOOTP/DHCP packet */
8108 		return;
8109 	}
8110 	/* check whether the broadcast bit is already set */
8111 	offset += sizeof(struct udphdr) + offsetof(struct dhcp, dp_flags);
8112 	error = mbuf_copydata(m, offset, sizeof(dp_flags), &dp_flags);
8113 	if (error != 0) {
8114 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8115 		    "mbuf_copydata dp_flags failed %d",
8116 		    error);
8117 		return;
8118 	}
8119 	if ((ntohs(dp_flags) & DHCP_FLAGS_BROADCAST) != 0) {
8120 		/* it's already set, nothing to do */
8121 		return;
8122 	}
8123 	/* broadcast bit needs to be set */
8124 	mnr->mnr_ip_dhcp_flags = dp_flags | htons(DHCP_FLAGS_BROADCAST);
8125 	mnr->mnr_ip_header_len = ip_header_len;
8126 	if (udphdr.uh_sum != 0) {
8127 		uint16_t        delta;
8128 
8129 		/* adjust checksum to take modified dp_flags into account */
8130 		delta = dp_flags - mnr->mnr_ip_dhcp_flags;
8131 		mnr->mnr_ip_udp_csum = udphdr.uh_sum + delta;
8132 	}
8133 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8134 	    "%s %s DHCP dp_flags 0x%x UDP cksum 0x%x",
8135 	    sc->sc_if_xname,
8136 	    bif->bif_ifp->if_xname,
8137 	    ntohs(mnr->mnr_ip_dhcp_flags),
8138 	    ntohs(mnr->mnr_ip_udp_csum));
8139 	return;
8140 }
8141 
8142 static boolean_t
8143 bridge_mac_nat_ip_output(struct bridge_softc *sc,
8144     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8145 {
8146 #pragma unused(mnr)
8147 	struct ether_header     *eh;
8148 	struct in_addr          ip;
8149 	struct ip               *iphdr;
8150 	uint8_t                 ip_header_len;
8151 	struct mac_nat_entry    *mne = NULL;
8152 	boolean_t               translate = FALSE;
8153 
8154 	eh = get_ether_ip_header(data, TRUE);
8155 	if (eh == NULL) {
8156 		goto done;
8157 	}
8158 	iphdr = (struct ip *)(void *)(eh + 1);
8159 	ip_header_len = IP_VHL_HL(iphdr->ip_vhl) << 2;
8160 	if (ip_header_len < sizeof(ip)) {
8161 		/* bogus IP header */
8162 		goto done;
8163 	}
8164 	bcopy(&iphdr->ip_src, &ip, sizeof(ip));
8165 	/* XXX validate the source address */
8166 	if (ip.s_addr != 0) {
8167 		mne = bridge_update_mac_nat_entry(sc, bif, AF_INET, &ip,
8168 		    eh->ether_shost);
8169 	}
8170 	if (mnr != NULL) {
8171 		if (iphdr->ip_p == IPPROTO_UDP) {
8172 			/* handle DHCP must broadcast */
8173 			bridge_mac_nat_udp_output(sc, bif, *data,
8174 			    ip_header_len, mnr);
8175 		}
8176 		translate = TRUE;
8177 	}
8178 done:
8179 	return translate;
8180 }
8181 
8182 #define ETHER_IPV6_HEADER_LEN   (sizeof(struct ether_header) +  \
8183 	                         + sizeof(struct ip6_hdr))
8184 static struct ether_header *
8185 get_ether_ipv6_header(mbuf_t *data, boolean_t is_output)
8186 {
8187 	struct ether_header     *eh = NULL;
8188 	int             flags = is_output ? BR_DBGF_OUTPUT : BR_DBGF_INPUT;
8189 	size_t          minlen = ETHER_IPV6_HEADER_LEN;
8190 
8191 	if (mbuf_pkthdr_len(*data) < minlen) {
8192 		BRIDGE_LOG(LOG_DEBUG, flags,
8193 		    "IP %s short frame %lu < %lu",
8194 		    get_in_out_string(is_output),
8195 		    mbuf_pkthdr_len(*data), minlen);
8196 		goto done;
8197 	}
8198 	if (mbuf_len(*data) < minlen && mbuf_pullup(data, minlen) != 0) {
8199 		BRIDGE_LOG(LOG_DEBUG, flags,
8200 		    "IP %s size %lu mbuf_pullup fail",
8201 		    get_in_out_string(is_output),
8202 		    minlen);
8203 		*data = NULL;
8204 		goto done;
8205 	}
8206 	eh = mtod(*data, struct ether_header *);
8207 done:
8208 	return eh;
8209 }
8210 
8211 #include <netinet/icmp6.h>
8212 #include <netinet6/nd6.h>
8213 
8214 #define ETHER_ND_LLADDR_LEN     (ETHER_ADDR_LEN + sizeof(struct nd_opt_hdr))
8215 
8216 static void
8217 bridge_mac_nat_icmpv6_output(struct bridge_softc *sc, struct bridge_iflist *bif,
8218     mbuf_t *data, struct ether_header *eh,
8219     struct ip6_hdr *ip6h, struct in6_addr *saddrp, struct mac_nat_record *mnr)
8220 {
8221 	struct icmp6_hdr *icmp6;
8222 	unsigned int    icmp6len;
8223 	int             lladdrlen = 0;
8224 	char            *lladdr = NULL;
8225 	mbuf_t          m = *data;
8226 	unsigned int    off = sizeof(*ip6h);
8227 
8228 	icmp6len = m->m_pkthdr.len - sizeof(*eh) - off;
8229 	if (icmp6len < sizeof(*icmp6)) {
8230 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8231 		    "short packet %d < %lu",
8232 		    icmp6len, sizeof(*icmp6));
8233 		return;
8234 	}
8235 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6h + off);
8236 	switch (icmp6->icmp6_type) {
8237 	case ND_NEIGHBOR_SOLICIT: {
8238 		struct nd_neighbor_solicit *nd_ns;
8239 		union nd_opts ndopts;
8240 		boolean_t is_dad_probe;
8241 		struct in6_addr taddr;
8242 
8243 		if (icmp6len < sizeof(*nd_ns)) {
8244 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8245 			    "short nd_ns %d < %lu",
8246 			    icmp6len, sizeof(*nd_ns));
8247 			return;
8248 		}
8249 
8250 		nd_ns = (struct nd_neighbor_solicit *)(void *)icmp6;
8251 		bcopy(&nd_ns->nd_ns_target, &taddr, sizeof(taddr));
8252 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8253 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8254 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8255 			    "invalid target ignored");
8256 			return;
8257 		}
8258 		/* parse options */
8259 		nd6_option_init(nd_ns + 1, icmp6len - sizeof(*nd_ns), &ndopts);
8260 		if (nd6_options(&ndopts) < 0) {
8261 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8262 			    "invalid ND6 NS option");
8263 			return;
8264 		}
8265 		if (ndopts.nd_opts_src_lladdr != NULL) {
8266 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8267 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8268 		}
8269 		is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(saddrp);
8270 		if (lladdr != NULL) {
8271 			if (is_dad_probe) {
8272 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8273 				    "bad ND6 DAD packet");
8274 				return;
8275 			}
8276 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8277 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8278 				    "source lladdrlen %d != %lu",
8279 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8280 				return;
8281 			}
8282 			mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8283 			    (uintptr_t)eh);
8284 			mnr->mnr_ip6_icmp6_len = icmp6len;
8285 			mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8286 			mnr->mnr_ip6_header_len = off;
8287 		}
8288 		if (is_dad_probe) {
8289 			/* node is trying use taddr, create an mne using taddr */
8290 			*saddrp = taddr;
8291 		}
8292 		break;
8293 	}
8294 	case ND_NEIGHBOR_ADVERT: {
8295 		struct nd_neighbor_advert *nd_na;
8296 		union nd_opts ndopts;
8297 		struct in6_addr taddr;
8298 
8299 
8300 		nd_na = (struct nd_neighbor_advert *)(void *)icmp6;
8301 
8302 		if (icmp6len < sizeof(*nd_na)) {
8303 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8304 			    "short nd_na %d < %lu",
8305 			    icmp6len, sizeof(*nd_na));
8306 			return;
8307 		}
8308 
8309 		bcopy(&nd_na->nd_na_target, &taddr, sizeof(taddr));
8310 		if (IN6_IS_ADDR_MULTICAST(&taddr) ||
8311 		    IN6_IS_ADDR_UNSPECIFIED(&taddr)) {
8312 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8313 			    "invalid target ignored");
8314 			return;
8315 		}
8316 		/* parse options */
8317 		nd6_option_init(nd_na + 1, icmp6len - sizeof(*nd_na), &ndopts);
8318 		if (nd6_options(&ndopts) < 0) {
8319 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8320 			    "invalid ND6 NA option");
8321 			return;
8322 		}
8323 		if (ndopts.nd_opts_tgt_lladdr == NULL) {
8324 			/* target linklayer, nothing to do */
8325 			return;
8326 		}
8327 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
8328 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
8329 		if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8330 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8331 			    "target lladdrlen %d != %lu",
8332 			    lladdrlen, ETHER_ND_LLADDR_LEN);
8333 			return;
8334 		}
8335 		mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr - (uintptr_t)eh);
8336 		mnr->mnr_ip6_icmp6_len = icmp6len;
8337 		mnr->mnr_ip6_header_len = off;
8338 		mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8339 		break;
8340 	}
8341 	case ND_ROUTER_SOLICIT: {
8342 		struct nd_router_solicit *nd_rs;
8343 		union nd_opts ndopts;
8344 
8345 		if (icmp6len < sizeof(*nd_rs)) {
8346 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8347 			    "short nd_rs %d < %lu",
8348 			    icmp6len, sizeof(*nd_rs));
8349 			return;
8350 		}
8351 		nd_rs = (struct nd_router_solicit *)(void *)icmp6;
8352 
8353 		/* parse options */
8354 		nd6_option_init(nd_rs + 1, icmp6len - sizeof(*nd_rs), &ndopts);
8355 		if (nd6_options(&ndopts) < 0) {
8356 			BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8357 			    "invalid ND6 RS option");
8358 			return;
8359 		}
8360 		if (ndopts.nd_opts_src_lladdr != NULL) {
8361 			lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
8362 			lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
8363 		}
8364 		if (lladdr != NULL) {
8365 			if (lladdrlen != ETHER_ND_LLADDR_LEN) {
8366 				BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8367 				    "source lladdrlen %d != %lu",
8368 				    lladdrlen, ETHER_ND_LLADDR_LEN);
8369 				return;
8370 			}
8371 			mnr->mnr_ip6_lladdr_offset = (uint16_t)((uintptr_t)lladdr -
8372 			    (uintptr_t)eh);
8373 			mnr->mnr_ip6_icmp6_len = icmp6len;
8374 			mnr->mnr_ip6_icmp6_type = icmp6->icmp6_type;
8375 			mnr->mnr_ip6_header_len = off;
8376 		}
8377 		break;
8378 	}
8379 	default:
8380 		break;
8381 	}
8382 	if (mnr->mnr_ip6_lladdr_offset != 0 &&
8383 	    BRIDGE_DBGF_ENABLED(BR_DBGF_MAC_NAT)) {
8384 		const char *str;
8385 
8386 		switch (mnr->mnr_ip6_icmp6_type) {
8387 		case ND_ROUTER_SOLICIT:
8388 			str = "ROUTER SOLICIT";
8389 			break;
8390 		case ND_NEIGHBOR_ADVERT:
8391 			str = "NEIGHBOR ADVERT";
8392 			break;
8393 		case ND_NEIGHBOR_SOLICIT:
8394 			str = "NEIGHBOR SOLICIT";
8395 			break;
8396 		default:
8397 			str = "";
8398 			break;
8399 		}
8400 		BRIDGE_LOG(LOG_DEBUG, BR_DBGF_MAC_NAT,
8401 		    "%s %s %s ip6len %d icmp6len %d lladdr offset %d",
8402 		    sc->sc_if_xname, bif->bif_ifp->if_xname, str,
8403 		    mnr->mnr_ip6_header_len,
8404 		    mnr->mnr_ip6_icmp6_len, mnr->mnr_ip6_lladdr_offset);
8405 	}
8406 }
8407 
8408 static struct mac_nat_entry *
8409 bridge_mac_nat_ipv6_input(struct bridge_softc *sc, mbuf_t *data)
8410 {
8411 	struct in6_addr         dst;
8412 	struct ether_header     *eh;
8413 	struct ip6_hdr          *ip6h;
8414 	struct mac_nat_entry    *mne = NULL;
8415 
8416 	eh = get_ether_ipv6_header(data, FALSE);
8417 	if (eh == NULL) {
8418 		goto done;
8419 	}
8420 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8421 	bcopy(&ip6h->ip6_dst, &dst, sizeof(dst));
8422 	/* XXX validate IPv6 address */
8423 	if (IN6_IS_ADDR_UNSPECIFIED(&dst)) {
8424 		goto done;
8425 	}
8426 	mne = bridge_lookup_mac_nat_entry(sc, AF_INET6, &dst);
8427 
8428 done:
8429 	return mne;
8430 }
8431 
8432 static boolean_t
8433 bridge_mac_nat_ipv6_output(struct bridge_softc *sc,
8434     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8435 {
8436 	struct ether_header     *eh;
8437 	struct ip6_hdr          *ip6h;
8438 	struct in6_addr         saddr;
8439 	boolean_t               translate;
8440 
8441 	translate = (bif == sc->sc_mac_nat_bif) ? FALSE : TRUE;
8442 	eh = get_ether_ipv6_header(data, TRUE);
8443 	if (eh == NULL) {
8444 		translate = FALSE;
8445 		goto done;
8446 	}
8447 	ip6h = (struct ip6_hdr *)(void *)(eh + 1);
8448 	bcopy(&ip6h->ip6_src, &saddr, sizeof(saddr));
8449 	if (mnr != NULL && ip6h->ip6_nxt == IPPROTO_ICMPV6) {
8450 		bridge_mac_nat_icmpv6_output(sc, bif, data,
8451 		    eh, ip6h, &saddr, mnr);
8452 	}
8453 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr)) {
8454 		goto done;
8455 	}
8456 	(void)bridge_update_mac_nat_entry(sc, bif, AF_INET6, &saddr,
8457 	    eh->ether_shost);
8458 
8459 done:
8460 	return translate;
8461 }
8462 
8463 /*
8464  * bridge_mac_nat_input:
8465  * Process a packet arriving on the MAC NAT interface (sc_mac_nat_bif).
8466  * This interface is the "external" interface with respect to NAT.
8467  * The interface is only capable of receiving a single MAC address
8468  * (e.g. a Wi-Fi STA interface).
8469  *
8470  * When a packet arrives on the external interface, look up the destination
8471  * IP address in the mac_nat_entry table. If there is a match, *is_input
8472  * is set to TRUE if it's for the MAC NAT interface, otherwise *is_input
8473  * is set to FALSE and translate the MAC address if necessary.
8474  *
8475  * Returns:
8476  * The internal interface to direct the packet to, or NULL if the packet
8477  * should not be redirected.
8478  *
8479  * *data may be updated to point at a different mbuf chain, or set to NULL
8480  * if the chain was deallocated during processing.
8481  */
8482 static ifnet_t
8483 bridge_mac_nat_input(struct bridge_softc *sc, mbuf_t *data,
8484     boolean_t *is_input)
8485 {
8486 	ifnet_t                 dst_if = NULL;
8487 	struct ether_header     *eh;
8488 	uint16_t                ether_type;
8489 	boolean_t               is_unicast;
8490 	mbuf_t                  m = *data;
8491 	struct mac_nat_entry    *mne = NULL;
8492 
8493 	BRIDGE_LOCK_ASSERT_HELD(sc);
8494 	*is_input = FALSE;
8495 	assert(sc->sc_mac_nat_bif != NULL);
8496 	is_unicast = ((m->m_flags & (M_BCAST | M_MCAST)) == 0);
8497 	eh = mtod(m, struct ether_header *);
8498 	ether_type = ntohs(eh->ether_type);
8499 	switch (ether_type) {
8500 	case ETHERTYPE_ARP:
8501 		mne = bridge_mac_nat_arp_input(sc, data);
8502 		break;
8503 	case ETHERTYPE_IP:
8504 		if (is_unicast) {
8505 			mne = bridge_mac_nat_ip_input(sc, data);
8506 		}
8507 		break;
8508 	case ETHERTYPE_IPV6:
8509 		if (is_unicast) {
8510 			mne = bridge_mac_nat_ipv6_input(sc, data);
8511 		}
8512 		break;
8513 	default:
8514 		break;
8515 	}
8516 	if (mne != NULL) {
8517 		if (is_unicast) {
8518 			if (m != *data) {
8519 				/* it may have changed */
8520 				eh = mtod(*data, struct ether_header *);
8521 			}
8522 			bcopy(mne->mne_mac, eh->ether_dhost,
8523 			    sizeof(eh->ether_dhost));
8524 		}
8525 		dst_if = mne->mne_bif->bif_ifp;
8526 		*is_input = (mne->mne_bif == sc->sc_mac_nat_bif);
8527 	}
8528 	return dst_if;
8529 }
8530 
8531 /*
8532  * bridge_mac_nat_output:
8533  * Process a packet destined to the MAC NAT interface (sc_mac_nat_bif)
8534  * from the interface 'bif'.
8535  *
8536  * Create a mac_nat_entry containing the source IP address and MAC address
8537  * from the packet. Populate a mac_nat_record with information detailing
8538  * how to translate the packet. Translation takes place later when
8539  * the bridge lock is no longer held.
8540  *
8541  * If 'bif' == sc_mac_nat_bif, the stack over the MAC NAT
8542  * interface is generating an output packet. No translation is required in this
8543  * case, we just record the IP address used to prevent another bif from
8544  * claiming our IP address.
8545  *
8546  * Returns:
8547  * TRUE if the packet should be translated (*mnr updated as well),
8548  * FALSE otherwise.
8549  *
8550  * *data may be updated to point at a different mbuf chain or NULL if
8551  * the chain was deallocated during processing.
8552  */
8553 
8554 static boolean_t
8555 bridge_mac_nat_output(struct bridge_softc *sc,
8556     struct bridge_iflist *bif, mbuf_t *data, struct mac_nat_record *mnr)
8557 {
8558 	struct ether_header     *eh;
8559 	uint16_t                ether_type;
8560 	boolean_t               translate = FALSE;
8561 
8562 	BRIDGE_LOCK_ASSERT_HELD(sc);
8563 	assert(sc->sc_mac_nat_bif != NULL);
8564 
8565 	eh = mtod(*data, struct ether_header *);
8566 	ether_type = ntohs(eh->ether_type);
8567 	if (mnr != NULL) {
8568 		bzero(mnr, sizeof(*mnr));
8569 		mnr->mnr_ether_type = ether_type;
8570 	}
8571 	switch (ether_type) {
8572 	case ETHERTYPE_ARP:
8573 		translate = bridge_mac_nat_arp_output(sc, bif, data, mnr);
8574 		break;
8575 	case ETHERTYPE_IP:
8576 		translate = bridge_mac_nat_ip_output(sc, bif, data, mnr);
8577 		break;
8578 	case ETHERTYPE_IPV6:
8579 		translate = bridge_mac_nat_ipv6_output(sc, bif, data, mnr);
8580 		break;
8581 	default:
8582 		break;
8583 	}
8584 	return translate;
8585 }
8586 
8587 static void
8588 bridge_mac_nat_arp_translate(mbuf_t *data, struct mac_nat_record *mnr,
8589     const caddr_t eaddr)
8590 {
8591 	errno_t                 error;
8592 
8593 	if (mnr->mnr_arp_offset == 0) {
8594 		return;
8595 	}
8596 	/* replace the source hardware address */
8597 	error = mbuf_copyback(*data, mnr->mnr_arp_offset,
8598 	    ETHER_ADDR_LEN, eaddr,
8599 	    MBUF_DONTWAIT);
8600 	if (error != 0) {
8601 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8602 		    "mbuf_copyback failed");
8603 		m_freem(*data);
8604 		*data = NULL;
8605 	}
8606 	return;
8607 }
8608 
8609 static void
8610 bridge_mac_nat_ip_translate(mbuf_t *data, struct mac_nat_record *mnr)
8611 {
8612 	errno_t         error;
8613 	size_t          offset;
8614 
8615 	if (mnr->mnr_ip_header_len == 0) {
8616 		return;
8617 	}
8618 	/* update the UDP checksum */
8619 	offset = sizeof(struct ether_header) + mnr->mnr_ip_header_len;
8620 	error = mbuf_copyback(*data, offset + offsetof(struct udphdr, uh_sum),
8621 	    sizeof(mnr->mnr_ip_udp_csum),
8622 	    &mnr->mnr_ip_udp_csum,
8623 	    MBUF_DONTWAIT);
8624 	if (error != 0) {
8625 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8626 		    "mbuf_copyback uh_sum failed");
8627 		m_freem(*data);
8628 		*data = NULL;
8629 	}
8630 	/* update the DHCP must broadcast flag */
8631 	offset += sizeof(struct udphdr);
8632 	error = mbuf_copyback(*data, offset + offsetof(struct dhcp, dp_flags),
8633 	    sizeof(mnr->mnr_ip_dhcp_flags),
8634 	    &mnr->mnr_ip_dhcp_flags,
8635 	    MBUF_DONTWAIT);
8636 	if (error != 0) {
8637 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8638 		    "mbuf_copyback dp_flags failed");
8639 		m_freem(*data);
8640 		*data = NULL;
8641 	}
8642 }
8643 
8644 static void
8645 bridge_mac_nat_ipv6_translate(mbuf_t *data, struct mac_nat_record *mnr,
8646     const caddr_t eaddr)
8647 {
8648 	uint16_t        cksum;
8649 	errno_t         error;
8650 	mbuf_t          m = *data;
8651 
8652 	if (mnr->mnr_ip6_header_len == 0) {
8653 		return;
8654 	}
8655 	switch (mnr->mnr_ip6_icmp6_type) {
8656 	case ND_ROUTER_SOLICIT:
8657 	case ND_NEIGHBOR_SOLICIT:
8658 	case ND_NEIGHBOR_ADVERT:
8659 		if (mnr->mnr_ip6_lladdr_offset == 0) {
8660 			/* nothing to do */
8661 			return;
8662 		}
8663 		break;
8664 	default:
8665 		return;
8666 	}
8667 
8668 	/*
8669 	 * replace the lladdr
8670 	 */
8671 	error = mbuf_copyback(m, mnr->mnr_ip6_lladdr_offset,
8672 	    ETHER_ADDR_LEN, eaddr,
8673 	    MBUF_DONTWAIT);
8674 	if (error != 0) {
8675 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8676 		    "mbuf_copyback lladdr failed");
8677 		m_freem(m);
8678 		*data = NULL;
8679 		return;
8680 	}
8681 
8682 	/*
8683 	 * recompute the icmp6 checksum
8684 	 */
8685 
8686 	/* skip past the ethernet header */
8687 	mbuf_setdata(m, (char *)mbuf_data(m) + ETHER_HDR_LEN,
8688 	    mbuf_len(m) - ETHER_HDR_LEN);
8689 	mbuf_pkthdr_adjustlen(m, -ETHER_HDR_LEN);
8690 
8691 #define CKSUM_OFFSET_ICMP6      offsetof(struct icmp6_hdr, icmp6_cksum)
8692 	/* set the checksum to zero */
8693 	cksum = 0;
8694 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8695 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8696 	if (error != 0) {
8697 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8698 		    "mbuf_copyback cksum=0 failed");
8699 		m_freem(m);
8700 		*data = NULL;
8701 		return;
8702 	}
8703 	/* compute and set the new checksum */
8704 	cksum = in6_cksum(m, IPPROTO_ICMPV6, mnr->mnr_ip6_header_len,
8705 	    mnr->mnr_ip6_icmp6_len);
8706 	error = mbuf_copyback(m, mnr->mnr_ip6_header_len + CKSUM_OFFSET_ICMP6,
8707 	    sizeof(cksum), &cksum, MBUF_DONTWAIT);
8708 	if (error != 0) {
8709 		BRIDGE_LOG(LOG_NOTICE, BR_DBGF_MAC_NAT,
8710 		    "mbuf_copyback cksum failed");
8711 		m_freem(m);
8712 		*data = NULL;
8713 		return;
8714 	}
8715 	/* restore the ethernet header */
8716 	mbuf_setdata(m, (char *)mbuf_data(m) - ETHER_HDR_LEN,
8717 	    mbuf_len(m) + ETHER_HDR_LEN);
8718 	mbuf_pkthdr_adjustlen(m, ETHER_HDR_LEN);
8719 	return;
8720 }
8721 
8722 static void
8723 bridge_mac_nat_translate(mbuf_t *data, struct mac_nat_record *mnr,
8724     const caddr_t eaddr)
8725 {
8726 	struct ether_header     *eh;
8727 
8728 	/* replace the source ethernet address with the single MAC */
8729 	eh = mtod(*data, struct ether_header *);
8730 	bcopy(eaddr, eh->ether_shost, sizeof(eh->ether_shost));
8731 	switch (mnr->mnr_ether_type) {
8732 	case ETHERTYPE_ARP:
8733 		bridge_mac_nat_arp_translate(data, mnr, eaddr);
8734 		break;
8735 
8736 	case ETHERTYPE_IP:
8737 		bridge_mac_nat_ip_translate(data, mnr);
8738 		break;
8739 
8740 	case ETHERTYPE_IPV6:
8741 		bridge_mac_nat_ipv6_translate(data, mnr, eaddr);
8742 		break;
8743 
8744 	default:
8745 		break;
8746 	}
8747 	return;
8748 }
8749 
8750 /*
8751  * bridge packet filtering
8752  */
8753 
8754 /*
8755  * Perform basic checks on header size since
8756  * pfil assumes ip_input has already processed
8757  * it for it.  Cut-and-pasted from ip_input.c.
8758  * Given how simple the IPv6 version is,
8759  * does the IPv4 version really need to be
8760  * this complicated?
8761  *
8762  * XXX Should we update ipstat here, or not?
8763  * XXX Right now we update ipstat but not
8764  * XXX csum_counter.
8765  */
8766 static int
8767 bridge_ip_checkbasic(struct mbuf **mp)
8768 {
8769 	struct mbuf *m = *mp;
8770 	struct ip *ip;
8771 	int len, hlen;
8772 	u_short sum;
8773 
8774 	if (*mp == NULL) {
8775 		return -1;
8776 	}
8777 
8778 	if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8779 		/* max_linkhdr is already rounded up to nearest 4-byte */
8780 		if ((m = m_copyup(m, sizeof(struct ip),
8781 		    max_linkhdr)) == NULL) {
8782 			/* XXXJRT new stat, please */
8783 			ipstat.ips_toosmall++;
8784 			goto bad;
8785 		}
8786 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip), 0)) {
8787 		if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
8788 			ipstat.ips_toosmall++;
8789 			goto bad;
8790 		}
8791 	}
8792 	ip = mtod(m, struct ip *);
8793 	if (ip == NULL) {
8794 		goto bad;
8795 	}
8796 
8797 	if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
8798 		ipstat.ips_badvers++;
8799 		goto bad;
8800 	}
8801 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
8802 	if (hlen < (int)sizeof(struct ip)) {  /* minimum header length */
8803 		ipstat.ips_badhlen++;
8804 		goto bad;
8805 	}
8806 	if (hlen > m->m_len) {
8807 		if ((m = m_pullup(m, hlen)) == 0) {
8808 			ipstat.ips_badhlen++;
8809 			goto bad;
8810 		}
8811 		ip = mtod(m, struct ip *);
8812 		if (ip == NULL) {
8813 			goto bad;
8814 		}
8815 	}
8816 
8817 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
8818 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
8819 	} else {
8820 		if (hlen == sizeof(struct ip)) {
8821 			sum = in_cksum_hdr(ip);
8822 		} else {
8823 			sum = in_cksum(m, hlen);
8824 		}
8825 	}
8826 	if (sum) {
8827 		ipstat.ips_badsum++;
8828 		goto bad;
8829 	}
8830 
8831 	/* Retrieve the packet length. */
8832 	len = ntohs(ip->ip_len);
8833 
8834 	/*
8835 	 * Check for additional length bogosity
8836 	 */
8837 	if (len < hlen) {
8838 		ipstat.ips_badlen++;
8839 		goto bad;
8840 	}
8841 
8842 	/*
8843 	 * Check that the amount of data in the buffers
8844 	 * is as at least much as the IP header would have us expect.
8845 	 * Drop packet if shorter than we expect.
8846 	 */
8847 	if (m->m_pkthdr.len < len) {
8848 		ipstat.ips_tooshort++;
8849 		goto bad;
8850 	}
8851 
8852 	/* Checks out, proceed */
8853 	*mp = m;
8854 	return 0;
8855 
8856 bad:
8857 	*mp = m;
8858 	return -1;
8859 }
8860 
8861 /*
8862  * Same as above, but for IPv6.
8863  * Cut-and-pasted from ip6_input.c.
8864  * XXX Should we update ip6stat, or not?
8865  */
8866 static int
8867 bridge_ip6_checkbasic(struct mbuf **mp)
8868 {
8869 	struct mbuf *m = *mp;
8870 	struct ip6_hdr *ip6;
8871 
8872 	/*
8873 	 * If the IPv6 header is not aligned, slurp it up into a new
8874 	 * mbuf with space for link headers, in the event we forward
8875 	 * it.  Otherwise, if it is aligned, make sure the entire base
8876 	 * IPv6 header is in the first mbuf of the chain.
8877 	 */
8878 	if (IP6_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
8879 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8880 		/* max_linkhdr is already rounded up to nearest 4-byte */
8881 		if ((m = m_copyup(m, sizeof(struct ip6_hdr),
8882 		    max_linkhdr)) == NULL) {
8883 			/* XXXJRT new stat, please */
8884 			ip6stat.ip6s_toosmall++;
8885 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8886 			goto bad;
8887 		}
8888 	} else if (OS_EXPECT((size_t)m->m_len < sizeof(struct ip6_hdr), 0)) {
8889 		struct ifnet *inifp = m->m_pkthdr.rcvif;
8890 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
8891 			ip6stat.ip6s_toosmall++;
8892 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
8893 			goto bad;
8894 		}
8895 	}
8896 
8897 	ip6 = mtod(m, struct ip6_hdr *);
8898 
8899 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
8900 		ip6stat.ip6s_badvers++;
8901 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
8902 		goto bad;
8903 	}
8904 
8905 	/* Checks out, proceed */
8906 	*mp = m;
8907 	return 0;
8908 
8909 bad:
8910 	*mp = m;
8911 	return -1;
8912 }
8913 
8914 /*
8915  * the PF routines expect to be called from ip_input, so we
8916  * need to do and undo here some of the same processing.
8917  *
8918  * XXX : this is heavily inspired on bridge_pfil()
8919  */
8920 static int
8921 bridge_pf(struct mbuf **mp, struct ifnet *ifp, uint32_t sc_filter_flags,
8922     int input)
8923 {
8924 	/*
8925 	 * XXX : mpetit : heavily inspired by bridge_pfil()
8926 	 */
8927 
8928 	int snap, error, i, hlen;
8929 	struct ether_header *eh1, eh2;
8930 	struct ip *ip;
8931 	struct llc llc1;
8932 	u_int16_t ether_type;
8933 
8934 	snap = 0;
8935 	error = -1;     /* Default error if not error == 0 */
8936 
8937 	if ((sc_filter_flags & IFBF_FILT_MEMBER) == 0) {
8938 		return 0; /* filtering is disabled */
8939 	}
8940 	i = min((*mp)->m_pkthdr.len, max_protohdr);
8941 	if ((*mp)->m_len < i) {
8942 		*mp = m_pullup(*mp, i);
8943 		if (*mp == NULL) {
8944 			BRIDGE_LOG(LOG_NOTICE, 0, "m_pullup failed");
8945 			return -1;
8946 		}
8947 	}
8948 
8949 	eh1 = mtod(*mp, struct ether_header *);
8950 	ether_type = ntohs(eh1->ether_type);
8951 
8952 	/*
8953 	 * Check for SNAP/LLC.
8954 	 */
8955 	if (ether_type < ETHERMTU) {
8956 		struct llc *llc2 = (struct llc *)(eh1 + 1);
8957 
8958 		if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
8959 		    llc2->llc_dsap == LLC_SNAP_LSAP &&
8960 		    llc2->llc_ssap == LLC_SNAP_LSAP &&
8961 		    llc2->llc_control == LLC_UI) {
8962 			ether_type = htons(llc2->llc_un.type_snap.ether_type);
8963 			snap = 1;
8964 		}
8965 	}
8966 
8967 	/*
8968 	 * If we're trying to filter bridge traffic, don't look at anything
8969 	 * other than IP and ARP traffic.  If the filter doesn't understand
8970 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
8971 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
8972 	 * but of course we don't have an AppleTalk filter to begin with.
8973 	 * (Note that since pfil doesn't understand ARP it will pass *ALL*
8974 	 * ARP traffic.)
8975 	 */
8976 	switch (ether_type) {
8977 	case ETHERTYPE_ARP:
8978 	case ETHERTYPE_REVARP:
8979 		return 0;         /* Automatically pass */
8980 
8981 	case ETHERTYPE_IP:
8982 	case ETHERTYPE_IPV6:
8983 		break;
8984 	default:
8985 		/*
8986 		 * Check to see if the user wants to pass non-ip
8987 		 * packets, these will not be checked by pf and
8988 		 * passed unconditionally so the default is to drop.
8989 		 */
8990 		if ((sc_filter_flags & IFBF_FILT_ONLYIP)) {
8991 			goto bad;
8992 		}
8993 		break;
8994 	}
8995 
8996 	/* Strip off the Ethernet header and keep a copy. */
8997 	m_copydata(*mp, 0, ETHER_HDR_LEN, (caddr_t)&eh2);
8998 	m_adj(*mp, ETHER_HDR_LEN);
8999 
9000 	/* Strip off snap header, if present */
9001 	if (snap) {
9002 		m_copydata(*mp, 0, sizeof(struct llc), (caddr_t)&llc1);
9003 		m_adj(*mp, sizeof(struct llc));
9004 	}
9005 
9006 	/*
9007 	 * Check the IP header for alignment and errors
9008 	 */
9009 	switch (ether_type) {
9010 	case ETHERTYPE_IP:
9011 		error = bridge_ip_checkbasic(mp);
9012 		break;
9013 	case ETHERTYPE_IPV6:
9014 		error = bridge_ip6_checkbasic(mp);
9015 		break;
9016 	default:
9017 		error = 0;
9018 		break;
9019 	}
9020 	if (error) {
9021 		goto bad;
9022 	}
9023 
9024 	error = 0;
9025 
9026 	/*
9027 	 * Run the packet through pf rules
9028 	 */
9029 	switch (ether_type) {
9030 	case ETHERTYPE_IP:
9031 		/*
9032 		 * before calling the firewall, swap fields the same as
9033 		 * IP does. here we assume the header is contiguous
9034 		 */
9035 		ip = mtod(*mp, struct ip *);
9036 
9037 		ip->ip_len = ntohs(ip->ip_len);
9038 		ip->ip_off = ntohs(ip->ip_off);
9039 
9040 		if (ifp != NULL) {
9041 			error = pf_af_hook(ifp, 0, mp, AF_INET, input, NULL);
9042 		}
9043 
9044 		if (*mp == NULL || error != 0) { /* filter may consume */
9045 			break;
9046 		}
9047 
9048 		/* Recalculate the ip checksum and restore byte ordering */
9049 		ip = mtod(*mp, struct ip *);
9050 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
9051 		if (hlen < (int)sizeof(struct ip)) {
9052 			goto bad;
9053 		}
9054 		if (hlen > (*mp)->m_len) {
9055 			if ((*mp = m_pullup(*mp, hlen)) == 0) {
9056 				goto bad;
9057 			}
9058 			ip = mtod(*mp, struct ip *);
9059 			if (ip == NULL) {
9060 				goto bad;
9061 			}
9062 		}
9063 		ip->ip_len = htons(ip->ip_len);
9064 		ip->ip_off = htons(ip->ip_off);
9065 		ip->ip_sum = 0;
9066 		if (hlen == sizeof(struct ip)) {
9067 			ip->ip_sum = in_cksum_hdr(ip);
9068 		} else {
9069 			ip->ip_sum = in_cksum(*mp, hlen);
9070 		}
9071 		break;
9072 
9073 	case ETHERTYPE_IPV6:
9074 		if (ifp != NULL) {
9075 			error = pf_af_hook(ifp, 0, mp, AF_INET6, input, NULL);
9076 		}
9077 
9078 		if (*mp == NULL || error != 0) { /* filter may consume */
9079 			break;
9080 		}
9081 		break;
9082 	default:
9083 		error = 0;
9084 		break;
9085 	}
9086 
9087 	if (*mp == NULL) {
9088 		return error;
9089 	}
9090 	if (error != 0) {
9091 		goto bad;
9092 	}
9093 
9094 	error = -1;
9095 
9096 	/*
9097 	 * Finally, put everything back the way it was and return
9098 	 */
9099 	if (snap) {
9100 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT, 0);
9101 		if (*mp == NULL) {
9102 			return error;
9103 		}
9104 		bcopy(&llc1, mtod(*mp, caddr_t), sizeof(struct llc));
9105 	}
9106 
9107 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT, 0);
9108 	if (*mp == NULL) {
9109 		return error;
9110 	}
9111 	bcopy(&eh2, mtod(*mp, caddr_t), ETHER_HDR_LEN);
9112 
9113 	return 0;
9114 
9115 bad:
9116 	m_freem(*mp);
9117 	*mp = NULL;
9118 	return error;
9119 }
9120 
9121 /*
9122  * Copyright (C) 2014, Stefano Garzarella - Universita` di Pisa.
9123  * All rights reserved.
9124  *
9125  * Redistribution and use in source and binary forms, with or without
9126  * modification, are permitted provided that the following conditions
9127  * are met:
9128  *   1. Redistributions of source code must retain the above copyright
9129  *      notice, this list of conditions and the following disclaimer.
9130  *   2. Redistributions in binary form must reproduce the above copyright
9131  *      notice, this list of conditions and the following disclaimer in the
9132  *      documentation and/or other materials provided with the distribution.
9133  *
9134  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
9135  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9136  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
9137  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
9138  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
9139  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
9140  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9141  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
9142  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
9143  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
9144  * SUCH DAMAGE.
9145  */
9146 
9147 /*
9148  * XXX-ste: Maybe this function must be moved into kern/uipc_mbuf.c
9149  *
9150  * Create a queue of packets/segments which fit the given mss + hdr_len.
9151  * m0 points to mbuf chain to be segmented.
9152  * This function splits the payload (m0-> m_pkthdr.len - hdr_len)
9153  * into segments of length MSS bytes and then copy the first hdr_len bytes
9154  * from m0 at the top of each segment.
9155  * If hdr2_buf is not NULL (hdr2_len is the buf length), it is copied
9156  * in each segment after the first hdr_len bytes
9157  *
9158  * Return the new queue with the segments on success, NULL on failure.
9159  * (the mbuf queue is freed in this case).
9160  * nsegs contains the number of segments generated.
9161  */
9162 
9163 static struct mbuf *
9164 m_seg(struct mbuf *m0, int hdr_len, int mss, int *nsegs,
9165     char * hdr2_buf, int hdr2_len)
9166 {
9167 	int off = 0, n, firstlen;
9168 	struct mbuf **mnext, *mseg;
9169 	int total_len = m0->m_pkthdr.len;
9170 
9171 	/*
9172 	 * Segmentation useless
9173 	 */
9174 	if (total_len <= hdr_len + mss) {
9175 		return m0;
9176 	}
9177 
9178 	if (hdr2_buf == NULL || hdr2_len <= 0) {
9179 		hdr2_buf = NULL;
9180 		hdr2_len = 0;
9181 	}
9182 
9183 	off = hdr_len + mss;
9184 	firstlen = mss; /* first segment stored in the original mbuf */
9185 
9186 	mnext = &(m0->m_nextpkt); /* pointer to next packet */
9187 
9188 	for (n = 1; off < total_len; off += mss, n++) {
9189 		struct mbuf *m;
9190 		/*
9191 		 * Copy the header from the original packet
9192 		 * and create a new mbuf chain
9193 		 */
9194 		if (MHLEN < hdr_len) {
9195 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
9196 		} else {
9197 			m = m_gethdr(M_NOWAIT, MT_DATA);
9198 		}
9199 
9200 		if (m == NULL) {
9201 #ifdef GSO_DEBUG
9202 			D("MGETHDR error\n");
9203 #endif
9204 			goto err;
9205 		}
9206 
9207 		m_copydata(m0, 0, hdr_len, mtod(m, caddr_t));
9208 
9209 		m->m_len = hdr_len;
9210 		/*
9211 		 * if the optional header is present, copy it
9212 		 */
9213 		if (hdr2_buf != NULL) {
9214 			m_copyback(m, hdr_len, hdr2_len, hdr2_buf);
9215 		}
9216 
9217 		m->m_flags |= (m0->m_flags & M_COPYFLAGS);
9218 		if (off + mss >= total_len) {           /* last segment */
9219 			mss = total_len - off;
9220 		}
9221 		/*
9222 		 * Copy the payload from original packet
9223 		 */
9224 		mseg = m_copym(m0, off, mss, M_NOWAIT);
9225 		if (mseg == NULL) {
9226 			m_freem(m);
9227 #ifdef GSO_DEBUG
9228 			D("m_copym error\n");
9229 #endif
9230 			goto err;
9231 		}
9232 		m_cat(m, mseg);
9233 
9234 		m->m_pkthdr.len = hdr_len + hdr2_len + mss;
9235 		m->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
9236 		/*
9237 		 * Copy the checksum flags and data (in_cksum() need this)
9238 		 */
9239 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
9240 		m->m_pkthdr.csum_data = m0->m_pkthdr.csum_data;
9241 		m->m_pkthdr.tso_segsz = m0->m_pkthdr.tso_segsz;
9242 
9243 		*mnext = m;
9244 		mnext = &(m->m_nextpkt);
9245 	}
9246 
9247 	/*
9248 	 * Update first segment.
9249 	 * If the optional header is present, is necessary
9250 	 * to insert it into the first segment.
9251 	 */
9252 	if (hdr2_buf == NULL) {
9253 		m_adj(m0, hdr_len + firstlen - total_len);
9254 		m0->m_pkthdr.len = hdr_len + firstlen;
9255 	} else {
9256 		mseg = m_copym(m0, hdr_len, firstlen, M_NOWAIT);
9257 		if (mseg == NULL) {
9258 #ifdef GSO_DEBUG
9259 			D("m_copym error\n");
9260 #endif
9261 			goto err;
9262 		}
9263 		m_adj(m0, hdr_len - total_len);
9264 		m_copyback(m0, hdr_len, hdr2_len, hdr2_buf);
9265 		m_cat(m0, mseg);
9266 		m0->m_pkthdr.len = hdr_len + hdr2_len + firstlen;
9267 	}
9268 
9269 	if (nsegs != NULL) {
9270 		*nsegs = n;
9271 	}
9272 	return m0;
9273 err:
9274 	while (m0 != NULL) {
9275 		mseg = m0->m_nextpkt;
9276 		m0->m_nextpkt = NULL;
9277 		m_freem(m0);
9278 		m0 = mseg;
9279 	}
9280 	return NULL;
9281 }
9282 
9283 /*
9284  * Wrappers of IPv4 checksum functions
9285  */
9286 static inline void
9287 gso_ipv4_data_cksum(struct mbuf *m, struct ip *ip, int mac_hlen)
9288 {
9289 	m->m_data += mac_hlen;
9290 	m->m_len -= mac_hlen;
9291 	m->m_pkthdr.len -= mac_hlen;
9292 #if __FreeBSD_version < 1000000
9293 	ip->ip_len = ntohs(ip->ip_len); /* needed for in_delayed_cksum() */
9294 #endif
9295 
9296 	in_delayed_cksum(m);
9297 
9298 #if __FreeBSD_version < 1000000
9299 	ip->ip_len = htons(ip->ip_len);
9300 #endif
9301 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
9302 	m->m_len += mac_hlen;
9303 	m->m_pkthdr.len += mac_hlen;
9304 	m->m_data -= mac_hlen;
9305 }
9306 
9307 static inline void
9308 gso_ipv4_hdr_cksum(struct mbuf *m, struct ip *ip, int mac_hlen, int ip_hlen)
9309 {
9310 	m->m_data += mac_hlen;
9311 
9312 	ip->ip_sum = in_cksum(m, ip_hlen);
9313 
9314 	m->m_pkthdr.csum_flags &= ~CSUM_IP;
9315 	m->m_data -= mac_hlen;
9316 }
9317 
9318 /*
9319  * Structure that contains the state during the TCP segmentation
9320  */
9321 struct gso_ip_tcp_state {
9322 	void    (*update)
9323 	(struct gso_ip_tcp_state*, struct mbuf*);
9324 	void    (*internal)
9325 	(struct gso_ip_tcp_state*, struct mbuf*);
9326 	union iphdr hdr;
9327 	struct tcphdr *tcp;
9328 	int mac_hlen;
9329 	int ip_hlen;
9330 	int tcp_hlen;
9331 	int hlen;
9332 	int pay_len;
9333 	int sw_csum;
9334 	uint32_t tcp_seq;
9335 	uint16_t ip_id;
9336 	boolean_t is_tx;
9337 };
9338 
9339 /*
9340  * Update the pointers to TCP and IPv4 headers
9341  */
9342 static inline void
9343 gso_ipv4_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9344 {
9345 	state->hdr.ip = (struct ip *)(void *)(mtod(m, uint8_t *) + state->mac_hlen);
9346 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip) + state->ip_hlen);
9347 	state->pay_len = m->m_pkthdr.len - state->hlen;
9348 }
9349 
9350 /*
9351  * Set properly the TCP and IPv4 headers
9352  */
9353 static inline void
9354 gso_ipv4_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9355 {
9356 	/*
9357 	 * Update IP header
9358 	 */
9359 	state->hdr.ip->ip_id = htons((state->ip_id)++);
9360 	state->hdr.ip->ip_len = htons(m->m_pkthdr.len - state->mac_hlen);
9361 	/*
9362 	 * TCP Checksum
9363 	 */
9364 	state->tcp->th_sum = 0;
9365 	state->tcp->th_sum = in_pseudo(state->hdr.ip->ip_src.s_addr,
9366 	    state->hdr.ip->ip_dst.s_addr,
9367 	    htons(state->tcp_hlen + IPPROTO_TCP + state->pay_len));
9368 	/*
9369 	 * Checksum HW not supported (TCP)
9370 	 */
9371 	if (state->sw_csum & CSUM_DELAY_DATA) {
9372 		gso_ipv4_data_cksum(m, state->hdr.ip, state->mac_hlen);
9373 	}
9374 
9375 	state->tcp_seq += state->pay_len;
9376 	/*
9377 	 * IP Checksum
9378 	 */
9379 	state->hdr.ip->ip_sum = 0;
9380 	/*
9381 	 * Checksum HW not supported (IP)
9382 	 */
9383 	if (state->sw_csum & CSUM_IP) {
9384 		gso_ipv4_hdr_cksum(m, state->hdr.ip, state->mac_hlen, state->ip_hlen);
9385 	}
9386 }
9387 
9388 
9389 /*
9390  * Updates the pointers to TCP and IPv6 headers
9391  */
9392 static inline void
9393 gso_ipv6_tcp_update(struct gso_ip_tcp_state *state, struct mbuf *m)
9394 {
9395 	state->hdr.ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + state->mac_hlen);
9396 	state->tcp = (struct tcphdr *)(void *)((caddr_t)(state->hdr.ip6) + state->ip_hlen);
9397 	state->pay_len = m->m_pkthdr.len - state->hlen;
9398 }
9399 
9400 /*
9401  * Sets properly the TCP and IPv6 headers
9402  */
9403 static inline void
9404 gso_ipv6_tcp_internal(struct gso_ip_tcp_state *state, struct mbuf *m)
9405 {
9406 	state->hdr.ip6->ip6_plen = htons(m->m_pkthdr.len -
9407 	    state->mac_hlen - state->ip_hlen);
9408 	/*
9409 	 * TCP Checksum
9410 	 */
9411 	state->tcp->th_sum = 0;
9412 	state->tcp->th_sum = in6_pseudo(&state->hdr.ip6->ip6_src,
9413 	    &state->hdr.ip6->ip6_dst,
9414 	    htonl(state->tcp_hlen + state->pay_len + IPPROTO_TCP));
9415 	/*
9416 	 * Checksum HW not supported (TCP)
9417 	 */
9418 	if (state->sw_csum & CSUM_DELAY_IPV6_DATA) {
9419 		(void)in6_finalize_cksum(m, state->mac_hlen, -1, -1, state->sw_csum);
9420 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
9421 	}
9422 	state->tcp_seq += state->pay_len;
9423 }
9424 
9425 /*
9426  * Init the state during the TCP segmentation
9427  */
9428 static void
9429 gso_ip_tcp_init_state(struct gso_ip_tcp_state *state, struct ifnet *ifp,
9430     bool is_ipv4, int mac_hlen, int ip_hlen,
9431     void * ip_hdr, struct tcphdr * tcp_hdr)
9432 {
9433 #pragma unused(ifp)
9434 
9435 	state->hdr.ptr = ip_hdr;
9436 	state->tcp = tcp_hdr;
9437 	if (is_ipv4) {
9438 		state->ip_id = ntohs(state->hdr.ip->ip_id);
9439 		state->update = gso_ipv4_tcp_update;
9440 		state->internal = gso_ipv4_tcp_internal;
9441 		state->sw_csum = CSUM_DELAY_DATA | CSUM_IP; /* XXX */
9442 	} else {
9443 		state->update = gso_ipv6_tcp_update;
9444 		state->internal = gso_ipv6_tcp_internal;
9445 		state->sw_csum = CSUM_DELAY_IPV6_DATA; /* XXX */
9446 	}
9447 	state->mac_hlen = mac_hlen;
9448 	state->ip_hlen = ip_hlen;
9449 	state->tcp_hlen = state->tcp->th_off << 2;
9450 	state->hlen = mac_hlen + ip_hlen + state->tcp_hlen;
9451 	state->tcp_seq = ntohl(state->tcp->th_seq);
9452 	//state->sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
9453 	return;
9454 }
9455 
9456 /*
9457  * GSO on TCP/IP (v4 or v6)
9458  *
9459  * If is_tx is TRUE, segmented packets are transmitted after they are
9460  * segmented.
9461  *
9462  * If is_tx is FALSE, the segmented packets are returned as a chain in *mp.
9463  */
9464 static int
9465 gso_ip_tcp(struct ifnet *ifp, struct mbuf **mp, struct gso_ip_tcp_state *state,
9466     boolean_t is_tx)
9467 {
9468 	struct mbuf *m, *m_tx;
9469 	int error = 0;
9470 	int mss = 0;
9471 	int nsegs = 0;
9472 	struct mbuf *m0 = *mp;
9473 #ifdef GSO_STATS
9474 	int total_len = m0->m_pkthdr.len;
9475 #endif /* GSO_STATS */
9476 
9477 #if 1
9478 	mss = ifp->if_mtu - state->ip_hlen - state->tcp_hlen;
9479 #else
9480 	if (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) {/* TSO with GSO */
9481 		mss = ifp->if_hw_tsomax - state->ip_hlen - state->tcp_hlen;
9482 	} else {
9483 		mss = m0->m_pkthdr.tso_segsz;
9484 	}
9485 #endif
9486 
9487 	*mp = m0 = m_seg(m0, state->hlen, mss, &nsegs, 0, 0);
9488 	if (m0 == NULL) {
9489 		return ENOBUFS; /* XXX ok? */
9490 	}
9491 	BRIDGE_LOG(LOG_DEBUG, BR_DBGF_CHECKSUM,
9492 	    "%s %s mss %d nsegs %d",
9493 	    ifp->if_xname,
9494 	    is_tx ? "TX" : "RX",
9495 	    mss, nsegs);
9496 	/*
9497 	 * XXX-ste: can this happen?
9498 	 */
9499 	if (m0->m_nextpkt == NULL) {
9500 #ifdef GSO_DEBUG
9501 		D("only 1 segment");
9502 #endif
9503 		if (is_tx) {
9504 			error = bridge_transmit(ifp, m0);
9505 		}
9506 		return error;
9507 	}
9508 #ifdef GSO_STATS
9509 	GSOSTAT_SET_MAX(tcp.gsos_max_mss, mss);
9510 	GSOSTAT_SET_MIN(tcp.gsos_min_mss, mss);
9511 	GSOSTAT_ADD(tcp.gsos_osegments, nsegs);
9512 #endif /* GSO_STATS */
9513 
9514 	/* first pkt */
9515 	m = m0;
9516 
9517 	state->update(state, m);
9518 
9519 	do {
9520 		state->tcp->th_flags &= ~(TH_FIN | TH_PUSH);
9521 
9522 		state->internal(state, m);
9523 		m_tx = m;
9524 		m = m->m_nextpkt;
9525 		if (is_tx) {
9526 			m_tx->m_nextpkt = NULL;
9527 			if ((error = bridge_transmit(ifp, m_tx)) != 0) {
9528 				/*
9529 				 * XXX: If a segment can not be sent, discard the following
9530 				 * segments and propagate the error to the upper levels.
9531 				 * In this way the TCP retransmits all the initial packet.
9532 				 */
9533 #ifdef GSO_DEBUG
9534 				D("if_transmit error\n");
9535 #endif
9536 				goto err;
9537 			}
9538 		}
9539 		state->update(state, m);
9540 
9541 		state->tcp->th_flags &= ~TH_CWR;
9542 		state->tcp->th_seq = htonl(state->tcp_seq);
9543 	} while (m->m_nextpkt);
9544 
9545 	/* last pkt */
9546 	state->internal(state, m);
9547 
9548 	if (is_tx) {
9549 		error = bridge_transmit(ifp, m);
9550 #ifdef GSO_DEBUG
9551 		if (error) {
9552 			D("last if_transmit error\n");
9553 			D("error - type = %d \n", error);
9554 		}
9555 #endif
9556 	}
9557 #ifdef GSO_STATS
9558 	if (!error) {
9559 		GSOSTAT_INC(tcp.gsos_segmented);
9560 		GSOSTAT_SET_MAX(tcp.gsos_maxsegmented, total_len);
9561 		GSOSTAT_SET_MIN(tcp.gsos_minsegmented, total_len);
9562 		GSOSTAT_ADD(tcp.gsos_totalbyteseg, total_len);
9563 	}
9564 #endif /* GSO_STATS */
9565 	return error;
9566 
9567 err:
9568 #ifdef GSO_DEBUG
9569 	D("error - type = %d \n", error);
9570 #endif
9571 	while (m != NULL) {
9572 		m_tx = m->m_nextpkt;
9573 		m->m_nextpkt = NULL;
9574 		m_freem(m);
9575 		m = m_tx;
9576 	}
9577 	return error;
9578 }
9579 
9580 /*
9581  * GSO for TCP/IPv[46]
9582  */
9583 static int
9584 gso_tcp(struct ifnet *ifp, struct mbuf **mp, u_int mac_hlen, bool is_ipv4,
9585     boolean_t is_tx)
9586 {
9587 	int error;
9588 	ip_packet_info  info;
9589 	uint32_t csum_flags;
9590 	struct gso_ip_tcp_state state;
9591 	struct bripstats stats; /* XXX ignored */
9592 	struct tcphdr *tcp;
9593 
9594 	if (!is_tx && ipforwarding == 0) {
9595 		/* no need to segment if the packet will not be forwarded */
9596 		return 0;
9597 	}
9598 	error = bridge_get_tcp_header(mp, mac_hlen, is_ipv4, &info, &stats);
9599 	if (error != 0) {
9600 		if (*mp != NULL) {
9601 			m_freem(*mp);
9602 			*mp = NULL;
9603 		}
9604 		return error;
9605 	}
9606 	if (info.ip_proto_hdr == NULL) {
9607 		/* not a TCP packet */
9608 		return 0;
9609 	}
9610 	tcp = (struct tcphdr *)(void *)info.ip_proto_hdr;
9611 	gso_ip_tcp_init_state(&state, ifp, is_ipv4, mac_hlen,
9612 	    info.ip_hlen, info.ip_hdr.ptr, tcp);
9613 	if (is_ipv4) {
9614 		csum_flags = CSUM_DELAY_DATA; /* XXX */
9615 		if (!is_tx) {
9616 			/* if RX to our local IP address, don't segment */
9617 			struct in_addr  dst_ip;
9618 
9619 			bcopy(&state.hdr.ip->ip_dst, &dst_ip, sizeof(dst_ip));
9620 			if (in_addr_is_ours(dst_ip)) {
9621 				return 0;
9622 			}
9623 		}
9624 	} else {
9625 		csum_flags = CSUM_DELAY_IPV6_DATA; /* XXX */
9626 		if (!is_tx) {
9627 			/* if RX to our local IP address, don't segment */
9628 			if (in6_addr_is_ours(&state.hdr.ip6->ip6_dst,
9629 			    ifp->if_index)) {
9630 				/* local IP address, no need to segment */
9631 				return 0;
9632 			}
9633 		}
9634 	}
9635 	(*mp)->m_pkthdr.csum_flags = csum_flags;
9636 	(*mp)->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
9637 	return gso_ip_tcp(ifp, mp, &state, is_tx);
9638 }
9639